1 // Early register allocation pass.
2 // Copyright (C) 2023-2025 Free Software Foundation, Inc.
4 // This file is part of GCC.
6 // GCC is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation; either version 3, or (at your option) any later
11 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 // This pass implements a simple form of early register allocation.
21 // It is restricted to FP/SIMD registers, and it only allocates
22 // a region of FP/SIMD usage if it can do so without any spilling.
23 // It punts on anything too complicated, leaving it to the real
24 // register allocator.
26 // There are two main purposes:
28 // (1) The pass runs before scheduling. It therefore has a chance to
29 // bag a spill-free allocation, if there is one, before scheduling
30 // moves things around.
32 // (2) The pass can make use of strided register operations, such as the
33 // strided forms of LD1 and ST1 in SME2.
35 // The allocator works at the level of individual FPRs, rather than whole
36 // pseudo registers. It is mostly intended to help optimize ACLE code.
38 // The pass is very simplistic. There are many things that could be improved.
39 #define IN_TARGET_CODE 1
41 #define INCLUDE_ALGORITHM
42 #define INCLUDE_FUNCTIONAL
46 #include "coretypes.h"
51 #include "tree-pass.h"
55 #include "print-rtl.h"
56 #include "insn-attr.h"
57 #include "insn-opinit.h"
61 class simple_iterator
: public wrapper_iterator
<T
>
64 using wrapper_iterator
<T
>::wrapper_iterator
;
66 simple_iterator
&operator-- () { --this->m_contents
; return *this; }
67 simple_iterator
operator-- (int) { return this->m_contents
--; }
68 simple_iterator
&operator++ () { ++this->m_contents
; return *this; }
69 simple_iterator
operator++ (int) { return this->m_contents
++; }
72 using namespace rtl_ssa
;
75 const pass_data pass_data_early_ra
=
79 OPTGROUP_NONE
, // optinfo_flags
81 0, // properties_required
82 0, // properties_provided
83 0, // properties_destroyed
84 0, // todo_flags_start
85 TODO_df_finish
, // todo_flags_finish
88 using allocno_iterator
= simple_iterator
<unsigned int>;
90 // Class that represents one run of the pass.
94 early_ra (function
*fn
);
99 // Whether to test only things that are required for correctness,
100 // or whether to take optimization heuristics into account as well.
101 enum test_strictness
{ CORRECTNESS_ONLY
, ALL_REASONS
};
103 static_assert (MAX_RECOG_OPERANDS
<= 32, "Operand mask is 32 bits");
104 using operand_mask
= uint32_t;
106 // Points in the function are represented using "program points".
107 // The program points are allocated in reverse order, with smaller
108 // numbers indicating later points. These special values indicate
109 // the start and end of a region.
110 static constexpr unsigned int START_OF_REGION
= ~0U;
111 static constexpr unsigned int END_OF_REGION
= 0U;
113 // An invalid allocno index, used to represent no allocno.
114 static constexpr unsigned int INVALID_ALLOCNO
= ~0U;
116 // Enumerates the single FPR sizes that matter for register allocation.
117 // Anything smaller than 64 bits is treated as FPR_D.
125 // A live range for an FPR, containing program points [START_POINT,
126 // END_POINT]. If ALLOCNO is not INVALID_ALLOCNO, the FPR is known
127 // to be equal to ALLOCNO for the duration of the live range.
128 struct fpr_range_info
130 unsigned int start_point
;
131 unsigned int end_point
;
132 unsigned int allocno
;
135 // Flags used in pseudo_reg_info.
137 // Whether the pseudo register occurs in one instruction alternative that
138 // matches (respectively) V0-V7, V0-V15, V0-V31 or a non-FP register.
139 static constexpr unsigned int ALLOWS_FPR8
= 1U << 0;
140 static constexpr unsigned int ALLOWS_FPR16
= 1U << 1;
141 static constexpr unsigned int ALLOWS_FPR32
= 1U << 2;
142 static constexpr unsigned int ALLOWS_NONFPR
= 1U << 3;
144 // Likewise whether the register occurs in an instruction that requires
145 // the associated register type.
146 static constexpr unsigned int NEEDS_FPR8
= 1U << 4;
147 static constexpr unsigned int NEEDS_FPR16
= 1U << 5;
148 static constexpr unsigned int NEEDS_FPR32
= 1U << 6;
149 static constexpr unsigned int NEEDS_NONFPR
= 1U << 7;
151 // Whether the pseudo register is copied to or from a hard FP register.
152 static constexpr unsigned int HAS_FPR_COPY
= 1U << 8;
154 // Whether the pseudo register is copied to or from a hard non-FP register.
155 static constexpr unsigned int HAS_NONFPR_COPY
= 1U << 9;
157 // Whether the pseudo register is used as a multi-register vector operand
158 // to an instruction that supports strided accesses, and whether it is used
159 // as a multi-register vector operand in some other non-move instruction.
160 static constexpr unsigned int HAS_FLEXIBLE_STRIDE
= 1U << 10;
161 static constexpr unsigned int HAS_FIXED_STRIDE
= 1U << 11;
163 // Whether we have decided not to allocate the pseudo register.
164 static constexpr unsigned int IGNORE_REG
= 1U << 12;
166 // Flags that should be propagated across moves between pseudo registers.
167 static constexpr unsigned int PSEUDO_COPY_FLAGS
= ~(HAS_FLEXIBLE_STRIDE
170 // Information about a copy between two registers.
173 // The two registers, in order.
174 unsigned int regnos
[2];
176 // Index I gives the index of the next reg_copy_info involving REGNOS[I],
178 unsigned int next_copies
[2];
181 // Information about a pseudo register.
182 struct pseudo_reg_info
184 // Flags describing how the register is used, defined above.
185 unsigned int flags
: 16;
187 // The mode of the pseudo register, cached for convenience.
188 machine_mode mode
: 16;
190 // The index of the first copy, or 0 if none.
191 unsigned int first_copy
;
194 // Information about a group of allocnos that have a fixed offset
195 // relative to each other. The allocnos in each group must be allocated
198 // Allocnos that can share the same hard register are eventually
199 // chained together. These chains represent edges on a graph of
200 // allocnos, such that two allocnos joined by an edge use the same FPR.
201 // These chains are formed between individual allocnos rather than
202 // whole groups, although the system is required to be self-consistent.
203 // Each clique in the graph has at least one "full-width" allocno group
204 // that has one allocno for every FPR that needs to be allocated to
207 // One group of allocnos is chosen as the "color representative" of
208 // each clique in the graph. This group will be a full-width group.
210 struct allocno_group_info
212 array_slice
<unsigned int> chain_heads ();
213 array_slice
<allocno_info
> allocnos ();
214 allocno_group_info
*color_rep ();
215 allocno_info
*allocno (unsigned int);
217 // The color representative of the containing clique.
218 allocno_group_info
*m_color_rep
;
220 // The pseudo register associated with this allocno, or INVALID_REGNUM
224 // The offset of the first allocno (and thus this group) from the start
226 unsigned int color_rep_offset
: 8;
228 // The number of allocnos in the group, and thus the number of FPRs
229 // that need to be allocated.
230 unsigned int size
: 8;
232 // The gap between FPRs in the group. This is normally 1, but can be
233 // higher if we've decided to use strided multi-register accesses.
234 unsigned int stride
: 4;
236 // Used temporarily while deciding which allocnos should have non-unit
237 // strides; see find_strided_accesses for details.
238 int consecutive_pref
: 4;
239 int strided_polarity
: 2;
241 // The largest size of FPR needed by references to the allocno group.
242 fpr_size_info fpr_size
: 2;
244 // True if all non-move accesses can be converted to strided form.
245 unsigned int has_flexible_stride
: 1;
247 // True if we've assigned a color index to this group.
248 unsigned int has_color
: 1;
250 // The mask of FPRs that would make valid choices for the first allocno,
251 // taking the requirements of all the allocnos in the group into account.
252 unsigned int fpr_candidates
;
254 // The index of the color that has been assigned to the containing clique.
258 // Represents a single FPR-sized quantity that needs to be allocated.
259 // Each allocno is identified by index (for compactness).
261 // Quantities that span multiple FPRs are assigned groups of consecutive
262 // allocnos. Quantities that occupy a single FPR are assigned their own
266 allocno_group_info
*group ();
268 bool is_equiv_to (unsigned int);
270 // The allocno's unique identifier.
273 // The offset of this allocno into the containing group.
274 unsigned int offset
: 8;
276 // The number of allocnos in the containing group.
277 unsigned int group_size
: 8;
279 // If the allocno has an affinity with at least one hard register
280 // (so that choosing that hard register would avoid a copy), this is
281 // the number of one such hard register, otherwise it is
282 // FIRST_PSEUDO_REGISTER.
283 unsigned int hard_regno
: 8;
285 // Set to 1 if the allocno has a single definition or 2 if it has more.
286 unsigned int num_defs
: 2;
288 // True if, at START_POINT, another allocno is copied to this one.
289 // See callers of record_copy for what counts as a copy.
290 unsigned int is_copy_dest
: 1;
292 // True if, at START_POINT, another allocno is copied to this one,
293 // and if the allocnos at both ends of the copy chain have an affinity
294 // with the same hard register.
295 unsigned int is_strong_copy_dest
: 1;
297 // True if, at END_POINT, this allocno is copied to another one,
298 // and both allocnos have an affinity with the same hard register.
299 unsigned int is_strong_copy_src
: 1;
301 // True if the allocno is subject to an earlyclobber at END_POINT,
302 // so that it cannot be tied to the destination of the instruction.
303 unsigned int is_earlyclobbered
: 1;
305 // True if this allocno is known to be equivalent to related_allocno
306 // for the whole of this allocno's lifetime.
307 unsigned int is_equiv
: 1;
309 // The inclusive range of program points spanned by the allocno.
310 // START_POINT >= END_POINT.
311 unsigned int start_point
;
312 unsigned int end_point
;
314 // If, at END_POINT, this allocno is copied to another allocno, this
315 // is the index of that allocno, otherwise it is INVALID_ALLOCNO.
316 // See callers of record_copy for what counts as a copy.
317 unsigned int copy_dest
;
319 // If this field is not INVALID_ALLOCNO, it indicates one of two things:
321 // - if is_equiv, this allocno is equivalent to related_allocno for
322 // the whole of this allocno's lifetime.
324 // - if !is_equiv, this allocno's live range is a subrange of
325 // related_allocno's and we have committed to making this allocno
326 // share whatever register related_allocno uses.
327 unsigned int related_allocno
;
331 // The program point at which the allocno was last defined,
332 // or START_OF_REGION if none. This is only used temporarily
333 // while recording allocnos; after that, chain_next below is
335 unsigned int last_def_point
;
337 // The next chained allocno in program order (i.e. at lower program
338 // points), or INVALID_ALLOCNO if none.
339 unsigned int chain_next
;
344 // The program point before start_point at which the allocno was
345 // last used, or END_OF_REGION if none. This is only used temporarily
346 // while recording allocnos; after that, chain_prev below is used
348 unsigned int last_use_point
;
350 // The previous chained allocno in program order (i.e. at higher
351 // program points), or INVALID_ALLOCNO if none.
352 unsigned int chain_prev
;
356 // Information about a full allocno group or a subgroup of it.
357 // The subgroup can be empty to indicate "none".
358 struct allocno_subgroup
360 array_slice
<allocno_info
> allocnos ();
361 allocno_info
*allocno (unsigned int);
363 // True if a subgroup is present.
364 operator bool () const { return count
; }
366 // The containing group.
367 allocno_group_info
*group
;
369 // The offset of the subgroup from the start of GROUP.
372 // The number of allocnos in the subgroup.
376 // Represents information about a copy between an allocno and an FPR.
377 // This establishes an affinity between the allocno and the FPR.
378 struct allocno_copy_info
380 // The allocno involved in the copy.
381 unsigned int allocno
;
383 // The FPR involved in the copy, relative to V0_REGNUM.
384 unsigned int fpr
: 16;
386 // A measure of how strong the affinity between the allocno and FPR is.
387 unsigned int weight
: 16;
390 // Information about a possible allocno chain.
391 struct chain_candidate_info
393 // The candidate target allocno.
394 allocno_info
*allocno
;
396 // A rating of the candidate (higher is better).
400 // Information about an allocno color.
403 // The color's unique identifier.
406 // The allocated hard register, when known.
407 unsigned int hard_regno
;
409 // The clique's representative group.
410 allocno_group_info
*group
;
412 // The number of FPR preferences recorded in fpr_preferences.
413 unsigned int num_fpr_preferences
;
415 // Weights in favor of choosing each FPR as the first register for GROUP.
416 int8_t fpr_preferences
[32];
419 template<typename T
, typename
... Ts
>
420 T
*region_allocate (Ts
...);
422 allocno_info
*chain_prev (allocno_info
*);
423 allocno_info
*chain_next (allocno_info
*);
425 void dump_pseudo_regs ();
426 void dump_fpr_ranges ();
428 void dump_allocnos ();
431 iterator_range
<allocno_iterator
> get_group_allocnos (unsigned int);
433 void preprocess_move (rtx
, rtx
);
434 void process_pseudo_reg_constraints (rtx_insn
*);
435 void preprocess_insns ();
437 int fpr_preference (unsigned int);
438 void propagate_pseudo_reg_info ();
440 void choose_fpr_pseudos ();
442 void start_new_region ();
445 void record_live_range_failure (T
);
447 void record_allocation_failure (T
);
449 allocno_group_info
*create_allocno_group (unsigned int, unsigned int);
450 allocno_subgroup
get_allocno_subgroup (rtx
);
451 void record_fpr_use (unsigned int);
452 void record_fpr_def (unsigned int);
453 void record_allocno_use (allocno_info
*);
454 void record_allocno_def (allocno_info
*);
455 allocno_info
*find_related_start (allocno_info
*, allocno_info
*, bool);
456 void accumulate_defs (allocno_info
*, allocno_info
*);
457 void record_copy (rtx
, rtx
, bool = false);
458 void record_constraints (rtx_insn
*);
459 void record_artificial_refs (unsigned int);
460 void record_insn_defs (rtx_insn
*);
461 void record_insn_call (rtx_call_insn
*);
462 void record_insn_uses (rtx_insn
*);
464 bool consider_strong_copy_src_chain (allocno_info
*);
465 int strided_polarity_pref (allocno_info
*, allocno_info
*);
466 void find_strided_accesses ();
468 template<unsigned int allocno_info::*field
>
469 static int cmp_increasing (const void *, const void *);
470 bool is_chain_candidate (allocno_info
*, allocno_info
*, test_strictness
);
471 int rate_chain (allocno_info
*, allocno_info
*);
472 static int cmp_chain_candidates (const void *, const void *);
473 void chain_allocnos (unsigned int &, unsigned int &);
474 void merge_fpr_info (allocno_group_info
*, allocno_group_info
*,
476 void set_single_color_rep (allocno_info
*, allocno_group_info
*,
478 void set_color_rep (allocno_group_info
*, allocno_group_info
*,
480 bool try_to_chain_allocnos (allocno_info
*, allocno_info
*);
481 void create_color (allocno_group_info
*);
484 bool fpr_conflicts_with_allocno_p (unsigned int, allocno_info
*);
485 bool call_in_range_p (unsigned int, unsigned int, unsigned int);
486 unsigned int partial_fpr_clobbers (unsigned int, fpr_size_info
);
488 void process_copies ();
490 static int cmp_allocation_order (const void *, const void *);
491 void allocate_colors ();
492 allocno_info
*find_independent_subchain (allocno_info
*);
493 color_info
*find_oldest_color (unsigned int, unsigned int);
494 void broaden_colors ();
495 void finalize_allocation ();
497 bool replace_regs (rtx_insn
*, df_ref
);
498 int try_enforce_constraints (rtx_insn
*, vec
<std::pair
<int, int>> &);
499 void enforce_constraints (rtx_insn
*);
500 bool maybe_convert_to_strided_access (rtx_insn
*);
501 void apply_allocation ();
503 void process_region ();
504 bool is_dead_insn (rtx_insn
*);
505 bool could_split_region_here ();
506 void process_block (basic_block
, bool);
507 void process_blocks ();
509 // ----------------------------------------------------------------------
511 // The function we're operating on.
514 // Information about each pseudo register, indexed by REGNO.
515 auto_vec
<pseudo_reg_info
> m_pseudo_regs
;
517 // All recorded register copies.
518 auto_vec
<reg_copy_info
> m_pseudo_reg_copies
;
520 // The set of pseudos that we've decided to allocate an FPR to.
521 auto_bitmap m_fpr_pseudos
;
523 // ----------------------------------------------------------------------
525 // An obstack for allocating information that is referenced by the member
527 obstack m_region_obstack
;
528 void *m_region_alloc_start
;
530 // ----------------------------------------------------------------------
532 // The basic block that we're currently processing.
533 basic_block m_current_bb
;
535 // The lowest-numbered program point in the current basic block.
536 unsigned int m_current_bb_point
;
538 // The program point that we're currently processing (described above).
539 unsigned int m_current_point
;
541 // The set of allocnos that are currently live.
542 auto_bitmap m_live_allocnos
;
544 // The set of FPRs that are currently live.
545 unsigned int m_live_fprs
;
547 // A unique one-based identifier for the current region.
548 unsigned int m_current_region
;
550 // The region in which each FPR was last used, or 0 if none.
551 unsigned int m_fpr_recency
[32];
553 // ----------------------------------------------------------------------
555 // A mask of the FPRs that have already been allocated.
556 unsigned int m_allocated_fprs
;
558 // A mask of the FPRs that must be at least partially preserved by the
560 unsigned int m_call_preserved_fprs
;
562 // True if we have so far been able to track the live ranges of individual
564 bool m_accurate_live_ranges
;
566 // True if we haven't yet failed to allocate the current region.
567 bool m_allocation_successful
;
569 // A map from pseudo registers to the first allocno in their associated
571 hash_map
<int_hash
<unsigned int, INVALID_REGNUM
>,
572 allocno_group_info
*> m_regno_to_group
;
574 // All recorded copies between allocnos and FPRs.
575 auto_vec
<allocno_copy_info
> m_allocno_copies
;
577 // All allocnos, by index.
578 auto_vec
<allocno_info
*> m_allocnos
;
580 // All allocnos, by increasing START_POINT.
581 auto_vec
<allocno_info
*> m_sorted_allocnos
;
583 // Allocnos for which is_shared is true.
584 auto_vec
<allocno_info
*> m_shared_allocnos
;
586 // All colors, by index.
587 auto_vec
<color_info
*> m_colors
;
589 // The instruction ranges that make up the current region,
590 // as half-open ranges [LAST, FIRST).
591 auto_vec
<std::pair
<rtx_insn
*, rtx_insn
*>> m_insn_ranges
;
593 // The live ranges of each FPR, in order of increasing program point.
594 auto_vec
<fpr_range_info
> m_fpr_ranges
[32];
596 // For each function call id, a list of program points at which a call
597 // to such a function is made. Each list is in order of increasing
599 auto_vec
<unsigned int> m_call_points
[NUM_ABI_IDS
];
601 // A list of instructions that can be removed if allocation succeeds.
602 auto_vec
<rtx_insn
*> m_dead_insns
;
605 // True if PAT is something that would typically be treated as a move.
607 is_move_set (rtx pat
)
609 if (GET_CODE (pat
) != SET
)
612 rtx dest
= SET_DEST (pat
);
614 dest
= SUBREG_REG (dest
);
615 if (!OBJECT_P (dest
))
618 rtx src
= SET_SRC (pat
);
620 src
= SUBREG_REG (src
);
621 if (!OBJECT_P (src
) && !CONSTANT_P (src
))
627 // Return true if operand OP is likely to match OP_ALT after register
630 likely_operand_match_p (const operand_alternative
&op_alt
, rtx op
)
632 // Empty constraints match everything.
633 const char *constraint
= op_alt
.constraint
;
634 if (constraint
[0] == 0 || constraint
[0] == ',')
639 char c
= *constraint
;
640 int len
= CONSTRAINT_LEN (c
, constraint
);
641 if (c
== 0 || c
== ',')
647 auto cn
= lookup_constraint (constraint
);
648 switch (get_constraint_type (cn
))
651 if (REG_P (op
) || SUBREG_P (op
))
656 case CT_SPECIAL_MEMORY
:
657 case CT_RELAXED_MEMORY
:
665 if (constraint_satisfied_p (op
, cn
))
673 if (op_alt
.matches
>= 0)
675 rtx other
= recog_data
.operand
[op_alt
.matches
];
676 if ((REG_P (other
) || SUBREG_P (other
))
677 && (REG_P (op
) || SUBREG_P (op
)))
683 // Return true if the operands of the current instruction are likely to
686 likely_alternative_match_p (const operand_alternative
*op_alt
)
688 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
689 if (!likely_operand_match_p (op_alt
[i
], recog_data
.operand
[i
]))
694 // Return the sum of how disparaged OP_ALT is.
696 count_rejects (const operand_alternative
*op_alt
)
699 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
700 reject
+= op_alt
[opno
].reject
;
704 // Allocate a T from the region obstack.
705 template<typename T
, typename
... Ts
>
707 early_ra::region_allocate (Ts
... args
)
709 static_assert (std::is_trivially_destructible
<T
>::value
,
710 "destructor won't be called");
711 void *addr
= obstack_alloc (&m_region_obstack
, sizeof (T
));
712 return new (addr
) T (std::forward
<Ts
> (args
)...);
715 early_ra::early_ra (function
*fn
) : m_fn (fn
), m_live_fprs (0)
717 gcc_obstack_init (&m_region_obstack
);
718 m_region_alloc_start
= obstack_alloc (&m_region_obstack
, 0);
719 bitmap_tree_view (m_live_allocnos
);
722 early_ra::~early_ra ()
724 obstack_free (&m_region_obstack
, nullptr);
727 // Return an array that, for each allocno A in the group, contains the index
728 // of the allocno at the head of A's chain (that is, the one with the highest
729 // START_POINT). The index is INVALID_ALLOCNO if the chain is empty.
730 inline array_slice
<unsigned int>
731 early_ra::allocno_group_info::chain_heads ()
733 auto *start
= reinterpret_cast<unsigned int *> (this + 1);
734 return { start
, size
};
737 // Return the array of allocnos in the group.
738 inline array_slice
<early_ra::allocno_info
>
739 early_ra::allocno_group_info::allocnos ()
741 gcc_checking_assert (regno
!= INVALID_REGNUM
);
742 auto *chain_end
= reinterpret_cast<unsigned int *> (this + 1) + size
;
743 auto *allocno_start
= reinterpret_cast<allocno_info
*> (chain_end
);
744 return { allocno_start
, size
};
747 // Return the group's color representative.
748 inline early_ra::allocno_group_info
*
749 early_ra::allocno_group_info::color_rep ()
751 gcc_checking_assert (m_color_rep
->m_color_rep
== m_color_rep
);
755 // Return the group that contains the allocno.
756 inline early_ra::allocno_group_info
*
757 early_ra::allocno_info::group ()
759 auto *chain_end
= reinterpret_cast<unsigned int *> (this - offset
);
760 return reinterpret_cast<allocno_group_info
*> (chain_end
- group_size
) - 1;
763 // Return true if this allocno's live range is a subrange of related_allocno's
764 // and if we have committed to making this allocno share whatever register
765 // related_allocno uses.
767 early_ra::allocno_info::is_shared ()
769 return related_allocno
!= INVALID_ALLOCNO
&& !is_equiv
;
772 // Return true if this allocno is known to be equivalent to ALLOCNO.
774 early_ra::allocno_info::is_equiv_to (unsigned int allocno
)
776 return is_equiv
&& related_allocno
== allocno
;
779 // Return the allocnos in the subgroup.
780 inline array_slice
<early_ra::allocno_info
>
781 early_ra::allocno_subgroup::allocnos ()
785 return { &group
->allocnos ()[start
], count
};
788 // Return allocno I in the subgroup, with 0 being the first.
789 inline early_ra::allocno_info
*
790 early_ra::allocno_subgroup::allocno (unsigned int i
)
792 return &group
->allocnos ()[start
+ i
];
795 // Return the previous (earlier) allocno in ALLOCNO's chain, or null if none.
796 inline early_ra::allocno_info
*
797 early_ra::chain_prev (allocno_info
*allocno
)
799 if (allocno
->chain_prev
!= INVALID_ALLOCNO
)
800 return m_allocnos
[allocno
->chain_prev
];
804 // Return the next (later) allocno in ALLOCNO's chain, or null if none.
805 inline early_ra::allocno_info
*
806 early_ra::chain_next (allocno_info
*allocno
)
808 if (allocno
->chain_next
!= INVALID_ALLOCNO
)
809 return m_allocnos
[allocno
->chain_next
];
813 // Dump the information in m_pseudo_regs.
815 early_ra::dump_pseudo_regs ()
817 fprintf (dump_file
, "\nPseudos:\n");
818 fprintf (dump_file
, " %6s %6s %6s %6s %6s %6s %8s %s\n",
819 "Id", "FPR8", "FPR16", "FPR32", "NONFPR", "Stride",
820 "FPRness", "Copies");
821 pseudo_reg_info unused_reg
= {};
822 for (unsigned int regno
= FIRST_PSEUDO_REGISTER
;
823 regno
< m_pseudo_regs
.length (); ++regno
)
825 const auto ®
= m_pseudo_regs
[regno
];
826 if (memcmp (®
, &unused_reg
, sizeof (reg
)) == 0)
829 fprintf (dump_file
, " %6d %6s %6s %6s %6s %6s %8d", regno
,
830 reg
.flags
& NEEDS_FPR8
? "Req"
831 : reg
.flags
& ALLOWS_FPR8
? "OK" : "-",
832 reg
.flags
& NEEDS_FPR16
? "Req"
833 : reg
.flags
& ALLOWS_FPR16
? "OK" : "-",
834 reg
.flags
& NEEDS_FPR32
? "Req"
835 : reg
.flags
& ALLOWS_FPR32
? "OK" : "-",
836 reg
.flags
& NEEDS_NONFPR
? "Req"
837 : reg
.flags
& ALLOWS_NONFPR
? "OK" : "-",
838 ~reg
.flags
& HAS_FLEXIBLE_STRIDE
? "-"
839 : reg
.flags
& HAS_FIXED_STRIDE
? "Some" : "All",
840 fpr_preference (regno
));
841 if (reg
.flags
& HAS_FPR_COPY
)
842 fprintf (dump_file
, " FPR");
843 if (reg
.flags
& HAS_NONFPR_COPY
)
844 fprintf (dump_file
, " Non-FPR");
845 unsigned int copyi
= reg
.first_copy
;
848 const auto ©
= m_pseudo_reg_copies
[copyi
];
849 if (copy
.regnos
[0] == regno
)
851 fprintf (dump_file
, " r%d", copy
.regnos
[1]);
852 copyi
= copy
.next_copies
[0];
856 fprintf (dump_file
, " r%d", copy
.regnos
[0]);
857 copyi
= copy
.next_copies
[1];
860 fprintf (dump_file
, "\n");
864 // Dump the information in m_fpr_ranges.
866 early_ra::dump_fpr_ranges ()
868 fprintf (dump_file
, "\nFPR live ranges:\n");
869 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
871 auto &intervals
= m_fpr_ranges
[fpr
];
872 if (intervals
.is_empty ())
875 fprintf (dump_file
, " %2d", fpr
);
876 for (unsigned int i
= 0; i
< intervals
.length (); ++i
)
878 auto &interval
= intervals
[i
];
879 if (i
&& (i
% 4) == 0)
880 fprintf (dump_file
, "\n ");
881 fprintf (dump_file
, " [ %6d %6d ]", interval
.start_point
,
884 fprintf (dump_file
, "\n");
888 // Dump the information in m_allocno_copies.
890 early_ra::dump_copies ()
892 fprintf (dump_file
, "\nCopies:\n");
893 fprintf (dump_file
, " %8s %3s %6s\n",
894 "Allocno", "FPR", "Weight");
895 for (const auto ©
: m_allocno_copies
)
896 fprintf (dump_file
, " %8d %3d %6d\n", copy
.allocno
,
897 copy
.fpr
, copy
.weight
);
900 // Dump the information in m_allocnos.
902 early_ra::dump_allocnos ()
904 char buffer
[sizeof ("r[:]") + 3 * 3 * sizeof (int) + 1];
905 fprintf (dump_file
, "\nAllocno groups:\n");
907 " %12s %12s %4s %6s %8s %s\n",
908 "Ids", "Regno", "Size", "Stride", "Cands", "Heads");
909 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
911 auto *allocno
= m_allocnos
[ai
];
912 if (allocno
->offset
!= 0)
914 auto *group
= allocno
->group ();
915 snprintf (buffer
, sizeof (buffer
), "[%d:%d]", allocno
->id
,
916 allocno
->id
+ group
->size
- 1);
917 fprintf (dump_file
, " %12s", buffer
);
918 snprintf (buffer
, sizeof (buffer
), "r%d[0:%d]", group
->regno
,
920 fprintf (dump_file
, " %12s %4s %6d %08x", buffer
,
921 group
->fpr_size
== FPR_D
? "D"
922 : group
->fpr_size
== FPR_Q
? "Q" : "Z",
924 group
->fpr_candidates
);
925 for (auto head
: group
->chain_heads ())
926 if (head
== INVALID_ALLOCNO
)
927 fprintf (dump_file
, " -");
929 fprintf (dump_file
, " %d", head
);
930 fprintf (dump_file
, "\n");
933 fprintf (dump_file
, "\nAllocno chains:\n");
934 fprintf (dump_file
, " %5s %12s %12s %6s %5s %5s %6s %5s\n",
935 "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "Shared", "FPR");
936 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
938 auto *allocno
= m_allocnos
[ai
];
939 if (allocno
->chain_prev
!= INVALID_ALLOCNO
)
941 const char *prefix
= "=>";
944 auto *group
= allocno
->group ();
945 fprintf (dump_file
, " %2s", prefix
);
946 fprintf (dump_file
, " %5d", allocno
->id
);
947 snprintf (buffer
, sizeof (buffer
), "r%d[%d]", group
->regno
,
949 fprintf (dump_file
, " %12s", buffer
);
950 snprintf (buffer
, sizeof (buffer
), "[%d,%d]",
951 allocno
->start_point
, allocno
->end_point
);
952 fprintf (dump_file
, " %11s%s %6s", buffer
,
953 allocno
->is_earlyclobbered
? "*" : " ",
954 allocno
->is_strong_copy_dest
? "Strong"
955 : allocno
->is_copy_dest
? "Yes" : "-");
956 if (allocno
->copy_dest
== INVALID_ALLOCNO
)
957 fprintf (dump_file
, " %5s", "-");
959 fprintf (dump_file
, " %5d", allocno
->copy_dest
);
960 if (allocno
->is_equiv
)
961 fprintf (dump_file
, " %5d", allocno
->related_allocno
);
963 fprintf (dump_file
, " %5s", "-");
964 if (allocno
->is_shared ())
965 fprintf (dump_file
, " %6d", allocno
->related_allocno
);
967 fprintf (dump_file
, " %6s", "-");
968 if (allocno
->hard_regno
== FIRST_PSEUDO_REGISTER
)
969 fprintf (dump_file
, " %5s", "-");
971 fprintf (dump_file
, " %5s", reg_names
[allocno
->hard_regno
]);
972 fprintf (dump_file
, "\n");
973 if (allocno
->chain_next
== INVALID_ALLOCNO
)
975 allocno
= m_allocnos
[allocno
->chain_next
];
981 // Dump the information in m_colors.
983 early_ra::dump_colors ()
985 fprintf (dump_file
, "\nColors:\n");
986 for (unsigned int i
= 0; i
< m_colors
.length (); ++i
)
988 auto *color
= m_colors
[i
];
992 fprintf (dump_file
, " color %d:\n", i
);
993 fprintf (dump_file
, " chains:\n");
994 auto heads
= color
->group
->chain_heads ();
995 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
997 fprintf (dump_file
, " %2d:", i
);
999 while (ai
!= INVALID_ALLOCNO
)
1001 auto *allocno
= m_allocnos
[ai
];
1002 fprintf (dump_file
, " r%d[%d]", allocno
->group ()->regno
,
1004 ai
= allocno
->chain_next
;
1006 fprintf (dump_file
, "\n");
1008 fprintf (dump_file
, " FPR candidates:");
1009 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
1010 fprintf (dump_file
, "%s%c", fpr
% 8 ? "" : " ",
1011 color
->group
->fpr_candidates
& (1U << fpr
) ? 'Y' : '-');
1012 fprintf (dump_file
, "\n");
1013 fprintf (dump_file
, " FPR preferences:");
1014 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
1015 if (color
->fpr_preferences
[fpr
])
1016 fprintf (dump_file
, " %d(%d)", fpr
, color
->fpr_preferences
[fpr
]);
1017 fprintf (dump_file
, "\n");
1021 // Record any necessary information about a move from SRC to DEST.
1023 early_ra::preprocess_move (rtx dest
, rtx src
)
1025 if (SUBREG_P (dest
))
1026 dest
= SUBREG_REG (dest
);
1031 src
= SUBREG_REG (src
);
1035 // Sort the registers by increasing REGNO.
1036 rtx regs
[] = { dest
, src
};
1037 if (REGNO (dest
) > REGNO (src
))
1038 std::swap (regs
[0], regs
[1]);
1039 unsigned int regno0
= REGNO (regs
[0]);
1040 unsigned int regno1
= REGNO (regs
[1]);
1042 // Ignore moves between hard registers.
1043 if (HARD_REGISTER_NUM_P (regno1
))
1046 // For moves between hard registers and pseudos, just record the type
1047 // of hard register involved.
1048 auto ®1
= m_pseudo_regs
[regno1
];
1049 reg1
.mode
= GET_MODE (regs
[1]);
1050 if (HARD_REGISTER_NUM_P (regno0
))
1052 reg1
.flags
|= (FP_REGNUM_P (regno0
) ? HAS_FPR_COPY
: HAS_NONFPR_COPY
);
1056 // Record a move between two pseudo registers.
1057 auto ®0
= m_pseudo_regs
[regno0
];
1058 reg0
.mode
= GET_MODE (regs
[0]);
1061 copy
.regnos
[0] = regno0
;
1062 copy
.regnos
[1] = regno1
;
1063 copy
.next_copies
[0] = reg0
.first_copy
;
1064 copy
.next_copies
[1] = reg1
.first_copy
;
1066 reg0
.first_copy
= reg1
.first_copy
= m_pseudo_reg_copies
.length ();
1067 m_pseudo_reg_copies
.safe_push (copy
);
1070 // Return true if INSN has a multi-vector operand and if that operand
1071 // could be converted to strided form.
1073 is_stride_candidate (rtx_insn
*insn
)
1075 if (recog_memoized (insn
) < 0)
1078 auto stride_type
= get_attr_stride_type (insn
);
1079 return (TARGET_STREAMING_SME2
1080 && (stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
1081 || stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
));
1084 // Go through the constraints of INSN, which has already been extracted,
1085 // and record any relevant information about pseudo registers.
1087 early_ra::process_pseudo_reg_constraints (rtx_insn
*insn
)
1089 extract_insn (insn
);
1090 preprocess_constraints (insn
);
1092 // Flags that describe any multi-register vector operands.
1093 unsigned int insn_flags
= (is_stride_candidate (insn
)
1094 ? HAS_FLEXIBLE_STRIDE
1095 : HAS_FIXED_STRIDE
);
1097 auto alts
= get_preferred_alternatives (insn
);
1099 int operand_matches
[MAX_RECOG_OPERANDS
];
1100 unsigned int operand_flags
[MAX_RECOG_OPERANDS
];
1101 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1103 operand_matches
[i
] = -1;
1104 operand_flags
[i
] = 0;
1107 // Extract information from the constraints, considering all plausible
1109 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
1111 if (!(alts
& ALTERNATIVE_BIT (altno
)))
1114 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
1115 if (!likely_alternative_match_p (op_alt
))
1118 // Use SRC_OPNO's constraints to derive information about DEST_OPNO.
1119 auto record_operand
= [&](int src_opno
, int dest_opno
)
1121 int matches
= op_alt
[src_opno
].matches
;
1123 operand_matches
[dest_opno
] = matches
;
1125 auto cl
= alternative_class (op_alt
, src_opno
);
1128 if (reg_class_subset_p (cl
, FP_REGS
))
1129 operand_flags
[dest_opno
] |= ALLOWS_FPR32
;
1130 if (reg_class_subset_p (cl
, FP_LO_REGS
))
1131 operand_flags
[dest_opno
] |= ALLOWS_FPR16
;
1132 if (reg_class_subset_p (cl
, FP_LO8_REGS
))
1133 operand_flags
[dest_opno
] |= ALLOWS_FPR8
;
1134 if (!reg_classes_intersect_p (cl
, FP_REGS
))
1135 operand_flags
[dest_opno
] |= ALLOWS_NONFPR
;
1139 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1141 record_operand (i
, i
);
1142 if (recog_data
.constraints
[i
][0] == '%')
1144 record_operand (i
, i
+ 1);
1145 record_operand (i
+ 1, i
);
1150 // Process the information we collected above.
1151 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1153 rtx op
= recog_data
.operand
[i
];
1154 machine_mode orig_mode
= GET_MODE (op
);
1156 op
= SUBREG_REG (op
);
1158 // Record the accumulated information in m_pseudo_regs.
1159 if (REG_P (op
) && !HARD_REGISTER_P (op
))
1161 // The flags so far just describe what at least one alternative
1162 // would accept. Calculate the associated NEEDS_* information.
1163 auto flags
= operand_flags
[i
];
1164 if (!(flags
& ALLOWS_FPR32
) && (flags
& ALLOWS_NONFPR
))
1165 flags
|= NEEDS_NONFPR
;
1166 else if ((flags
& ALLOWS_FPR32
) && !(flags
& ALLOWS_NONFPR
))
1168 if (flags
& ALLOWS_FPR8
)
1169 flags
|= NEEDS_FPR8
;
1170 if (flags
& ALLOWS_FPR16
)
1171 flags
|= NEEDS_FPR16
;
1172 flags
|= NEEDS_FPR32
;
1175 // Look for multi-register vector operands.
1176 if (VECTOR_MODE_P (orig_mode
)
1177 && targetm
.hard_regno_mode_ok (V0_REGNUM
, orig_mode
)
1178 && hard_regno_nregs (V0_REGNUM
, orig_mode
) > 1)
1179 flags
|= insn_flags
;
1181 m_pseudo_regs
[REGNO (op
)].flags
|= flags
;
1182 m_pseudo_regs
[REGNO (op
)].mode
= GET_MODE (op
);
1185 // Treat matching constraints as equivalent to moves.
1186 if (operand_matches
[i
] >= 0)
1187 preprocess_move (recog_data
.operand
[operand_matches
[i
]], op
);
1191 // Make one pass through the instructions, collecting information that
1192 // will be needed later.
1194 early_ra::preprocess_insns ()
1196 m_pseudo_regs
.safe_grow_cleared (max_reg_num ());
1197 m_pseudo_reg_copies
.safe_push (reg_copy_info ());
1198 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
1200 if (!NONDEBUG_INSN_P (insn
))
1203 // Mark all registers that occur in addresses as needing a GPR.
1204 vec_rtx_properties properties
;
1205 properties
.add_insn (insn
, true);
1206 for (rtx_obj_reference ref
: properties
.refs ())
1208 && ref
.in_address ()
1209 && !HARD_REGISTER_NUM_P (ref
.regno
))
1210 m_pseudo_regs
[ref
.regno
].flags
|= ALLOWS_NONFPR
| NEEDS_NONFPR
;
1212 if (GET_CODE (PATTERN (insn
)) == USE
1213 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
1216 rtx set
= single_set (insn
);
1217 if (set
&& is_move_set (set
))
1218 preprocess_move (SET_DEST (set
), SET_SRC (set
));
1220 process_pseudo_reg_constraints (insn
);
1224 // Return a signed integer that says (roughly) how strong an affinity
1225 // pseudo register REGNO has with FPRs. A positive value indicates
1226 // that we should try to allocate an FPR, a negative value indicates
1227 // that we shouldn't, and 0 indicates neutrality.
1229 early_ra::fpr_preference (unsigned int regno
)
1231 auto mode
= m_pseudo_regs
[regno
].mode
;
1232 auto flags
= m_pseudo_regs
[regno
].flags
;
1233 if (flags
& IGNORE_REG
)
1235 else if (mode
== VOIDmode
|| !targetm
.hard_regno_mode_ok (V0_REGNUM
, mode
))
1237 else if (flags
& HAS_FLEXIBLE_STRIDE
)
1239 else if (flags
& NEEDS_FPR32
)
1241 else if (!(flags
& ALLOWS_FPR32
) && (flags
& ALLOWS_NONFPR
))
1243 else if ((flags
& HAS_FPR_COPY
) && !(flags
& HAS_NONFPR_COPY
))
1245 else if ((flags
& HAS_NONFPR_COPY
) && !(flags
& HAS_FPR_COPY
))
1251 // Propagate information about pseudo-registers along copy edges,
1252 // while doing so doesn't create conflicting FPR preferences.
1254 early_ra::propagate_pseudo_reg_info ()
1256 struct stack_entry
{ unsigned int regno
, copyi
; };
1258 auto_vec
<stack_entry
, 32> stack
;
1259 for (unsigned int i
= FIRST_PSEUDO_REGISTER
;
1260 i
< m_pseudo_regs
.length (); ++i
)
1262 auto start
= m_pseudo_regs
[i
].first_copy
;
1266 stack
.quick_push ({ i
, start
});
1267 while (!stack
.is_empty ())
1269 auto entry
= stack
.pop ();
1270 auto ©
= m_pseudo_reg_copies
[entry
.copyi
];
1271 auto src_regno
= entry
.regno
;
1272 auto dest_regno
= (src_regno
== copy
.regnos
[1]
1275 auto next_copyi
= (src_regno
== copy
.regnos
[1]
1276 ? copy
.next_copies
[1]
1277 : copy
.next_copies
[0]);
1279 stack
.safe_push ({ src_regno
, next_copyi
});
1281 auto &src_reg
= m_pseudo_regs
[src_regno
];
1282 auto &dest_reg
= m_pseudo_regs
[dest_regno
];
1284 if (src_reg
.flags
& ~dest_reg
.flags
& PSEUDO_COPY_FLAGS
)
1286 auto src_preference
= fpr_preference (src_regno
);
1287 auto dest_preference
= fpr_preference (dest_regno
);
1288 if ((src_preference
>= 0 && dest_preference
>= 0)
1289 || (src_preference
<= 0 && dest_preference
<= 0))
1291 dest_reg
.flags
|= (src_reg
.flags
& PSEUDO_COPY_FLAGS
);
1292 stack
.safe_push ({ dest_regno
, dest_reg
.first_copy
});
1299 // Decide which pseudos should be allocated an FPR, setting m_fpr_pseudos
1302 early_ra::choose_fpr_pseudos ()
1304 for (unsigned int i
= FIRST_PSEUDO_REGISTER
;
1305 i
< m_pseudo_regs
.length (); ++i
)
1306 if (fpr_preference (i
) > 0)
1307 bitmap_set_bit (m_fpr_pseudos
, i
);
1310 // Clear out information about the previous CFG region (if any)
1311 // and set up the data for a new region.
1313 early_ra::start_new_region ()
1315 obstack_free (&m_region_obstack
, m_region_alloc_start
);
1316 m_regno_to_group
.empty ();
1317 m_allocno_copies
.truncate (0);
1318 m_allocnos
.truncate (0);
1319 m_sorted_allocnos
.truncate (0);
1320 m_shared_allocnos
.truncate (0);
1321 m_colors
.truncate (0);
1322 m_insn_ranges
.truncate (0);
1323 for (auto &fpr_ranges
: m_fpr_ranges
)
1324 fpr_ranges
.truncate (0);
1325 for (auto &call_points
: m_call_points
)
1326 call_points
.truncate (0);
1327 gcc_assert (bitmap_empty_p (m_live_allocnos
) && m_live_fprs
== 0);
1328 m_dead_insns
.truncate (0);
1329 m_allocated_fprs
= 0;
1330 m_call_preserved_fprs
= 0;
1331 m_accurate_live_ranges
= true;
1332 m_allocation_successful
= true;
1333 m_current_region
+= 1;
1336 // Record that we can no longer track the liveness of individual allocnos.
1337 // Call DUMP to dump the reason to a dump file.
1338 template<typename T
>
1340 early_ra::record_live_range_failure (T dump
)
1342 if (!m_accurate_live_ranges
)
1345 m_accurate_live_ranges
= false;
1346 m_allocation_successful
= false;
1347 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1349 fprintf (dump_file
, "Unable to track live ranges further: ");
1351 fprintf (dump_file
, "\n");
1355 // Record that the allocation of the current region has filed. Call DUMP to
1356 // dump the reason to a dump file.
1357 template<typename T
>
1359 early_ra::record_allocation_failure (T dump
)
1361 if (!m_allocation_successful
)
1364 m_allocation_successful
= false;
1365 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1367 fprintf (dump_file
, "Not allocating region: ");
1369 fprintf (dump_file
, "\n");
1373 // Create and return an allocno group of size SIZE for register REGNO.
1374 // REGNO can be INVALID_REGNUM if the group just exists to allow
1375 // other groups to be chained together, and does not have any new
1376 // allocnos of its own.
1377 early_ra::allocno_group_info
*
1378 early_ra::create_allocno_group (unsigned int regno
, unsigned int size
)
1380 static_assert (alignof (unsigned int) == alignof (allocno_info
),
1381 "allocno_info alignment");
1382 unsigned int num_allocnos
= (regno
!= INVALID_REGNUM
? size
: 0);
1384 // Allocate an allocno_group_info, followed by an array of chain heads,
1385 // followed by the allocnos themselves.
1386 size_t alloc_size
= (sizeof (allocno_group_info
)
1387 + size
* sizeof (unsigned int)
1388 + num_allocnos
* sizeof (allocno_info
));
1389 void *data
= obstack_alloc (&m_region_obstack
, alloc_size
);
1391 // Initialize the group.
1392 auto *group
= reinterpret_cast<allocno_group_info
*> (data
);
1393 memset (group
, 0, sizeof (*group
));
1394 group
->m_color_rep
= group
;
1395 group
->regno
= regno
;
1398 group
->fpr_size
= FPR_D
;
1399 group
->fpr_candidates
= ~0U;
1401 // Initialize the chain heads.
1402 auto heads
= group
->chain_heads ();
1403 for (unsigned int i
= 0; i
< heads
.size (); ++i
)
1404 heads
[i
] = (i
< num_allocnos
? m_allocnos
.length () + i
: INVALID_ALLOCNO
);
1406 // Initialize the allocnos.
1407 if (num_allocnos
> 0)
1409 auto allocnos
= group
->allocnos ();
1410 memset (allocnos
.begin (), 0, num_allocnos
* sizeof (allocno_info
));
1411 for (unsigned int i
= 0; i
< num_allocnos
; ++i
)
1413 auto *allocno
= &allocnos
[i
];
1414 allocno
->id
= m_allocnos
.length ();
1415 allocno
->offset
= i
;
1416 allocno
->group_size
= size
;
1417 allocno
->hard_regno
= FIRST_PSEUDO_REGISTER
;
1418 allocno
->start_point
= END_OF_REGION
;
1419 allocno
->end_point
= START_OF_REGION
;
1420 allocno
->copy_dest
= INVALID_ALLOCNO
;
1421 allocno
->related_allocno
= INVALID_ALLOCNO
;
1422 allocno
->chain_next
= INVALID_ALLOCNO
;
1423 allocno
->chain_prev
= INVALID_ALLOCNO
;
1424 m_allocnos
.safe_push (allocno
);
1430 // If REG refers to a pseudo register that might be allocated to FPRs,
1431 // return the associated range of allocnos, creating new ones if necessary.
1432 // Return an empty range otherwise.
1433 early_ra::allocno_subgroup
1434 early_ra::get_allocno_subgroup (rtx reg
)
1436 if (GET_CODE (reg
) == SUBREG
)
1438 allocno_subgroup inner
= get_allocno_subgroup (SUBREG_REG (reg
));
1442 if (!targetm
.can_change_mode_class (GET_MODE (SUBREG_REG (reg
)),
1443 GET_MODE (reg
), FP_REGS
))
1445 record_live_range_failure ([&](){
1446 fprintf (dump_file
, "cannot refer to r%d:%s in mode %s",
1447 REGNO (SUBREG_REG (reg
)),
1448 GET_MODE_NAME (GET_MODE (SUBREG_REG (reg
))),
1449 GET_MODE_NAME (GET_MODE (reg
)));
1454 if (!targetm
.modes_tieable_p (GET_MODE (SUBREG_REG (reg
)),
1456 record_allocation_failure ([&](){
1457 fprintf (dump_file
, "r%d's mode %s is not tieable to mode %s",
1458 REGNO (SUBREG_REG (reg
)),
1459 GET_MODE_NAME (GET_MODE (SUBREG_REG (reg
))),
1460 GET_MODE_NAME (GET_MODE (reg
)));
1464 subreg_get_info (V0_REGNUM
, GET_MODE (SUBREG_REG (reg
)),
1465 SUBREG_BYTE (reg
), GET_MODE (reg
), &info
);
1466 if (!info
.representable_p
)
1468 record_live_range_failure ([&](){
1469 fprintf (dump_file
, "subreg of r%d is invalid for V0",
1470 REGNO (SUBREG_REG (reg
)));
1475 inner
.start
+= info
.offset
;
1476 inner
.count
= info
.nregs
;
1480 if (!REG_P (reg
) || HARD_REGISTER_P (reg
))
1483 unsigned int regno
= REGNO (reg
);
1484 if (fpr_preference (regno
) <= 0)
1487 unsigned int count
= hard_regno_nregs (V0_REGNUM
, GET_MODE (reg
));
1489 auto &entry
= m_regno_to_group
.get_or_insert (regno
, &existed
);
1492 auto *group
= create_allocno_group (regno
, count
);
1493 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1495 auto allocnos
= group
->allocnos ();
1496 fprintf (dump_file
, "Creating allocnos [%d:%d] for r%d\n",
1497 allocnos
.front ().id
, allocnos
.back ().id
, regno
);
1500 auto reg_bits
= GET_MODE_BITSIZE (GET_MODE (reg
));
1501 auto fpr_bits
= exact_div (reg_bits
, count
);
1502 auto flags
= m_pseudo_regs
[regno
].flags
;
1504 // Punt for now if there is a choice to be made between using an
1505 // FPR and a non-FPR.
1506 if ((flags
& NEEDS_NONFPR
)
1507 || ((flags
& ALLOWS_NONFPR
)
1508 && !FLOAT_MODE_P (GET_MODE (reg
))
1509 && !VECTOR_MODE_P (GET_MODE (reg
))))
1510 record_allocation_failure ([&](){
1511 fprintf (dump_file
, "r%d has FPR and non-FPR references", regno
);
1514 if (flags
& ALLOWS_FPR8
)
1515 group
->fpr_candidates
&= 0xff;
1516 else if (flags
& ALLOWS_FPR16
)
1517 group
->fpr_candidates
&= 0xffff;
1518 group
->fpr_candidates
&= ~0U >> (count
- 1);
1520 group
->has_flexible_stride
= ((flags
& HAS_FLEXIBLE_STRIDE
) != 0
1521 && (flags
& HAS_FIXED_STRIDE
) == 0);
1523 group
->fpr_size
= (maybe_gt (fpr_bits
, 128) ? FPR_Z
1524 : maybe_gt (fpr_bits
, 64) ? FPR_Q
: FPR_D
);
1528 return { entry
, 0, count
};
1531 // Record a use of FPR REGNO at the current program point, as part of
1532 // a backwards walk over a block.
1534 early_ra::record_fpr_use (unsigned int regno
)
1536 gcc_assert (IN_RANGE (regno
, V0_REGNUM
, V31_REGNUM
));
1537 unsigned int offset
= regno
- V0_REGNUM
;
1538 if (!(m_live_fprs
& (1U << offset
)))
1540 m_fpr_ranges
[offset
].safe_push ({ START_OF_REGION
, m_current_point
,
1542 m_live_fprs
|= 1U << offset
;
1546 // Record a definition of FPR REGNO at the current program point, as part of
1547 // a backwards walk over a block.
1549 early_ra::record_fpr_def (unsigned int regno
)
1551 gcc_assert (IN_RANGE (regno
, V0_REGNUM
, V31_REGNUM
));
1552 unsigned int offset
= regno
- V0_REGNUM
;
1554 // The definition completes the current live range. If the result
1555 // of the definition is used, the live range extends to the last use.
1556 // Otherwise the live range is just a momentary blip at the current point.
1557 auto &ranges
= m_fpr_ranges
[offset
];
1558 if (m_live_fprs
& (1U << offset
))
1560 ranges
.last ().start_point
= m_current_point
;
1561 m_live_fprs
&= ~(1U << offset
);
1564 ranges
.safe_push ({ m_current_point
, m_current_point
, INVALID_ALLOCNO
});
1567 // Record a use of allocno ALLOCNO at the current program point, as part
1568 // of a backwards walk over a block.
1570 early_ra::record_allocno_use (allocno_info
*allocno
)
1572 if (allocno
->start_point
== m_current_point
)
1575 gcc_checking_assert (!allocno
->is_shared ());
1576 bitmap_set_bit (m_live_allocnos
, allocno
->id
);
1577 if (allocno
->end_point
> m_current_point
)
1579 allocno
->end_point
= m_current_point
;
1580 allocno
->last_def_point
= START_OF_REGION
;
1581 allocno
->last_use_point
= END_OF_REGION
;
1584 allocno
->last_use_point
= allocno
->start_point
;
1585 allocno
->start_point
= m_current_point
;
1586 allocno
->is_copy_dest
= false;
1587 allocno
->is_strong_copy_src
= false;
1588 allocno
->related_allocno
= INVALID_ALLOCNO
;
1589 allocno
->is_equiv
= false;
1592 // Record a definition of the allocno with index AI at the current program
1593 // point, as part of a backwards walk over a block. The allocno is known
1596 early_ra::record_allocno_def (allocno_info
*allocno
)
1598 gcc_checking_assert (!allocno
->is_shared ());
1599 allocno
->last_use_point
= allocno
->start_point
;
1600 allocno
->last_def_point
= m_current_point
;
1601 allocno
->start_point
= m_current_point
;
1602 allocno
->num_defs
= MIN (allocno
->num_defs
+ 1, 2);
1603 gcc_checking_assert (!allocno
->is_copy_dest
1604 && !allocno
->is_strong_copy_src
);
1605 if (!bitmap_clear_bit (m_live_allocnos
, allocno
->id
))
1609 // SRC_ALLOCNO is copied or tied to DEST_ALLOCNO; IS_EQUIV is true if the
1610 // two allocnos are known to be equal. See whether we can mark a chain of
1611 // allocnos ending at DEST_ALLOCNO as related to SRC_ALLOCNO. Return the
1612 // start of the chain if so, otherwise return null.
1614 // If IS_EQUIV, a chain that contains just DEST_ALLOCNO should be treated
1615 // as an equivalence. Otherwise the chain should be shared with SRC_ALLOCNO.
1617 // Sharing chains are a rather hacky workaround for the fact that we
1618 // don't collect segmented live ranges, and that in the end we want to do
1619 // simple interval graph coloring.
1620 early_ra::allocno_info
*
1621 early_ra::find_related_start (allocno_info
*dest_allocno
,
1622 allocno_info
*src_allocno
, bool is_equiv
)
1624 allocno_info
*res
= nullptr;
1627 if (src_allocno
->end_point
> dest_allocno
->end_point
)
1628 // The src allocno dies first.
1631 if (src_allocno
->num_defs
!= 0)
1633 if (dest_allocno
->end_point
< m_current_bb_point
)
1634 // We don't currently track enough information to handle multiple
1635 // definitions across basic block boundaries.
1638 if (src_allocno
->last_def_point
>= dest_allocno
->end_point
)
1639 // There is another definition during the destination's live range.
1644 if (dest_allocno
->num_defs
== 1)
1645 // dest_allocno is equivalent to src_allocno for dest_allocno's
1646 // entire live range. Fall back to that if we can't establish
1652 if (src_allocno
->last_use_point
>= dest_allocno
->end_point
)
1653 // src_allocno is live during dest_allocno's live range,
1654 // and the two allocnos do not necessarily have the same value.
1658 if (dest_allocno
->group_size
!= 1
1659 // Account for definitions by shared registers.
1660 || dest_allocno
->num_defs
> 1
1661 || DF_REG_DEF_COUNT (dest_allocno
->group ()->regno
) != 1)
1662 // Currently only single allocnos that are defined once can
1663 // share registers with non-equivalent allocnos. This could be
1664 // relaxed, but at the time of writing, aggregates are not valid
1665 // SSA names and so generally only use a single pseudo throughout
1669 if (dest_allocno
->copy_dest
== src_allocno
->id
)
1670 // We've found a complete and valid sharing chain.
1671 return dest_allocno
;
1673 if (dest_allocno
->copy_dest
== INVALID_ALLOCNO
)
1676 auto *next_allocno
= m_allocnos
[dest_allocno
->copy_dest
];
1677 if (!is_chain_candidate (dest_allocno
, next_allocno
, ALL_REASONS
))
1680 dest_allocno
= next_allocno
;
1685 // Add FROM_ALLOCNO's definition information to TO_ALLOCNO's.
1687 early_ra::accumulate_defs (allocno_info
*to_allocno
,
1688 allocno_info
*from_allocno
)
1690 if (from_allocno
->num_defs
> 0)
1692 to_allocno
->num_defs
= MIN (from_allocno
->num_defs
1693 + to_allocno
->num_defs
, 2);
1694 to_allocno
->last_def_point
= MAX (to_allocno
->last_def_point
,
1695 from_allocno
->last_def_point
);
1699 // Record any relevant allocno-related information for an actual or imagined
1700 // copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit
1701 // move instruction, false if it represents one way of satisfying the previous
1702 // instruction's constraints.
1704 early_ra::record_copy (rtx dest
, rtx src
, bool from_move_p
)
1706 auto dest_range
= get_allocno_subgroup (dest
);
1707 auto src_range
= get_allocno_subgroup (src
);
1711 && FP_REGNUM_P (REGNO (src
)))
1713 // A copy from an FPR to an allocno group.
1714 unsigned int fpr
= REGNO (src
) - V0_REGNUM
;
1715 m_allocno_copies
.safe_push ({ dest_range
.allocno (0)->id
, fpr
,
1716 dest_range
.count
});
1718 // If the allocno at the other end of the chain of copies from DEST
1719 // has a copy to the same FPR, record that all intervening copy chains
1720 // could become "strong" ones. This indicates that picking the FPR
1721 // avoids a copy at both ends.
1722 unsigned int hard_regno
= REGNO (src
);
1723 for (auto &dest_allocno
: dest_range
.allocnos ())
1724 if (dest_allocno
.hard_regno
== hard_regno
++)
1725 dest_allocno
.is_strong_copy_src
= true;
1727 else if (from_move_p
1730 && FP_REGNUM_P (REGNO (dest
)))
1732 // A copy from an allocno group to an FPR.
1733 unsigned int fpr
= REGNO (dest
) - V0_REGNUM
;
1734 m_allocno_copies
.safe_push ({ src_range
.allocno (0)->id
, fpr
,
1736 for (auto &src_allocno
: src_range
.allocnos ())
1738 // If the copy comes from a move, see whether the destination
1739 // FPR is known to be equal to the source allocno for the FPR's
1741 if (from_move_p
&& src_allocno
.num_defs
== 0)
1743 auto &last_range
= m_fpr_ranges
[fpr
].last ();
1744 if (last_range
.end_point
>= src_allocno
.end_point
)
1745 last_range
.allocno
= src_allocno
.id
;
1747 src_allocno
.hard_regno
= V0_REGNUM
+ fpr
;
1751 else if (src_range
&& dest_range
)
1753 // A copy between two allocno groups. We can only have a mismatched
1754 // number of FPRs for imaginary, non-move copies. In that case
1755 // the matching happens on the common lowparts.
1756 gcc_assert (!from_move_p
|| src_range
.count
== dest_range
.count
);
1757 unsigned int count
= std::min (src_range
.count
, dest_range
.count
);
1758 if (WORDS_BIG_ENDIAN
)
1760 src_range
.start
+= src_range
.count
- count
;
1761 dest_range
.start
+= dest_range
.count
- count
;
1763 src_range
.count
= count
;
1764 dest_range
.count
= count
;
1766 // Ignore (imaginary non-move) copies if the destination is still live.
1767 for (auto &dest_allocno
: dest_range
.allocnos ())
1768 if (bitmap_bit_p (m_live_allocnos
, dest_allocno
.id
))
1771 for (unsigned int i
= 0; i
< src_range
.count
; ++i
)
1773 auto *dest_allocno
= dest_range
.allocno (i
);
1774 auto *src_allocno
= src_range
.allocno (i
);
1775 if (src_allocno
->end_point
> dest_allocno
->start_point
)
1777 gcc_assert (src_allocno
->copy_dest
== INVALID_ALLOCNO
1778 || src_allocno
->copy_dest
== dest_allocno
->id
);
1779 src_allocno
->copy_dest
= dest_allocno
->id
;
1780 src_allocno
->hard_regno
= dest_allocno
->hard_regno
;
1781 dest_allocno
->is_copy_dest
= 1;
1783 else if (auto *start_allocno
= find_related_start (dest_allocno
,
1787 auto *next_allocno
= dest_allocno
;
1790 next_allocno
->related_allocno
= src_allocno
->id
;
1791 next_allocno
->is_equiv
= (start_allocno
== dest_allocno
1793 // If we're sharing two allocnos that are not equivalent,
1794 // carry across the definition information. This is needed
1795 // to prevent multiple incompatible attempts to share with
1796 // the same register.
1797 if (next_allocno
->is_shared ())
1798 accumulate_defs (src_allocno
, next_allocno
);
1799 src_allocno
->last_use_point
1800 = MAX (src_allocno
->last_use_point
,
1801 next_allocno
->last_use_point
);
1803 if (next_allocno
== start_allocno
)
1805 next_allocno
= m_allocnos
[next_allocno
->copy_dest
];
1812 // Record any relevant allocno-related information about the constraints
1813 // on INSN, which has already been extracted.
1815 early_ra::record_constraints (rtx_insn
*insn
)
1817 preprocess_constraints (insn
);
1819 int operand_matches
[MAX_RECOG_OPERANDS
];
1820 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1821 operand_matches
[i
] = -1;
1823 auto alts
= get_preferred_alternatives (insn
);
1824 bool any_ok
= recog_data
.n_alternatives
== 0;
1826 // The set of output operands that are earlyclobber in at least one
1828 operand_mask earlyclobber_operands
= 0;
1830 // The set of output operands that are matched to inputs in at least
1832 operand_mask matched_operands
= 0;
1834 // The set of output operands that are not matched to inputs in at least
1836 operand_mask unmatched_operands
= 0;
1838 // The set of input operands that are matched to outputs in at least one
1839 // alternative, or that overlap with such an input if the output is not
1840 // earlyclobber. The latter part of the condition copes with things
1841 // like y = x * x, where the first x is tied to the destination, and where
1842 // y is not earlyclobber.
1843 operand_mask matches_operands
= 0;
1845 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
1847 if (!(alts
& ALTERNATIVE_BIT (altno
)))
1850 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
1851 if (!likely_alternative_match_p (op_alt
))
1856 // Update the information for operand DEST_OPNO based on the constraint
1857 // information for operand SRC_OPNO. The numbers can be different for
1858 // swapped commutative operands.
1859 auto record_operand
= [&](int src_opno
, int dest_opno
)
1861 int matches
= op_alt
[src_opno
].matches
;
1862 // A matched earlyclobber cannot be used if the same operand value
1863 // occurs in an unmatched operand. E.g. for y = x * x, a matched
1864 // earlyclobber on the first input does not cover the second input.
1867 rtx op
= recog_data
.operand
[dest_opno
];
1868 operand_mask overlaps
= 0;
1869 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1871 && !recog_data
.is_operator
[i
]
1872 && recog_data
.operand_type
[i
] != OP_OUT
1873 && reg_overlap_mentioned_p (op
, recog_data
.operand
[i
]))
1874 overlaps
|= 1U << i
;
1875 if (!op_alt
[matches
].earlyclobber
|| overlaps
== 0)
1877 operand_matches
[dest_opno
] = matches
;
1878 matches_operands
|= (1U << dest_opno
) | overlaps
;
1883 auto reject
= count_rejects (op_alt
);
1884 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1886 operand_mask op_mask
= operand_mask (1) << opno
;
1888 if (recog_data
.operand_type
[opno
] != OP_IN
)
1890 if (reject
== 0 && op_alt
[opno
].matched
>= 0)
1891 matched_operands
|= op_mask
;
1893 unmatched_operands
|= op_mask
;
1896 if (op_alt
[opno
].earlyclobber
)
1897 earlyclobber_operands
|= op_mask
;
1899 // Punt for now on scratches. If we wanted to handle them,
1900 // we'd need to create allocnos for them, like IRA does.
1901 rtx op
= recog_data
.operand
[opno
];
1902 if (GET_CODE (op
) == SCRATCH
1903 && reg_classes_intersect_p (op_alt
[opno
].cl
, FP_REGS
))
1904 record_allocation_failure ([&](){
1905 fprintf (dump_file
, "insn %d has FPR match_scratch",
1909 // Record filter information, which applies to the first register
1911 if (auto filters
= alternative_register_filters (op_alt
, opno
))
1912 if (auto range
= get_allocno_subgroup (recog_data
.operand
[opno
]))
1913 for (unsigned int fpr
= range
.start
; fpr
< 32; ++fpr
)
1914 if (!test_register_filters (filters
, fpr
))
1915 range
.group
->fpr_candidates
&= ~(1U << (fpr
- range
.start
));
1919 // Record possible matched operands.
1920 record_operand (opno
, opno
);
1921 if (recog_data
.constraints
[opno
][0] == '%')
1923 record_operand (opno
, opno
+ 1);
1924 record_operand (opno
+ 1, opno
);
1932 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1933 fprintf (dump_file
, " -- no match\n");
1934 record_allocation_failure ([&](){
1935 fprintf (dump_file
, "no matching constraints for insn %d",
1940 rtx dest_op
= NULL_RTX
;
1941 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1943 rtx op
= recog_data
.operand
[opno
];
1944 auto op_mask
= operand_mask (1) << opno
;
1945 // Record if there is an output operand that is "independent" of the
1946 // inputs, in the sense that it is never earlyclobber and is never
1947 // matched to an input. See the comment below for how this is used.
1948 if (recog_data
.operand_type
[opno
] == OP_OUT
1949 && (earlyclobber_operands
& op_mask
) == 0
1950 && (matched_operands
& op_mask
) == 0)
1955 // We sometimes decide not to allocate pseudos even if they meet
1956 // the normal FPR preference criteria; see the setters of IGNORE_REG
1957 // for details. However, the premise is that we should only ignore
1958 // definitions of such pseudos if they are independent of the other
1959 // operands in the instruction.
1960 else if (recog_data
.operand_type
[opno
] != OP_IN
1962 && !HARD_REGISTER_P (op
)
1963 && (m_pseudo_regs
[REGNO (op
)].flags
& IGNORE_REG
) != 0)
1964 record_allocation_failure ([&](){
1965 fprintf (dump_file
, "ignored register r%d depends on input operands",
1970 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1972 auto op_mask
= operand_mask (1) << opno
;
1973 rtx op
= recog_data
.operand
[opno
];
1974 int matches
= operand_matches
[opno
];
1976 // Punt for now on operands that already have a fixed choice of
1977 // register, since we don't have IRA's ability to find an alternative.
1978 // It's better if earlier passes don't create this kind of situation.
1979 if (REG_P (op
) && FP_REGNUM_P (REGNO (op
)))
1980 record_allocation_failure ([&](){
1981 fprintf (dump_file
, "operand %d of insn %d refers directly to %s",
1982 opno
, INSN_UID (insn
), reg_names
[REGNO (op
)]);
1985 // Treat input operands as being earlyclobbered if an output is
1986 // sometimes earlyclobber and if the input never matches an output.
1987 // Do the same if there is an output that is always matched to an
1988 // input, and if this operand doesn't match that input. In both
1989 // cases, tying the input and the output would lead to an impossible
1990 // combination (or at least one that is difficult to reload).
1991 if (recog_data
.operand_type
[opno
] != OP_OUT
1992 && ((earlyclobber_operands
&& matches
< 0)
1993 || ((matched_operands
& ~unmatched_operands
)
1994 && !(matches_operands
& op_mask
))))
1995 for (auto &allocno
: get_allocno_subgroup (op
).allocnos ())
1996 if (allocno
.end_point
+ 1 == m_current_point
)
1997 allocno
.is_earlyclobbered
= true;
1999 // Create copies between operands that can be tied. This (deliberately)
2000 // might add several copies to the same destination register; later code
2001 // can then choose between them based on other criteria.
2003 // If there is an output operand that is never matched or earlyclobber,
2004 // and an input operand that never matches an output operand, create
2005 // a tentative copy between them. This allows hard register preferences
2006 // to be transmitted along the copy chains.
2008 record_copy (recog_data
.operand
[matches
], op
);
2009 else if (dest_op
&& recog_data
.operand_type
[opno
] == OP_IN
)
2010 record_copy (dest_op
, op
);
2014 // If FLAGS is DF_REF_AT_TOP, model the artificial uses and defs at the
2015 // start of the current basic block, otherwise model the artificial uses
2016 // and defs at the end of the basic block. This is done as part of a
2017 // backwards walk, so defs should be processed before uses.
2019 early_ra::record_artificial_refs (unsigned int flags
)
2023 FOR_EACH_ARTIFICIAL_DEF (ref
, m_current_bb
->index
)
2024 if ((DF_REF_FLAGS (ref
) & DF_REF_AT_TOP
) == flags
2025 && IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
2026 record_fpr_def (DF_REF_REGNO (ref
));
2027 m_current_point
+= 1;
2029 FOR_EACH_ARTIFICIAL_USE (ref
, m_current_bb
->index
)
2030 if ((DF_REF_FLAGS (ref
) & DF_REF_AT_TOP
) == flags
2031 && IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
2032 record_fpr_use (DF_REF_REGNO (ref
));
2033 m_current_point
+= 1;
2038 // - X is a SUBREG, in which case it is a SUBREG of some REG Y
2040 // - one 64-bit word of Y can be modified while preserving all other words
2042 // - X refers to no more than one 64-bit word of Y
2044 // - assigning FPRs to Y would put more than one 64-bit word in each FPR
2046 // For example, this is true of:
2048 // - (subreg:DI (reg:TI R) 0) and
2049 // - (subreg:DI (reg:TI R) 8)
2051 // but is not true of:
2053 // - (subreg:V2SI (reg:V2x2SI R) 0) or
2054 // - (subreg:V2SI (reg:V2x2SI R) 8).
2056 allocno_assignment_is_rmw (rtx x
)
2058 if (partial_subreg_p (x
))
2060 auto outer_mode
= GET_MODE (x
);
2061 auto inner_mode
= GET_MODE (SUBREG_REG (x
));
2062 if (known_eq (REGMODE_NATURAL_SIZE (inner_mode
), 0U + UNITS_PER_WORD
)
2063 && known_lt (GET_MODE_SIZE (outer_mode
), UNITS_PER_VREG
))
2065 auto nregs
= targetm
.hard_regno_nregs (V0_REGNUM
, inner_mode
);
2066 if (maybe_ne (nregs
* UNITS_PER_WORD
, GET_MODE_SIZE (inner_mode
)))
2073 // Called as part of a backwards walk over a block. Model the definitions
2074 // in INSN, excluding partial call clobbers.
2076 early_ra::record_insn_defs (rtx_insn
*insn
)
2080 FOR_EACH_INSN_DEF (ref
, insn
)
2081 if (IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
2082 record_fpr_def (DF_REF_REGNO (ref
));
2085 rtx reg
= DF_REF_REG (ref
);
2086 auto range
= get_allocno_subgroup (reg
);
2087 for (auto &allocno
: range
.allocnos ())
2089 // Make sure that assigning to the DF_REF_REG clobbers the
2090 // whole of this allocno, not just some of it.
2091 if (allocno_assignment_is_rmw (reg
))
2093 record_live_range_failure ([&](){
2094 fprintf (dump_file
, "read-modify-write of allocno %d",
2100 // If the destination is unused, record a momentary blip
2101 // in its live range.
2102 if (!bitmap_bit_p (m_live_allocnos
, allocno
.id
))
2103 record_allocno_use (&allocno
);
2104 record_allocno_def (&allocno
);
2107 m_current_point
+= 1;
2110 // Called as part of a backwards walk over a block. Model the call made
2111 // by INSN as a separate phase in the evaluation of the insn. Any partial
2112 // call clobbers happen at that point, rather than in the definition or use
2113 // phase of the insn.
2115 early_ra::record_insn_call (rtx_call_insn
*insn
)
2117 function_abi abi
= insn_callee_abi (insn
);
2118 m_call_points
[abi
.id ()].safe_push (m_current_point
);
2119 m_current_point
+= 1;
2122 // Called as part of a backwards walk over a block. Model the uses in INSN.
2123 // We can ignore READ_MODIFY_WRITE uses of plain subregs, since we track the
2124 // FPR-sized parts of them individually.
2126 early_ra::record_insn_uses (rtx_insn
*insn
)
2130 FOR_EACH_INSN_USE (ref
, insn
)
2131 if (IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
2132 record_fpr_use (DF_REF_REGNO (ref
));
2133 else if (!DF_REF_FLAGS_IS_SET (ref
, DF_REF_READ_WRITE
)
2134 || DF_REF_FLAGS_IS_SET (ref
, DF_REF_STRICT_LOW_PART
)
2135 || DF_REF_FLAGS_IS_SET (ref
, DF_REF_ZERO_EXTRACT
))
2137 auto range
= get_allocno_subgroup (DF_REF_REG (ref
));
2138 for (auto &allocno
: range
.allocnos ())
2139 record_allocno_use (&allocno
);
2141 m_current_point
+= 1;
2144 // ALLOCNO->is_strong_copy_src is true. See whether ALLOCNO heads a
2145 // natural chain that has an affinity with the same hard register at
2148 early_ra::consider_strong_copy_src_chain (allocno_info
*allocno
)
2150 auto *src_allocno
= allocno
;
2151 while (src_allocno
->copy_dest
!= INVALID_ALLOCNO
)
2153 auto *dest_allocno
= m_allocnos
[src_allocno
->copy_dest
];
2154 if (dest_allocno
->start_point
> src_allocno
->end_point
2155 || dest_allocno
->hard_regno
!= src_allocno
->hard_regno
)
2157 gcc_checking_assert (dest_allocno
->is_copy_dest
);
2158 src_allocno
= dest_allocno
;
2161 while (allocno
->copy_dest
!= INVALID_ALLOCNO
)
2163 allocno
->is_strong_copy_src
= 1;
2164 allocno
= m_allocnos
[allocno
->copy_dest
];
2165 allocno
->is_strong_copy_dest
= 1;
2170 // ALLOCNO1 and ALLOCNO2 are linked in some way, and might end up being
2171 // chained together. See whether chaining them requires the containing
2172 // groups to have the same stride, or whether it requires them to have
2173 // different strides. Return 1 if they should have the same stride,
2174 // -1 if they should have different strides, or 0 if it doesn't matter.
2176 early_ra::strided_polarity_pref (allocno_info
*allocno1
,
2177 allocno_info
*allocno2
)
2179 if (allocno1
->offset
+ 1 < allocno1
->group_size
2180 && allocno2
->offset
+ 1 < allocno2
->group_size
)
2182 if (is_chain_candidate (allocno1
+ 1, allocno2
+ 1, ALL_REASONS
))
2188 if (allocno1
->offset
> 0 && allocno2
->offset
> 0)
2190 if (is_chain_candidate (allocno1
- 1, allocno2
- 1, ALL_REASONS
))
2199 // Decide which groups should be strided. Also complete "strong" copy chains.
2201 early_ra::find_strided_accesses ()
2203 // This function forms a graph of allocnos, linked by equivalences and
2204 // natural copy chains. It temporarily uses chain_next to record the
2205 // reverse of equivalence edges (related_allocno) and chain_prev to record
2206 // the reverse of copy edges (copy_dest).
2207 unsigned int allocno_info::*links
[] = {
2208 &allocno_info::chain_next
,
2209 &allocno_info::chain_prev
,
2210 &allocno_info::copy_dest
,
2211 &allocno_info::related_allocno
2214 // Set up the temporary reverse edges. Check for strong copy chains.
2215 for (unsigned int i
= m_allocnos
.length (); i
-- > 0; )
2217 auto *allocno1
= m_allocnos
[i
];
2218 if (allocno1
->copy_dest
!= INVALID_ALLOCNO
)
2219 m_allocnos
[allocno1
->copy_dest
]->chain_prev
= allocno1
->id
;
2220 if (allocno1
->related_allocno
!= INVALID_ALLOCNO
)
2221 m_allocnos
[allocno1
->related_allocno
]->chain_next
= allocno1
->id
;
2223 if (allocno1
->is_strong_copy_src
2224 && !allocno1
->is_copy_dest
2225 && !consider_strong_copy_src_chain (allocno1
))
2226 allocno1
->is_strong_copy_src
= false;
2229 // Partition the graph into cliques based on edges that have the following
2232 // - the edge joins two allocnos whose groups have a free choice between
2233 // consecutive and strided allocations.
2235 // - the two groups have a relative strided polarity preference (that is
2236 // they should make the same choice between consecutive and strided,
2237 // or they should make opposite choices).
2239 // Assign relative polarities to each group connected in this way.
2241 // The aim is to discover natural move-free striding choices, which will
2242 // often exist in carefully written ACLE code.
2243 unsigned int num_edges
= m_allocnos
.length () * ARRAY_SIZE (links
);
2244 auto_sbitmap
visited_edges (num_edges
);
2245 bitmap_clear (visited_edges
);
2247 auto_vec
<unsigned int, 32> worklist
;
2248 for (unsigned int i
= 0; i
< num_edges
; ++i
)
2250 if (!bitmap_set_bit (visited_edges
, i
))
2252 worklist
.quick_push (i
);
2253 while (!worklist
.is_empty ())
2255 auto ei
= worklist
.pop ();
2256 auto *allocno1
= m_allocnos
[ei
/ ARRAY_SIZE (links
)];
2257 auto ai2
= allocno1
->*links
[ei
% ARRAY_SIZE (links
)];
2258 if (ai2
== INVALID_ALLOCNO
)
2261 auto *allocno2
= m_allocnos
[ai2
];
2262 auto *group1
= allocno1
->group ();
2263 auto *group2
= allocno2
->group ();
2264 if (!group1
->has_flexible_stride
|| !group2
->has_flexible_stride
)
2267 int pref
= strided_polarity_pref (allocno1
, allocno2
);
2271 for (auto *group
: { group1
, group2
})
2272 for (auto &allocno
: group
->allocnos ())
2273 for (unsigned int j
= 0; j
< ARRAY_SIZE (links
); ++j
)
2274 if (bitmap_set_bit (visited_edges
, allocno
.id
* 4 + j
))
2275 worklist
.safe_push (allocno
.id
* 4 + j
);
2277 if (group1
->strided_polarity
)
2278 group2
->strided_polarity
= group1
->strided_polarity
* pref
;
2279 else if (group2
->strided_polarity
)
2280 group1
->strided_polarity
= group2
->strided_polarity
* pref
;
2283 group1
->strided_polarity
= 1;
2284 group2
->strided_polarity
= pref
;
2289 // Now look for edges between allocnos in multi-register groups where:
2291 // - the two groups have a relative strided polarity preference (as above).
2293 // - one group (G1) has a free choice between consecutive and strided
2296 // - the other group (G2) must use consecutive allocations.
2298 // Update G1's individual preference for strided or consecutive allocations
2299 // based on G2. If the previous loop chose a polarity for G1, work out
2300 // whether it is better for polarity 1 or -1 to correspond to consecutive
2302 int consecutive_pref
= 0;
2303 for (unsigned int i
= m_allocnos
.length (); i
-- > 0; )
2305 auto *allocno1
= m_allocnos
[i
];
2306 for (auto link
: links
)
2308 auto ai2
= allocno1
->*link
;
2309 if (ai2
== INVALID_ALLOCNO
)
2312 auto *allocno2
= m_allocnos
[ai2
];
2313 auto *group1
= allocno1
->group ();
2314 auto *group2
= allocno2
->group ();
2315 if (group1
->has_flexible_stride
== group2
->has_flexible_stride
)
2318 int pref
= strided_polarity_pref (allocno1
, allocno2
);
2322 auto *group
= (group1
->has_flexible_stride
? group1
: group2
);
2323 consecutive_pref
+= group
->strided_polarity
* pref
;
2324 group
->consecutive_pref
+= pref
;
2328 // If it doesn't matter whether polarity 1 or -1 corresponds to consecutive
2329 // allocation, arbitrarily pick 1.
2330 if (consecutive_pref
== 0)
2331 consecutive_pref
= 1;
2333 // Record which multi-register groups should use strided allocations.
2334 // Clear out the temporary edges.
2335 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
2337 auto *allocno
= m_allocnos
[ai
];
2338 allocno
->chain_prev
= INVALID_ALLOCNO
;
2339 allocno
->chain_next
= INVALID_ALLOCNO
;
2341 if (allocno
->offset
!= 0)
2344 auto *group
= allocno
->group ();
2345 if (!group
->has_flexible_stride
)
2348 bool make_strided
= (group
->strided_polarity
2349 ? (consecutive_pref
* group
->strided_polarity
) < 0
2350 : group
->consecutive_pref
< 0);
2351 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2352 fprintf (dump_file
, "Allocno [%d:%d]: strided polarity %d,"
2353 " consecutive pref %d, %s\n",
2354 allocno
->id
, allocno
->id
+ group
->size
- 1,
2355 group
->strided_polarity
, group
->consecutive_pref
,
2356 make_strided
? "making strided" : "keeping consecutive");
2360 // 2-register groups have a stride of 8 FPRs and must start in
2361 // registers matching the mask 0x17. 4-register groups have a stride
2362 // of 4 FPRs and must start in registers matching the mask 0x13.
2363 group
->stride
= group
->size
== 2 ? 8 : 4;
2364 gcc_checking_assert (group
->fpr_candidates
2365 == (group
->size
== 2 ? 0x55555555 : 0x11111111));
2366 group
->fpr_candidates
= (group
->size
== 2 ? 0xff00ff : 0xf000f);
2370 // Compare the allocnos at *ALLOCNO1_PTR and *ALLOCNO2_PTR and return a <=>
2371 // result that puts allocnos in order of increasing FIELD.
2372 template<unsigned int early_ra::allocno_info::*field
>
2374 early_ra::cmp_increasing (const void *allocno1_ptr
, const void *allocno2_ptr
)
2376 auto *allocno1
= *(allocno_info
*const *) allocno1_ptr
;
2377 auto *allocno2
= *(allocno_info
*const *) allocno2_ptr
;
2379 if (allocno1
->*field
!= allocno2
->*field
)
2380 return allocno1
->*field
< allocno2
->*field
? -1 : 1;
2381 return (allocno1
->id
< allocno2
->id
? -1
2382 : allocno1
->id
== allocno2
->id
? 0 : 1);
2385 // Return true if we should consider chaining ALLOCNO1 onto the head
2386 // of ALLOCNO2. STRICTNESS says whether we should take copy-elision
2387 // heuristics into account, or whether we should just consider things
2388 // that matter for correctness.
2390 // This is just a local test of the two allocnos; it doesn't guarantee
2391 // that chaining them would give a self-consistent system.
2393 early_ra::is_chain_candidate (allocno_info
*allocno1
, allocno_info
*allocno2
,
2394 test_strictness strictness
)
2396 if (allocno2
->is_shared ())
2399 while (allocno1
->is_equiv
)
2400 allocno1
= m_allocnos
[allocno1
->related_allocno
];
2402 if (allocno2
->start_point
>= allocno1
->end_point
2403 && !allocno2
->is_equiv_to (allocno1
->id
))
2406 if (allocno1
->is_earlyclobbered
2407 && allocno1
->end_point
== allocno2
->start_point
+ 1)
2410 if (strictness
== ALL_REASONS
&& allocno2
->is_copy_dest
)
2412 if (allocno1
->copy_dest
!= allocno2
->id
)
2414 if (allocno2
->is_strong_copy_dest
&& !allocno1
->is_strong_copy_src
)
2420 // We're trying to chain allocno ALLOCNO1 to a later allocno.
2421 // Rate how good a choice ALLOCNO2 would be, with higher being better.
2423 early_ra::rate_chain (allocno_info
*allocno1
, allocno_info
*allocno2
)
2426 if (allocno2
->is_strong_copy_dest
)
2428 else if (allocno2
->is_copy_dest
)
2431 // Prefer well-aligned matches.
2432 auto *group1
= allocno1
->group ();
2433 auto *group2
= allocno2
->group ();
2434 if (group1
->stride
== 1 && group2
->stride
== 1)
2436 unsigned int min_size
= std::min (group1
->color_rep ()->size
,
2437 group2
->color_rep ()->size
);
2438 if ((group1
->color_rep_offset
+ allocno1
->offset
) % min_size
2439 == (group2
->color_rep_offset
+ allocno2
->offset
) % min_size
)
2447 // Sort the chain_candidate_infos at ARG1 and ARG2 in order of decreasing
2450 early_ra::cmp_chain_candidates (const void *arg1
, const void *arg2
)
2452 auto &candidate1
= *(const chain_candidate_info
*) arg1
;
2453 auto &candidate2
= *(const chain_candidate_info
*) arg2
;
2454 if (candidate1
.score
!= candidate2
.score
)
2455 return candidate1
.score
> candidate2
.score
? -1 : 1;
2457 // Prefer to increase the gap between uses of the allocated register,
2458 // to give the scheduler more freedom.
2459 auto *allocno1
= candidate1
.allocno
;
2460 auto *allocno2
= candidate2
.allocno
;
2461 if (allocno1
->start_point
!= allocno2
->start_point
)
2462 return allocno1
->start_point
< allocno2
->start_point
? -1 : 1;
2464 if (allocno1
!= allocno2
)
2465 return allocno1
->id
< allocno2
->id
? -1 : 1;
2470 // Join the chains of allocnos that start at HEADI1 and HEADI2.
2471 // HEADI1 is either empty or a single allocno.
2473 early_ra::chain_allocnos (unsigned int &headi1
, unsigned int &headi2
)
2475 if (headi1
== INVALID_ALLOCNO
)
2477 else if (headi2
== INVALID_ALLOCNO
)
2481 auto *head1
= m_allocnos
[headi1
];
2482 auto *head2
= m_allocnos
[headi2
];
2483 gcc_checking_assert (head1
->chain_next
== INVALID_ALLOCNO
2484 && head1
->chain_prev
== INVALID_ALLOCNO
2485 && head2
->chain_prev
== INVALID_ALLOCNO
);
2488 && m_allocnos
[head1
->related_allocno
]->copy_dest
== headi2
)
2490 head1
->is_copy_dest
= head2
->is_copy_dest
;
2491 head1
->is_strong_copy_dest
= head2
->is_strong_copy_dest
;
2492 m_allocnos
[head1
->related_allocno
]->copy_dest
= headi1
;
2494 head1
->chain_next
= headi2
;
2495 head2
->chain_prev
= headi1
;
2501 // Add GROUP2's FPR information to GROUP1's, given that GROUP2 starts
2502 // OFFSET allocnos into GROUP2.
2504 early_ra::merge_fpr_info (allocno_group_info
*group1
,
2505 allocno_group_info
*group2
,
2506 unsigned int offset
)
2508 group1
->fpr_size
= std::max (group1
->fpr_size
, group2
->fpr_size
);
2509 group1
->fpr_candidates
&= (group2
->fpr_candidates
2510 >> (offset
* group1
->stride
));
2513 // Set the color representative of ALLOCNO's group to REP, such that ALLOCNO
2514 // ends being at allocno offset REP_OFFSET from the start of REP.
2516 early_ra::set_single_color_rep (allocno_info
*allocno
, allocno_group_info
*rep
,
2517 unsigned int rep_offset
)
2519 auto *group
= allocno
->group ();
2520 if (group
->m_color_rep
== rep
)
2523 group
->m_color_rep
= rep
;
2524 gcc_checking_assert (multiple_p (group
->stride
, rep
->stride
));
2525 unsigned int factor
= group
->stride
/ rep
->stride
;
2526 gcc_checking_assert (rep_offset
>= allocno
->offset
* factor
);
2527 group
->color_rep_offset
= rep_offset
- allocno
->offset
* factor
;
2528 merge_fpr_info (rep
, group
, group
->color_rep_offset
);
2531 // REP1 and REP2 are color representatives. Change REP1's color representative
2532 // to REP2, with REP1 starting at allocno offset REP2_OFFSET into REP2.
2534 early_ra::set_color_rep (allocno_group_info
*rep1
, allocno_group_info
*rep2
,
2535 unsigned int rep2_offset
)
2537 gcc_checking_assert (rep1
!= rep2
2538 && rep2
->m_color_rep
== rep2
2539 && multiple_p (rep1
->stride
, rep2
->stride
));
2541 auto heads1
= rep1
->chain_heads ();
2542 auto heads2
= rep2
->chain_heads ();
2543 for (unsigned int i1
= 0; i1
< heads1
.size (); ++i1
)
2544 if (heads1
[i1
] != INVALID_ALLOCNO
)
2546 unsigned int i2
= rep2_offset
+ i1
* rep1
->stride
/ rep2
->stride
;
2547 if (heads2
[i2
] == INVALID_ALLOCNO
)
2548 heads2
[i2
] = heads1
[i1
];
2550 gcc_checking_assert (heads2
[i2
] == heads1
[i1
]);
2551 set_single_color_rep (m_allocnos
[heads1
[i1
]], rep2
, i2
);
2555 // Try to chain ALLOCNO1 to the head of the chain starting at ALLOCNO2.
2556 // Return true on success.
2558 early_ra::try_to_chain_allocnos (allocno_info
*allocno1
,
2559 allocno_info
*allocno2
)
2561 auto *group1
= allocno1
->group ()->color_rep ();
2562 auto *group2
= allocno2
->group ()->color_rep ();
2564 // Avoid trying to tie different subgroups of the same group. This can
2565 // happen if the parts of a register are defined and used piecemeal.
2566 if (group1
== group2
)
2569 // The stride (in FPRs) between allocnos of each color representative.
2570 auto fpr_stride1
= group1
->stride
;
2571 auto fpr_stride2
= group2
->stride
;
2573 // The offset (in FPRs) of each allocno group from its color representative.
2574 auto fpr_offset1
= allocno1
->group ()->color_rep_offset
* fpr_stride1
;
2575 auto fpr_offset2
= allocno2
->group ()->color_rep_offset
* fpr_stride2
;
2577 // The offset (in FPRs) of each allocno from its color representative.
2578 fpr_offset1
+= allocno1
->offset
* allocno1
->group ()->stride
;
2579 fpr_offset2
+= allocno2
->offset
* allocno2
->group ()->stride
;
2581 // The FPR overlap is in multiples of the larger stride.
2582 auto max_fpr_stride
= std::max (fpr_stride1
, fpr_stride2
);
2583 auto min_fpr_offset
= std::min (fpr_offset1
, fpr_offset2
);
2584 auto fpr_overlap_offset
= ROUND_DOWN (min_fpr_offset
, max_fpr_stride
);
2586 // The offset (in FPRs) of the start of the overlapping region from
2587 // each color representative.
2588 fpr_offset1
-= fpr_overlap_offset
;
2589 fpr_offset2
-= fpr_overlap_offset
;
2591 // The number of FPRs in each color representative after the start
2592 // of the overlapping region.
2593 auto fpr_after1
= (group1
->size
- 1) * fpr_stride1
- fpr_offset1
;
2594 auto fpr_after2
= (group2
->size
- 1) * fpr_stride2
- fpr_offset2
;
2596 auto min_fpr_after
= std::min (fpr_after1
, fpr_after2
);
2598 // The number of overlapping allocnos.
2599 auto allocno_overlap_size
= min_fpr_after
/ max_fpr_stride
+ 1;
2601 // The offset (in allocnos) of the overlapping region from the start
2602 // of each color representative.
2603 auto allocno_offset1
= fpr_offset1
/ fpr_stride1
;
2604 auto allocno_offset2
= fpr_offset2
/ fpr_stride2
;
2606 // The stride (in allocnos) between overlapping allocnos.
2607 auto allocno_stride1
= max_fpr_stride
/ fpr_stride1
;
2608 auto allocno_stride2
= max_fpr_stride
/ fpr_stride2
;
2610 // Reject combinations that are impossible to allocate.
2611 auto fprs1
= group1
->fpr_candidates
;
2612 auto fprs2
= group2
->fpr_candidates
;
2613 if (fpr_offset1
> fpr_offset2
)
2614 fprs2
>>= (fpr_offset1
- fpr_offset2
);
2616 fprs1
>>= (fpr_offset2
- fpr_offset1
);
2617 if ((fprs1
& fprs2
) == 0)
2619 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2620 fprintf (dump_file
, " - cannot chain %d->%d, no FPRs in common"
2621 " (%08x@%d and %08x@%d)\n", allocno1
->id
, allocno2
->id
,
2622 group1
->fpr_candidates
, fpr_offset1
,
2623 group2
->fpr_candidates
, fpr_offset2
);
2627 // Check whether the chain can be formed.
2628 auto heads1
= group1
->chain_heads ();
2629 auto heads2
= group2
->chain_heads ();
2630 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2632 auto headi1
= heads1
[allocno_offset1
+ i
* allocno_stride1
];
2633 auto headi2
= heads2
[allocno_offset2
+ i
* allocno_stride2
];
2634 if (headi1
!= INVALID_ALLOCNO
&& headi2
!= INVALID_ALLOCNO
)
2636 auto *head1
= m_allocnos
[headi1
];
2637 auto *head2
= m_allocnos
[headi2
];
2638 if (head1
->chain_next
!= INVALID_ALLOCNO
)
2640 if (!is_chain_candidate (head1
, head2
, CORRECTNESS_ONLY
))
2645 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2647 fprintf (dump_file
, " - chaining allocnos [");
2648 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2649 fprintf (dump_file
, "%s%d", i
? "," : "",
2650 heads1
[allocno_offset1
+ i
* allocno_stride1
]);
2651 fprintf (dump_file
, "] and [");
2652 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2653 fprintf (dump_file
, "%s%d", i
? "," : "",
2654 heads2
[allocno_offset2
+ i
* allocno_stride2
]);
2655 fprintf (dump_file
, "]\n");
2658 // Chain the allocnos, updating the chain heads.
2659 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2660 chain_allocnos (heads1
[allocno_offset1
+ i
* allocno_stride1
],
2661 heads2
[allocno_offset2
+ i
* allocno_stride2
]);
2663 // Pick a color representative for the merged groups.
2664 allocno_group_info
*new_rep
;
2665 if (allocno_offset1
== 0
2666 && group1
->size
== allocno_overlap_size
* allocno_stride1
2667 && multiple_p (fpr_stride1
, fpr_stride2
))
2669 // The first group fits within the second.
2670 set_color_rep (group1
, group2
, allocno_offset2
);
2673 else if (allocno_offset2
== 0
2674 && group2
->size
== allocno_overlap_size
* allocno_stride2
2675 && multiple_p (fpr_stride2
, fpr_stride1
))
2677 // The second group fits within the first.
2678 set_color_rep (group2
, group1
, allocno_offset1
);
2683 // We need a new group that is big enough to span both groups.
2684 // The new group always has an FPR stride of 1.
2685 auto max_fpr_offset
= std::max (fpr_offset1
, fpr_offset2
);
2686 auto max_fpr_after
= std::max (fpr_after1
, fpr_after2
);
2687 auto new_size
= max_fpr_offset
+ max_fpr_after
+ 1;
2688 new_rep
= create_allocno_group (INVALID_REGNUM
, new_size
);
2690 set_color_rep (group1
, new_rep
, max_fpr_offset
- fpr_offset1
);
2691 set_color_rep (group2
, new_rep
, max_fpr_offset
- fpr_offset2
);
2694 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2696 fprintf (dump_file
, " - new frontier [");
2697 auto new_heads
= new_rep
->chain_heads ();
2698 for (unsigned int i
= 0; i
< new_heads
.size (); ++i
)
2701 fprintf (dump_file
, ",");
2702 if (new_heads
[i
] == INVALID_ALLOCNO
)
2703 fprintf (dump_file
, "-");
2705 fprintf (dump_file
, "%d", new_heads
[i
]);
2707 fprintf (dump_file
, "]\n");
2713 // Create a color_info for color representative GROUP.
2715 early_ra::create_color (allocno_group_info
*group
)
2717 auto *color
= region_allocate
<color_info
> ();
2718 color
->id
= m_colors
.length ();
2719 color
->hard_regno
= FIRST_PSEUDO_REGISTER
;
2720 color
->group
= group
;
2722 gcc_checking_assert (group
->m_color_rep
== group
);
2723 group
->has_color
= true;
2724 group
->color
= m_colors
.length ();
2726 m_colors
.safe_push (color
);
2729 // Form allocnos into chains. Create colors for each resulting clique.
2731 early_ra::form_chains ()
2733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2734 fprintf (dump_file
, "\nChaining allocnos:\n");
2736 // Perform (modified) interval graph coloring. First sort by
2737 // increasing start point.
2738 m_sorted_allocnos
.reserve (m_allocnos
.length ());
2739 m_sorted_allocnos
.splice (m_allocnos
);
2740 m_sorted_allocnos
.qsort (cmp_increasing
<&allocno_info::start_point
>);
2742 // During this phase, color representatives are only correct for
2743 // unprocessed allocno groups (where the color representative is
2744 // the group itself) and for groups that contain a current chain head.
2745 unsigned int ti
= 0;
2746 auto_vec
<chain_candidate_info
> candidates
;
2747 for (unsigned int hi
= 0; hi
< m_sorted_allocnos
.length (); ++hi
)
2749 auto *allocno1
= m_sorted_allocnos
[hi
];
2750 if (allocno1
->chain_next
!= INVALID_ALLOCNO
)
2753 // Record conflicts with direct uses for FPR hard registers.
2754 auto *group1
= allocno1
->group ();
2755 for (unsigned int fpr
= allocno1
->offset
; fpr
< 32; ++fpr
)
2756 if (fpr_conflicts_with_allocno_p (fpr
, allocno1
))
2757 group1
->fpr_candidates
&= ~(1U << (fpr
- allocno1
->offset
));
2759 // Record conflicts due to partially call-clobbered registers.
2760 // (Full clobbers are handled by the previous loop.)
2761 for (unsigned int abi_id
= 0; abi_id
< NUM_ABI_IDS
; ++abi_id
)
2762 if (call_in_range_p (abi_id
, allocno1
->start_point
,
2763 allocno1
->end_point
))
2765 auto fprs
= partial_fpr_clobbers (abi_id
, group1
->fpr_size
);
2766 group1
->fpr_candidates
&= ~fprs
>> allocno1
->offset
;
2769 if (allocno1
->is_shared ())
2771 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2772 fprintf (dump_file
, " Allocno %d shares the same hard register"
2773 " as allocno %d\n", allocno1
->id
,
2774 allocno1
->related_allocno
);
2775 auto *allocno2
= m_allocnos
[allocno1
->related_allocno
];
2776 merge_fpr_info (allocno2
->group (), group1
, allocno2
->offset
);
2777 m_shared_allocnos
.safe_push (allocno1
);
2781 // Find earlier allocnos (in processing order) that could be chained
2783 candidates
.truncate (0);
2784 for (unsigned int sci
= ti
; sci
< hi
; ++sci
)
2786 auto *allocno2
= m_sorted_allocnos
[sci
];
2787 if (allocno2
->chain_prev
== INVALID_ALLOCNO
)
2789 if (!is_chain_candidate (allocno1
, allocno2
, ALL_REASONS
))
2791 chain_candidate_info candidate
;
2792 candidate
.allocno
= allocno2
;
2793 candidate
.score
= rate_chain (allocno1
, allocno2
);
2794 candidates
.safe_push (candidate
);
2800 // Sort the candidates by decreasing score.
2801 candidates
.qsort (cmp_chain_candidates
);
2802 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2804 fprintf (dump_file
, " Chain candidates for %d:", allocno1
->id
);
2805 for (auto &candidate
: candidates
)
2806 fprintf (dump_file
, " %d(%d)", candidate
.allocno
->id
,
2808 fprintf (dump_file
, "\n");
2811 // Pick the first candidate that works.
2812 for (auto &candidate
: candidates
)
2813 if (try_to_chain_allocnos (allocno1
, candidate
.allocno
))
2817 // Create color_infos for each group. Make sure that each group's
2818 // color representative is up to date.
2819 for (unsigned int hi
= m_sorted_allocnos
.length (); hi
-- > 0; )
2821 auto *allocno
= m_sorted_allocnos
[hi
];
2822 if (allocno
->is_shared ())
2825 auto *rep
= allocno
->group ()->color_rep ();
2830 auto heads
= rep
->chain_heads ();
2831 for (unsigned int i
= 0; i
< heads
.size (); ++i
)
2833 unsigned int ai
= heads
[i
];
2834 while (ai
!= INVALID_ALLOCNO
)
2836 allocno
= m_allocnos
[ai
];
2837 set_single_color_rep (allocno
, rep
, i
* rep
->stride
);
2838 ai
= allocno
->chain_next
;
2844 // Return true if the given FPR (starting at 0) conflicts with allocno
2847 early_ra::fpr_conflicts_with_allocno_p (unsigned int fpr
,
2848 allocno_info
*allocno
)
2850 auto &ranges
= m_fpr_ranges
[fpr
];
2851 unsigned int start_i
= 0;
2852 unsigned int end_i
= ranges
.length ();
2853 while (start_i
< end_i
)
2855 unsigned int mid_i
= (start_i
+ end_i
) / 2;
2856 auto &range
= ranges
[mid_i
];
2857 if (allocno
->end_point
> range
.start_point
)
2858 start_i
= mid_i
+ 1;
2859 else if (allocno
->start_point
< range
.end_point
)
2863 if (range
.allocno
!= allocno
->id
)
2865 // The FPR is equivalent to ALLOCNO for this particular range.
2866 // See whether ALLOCNO conflicts with a neighboring range.
2868 && ranges
[mid_i
- 1].start_point
>= allocno
->end_point
)
2870 if (mid_i
+ 1 < ranges
.length ()
2871 && ranges
[mid_i
+ 1].end_point
<= allocno
->start_point
)
2879 // Return true if there is a call with ABI identifier ABI_ID in the inclusive
2880 // program point range [START_POINT, END_POINT].
2882 early_ra::call_in_range_p (unsigned int abi_id
, unsigned int start_point
,
2883 unsigned int end_point
)
2885 auto &points
= m_call_points
[abi_id
];
2886 unsigned int start_i
= 0;
2887 unsigned int end_i
= points
.length ();
2888 while (start_i
< end_i
)
2890 unsigned int mid_i
= (start_i
+ end_i
) / 2;
2891 auto point
= points
[mid_i
];
2892 if (end_point
> point
)
2893 start_i
= mid_i
+ 1;
2894 else if (start_point
< point
)
2902 // Return the set of FPRs for which a value of size SIZE will be clobbered
2903 // by a call to a function with ABI identifier ABI_ID, but would not be
2904 // for some smaller size. The set therefore excludes FPRs that are
2905 // fully-clobbered, like V0 in the base ABI.
2907 early_ra::partial_fpr_clobbers (unsigned int abi_id
, fpr_size_info size
)
2909 auto &abi
= function_abis
[abi_id
];
2910 unsigned int clobbers
= 0;
2911 machine_mode mode
= (size
== FPR_D
? V8QImode
2912 : size
== FPR_Q
? V16QImode
: VNx16QImode
);
2913 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
2914 if (!abi
.clobbers_full_reg_p (regno
)
2915 && abi
.clobbers_reg_p (mode
, regno
))
2916 clobbers
|= 1U << (regno
- V0_REGNUM
);
2920 // Process copies between pseudo registers and hard registers and update
2921 // the FPR preferences for the associated colors.
2923 early_ra::process_copies ()
2925 for (auto ©
: m_allocno_copies
)
2927 auto *allocno
= m_allocnos
[copy
.allocno
];
2928 auto *group
= allocno
->group ();
2929 auto offset
= group
->color_rep_offset
+ allocno
->offset
;
2930 if (offset
> copy
.fpr
)
2933 unsigned int fpr
= copy
.fpr
- offset
;
2934 auto *color
= m_colors
[group
->color_rep ()->color
];
2935 color
->fpr_preferences
[fpr
] = MIN (color
->fpr_preferences
[fpr
]
2936 + copy
.weight
, 127);
2937 color
->num_fpr_preferences
+= copy
.weight
;
2941 // Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=>
2942 // result that puts colors in allocation order.
2944 early_ra::cmp_allocation_order (const void *color1_ptr
, const void *color2_ptr
)
2946 auto *color1
= *(color_info
*const *) color1_ptr
;
2947 auto *color2
= *(color_info
*const *) color2_ptr
;
2949 // Allocate bigger groups before smaller groups.
2950 if (color1
->group
->size
!= color2
->group
->size
)
2951 return color1
->group
->size
> color2
->group
->size
? -1 : 1;
2953 // Allocate groups with stronger FPR preferences before groups with weaker
2955 if (color1
->num_fpr_preferences
!= color2
->num_fpr_preferences
)
2956 return color1
->num_fpr_preferences
> color2
->num_fpr_preferences
? -1 : 1;
2958 return (color1
->id
< color2
->id
? -1
2959 : color1
->id
== color2
->id
? 0 : 1);
2962 // Allocate a register to each color. If we run out of registers,
2963 // give up on doing a full allocation of the FPR-based pseudos in the
2966 early_ra::allocate_colors ()
2968 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2969 fprintf (dump_file
, "\nAllocating registers:\n");
2971 auto_vec
<color_info
*> sorted_colors
;
2972 sorted_colors
.safe_splice (m_colors
);
2973 sorted_colors
.qsort (cmp_allocation_order
);
2975 for (unsigned int i
= 0; i
< 32; ++i
)
2976 if (!crtl
->abi
->clobbers_full_reg_p (V0_REGNUM
+ i
))
2977 m_call_preserved_fprs
|= 1U << i
;
2979 for (auto *color
: sorted_colors
)
2981 unsigned int candidates
= color
->group
->fpr_candidates
;
2982 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
2983 candidates
&= ~(m_allocated_fprs
>> i
);
2984 unsigned int best
= INVALID_REGNUM
;
2985 int best_weight
= 0;
2986 unsigned int best_recency
= 0;
2987 for (unsigned int fpr
= 0; fpr
<= 32U - color
->group
->size
; ++fpr
)
2989 if ((candidates
& (1U << fpr
)) == 0)
2991 int weight
= color
->fpr_preferences
[fpr
];
2992 unsigned int recency
= 0;
2993 // Account for registers that the current function must preserve.
2994 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
2996 if (m_call_preserved_fprs
& (1U << (fpr
+ i
)))
2998 recency
= MAX (recency
, m_fpr_recency
[fpr
+ i
]);
3000 // Prefer higher-numbered registers in the event of a tie.
3001 // This should tend to keep lower-numbered registers free
3002 // for allocnos that require V0-V7 or V0-V15.
3003 if (best
== INVALID_REGNUM
3004 || best_weight
< weight
3005 || (best_weight
== weight
&& recency
<= best_recency
))
3008 best_weight
= weight
;
3009 best_recency
= recency
;
3013 if (best
== INVALID_REGNUM
)
3015 record_allocation_failure ([&](){
3016 fprintf (dump_file
, "no free register for color %d", color
->id
);
3021 color
->hard_regno
= best
+ V0_REGNUM
;
3022 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3023 fprintf (dump_file
, " Allocating [v%d:v%d] to color %d\n",
3024 best
, best
+ color
->group
->size
- 1, color
->id
);
3025 m_allocated_fprs
|= ((1U << color
->group
->size
) - 1) << best
;
3029 // See if ALLOCNO ends a subchain of single registers that can be split
3030 // off without affecting the rest of the chain, and without introducing
3031 // any moves. Return the start of the chain if so (which might be ALLOCNO
3032 // itself), otherwise return null.
3033 early_ra::allocno_info
*
3034 early_ra::find_independent_subchain (allocno_info
*allocno
)
3036 // Make sure ALLOCNO ends a natural subchain.
3037 if (auto *next_allocno
= chain_next (allocno
))
3038 if (next_allocno
->start_point
+ 1 >= allocno
->end_point
)
3041 // Check the allocnos in the purported subchain and find the other end.
3044 auto *group
= allocno
->group ();
3045 if (group
->m_color_rep
== group
)
3047 if (group
->size
!= 1)
3050 auto *prev_allocno
= chain_prev (allocno
);
3051 if (!prev_allocno
|| allocno
->start_point
+ 1 < prev_allocno
->end_point
)
3054 allocno
= prev_allocno
;
3058 // Search the colors starting at index FIRST_COLOR whose FPRs do not belong
3059 // to FPR_CONFLICTS. Return the first such color that has no group. If all
3060 // such colors have groups, instead return the color with the latest
3061 // (smallest) start point.
3062 early_ra::color_info
*
3063 early_ra::find_oldest_color (unsigned int first_color
,
3064 unsigned int fpr_conflicts
)
3066 color_info
*best
= nullptr;
3067 unsigned int best_start_point
= ~0U;
3068 unsigned int best_recency
= 0;
3069 for (unsigned int ci
= first_color
; ci
< m_colors
.length (); ++ci
)
3071 auto *color
= m_colors
[ci
];
3072 unsigned int fpr
= color
->hard_regno
- V0_REGNUM
;
3073 if (fpr_conflicts
& (1U << fpr
))
3075 unsigned int start_point
= 0;
3078 auto chain_head
= color
->group
->chain_heads ()[0];
3079 start_point
= m_allocnos
[chain_head
]->start_point
;
3081 unsigned int recency
= m_fpr_recency
[fpr
];
3083 || best_start_point
> start_point
3084 || (best_start_point
== start_point
&& recency
< best_recency
))
3087 best_start_point
= start_point
;
3088 best_recency
= recency
;
3094 // If there are some spare FPRs that can be reused without introducing saves,
3095 // restores, or moves, use them to "broaden" the allocation, in order to give
3096 // the scheduler more freedom. This is particularly useful for forming LDPs
3099 early_ra::broaden_colors ()
3101 // Create dummy colors for every leftover FPR that can be used cheaply.
3102 unsigned int first_color
= m_colors
.length ();
3103 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
3104 if (((m_allocated_fprs
| m_call_preserved_fprs
) & (1U << fpr
)) == 0)
3106 auto *color
= region_allocate
<color_info
> ();
3107 color
->id
= m_colors
.length ();
3108 color
->hard_regno
= V0_REGNUM
+ fpr
;
3109 color
->group
= nullptr;
3110 m_colors
.safe_push (color
);
3113 // Exit early if there are no spare FPRs.
3114 if (first_color
== m_colors
.length ())
3117 // Go through the allocnos in order, seeing if there is a subchain of
3118 // single-FPR allocnos that can be split off from the containingg clique.
3119 // Allocate such subchains to the new colors on an oldest-first basis.
3120 for (auto *allocno
: m_sorted_allocnos
)
3121 if (auto *start_allocno
= find_independent_subchain (allocno
))
3123 unsigned int fpr_conflicts
= 0;
3124 auto *member
= allocno
;
3127 fpr_conflicts
|= ~member
->group ()->fpr_candidates
;
3128 if (member
== start_allocno
)
3130 member
= m_allocnos
[member
->chain_prev
];
3133 auto *color
= find_oldest_color (first_color
, fpr_conflicts
);
3139 auto *group
= allocno
->group ();
3140 color
->group
= group
;
3141 group
->color
= color
->id
;
3142 group
->chain_heads ()[0] = INVALID_ALLOCNO
;
3146 auto chain_head
= color
->group
->chain_heads ()[0];
3147 auto start_point
= m_allocnos
[chain_head
]->start_point
;
3148 if (start_point
>= allocno
->end_point
)
3149 // Allocating to COLOR isn't viable, and it was the best
3150 // option available.
3153 auto *next_allocno
= chain_next (allocno
);
3154 if (!next_allocno
|| next_allocno
->start_point
<= start_point
)
3155 // The current allocation gives at least as much scheduling
3156 // freedom as COLOR would.
3160 // Unlink the chain.
3161 if (auto *next_allocno
= chain_next (allocno
))
3162 next_allocno
->chain_prev
= start_allocno
->chain_prev
;
3163 if (auto *prev_allocno
= chain_prev (start_allocno
))
3164 prev_allocno
->chain_next
= allocno
->chain_next
;
3166 // Make the subchain use COLOR.
3167 allocno
->chain_next
= color
->group
->chain_heads ()[0];
3168 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3169 fprintf (dump_file
, "Moving to optional color %d (register %s):",
3170 color
->id
, reg_names
[color
->hard_regno
]);
3173 auto *group
= allocno
->group ();
3174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3175 fprintf (dump_file
, " r%d", group
->regno
);
3176 group
->m_color_rep
= color
->group
;
3177 group
->color_rep_offset
= 0;
3178 if (allocno
== start_allocno
)
3180 allocno
= m_allocnos
[allocno
->chain_prev
];
3182 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3183 fprintf (dump_file
, "\n");
3184 color
->group
->chain_heads ()[0] = start_allocno
->id
;
3188 // Record the final choice of hard register for each allocno.
3190 early_ra::finalize_allocation ()
3192 for (auto *color
: m_colors
)
3195 unsigned int fpr
= color
->hard_regno
- V0_REGNUM
;
3196 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
3197 m_fpr_recency
[fpr
+ i
] = m_current_region
;
3199 for (auto *allocno
: m_allocnos
)
3201 if (allocno
->is_shared ())
3203 auto *group
= allocno
->group ();
3204 auto *rep
= group
->color_rep ();
3205 auto rep_regno
= m_colors
[rep
->color
]->hard_regno
;
3206 auto group_regno
= rep_regno
+ group
->color_rep_offset
;
3207 allocno
->hard_regno
= group_regno
+ allocno
->offset
* group
->stride
;
3209 for (auto *allocno
: m_shared_allocnos
)
3210 allocno
->hard_regno
= m_allocnos
[allocno
->related_allocno
]->hard_regno
;
3213 // Replace any allocno references in REFS with the allocated register.
3214 // INSN is the instruction that contains REFS.
3216 early_ra::replace_regs (rtx_insn
*insn
, df_ref refs
)
3218 bool changed
= false;
3219 for (df_ref ref
= refs
; ref
; ref
= DF_REF_NEXT_LOC (ref
))
3221 auto range
= get_allocno_subgroup (DF_REF_REG (ref
));
3225 auto new_regno
= range
.allocno (0)->hard_regno
;
3226 if (new_regno
== FIRST_PSEUDO_REGISTER
)
3228 // Reset a debug instruction if, after DCE, the only remaining
3229 // references to a register are in such instructions.
3230 gcc_assert (DEBUG_INSN_P (insn
));
3231 INSN_VAR_LOCATION_LOC (insn
) = gen_rtx_UNKNOWN_VAR_LOC ();
3234 *DF_REF_LOC (ref
) = gen_rtx_REG (GET_MODE (DF_REF_REG (ref
)), new_regno
);
3240 // Try to make INSN match its FPR-related constraints. If this needs
3241 // a source operand (SRC) to be copied to a destination operand (DEST)
3242 // before INSN, add the associated (DEST, SRC) pairs to MOVES.
3244 // Return -1 on failure, otherwise return a ?/!-style reject count.
3245 // The reject count doesn't model the moves, just the internal alternative
3248 early_ra::try_enforce_constraints (rtx_insn
*insn
,
3249 vec
<std::pair
<int, int>> &moves
)
3251 if (!constrain_operands (0, get_preferred_alternatives (insn
)))
3254 // Pick the alternative with the lowest cost.
3256 auto alts
= get_preferred_alternatives (insn
);
3257 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
3259 if (!(alts
& ALTERNATIVE_BIT (altno
)))
3262 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
3263 if (!likely_alternative_match_p (op_alt
))
3266 auto_vec
<std::pair
<int, int>, 4> new_moves
;
3267 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
3269 rtx op
= recog_data
.operand
[opno
];
3271 && FP_REGNUM_P (REGNO (op
))
3272 && op_alt
[opno
].matched
>= 0)
3274 rtx old_src
= recog_data
.operand
[op_alt
[opno
].matched
];
3275 if (!operands_match_p (op
, old_src
))
3277 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
3280 rtx other
= recog_data
.operand
[i
];
3281 if (reg_overlap_mentioned_p (op
, other
))
3289 new_moves
.safe_push ({ opno
, op_alt
[opno
].matched
});
3293 int cost
= count_rejects (op_alt
) + new_moves
.length () * 7;
3294 if (best
< 0 || cost
< best
)
3298 moves
.safe_splice (new_moves
);
3304 // Make INSN matches its FPR-related constraints.
3306 early_ra::enforce_constraints (rtx_insn
*insn
)
3308 extract_insn (insn
);
3309 preprocess_constraints (insn
);
3311 // First try with the operands they are.
3312 auto_vec
<std::pair
<int, int>, 4> moves
;
3313 int cost
= try_enforce_constraints (insn
, moves
);
3315 // Next try taking advantage of commutativity.
3316 for (int opno
= 0; opno
< recog_data
.n_operands
- 1; ++opno
)
3317 if (recog_data
.constraints
[opno
][0] == '%')
3319 std::swap (*recog_data
.operand_loc
[opno
],
3320 *recog_data
.operand_loc
[opno
+ 1]);
3321 std::swap (recog_data
.operand
[opno
],
3322 recog_data
.operand
[opno
+ 1]);
3323 auto_vec
<std::pair
<int, int>, 4> swapped_moves
;
3324 int swapped_cost
= try_enforce_constraints (insn
, swapped_moves
);
3325 if (swapped_cost
>= 0 && (cost
< 0 || swapped_cost
< cost
))
3327 cost
= swapped_cost
;
3329 moves
.safe_splice (swapped_moves
);
3333 std::swap (*recog_data
.operand_loc
[opno
],
3334 *recog_data
.operand_loc
[opno
+ 1]);
3335 std::swap (recog_data
.operand
[opno
],
3336 recog_data
.operand
[opno
+ 1]);
3340 // The allocation should ensure that there is at least one valid combination.
3341 // It's too late to back out now if not.
3342 gcc_assert (cost
>= 0);
3343 for (int i
= 0; i
< recog_data
.n_dups
; ++i
)
3345 int dup_of
= recog_data
.dup_num
[i
];
3346 rtx new_op
= *recog_data
.operand_loc
[dup_of
];
3347 if (new_op
!= recog_data
.operand
[dup_of
])
3348 *recog_data
.dup_loc
[i
] = copy_rtx (new_op
);
3350 for (auto move
: moves
)
3352 int dest_opno
= move
.first
;
3353 int src_opno
= move
.second
;
3354 rtx dest
= recog_data
.operand
[dest_opno
];
3355 rtx old_src
= recog_data
.operand
[src_opno
];
3356 rtx new_src
= lowpart_subreg (GET_MODE (old_src
), dest
, GET_MODE (dest
));
3357 emit_insn_before (gen_move_insn (new_src
, old_src
), insn
);
3358 *recog_data
.operand_loc
[src_opno
] = new_src
;
3362 // See whether INSN is an instruction that operates on multi-register vectors,
3363 // and if we have decided to make it use strided rather than consecutive
3364 // accesses. Update the pattern and return true if so.
3366 early_ra::maybe_convert_to_strided_access (rtx_insn
*insn
)
3368 if (!NONJUMP_INSN_P (insn
) || recog_memoized (insn
) < 0)
3371 auto stride_type
= get_attr_stride_type (insn
);
3372 rtx pat
= PATTERN (insn
);
3374 if (TARGET_STREAMING_SME2
&& stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
)
3375 op
= SET_DEST (pat
);
3376 else if (TARGET_STREAMING_SME2
&& stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
)
3377 op
= XVECEXP (SET_SRC (pat
), 0, 1);
3381 auto range
= get_allocno_subgroup (op
);
3382 if (!range
|| range
.group
->stride
== 1)
3385 gcc_assert (range
.start
== 0 && range
.count
== range
.group
->size
);
3386 auto elt_mode
= GET_MODE_INNER (GET_MODE (op
));
3387 auto single_mode
= aarch64_full_sve_mode (elt_mode
).require ();
3388 auto_vec
<rtx
, 4> regs
;
3389 for (unsigned int i
= 0; i
< range
.count
; ++i
)
3390 regs
.quick_push (gen_rtx_REG (single_mode
, range
.allocno (i
)->hard_regno
));
3392 extract_insn (insn
);
3393 if (stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
)
3395 auto unspec
= XINT (SET_SRC (pat
), 1);
3396 if (range
.count
== 2)
3397 pat
= gen_aarch64_strided2 (unspec
, GET_MODE (op
), regs
[0], regs
[1],
3398 recog_data
.operand
[1],
3399 recog_data
.operand
[2]);
3401 pat
= gen_aarch64_strided4 (unspec
, GET_MODE (op
),
3402 regs
[0], regs
[1], regs
[2], regs
[3],
3403 recog_data
.operand
[1],
3404 recog_data
.operand
[2]);
3406 else if (stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
)
3408 auto unspec
= XINT (SET_SRC (pat
), 1);
3409 if (range
.count
== 2)
3410 pat
= gen_aarch64_strided2 (unspec
, GET_MODE (op
),
3411 recog_data
.operand
[0],
3412 recog_data
.operand
[2], regs
[0], regs
[1]);
3414 pat
= gen_aarch64_strided4 (unspec
, GET_MODE (op
),
3415 recog_data
.operand
[0],
3416 recog_data
.operand
[2],
3417 regs
[0], regs
[1], regs
[2], regs
[3]);
3418 // Ensure correct sharing for the source memory.
3420 // ??? Why doesn't the generator get this right?
3421 XVECEXP (SET_SRC (pat
), 0, XVECLEN (SET_SRC (pat
), 0) - 1)
3422 = *recog_data
.dup_loc
[0];
3426 PATTERN (insn
) = pat
;
3427 INSN_CODE (insn
) = -1;
3428 df_insn_rescan (insn
);
3432 // We've successfully allocated the current region. Apply the allocation
3433 // to the instructions.
3435 early_ra::apply_allocation ()
3437 for (auto *insn
: m_dead_insns
)
3438 set_insn_deleted (insn
);
3441 for (auto insn_range
: m_insn_ranges
)
3442 for (rtx_insn
*insn
= insn_range
.first
;
3443 insn
!= insn_range
.second
;
3446 prev
= PREV_INSN (insn
);
3450 bool changed
= maybe_convert_to_strided_access (insn
);
3451 changed
|= replace_regs (insn
, DF_INSN_DEFS (insn
));
3452 changed
|= replace_regs (insn
, DF_INSN_USES (insn
));
3453 if (changed
&& NONDEBUG_INSN_P (insn
))
3455 if (GET_CODE (PATTERN (insn
)) != USE
3456 && GET_CODE (PATTERN (insn
)) != CLOBBER
3457 && !is_move_set (PATTERN (insn
)))
3458 enforce_constraints (insn
);
3460 // A REG_EQUIV note establishes an equivalence throughout
3461 // the function, but here we're reusing hard registers for
3462 // multiple pseudo registers. We also no longer need REG_EQUIV
3463 // notes that record potential spill locations, since we've
3464 // allocated the pseudo register without spilling.
3465 rtx
*ptr
= ®_NOTES (insn
);
3467 if (REG_NOTE_KIND (*ptr
) == REG_EQUIV
)
3468 *ptr
= XEXP (*ptr
, 1);
3470 ptr
= &XEXP (*ptr
, 1);
3472 changed
|= replace_regs (insn
, DF_INSN_EQ_USES (insn
));
3474 df_insn_rescan (insn
);
3477 for (auto *insn
: m_dead_insns
)
3481 // Try to allocate the current region. Update the instructions if successful.
3483 early_ra::process_region ()
3485 for (auto *allocno
: m_allocnos
)
3487 allocno
->chain_next
= INVALID_ALLOCNO
;
3488 allocno
->chain_prev
= INVALID_ALLOCNO
;
3491 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3498 find_strided_accesses ();
3500 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3505 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3510 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3514 if (!m_allocation_successful
)
3518 finalize_allocation ();
3520 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3522 fprintf (dump_file
, "\nAllocation successful\nFinal allocation:\n");
3527 apply_allocation ();
3530 // Return true if INSN would become dead if we successfully allocate the
3533 early_ra::is_dead_insn (rtx_insn
*insn
)
3535 rtx set
= single_set (insn
);
3539 rtx dest
= SET_DEST (set
);
3540 auto dest_range
= get_allocno_subgroup (dest
);
3544 for (auto &allocno
: dest_range
.allocnos ())
3545 if (bitmap_bit_p (m_live_allocnos
, allocno
.id
))
3548 if (side_effects_p (set
))
3551 /* If we can't delete dead exceptions and the insn throws,
3552 then the instruction is not dead. */
3553 if (!cfun
->can_delete_dead_exceptions
3554 && !insn_nothrow_p (insn
))
3560 // Return true if we could split a single-block region into two regions
3561 // at the current program point. This is true if the block up to this
3562 // point is worth treating as an independent region and if no registers
3563 // that would affect the allocation are currently live.
3565 early_ra::could_split_region_here ()
3567 return (m_accurate_live_ranges
3568 && bitmap_empty_p (m_live_allocnos
)
3570 && !m_allocnos
.is_empty ());
3573 // Return true if any of the pseudos defined by INSN are also defined
3576 defines_multi_def_pseudo (rtx_insn
*insn
)
3580 FOR_EACH_INSN_DEF (ref
, insn
)
3582 unsigned int regno
= DF_REF_REGNO (ref
);
3583 if (!HARD_REGISTER_NUM_P (regno
) && DF_REG_DEF_COUNT (regno
) > 1)
3589 // Build up information about block BB. IS_ISOLATED is true if the
3590 // block is not part of a larger region.
3592 early_ra::process_block (basic_block bb
, bool is_isolated
)
3595 m_current_point
+= 1;
3596 m_current_bb_point
= m_current_point
;
3598 // Process live-out FPRs.
3599 bitmap live_out
= df_get_live_out (bb
);
3600 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
3601 if (bitmap_bit_p (live_out
, regno
))
3602 record_fpr_use (regno
);
3604 // Process live-out allocnos. We don't track individual FPR liveness
3605 // across block boundaries, so we have to assume that the whole pseudo
3606 // register is live.
3609 EXECUTE_IF_AND_IN_BITMAP (df_get_live_out (bb
), m_fpr_pseudos
,
3610 FIRST_PSEUDO_REGISTER
, regno
, bi
)
3612 auto range
= get_allocno_subgroup (regno_reg_rtx
[regno
]);
3613 for (auto &allocno
: range
.allocnos ())
3614 record_allocno_use (&allocno
);
3617 m_current_point
+= 1;
3619 record_artificial_refs (0);
3621 bool is_first
= true;
3622 rtx_insn
*start_insn
= BB_END (bb
);
3624 FOR_BB_INSNS_REVERSE (bb
, insn
)
3626 if (!NONDEBUG_INSN_P (insn
))
3629 // CLOBBERs are used to prevent pseudos from being upwards exposed.
3630 // We can ignore them if allocation is successful.
3631 if (GET_CODE (PATTERN (insn
)) == CLOBBER
)
3633 if (get_allocno_subgroup (XEXP (PATTERN (insn
), 0)))
3634 m_dead_insns
.safe_push (insn
);
3638 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3641 fprintf (dump_file
, "\nBlock %d:\n", bb
->index
);
3642 fprintf (dump_file
, "%6d:", m_current_point
);
3643 pretty_printer rtl_slim_pp
;
3644 rtl_slim_pp
.set_output_stream (dump_file
);
3645 print_insn (&rtl_slim_pp
, insn
, 1);
3646 pp_flush (&rtl_slim_pp
);
3647 fprintf (dump_file
, "\n");
3651 if (is_dead_insn (insn
))
3653 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3654 fprintf (dump_file
, "%14s -- dead\n", "");
3655 m_dead_insns
.safe_push (insn
);
3659 record_insn_defs (insn
);
3661 // If we've decided not to allocate the current region, and if
3662 // all relevant registers are now dead, consider splitting the
3663 // block into two regions at this point.
3665 // This is useful when a sequence of vector code ends in a
3666 // vector-to-scalar-integer operation. We wouldn't normally
3667 // want to allocate the scalar integer, since IRA is much better
3668 // at handling cross-file moves. But if nothing of relevance is
3669 // live between the death of the final vector and the birth of the
3670 // scalar integer, we can try to split the region at that point.
3671 // We'd then allocate the vector input and leave the real RA
3672 // to handle the scalar output.
3674 && !m_allocation_successful
3675 && could_split_region_here ()
3676 && !defines_multi_def_pseudo (insn
))
3678 // Mark that we've deliberately chosen to leave the output
3679 // pseudos to the real RA.
3681 FOR_EACH_INSN_DEF (ref
, insn
)
3683 unsigned int regno
= DF_REF_REGNO (ref
);
3684 if (!HARD_REGISTER_NUM_P (regno
))
3686 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3687 fprintf (dump_file
, "Ignoring register r%d\n", regno
);
3688 m_pseudo_regs
[regno
].flags
|= IGNORE_REG
;
3689 bitmap_clear_bit (m_fpr_pseudos
, regno
);
3693 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3694 fprintf (dump_file
, "\nStarting new region at insn %d\n",
3697 // Start a new region and replay the definitions. The replay
3698 // is needed if the instruction sets a hard FP register.
3699 start_new_region ();
3700 record_insn_defs (insn
);
3704 if (auto *call_insn
= dyn_cast
<rtx_call_insn
*> (insn
))
3705 record_insn_call (call_insn
);
3706 record_insn_uses (insn
);
3707 rtx pat
= PATTERN (insn
);
3708 if (is_move_set (pat
))
3709 record_copy (SET_DEST (pat
), SET_SRC (pat
), true);
3712 extract_insn (insn
);
3713 record_constraints (insn
);
3717 // See whether we have a complete region, with no remaining live
3719 if (is_isolated
&& could_split_region_here ())
3721 rtx_insn
*prev_insn
= PREV_INSN (insn
);
3722 if (m_allocation_successful
)
3724 m_insn_ranges
.safe_push ({ start_insn
, prev_insn
});
3727 start_new_region ();
3729 start_insn
= prev_insn
;
3732 m_insn_ranges
.safe_push ({ start_insn
, BB_HEAD (bb
) });
3734 record_artificial_refs (DF_REF_AT_TOP
);
3736 // Process live-in FPRs.
3737 bitmap live_in
= df_get_live_in (bb
);
3738 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
3739 if (bitmap_bit_p (live_in
, regno
)
3740 && (m_live_fprs
& (1U << (regno
- V0_REGNUM
))))
3741 record_fpr_def (regno
);
3743 // Process live-in allocnos.
3744 EXECUTE_IF_AND_IN_BITMAP (live_in
, m_fpr_pseudos
,
3745 FIRST_PSEUDO_REGISTER
, regno
, bi
)
3747 auto range
= get_allocno_subgroup (regno_reg_rtx
[regno
]);
3748 for (auto &allocno
: range
.allocnos ())
3749 if (bitmap_bit_p (m_live_allocnos
, allocno
.id
))
3750 record_allocno_def (&allocno
);
3753 m_current_point
+= 1;
3755 bitmap_clear (m_live_allocnos
);
3759 // Divide the function into regions, such that there no edges into or out
3760 // of the region have live "FPR pseudos".
3762 early_ra::process_blocks ()
3764 auto_sbitmap
visited (last_basic_block_for_fn (m_fn
));
3765 auto_sbitmap
fpr_pseudos_live_out (last_basic_block_for_fn (m_fn
));
3766 auto_sbitmap
fpr_pseudos_live_in (last_basic_block_for_fn (m_fn
));
3768 bitmap_clear (visited
);
3769 bitmap_clear (fpr_pseudos_live_out
);
3770 bitmap_clear (fpr_pseudos_live_in
);
3772 // Record which blocks have live FPR pseudos on entry and exit.
3774 FOR_EACH_BB_FN (bb
, m_fn
)
3776 if (bitmap_intersect_p (df_get_live_out (bb
), m_fpr_pseudos
))
3777 bitmap_set_bit (fpr_pseudos_live_out
, bb
->index
);
3778 if (bitmap_intersect_p (df_get_live_in (bb
), m_fpr_pseudos
))
3779 bitmap_set_bit (fpr_pseudos_live_in
, bb
->index
);
3782 // This is incremented by 1 at the start of each region.
3783 m_current_region
= 0;
3784 memset (m_fpr_recency
, 0, sizeof (m_fpr_recency
));
3786 struct stack_node
{ edge_iterator ei
; basic_block bb
; };
3788 auto_vec
<stack_node
, 32> stack
;
3789 auto_vec
<basic_block
, 32> region
;
3791 // Go through the function in reverse postorder and process the region
3792 // containing each block.
3793 unsigned int n_blocks
= df_get_n_blocks (DF_FORWARD
);
3794 int *order
= df_get_postorder (DF_FORWARD
);
3795 for (unsigned int bbi
= 0; bbi
< n_blocks
; ++bbi
)
3797 basic_block bb
= BASIC_BLOCK_FOR_FN (m_fn
, order
[bbi
]);
3798 if (bb
->index
< NUM_FIXED_BLOCKS
)
3801 if (!bitmap_set_bit (visited
, bb
->index
))
3804 // Process forward edges before backward edges (so push backward
3805 // edges first). Build the region in an approximation of reverse
3807 if (bitmap_bit_p (fpr_pseudos_live_in
, bb
->index
))
3808 stack
.quick_push ({ ei_start (bb
->preds
), nullptr });
3809 if (bitmap_bit_p (fpr_pseudos_live_out
, bb
->index
))
3810 stack
.quick_push ({ ei_start (bb
->succs
), bb
});
3812 region
.safe_push (bb
);
3813 while (!stack
.is_empty ())
3815 auto &node
= stack
.last ();
3816 if (ei_end_p (node
.ei
))
3819 region
.safe_push (node
.bb
);
3823 edge e
= ei_edge (node
.ei
);
3826 // A forward edge from a node that has not yet been added
3828 if (bitmap_bit_p (fpr_pseudos_live_in
, e
->dest
->index
)
3829 && bitmap_set_bit (visited
, e
->dest
->index
))
3831 stack
.safe_push ({ ei_start (e
->dest
->preds
), nullptr });
3832 if (bitmap_bit_p (fpr_pseudos_live_out
, e
->dest
->index
))
3833 stack
.safe_push ({ ei_start (e
->dest
->succs
), e
->dest
});
3835 region
.safe_push (e
->dest
);
3842 // A backward edge from a node that has already been added
3844 if (bitmap_bit_p (fpr_pseudos_live_out
, e
->src
->index
)
3845 && bitmap_set_bit (visited
, e
->src
->index
))
3847 if (bitmap_bit_p (fpr_pseudos_live_in
, e
->src
->index
))
3848 stack
.safe_push ({ ei_start (e
->src
->preds
), nullptr });
3849 stack
.safe_push ({ ei_start (e
->src
->succs
), e
->src
});
3856 m_current_point
= 2;
3857 start_new_region ();
3859 if (region
.is_empty ())
3860 process_block (bb
, true);
3863 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3865 fprintf (dump_file
, "\nRegion (from %d):", bb
->index
);
3866 for (unsigned int j
= 0; j
< region
.length (); ++j
)
3867 fprintf (dump_file
, " %d", region
[j
]->index
);
3868 fprintf (dump_file
, "\n");
3870 for (unsigned int j
= 0; j
< region
.length (); ++j
)
3872 basic_block bb
= region
[j
];
3874 = ((j
== 0 && !bitmap_bit_p (fpr_pseudos_live_out
, bb
->index
))
3875 || (j
== region
.length () - 1
3876 && !bitmap_bit_p (fpr_pseudos_live_in
, bb
->index
)));
3877 process_block (bb
, is_isolated
);
3880 region
.truncate (0);
3882 if (!m_allocnos
.is_empty () && m_allocation_successful
)
3887 // Run the pass on the current function.
3889 early_ra::execute ()
3893 preprocess_insns ();
3894 propagate_pseudo_reg_info ();
3895 choose_fpr_pseudos ();
3896 if (bitmap_empty_p (m_fpr_pseudos
))
3899 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3900 dump_pseudo_regs ();
3906 class pass_early_ra
: public rtl_opt_pass
3909 pass_early_ra (gcc::context
*ctxt
)
3910 : rtl_opt_pass (pass_data_early_ra
, ctxt
)
3913 // opt_pass methods:
3914 virtual bool gate (function
*);
3915 virtual unsigned int execute (function
*);
3919 pass_early_ra::gate (function
*)
3921 // Require a vector ISA to be enabled.
3922 if (!TARGET_SIMD
&& !TARGET_SVE
)
3925 if (aarch64_early_ra
== AARCH64_EARLY_RA_NONE
)
3928 if (aarch64_early_ra
== AARCH64_EARLY_RA_STRIDED
3929 && !TARGET_STREAMING_SME2
)
3936 pass_early_ra::execute (function
*fn
)
3938 early_ra (fn
).execute ();
3944 // Create a new instance of the pass.
3946 make_pass_aarch64_early_ra (gcc::context
*ctxt
)
3948 return new pass_early_ra (ctxt
);