1 // Early register allocation pass.
2 // Copyright (C) 2023-2024 Free Software Foundation, Inc.
4 // This file is part of GCC.
6 // GCC is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation; either version 3, or (at your option) any later
11 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 // This pass implements a simple form of early register allocation.
21 // It is restricted to FP/SIMD registers, and it only allocates
22 // a region of FP/SIMD usage if it can do so without any spilling.
23 // It punts on anything too complicated, leaving it to the real
24 // register allocator.
26 // There are two main purposes:
28 // (1) The pass runs before scheduling. It therefore has a chance to
29 // bag a spill-free allocation, if there is one, before scheduling
30 // moves things around.
32 // (2) The pass can make use of strided register operations, such as the
33 // strided forms of LD1 and ST1 in SME2.
35 // The allocator works at the level of individual FPRs, rather than whole
36 // pseudo registers. It is mostly intended to help optimize ACLE code.
38 // The pass is very simplistic. There are many things that could be improved.
39 #define IN_TARGET_CODE 1
41 #define INCLUDE_ALGORITHM
42 #define INCLUDE_FUNCTIONAL
46 #include "coretypes.h"
51 #include "tree-pass.h"
55 #include "print-rtl.h"
56 #include "insn-attr.h"
57 #include "insn-opinit.h"
61 class simple_iterator
: public wrapper_iterator
<T
>
64 using wrapper_iterator
<T
>::wrapper_iterator
;
66 simple_iterator
&operator-- () { --this->m_contents
; return *this; }
67 simple_iterator
operator-- (int) { return this->m_contents
--; }
68 simple_iterator
&operator++ () { ++this->m_contents
; return *this; }
69 simple_iterator
operator++ (int) { return this->m_contents
++; }
72 using namespace rtl_ssa
;
75 const pass_data pass_data_early_ra
=
79 OPTGROUP_NONE
, // optinfo_flags
81 0, // properties_required
82 0, // properties_provided
83 0, // properties_destroyed
84 0, // todo_flags_start
85 TODO_df_finish
, // todo_flags_finish
88 using allocno_iterator
= simple_iterator
<unsigned int>;
90 // Class that represents one run of the pass.
94 early_ra (function
*fn
);
99 // Whether to test only things that are required for correctness,
100 // or whether to take optimization heuristics into account as well.
101 enum test_strictness
{ CORRECTNESS_ONLY
, ALL_REASONS
};
103 static_assert (MAX_RECOG_OPERANDS
<= 32, "Operand mask is 32 bits");
104 using operand_mask
= uint32_t;
106 // Points in the function are represented using "program points".
107 // The program points are allocated in reverse order, with smaller
108 // numbers indicating later points. These special values indicate
109 // the start and end of a region.
110 static constexpr unsigned int START_OF_REGION
= ~0U;
111 static constexpr unsigned int END_OF_REGION
= 0U;
113 // An invalid allocno index, used to represent no allocno.
114 static constexpr unsigned int INVALID_ALLOCNO
= ~0U;
116 // Enumerates the single FPR sizes that matter for register allocation.
117 // Anything smaller than 64 bits is treated as FPR_D.
125 // A live range for an FPR, containing program points [START_POINT,
126 // END_POINT]. If ALLOCNO is not INVALID_ALLOCNO, the FPR is known
127 // to be equal to ALLOCNO for the duration of the live range.
128 struct fpr_range_info
130 unsigned int start_point
;
131 unsigned int end_point
;
132 unsigned int allocno
;
135 // Flags used in pseudo_reg_info.
137 // Whether the pseudo register occurs in one instruction alternative that
138 // matches (respectively) V0-V7, V0-V15, V0-V31 or a non-FP register.
139 static constexpr unsigned int ALLOWS_FPR8
= 1U << 0;
140 static constexpr unsigned int ALLOWS_FPR16
= 1U << 1;
141 static constexpr unsigned int ALLOWS_FPR32
= 1U << 2;
142 static constexpr unsigned int ALLOWS_NONFPR
= 1U << 3;
144 // Likewise whether the register occurs in an instruction that requires
145 // the associated register type.
146 static constexpr unsigned int NEEDS_FPR8
= 1U << 4;
147 static constexpr unsigned int NEEDS_FPR16
= 1U << 5;
148 static constexpr unsigned int NEEDS_FPR32
= 1U << 6;
149 static constexpr unsigned int NEEDS_NONFPR
= 1U << 7;
151 // Whether the pseudo register is copied to or from a hard FP register.
152 static constexpr unsigned int HAS_FPR_COPY
= 1U << 8;
154 // Whether the pseudo register is copied to or from a hard non-FP register.
155 static constexpr unsigned int HAS_NONFPR_COPY
= 1U << 9;
157 // Whether the pseudo register is used as a multi-register vector operand
158 // to an instruction that supports strided accesses, and whether it is used
159 // as a multi-register vector operand in some other non-move instruction.
160 static constexpr unsigned int HAS_FLEXIBLE_STRIDE
= 1U << 10;
161 static constexpr unsigned int HAS_FIXED_STRIDE
= 1U << 11;
163 // Flags that should be propagated across moves between pseudo registers.
164 static constexpr unsigned int PSEUDO_COPY_FLAGS
= ~(HAS_FLEXIBLE_STRIDE
167 // Information about a copy between two registers.
170 // The two registers, in order.
171 unsigned int regnos
[2];
173 // Index I gives the index of the next reg_copy_info involving REGNOS[I],
175 unsigned int next_copies
[2];
178 // Information about a pseudo register.
179 struct pseudo_reg_info
181 // Flags describing how the register is used, defined above.
182 unsigned int flags
: 16;
184 // The mode of the pseudo register, cached for convenience.
185 machine_mode mode
: 16;
187 // The index of the first copy, or 0 if none.
188 unsigned int first_copy
;
191 // Information about a group of allocnos that have a fixed offset
192 // relative to each other. The allocnos in each group must be allocated
195 // Allocnos that can share the same hard register are eventually
196 // chained together. These chains represent edges on a graph of
197 // allocnos, such that two allocnos joined by an edge use the same FPR.
198 // These chains are formed between individual allocnos rather than
199 // whole groups, although the system is required to be self-consistent.
200 // Each clique in the graph has at least one "full-width" allocno group
201 // that has one allocno for every FPR that needs to be allocated to
204 // One group of allocnos is chosen as the "color representative" of
205 // each clique in the graph. This group will be a full-width group.
207 struct allocno_group_info
209 array_slice
<unsigned int> chain_heads ();
210 array_slice
<allocno_info
> allocnos ();
211 allocno_group_info
*color_rep ();
212 allocno_info
*allocno (unsigned int);
214 // The color representative of the containing clique.
215 allocno_group_info
*m_color_rep
;
217 // The pseudo register associated with this allocno, or INVALID_REGNUM
221 // The offset of the first allocno (and thus this group) from the start
223 unsigned int color_rep_offset
: 8;
225 // The number of allocnos in the group, and thus the number of FPRs
226 // that need to be allocated.
227 unsigned int size
: 8;
229 // The gap between FPRs in the group. This is normally 1, but can be
230 // higher if we've decided to use strided multi-register accesses.
231 unsigned int stride
: 4;
233 // Used temporarily while deciding which allocnos should have non-unit
234 // strides; see find_strided_accesses for details.
235 int consecutive_pref
: 4;
236 int strided_polarity
: 2;
238 // The largest size of FPR needed by references to the allocno group.
239 fpr_size_info fpr_size
: 2;
241 // True if all non-move accesses can be converted to strided form.
242 unsigned int has_flexible_stride
: 1;
244 // True if we've assigned a color index to this group.
245 unsigned int has_color
: 1;
247 // The mask of FPRs that would make valid choices for the first allocno,
248 // taking the requirements of all the allocnos in the group into account.
249 unsigned int fpr_candidates
;
251 // The index of the color that has been assigned to the containing clique.
255 // Represents a single FPR-sized quantity that needs to be allocated.
256 // Each allocno is identified by index (for compactness).
258 // Quantities that span multiple FPRs are assigned groups of consecutive
259 // allocnos. Quantities that occupy a single FPR are assigned their own
263 allocno_group_info
*group ();
265 bool is_equiv_to (unsigned int);
267 // The allocno's unique identifier.
270 // The offset of this allocno into the containing group.
271 unsigned int offset
: 8;
273 // The number of allocnos in the containing group.
274 unsigned int group_size
: 8;
276 // If the allocno has an affinity with at least one hard register
277 // (so that choosing that hard register would avoid a copy), this is
278 // the number of one such hard register, otherwise it is
279 // FIRST_PSEUDO_REGISTER.
280 unsigned int hard_regno
: 8;
282 // Set to 1 if the allocno has a single definition or 2 if it has more.
283 unsigned int num_defs
: 2;
285 // True if, at START_POINT, another allocno is copied to this one.
286 // See callers of record_copy for what counts as a copy.
287 unsigned int is_copy_dest
: 1;
289 // True if, at START_POINT, another allocno is copied to this one,
290 // and if the allocnos at both ends of the copy chain have an affinity
291 // with the same hard register.
292 unsigned int is_strong_copy_dest
: 1;
294 // True if, at END_POINT, this allocno is copied to another one,
295 // and both allocnos have an affinity with the same hard register.
296 unsigned int is_strong_copy_src
: 1;
298 // True if the allocno is subject to an earlyclobber at END_POINT,
299 // so that it cannot be tied to the destination of the instruction.
300 unsigned int is_earlyclobbered
: 1;
302 // True if this allocno is known to be equivalent to related_allocno
303 // for the whole of this allocno's lifetime.
304 unsigned int is_equiv
: 1;
306 // The inclusive range of program points spanned by the allocno.
307 // START_POINT >= END_POINT.
308 unsigned int start_point
;
309 unsigned int end_point
;
311 // If, at END_POINT, this allocno is copied to another allocno, this
312 // is the index of that allocno, otherwise it is INVALID_ALLOCNO.
313 // See callers of record_copy for what counts as a copy.
314 unsigned int copy_dest
;
316 // If this field is not INVALID_ALLOCNO, it indicates one of two things:
318 // - if is_equiv, this allocno is equivalent to related_allocno for
319 // the whole of this allocno's lifetime.
321 // - if !is_equiv, this allocno's live range is a subrange of
322 // related_allocno's and we have committed to making this allocno
323 // share whatever register related_allocno uses.
324 unsigned int related_allocno
;
328 // The program point at which the allocno was last defined,
329 // or START_OF_REGION if none. This is only used temporarily
330 // while recording allocnos; after that, chain_next below is
332 unsigned int last_def_point
;
334 // The next chained allocno in program order (i.e. at lower program
335 // points), or INVALID_ALLOCNO if none.
336 unsigned int chain_next
;
341 // The program point before start_point at which the allocno was
342 // last used, or END_OF_REGION if none. This is only used temporarily
343 // while recording allocnos; after that, chain_prev below is used
345 unsigned int last_use_point
;
347 // The previous chained allocno in program order (i.e. at higher
348 // program points), or INVALID_ALLOCNO if none.
349 unsigned int chain_prev
;
353 // Information about a full allocno group or a subgroup of it.
354 // The subgroup can be empty to indicate "none".
355 struct allocno_subgroup
357 array_slice
<allocno_info
> allocnos ();
358 allocno_info
*allocno (unsigned int);
360 // True if a subgroup is present.
361 operator bool () const { return count
; }
363 // The containing group.
364 allocno_group_info
*group
;
366 // The offset of the subgroup from the start of GROUP.
369 // The number of allocnos in the subgroup.
373 // Represents information about a copy between an allocno and an FPR.
374 // This establishes an affinity between the allocno and the FPR.
375 struct allocno_copy_info
377 // The allocno involved in the copy.
378 unsigned int allocno
;
380 // The FPR involved in the copy, relative to V0_REGNUM.
381 unsigned int fpr
: 16;
383 // A measure of how strong the affinity between the allocno and FPR is.
384 unsigned int weight
: 16;
387 // Information about a possible allocno chain.
388 struct chain_candidate_info
390 // The candidate target allocno.
391 allocno_info
*allocno
;
393 // A rating of the candidate (higher is better).
397 // Information about an allocno color.
400 // The color's unique identifier.
403 // The allocated hard register, when known.
404 unsigned int hard_regno
;
406 // The clique's representative group.
407 allocno_group_info
*group
;
409 // The number of FPR preferences recorded in fpr_preferences.
410 unsigned int num_fpr_preferences
;
412 // Weights in favor of choosing each FPR as the first register for GROUP.
413 int8_t fpr_preferences
[32];
416 template<typename T
, typename
... Ts
>
417 T
*region_allocate (Ts
...);
419 allocno_info
*chain_prev (allocno_info
*);
420 allocno_info
*chain_next (allocno_info
*);
422 void dump_pseudo_regs ();
423 void dump_fpr_ranges ();
425 void dump_allocnos ();
428 iterator_range
<allocno_iterator
> get_group_allocnos (unsigned int);
430 void preprocess_move (rtx
, rtx
);
431 void process_pseudo_reg_constraints (rtx_insn
*);
432 void preprocess_insns ();
434 int fpr_preference (unsigned int);
435 void propagate_pseudo_reg_info ();
437 void choose_fpr_pseudos ();
439 void start_new_region ();
441 allocno_group_info
*create_allocno_group (unsigned int, unsigned int);
442 allocno_subgroup
get_allocno_subgroup (rtx
);
443 void record_fpr_use (unsigned int);
444 void record_fpr_def (unsigned int);
445 void record_allocno_use (allocno_info
*);
446 void record_allocno_def (allocno_info
*);
447 allocno_info
*find_related_start (allocno_info
*, allocno_info
*, bool);
448 void accumulate_defs (allocno_info
*, allocno_info
*);
449 void record_copy (rtx
, rtx
, bool = false);
450 void record_constraints (rtx_insn
*);
451 void record_artificial_refs (unsigned int);
452 void record_insn_refs (rtx_insn
*);
454 bool consider_strong_copy_src_chain (allocno_info
*);
455 int strided_polarity_pref (allocno_info
*, allocno_info
*);
456 void find_strided_accesses ();
458 template<unsigned int allocno_info::*field
>
459 static int cmp_increasing (const void *, const void *);
460 bool is_chain_candidate (allocno_info
*, allocno_info
*, test_strictness
);
461 int rate_chain (allocno_info
*, allocno_info
*);
462 static int cmp_chain_candidates (const void *, const void *);
463 void chain_allocnos (unsigned int &, unsigned int &);
464 void merge_fpr_info (allocno_group_info
*, allocno_group_info
*,
466 void set_single_color_rep (allocno_info
*, allocno_group_info
*,
468 void set_color_rep (allocno_group_info
*, allocno_group_info
*,
470 bool try_to_chain_allocnos (allocno_info
*, allocno_info
*);
471 void create_color (allocno_group_info
*);
474 bool fpr_conflicts_with_allocno_p (unsigned int, allocno_info
*);
475 bool call_in_range_p (unsigned int, unsigned int, unsigned int);
476 unsigned int partial_fpr_clobbers (unsigned int, fpr_size_info
);
478 void process_copies ();
480 static int cmp_allocation_order (const void *, const void *);
481 void allocate_colors ();
482 allocno_info
*find_independent_subchain (allocno_info
*);
483 color_info
*find_oldest_color (unsigned int, unsigned int);
484 void broaden_colors ();
485 void finalize_allocation ();
487 bool replace_regs (rtx_insn
*, df_ref
);
488 int try_enforce_constraints (rtx_insn
*, vec
<std::pair
<int, int>> &);
489 void enforce_constraints (rtx_insn
*);
490 bool maybe_convert_to_strided_access (rtx_insn
*);
491 void apply_allocation ();
493 void process_region ();
494 bool is_dead_insn (rtx_insn
*);
495 void process_block (basic_block
, bool);
496 void process_blocks ();
498 // ----------------------------------------------------------------------
500 // The function we're operating on.
503 // Information about each pseudo register, indexed by REGNO.
504 auto_vec
<pseudo_reg_info
> m_pseudo_regs
;
506 // All recorded register copies.
507 auto_vec
<reg_copy_info
> m_pseudo_reg_copies
;
509 // The set of pseudos that we've decided to allocate an FPR to.
510 auto_bitmap m_fpr_pseudos
;
512 // ----------------------------------------------------------------------
514 // An obstack for allocating information that is referenced by the member
516 obstack m_region_obstack
;
517 void *m_region_alloc_start
;
519 // ----------------------------------------------------------------------
521 // The basic block that we're currently processing.
522 basic_block m_current_bb
;
524 // The lowest-numbered program point in the current basic block.
525 unsigned int m_current_bb_point
;
527 // The program point that we're currently processing (described above).
528 unsigned int m_current_point
;
530 // The set of allocnos that are currently live.
531 auto_bitmap m_live_allocnos
;
533 // The set of FPRs that are currently live.
534 unsigned int m_live_fprs
;
536 // A unique one-based identifier for the current region.
537 unsigned int m_current_region
;
539 // The region in which each FPR was last used, or 0 if none.
540 unsigned int m_fpr_recency
[32];
542 // ----------------------------------------------------------------------
544 // A mask of the FPRs that have already been allocated.
545 unsigned int m_allocated_fprs
;
547 // A mask of the FPRs that must be at least partially preserved by the
549 unsigned int m_call_preserved_fprs
;
551 // True if we haven't yet failed to allocate the current region.
552 bool m_allocation_successful
;
554 // A map from pseudo registers to the first allocno in their associated
556 hash_map
<int_hash
<unsigned int, INVALID_REGNUM
>,
557 allocno_group_info
*> m_regno_to_group
;
559 // All recorded copies between allocnos and FPRs.
560 auto_vec
<allocno_copy_info
> m_allocno_copies
;
562 // All allocnos, by index.
563 auto_vec
<allocno_info
*> m_allocnos
;
565 // All allocnos, by increasing START_POINT.
566 auto_vec
<allocno_info
*> m_sorted_allocnos
;
568 // Allocnos for which is_shared is true.
569 auto_vec
<allocno_info
*> m_shared_allocnos
;
571 // All colors, by index.
572 auto_vec
<color_info
*> m_colors
;
574 // The instruction ranges that make up the current region,
575 // as half-open ranges [LAST, FIRST).
576 auto_vec
<std::pair
<rtx_insn
*, rtx_insn
*>> m_insn_ranges
;
578 // The live ranges of each FPR, in order of increasing program point.
579 auto_vec
<fpr_range_info
> m_fpr_ranges
[32];
581 // For each function call id, a list of program points at which a call
582 // to such a function is made. Each list is in order of increasing
584 auto_vec
<unsigned int> m_call_points
[NUM_ABI_IDS
];
586 // A list of instructions that can be removed if allocation succeeds.
587 auto_vec
<rtx_insn
*> m_dead_insns
;
590 // True if PAT is something that would typically be treated as a move.
592 is_move_set (rtx pat
)
594 if (GET_CODE (pat
) != SET
)
597 rtx dest
= SET_DEST (pat
);
599 dest
= SUBREG_REG (dest
);
600 if (!OBJECT_P (dest
))
603 rtx src
= SET_SRC (pat
);
605 src
= SUBREG_REG (src
);
606 if (!OBJECT_P (src
) && !CONSTANT_P (src
))
612 // Return true if operand OP is likely to match OP_ALT after register
615 likely_operand_match_p (const operand_alternative
&op_alt
, rtx op
)
617 // Empty constraints match everything.
618 const char *constraint
= op_alt
.constraint
;
619 if (constraint
[0] == 0 || constraint
[0] == ',')
624 char c
= *constraint
;
625 int len
= CONSTRAINT_LEN (c
, constraint
);
626 if (c
== 0 || c
== ',')
632 auto cn
= lookup_constraint (constraint
);
633 switch (get_constraint_type (cn
))
636 if (REG_P (op
) || SUBREG_P (op
))
641 case CT_SPECIAL_MEMORY
:
642 case CT_RELAXED_MEMORY
:
650 if (constraint_satisfied_p (op
, cn
))
658 if (op_alt
.matches
>= 0)
660 rtx other
= recog_data
.operand
[op_alt
.matches
];
661 if ((REG_P (other
) || SUBREG_P (other
))
662 && (REG_P (op
) || SUBREG_P (op
)))
668 // Return true if the operands of the current instruction are likely to
671 likely_alternative_match_p (const operand_alternative
*op_alt
)
673 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
674 if (!likely_operand_match_p (op_alt
[i
], recog_data
.operand
[i
]))
679 // Return the sum of how disparaged OP_ALT is.
681 count_rejects (const operand_alternative
*op_alt
)
684 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
685 reject
+= op_alt
[opno
].reject
;
689 // Allocate a T from the region obstack.
690 template<typename T
, typename
... Ts
>
692 early_ra::region_allocate (Ts
... args
)
694 static_assert (std::is_trivially_destructible
<T
>::value
,
695 "destructor won't be called");
696 void *addr
= obstack_alloc (&m_region_obstack
, sizeof (T
));
697 return new (addr
) T (std::forward
<Ts
> (args
)...);
700 early_ra::early_ra (function
*fn
) : m_fn (fn
), m_live_fprs (0)
702 gcc_obstack_init (&m_region_obstack
);
703 m_region_alloc_start
= obstack_alloc (&m_region_obstack
, 0);
704 bitmap_tree_view (m_live_allocnos
);
707 early_ra::~early_ra ()
709 obstack_free (&m_region_obstack
, nullptr);
712 // Return an array that, for each allocno A in the group, contains the index
713 // of the allocno at the head of A's chain (that is, the one with the highest
714 // START_POINT). The index is INVALID_ALLOCNO if the chain is empty.
715 inline array_slice
<unsigned int>
716 early_ra::allocno_group_info::chain_heads ()
718 auto *start
= reinterpret_cast<unsigned int *> (this + 1);
719 return { start
, size
};
722 // Return the array of allocnos in the group.
723 inline array_slice
<early_ra::allocno_info
>
724 early_ra::allocno_group_info::allocnos ()
726 gcc_checking_assert (regno
!= INVALID_REGNUM
);
727 auto *chain_end
= reinterpret_cast<unsigned int *> (this + 1) + size
;
728 auto *allocno_start
= reinterpret_cast<allocno_info
*> (chain_end
);
729 return { allocno_start
, size
};
732 // Return the group's color representative.
733 inline early_ra::allocno_group_info
*
734 early_ra::allocno_group_info::color_rep ()
736 gcc_checking_assert (m_color_rep
->m_color_rep
== m_color_rep
);
740 // Return the group that contains the allocno.
741 inline early_ra::allocno_group_info
*
742 early_ra::allocno_info::group ()
744 auto *chain_end
= reinterpret_cast<unsigned int *> (this - offset
);
745 return reinterpret_cast<allocno_group_info
*> (chain_end
- group_size
) - 1;
748 // Return true if this allocno's live range is a subrange of related_allocno's
749 // and if we have committed to making this allocno share whatever register
750 // related_allocno uses.
752 early_ra::allocno_info::is_shared ()
754 return related_allocno
!= INVALID_ALLOCNO
&& !is_equiv
;
757 // Return true if this allocno is known to be equivalent to ALLOCNO.
759 early_ra::allocno_info::is_equiv_to (unsigned int allocno
)
761 return is_equiv
&& related_allocno
== allocno
;
764 // Return the allocnos in the subgroup.
765 inline array_slice
<early_ra::allocno_info
>
766 early_ra::allocno_subgroup::allocnos ()
770 return { &group
->allocnos ()[start
], count
};
773 // Return allocno I in the subgroup, with 0 being the first.
774 inline early_ra::allocno_info
*
775 early_ra::allocno_subgroup::allocno (unsigned int i
)
777 return &group
->allocnos ()[start
+ i
];
780 // Return the previous (earlier) allocno in ALLOCNO's chain, or null if none.
781 inline early_ra::allocno_info
*
782 early_ra::chain_prev (allocno_info
*allocno
)
784 if (allocno
->chain_prev
!= INVALID_ALLOCNO
)
785 return m_allocnos
[allocno
->chain_prev
];
789 // Return the next (later) allocno in ALLOCNO's chain, or null if none.
790 inline early_ra::allocno_info
*
791 early_ra::chain_next (allocno_info
*allocno
)
793 if (allocno
->chain_next
!= INVALID_ALLOCNO
)
794 return m_allocnos
[allocno
->chain_next
];
798 // Dump the information in m_pseudo_regs.
800 early_ra::dump_pseudo_regs ()
802 fprintf (dump_file
, "\nPseudos:\n");
803 fprintf (dump_file
, " %6s %6s %6s %6s %6s %6s %8s %s\n",
804 "Id", "FPR8", "FPR16", "FPR32", "NONFPR", "Stride",
805 "FPRness", "Copies");
806 pseudo_reg_info unused_reg
= {};
807 for (unsigned int regno
= FIRST_PSEUDO_REGISTER
;
808 regno
< m_pseudo_regs
.length (); ++regno
)
810 const auto ®
= m_pseudo_regs
[regno
];
811 if (memcmp (®
, &unused_reg
, sizeof (reg
)) == 0)
814 fprintf (dump_file
, " %6d %6s %6s %6s %6s %6s %8d", regno
,
815 reg
.flags
& NEEDS_FPR8
? "Req"
816 : reg
.flags
& ALLOWS_FPR8
? "OK" : "-",
817 reg
.flags
& NEEDS_FPR16
? "Req"
818 : reg
.flags
& ALLOWS_FPR16
? "OK" : "-",
819 reg
.flags
& NEEDS_FPR32
? "Req"
820 : reg
.flags
& ALLOWS_FPR32
? "OK" : "-",
821 reg
.flags
& NEEDS_NONFPR
? "Req"
822 : reg
.flags
& ALLOWS_NONFPR
? "OK" : "-",
823 ~reg
.flags
& HAS_FLEXIBLE_STRIDE
? "-"
824 : reg
.flags
& HAS_FIXED_STRIDE
? "Some" : "All",
825 fpr_preference (regno
));
826 if (reg
.flags
& HAS_FPR_COPY
)
827 fprintf (dump_file
, " FPR");
828 if (reg
.flags
& HAS_NONFPR_COPY
)
829 fprintf (dump_file
, " Non-FPR");
830 unsigned int copyi
= reg
.first_copy
;
833 const auto ©
= m_pseudo_reg_copies
[copyi
];
834 if (copy
.regnos
[0] == regno
)
836 fprintf (dump_file
, " r%d", copy
.regnos
[1]);
837 copyi
= copy
.next_copies
[0];
841 fprintf (dump_file
, " r%d", copy
.regnos
[0]);
842 copyi
= copy
.next_copies
[1];
845 fprintf (dump_file
, "\n");
849 // Dump the information in m_fpr_ranges.
851 early_ra::dump_fpr_ranges ()
853 fprintf (dump_file
, "\nFPR live ranges:\n");
854 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
856 auto &intervals
= m_fpr_ranges
[fpr
];
857 if (intervals
.is_empty ())
860 fprintf (dump_file
, " %2d", fpr
);
861 for (unsigned int i
= 0; i
< intervals
.length (); ++i
)
863 auto &interval
= intervals
[i
];
864 if (i
&& (i
% 4) == 0)
865 fprintf (dump_file
, "\n ");
866 fprintf (dump_file
, " [ %6d %6d ]", interval
.start_point
,
869 fprintf (dump_file
, "\n");
873 // Dump the information in m_allocno_copies.
875 early_ra::dump_copies ()
877 fprintf (dump_file
, "\nCopies:\n");
878 fprintf (dump_file
, " %8s %3s %6s\n",
879 "Allocno", "FPR", "Weight");
880 for (const auto ©
: m_allocno_copies
)
881 fprintf (dump_file
, " %8d %3d %6d\n", copy
.allocno
,
882 copy
.fpr
, copy
.weight
);
885 // Dump the information in m_allocnos.
887 early_ra::dump_allocnos ()
889 char buffer
[sizeof ("r[:]") + 3 * 3 * sizeof (int) + 1];
890 fprintf (dump_file
, "\nAllocno groups:\n");
892 " %12s %12s %4s %6s %8s %s\n",
893 "Ids", "Regno", "Size", "Stride", "Cands", "Heads");
894 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
896 auto *allocno
= m_allocnos
[ai
];
897 if (allocno
->offset
!= 0)
899 auto *group
= allocno
->group ();
900 snprintf (buffer
, sizeof (buffer
), "[%d:%d]", allocno
->id
,
901 allocno
->id
+ group
->size
- 1);
902 fprintf (dump_file
, " %12s", buffer
);
903 snprintf (buffer
, sizeof (buffer
), "r%d[0:%d]", group
->regno
,
905 fprintf (dump_file
, " %12s %4s %6d %08x", buffer
,
906 group
->fpr_size
== FPR_D
? "D"
907 : group
->fpr_size
== FPR_Q
? "Q" : "Z",
909 group
->fpr_candidates
);
910 for (auto head
: group
->chain_heads ())
911 if (head
== INVALID_ALLOCNO
)
912 fprintf (dump_file
, " -");
914 fprintf (dump_file
, " %d", head
);
915 fprintf (dump_file
, "\n");
918 fprintf (dump_file
, "\nAllocno chains:\n");
919 fprintf (dump_file
, " %5s %12s %12s %6s %5s %5s %6s %5s\n",
920 "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "Shared", "FPR");
921 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
923 auto *allocno
= m_allocnos
[ai
];
924 if (allocno
->chain_prev
!= INVALID_ALLOCNO
)
926 const char *prefix
= "=>";
929 auto *group
= allocno
->group ();
930 fprintf (dump_file
, " %2s", prefix
);
931 fprintf (dump_file
, " %5d", allocno
->id
);
932 snprintf (buffer
, sizeof (buffer
), "r%d[%d]", group
->regno
,
934 fprintf (dump_file
, " %12s", buffer
);
935 snprintf (buffer
, sizeof (buffer
), "[%d,%d]",
936 allocno
->start_point
, allocno
->end_point
);
937 fprintf (dump_file
, " %11s%s %6s", buffer
,
938 allocno
->is_earlyclobbered
? "*" : " ",
939 allocno
->is_strong_copy_dest
? "Strong"
940 : allocno
->is_copy_dest
? "Yes" : "-");
941 if (allocno
->copy_dest
== INVALID_ALLOCNO
)
942 fprintf (dump_file
, " %5s", "-");
944 fprintf (dump_file
, " %5d", allocno
->copy_dest
);
945 if (allocno
->is_equiv
)
946 fprintf (dump_file
, " %5d", allocno
->related_allocno
);
948 fprintf (dump_file
, " %5s", "-");
949 if (allocno
->is_shared ())
950 fprintf (dump_file
, " %6d", allocno
->related_allocno
);
952 fprintf (dump_file
, " %6s", "-");
953 if (allocno
->hard_regno
== FIRST_PSEUDO_REGISTER
)
954 fprintf (dump_file
, " %5s", "-");
956 fprintf (dump_file
, " %5s", reg_names
[allocno
->hard_regno
]);
957 fprintf (dump_file
, "\n");
958 if (allocno
->chain_next
== INVALID_ALLOCNO
)
960 allocno
= m_allocnos
[allocno
->chain_next
];
966 // Dump the information in m_colors.
968 early_ra::dump_colors ()
970 fprintf (dump_file
, "\nColors:\n");
971 for (unsigned int i
= 0; i
< m_colors
.length (); ++i
)
973 auto *color
= m_colors
[i
];
977 fprintf (dump_file
, " color %d:\n", i
);
978 fprintf (dump_file
, " chains:\n");
979 auto heads
= color
->group
->chain_heads ();
980 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
982 fprintf (dump_file
, " %2d:", i
);
984 while (ai
!= INVALID_ALLOCNO
)
986 auto *allocno
= m_allocnos
[ai
];
987 fprintf (dump_file
, " r%d[%d]", allocno
->group ()->regno
,
989 ai
= allocno
->chain_next
;
991 fprintf (dump_file
, "\n");
993 fprintf (dump_file
, " FPR candidates:");
994 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
995 fprintf (dump_file
, "%s%c", fpr
% 8 ? "" : " ",
996 color
->group
->fpr_candidates
& (1U << fpr
) ? 'Y' : '-');
997 fprintf (dump_file
, "\n");
998 fprintf (dump_file
, " FPR preferences:");
999 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
1000 if (color
->fpr_preferences
[fpr
])
1001 fprintf (dump_file
, " %d(%d)", fpr
, color
->fpr_preferences
[fpr
]);
1002 fprintf (dump_file
, "\n");
1006 // Record any necessary information about a move from SRC to DEST.
1008 early_ra::preprocess_move (rtx dest
, rtx src
)
1010 if (SUBREG_P (dest
))
1011 dest
= SUBREG_REG (dest
);
1016 src
= SUBREG_REG (src
);
1020 // Sort the registers by increasing REGNO.
1021 rtx regs
[] = { dest
, src
};
1022 if (REGNO (dest
) > REGNO (src
))
1023 std::swap (regs
[0], regs
[1]);
1024 unsigned int regno0
= REGNO (regs
[0]);
1025 unsigned int regno1
= REGNO (regs
[1]);
1027 // Ignore moves between hard registers.
1028 if (HARD_REGISTER_NUM_P (regno1
))
1031 // For moves between hard registers and pseudos, just record the type
1032 // of hard register involved.
1033 auto ®1
= m_pseudo_regs
[regno1
];
1034 reg1
.mode
= GET_MODE (regs
[1]);
1035 if (HARD_REGISTER_NUM_P (regno0
))
1037 reg1
.flags
|= (FP_REGNUM_P (regno0
) ? HAS_FPR_COPY
: HAS_NONFPR_COPY
);
1041 // Record a move between two pseudo registers.
1042 auto ®0
= m_pseudo_regs
[regno0
];
1043 reg0
.mode
= GET_MODE (regs
[0]);
1046 copy
.regnos
[0] = regno0
;
1047 copy
.regnos
[1] = regno1
;
1048 copy
.next_copies
[0] = reg0
.first_copy
;
1049 copy
.next_copies
[1] = reg1
.first_copy
;
1051 reg0
.first_copy
= reg1
.first_copy
= m_pseudo_reg_copies
.length ();
1052 m_pseudo_reg_copies
.safe_push (copy
);
1055 // Return true if INSN has a multi-vector operand and if that operand
1056 // could be converted to strided form.
1058 is_stride_candidate (rtx_insn
*insn
)
1060 if (recog_memoized (insn
) < 0)
1063 auto stride_type
= get_attr_stride_type (insn
);
1064 return (stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
1065 || stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
);
1068 // Go through the constraints of INSN, which has already been extracted,
1069 // and record any relevant information about pseudo registers.
1071 early_ra::process_pseudo_reg_constraints (rtx_insn
*insn
)
1073 extract_insn (insn
);
1074 preprocess_constraints (insn
);
1076 // Flags that describe any multi-register vector operands.
1077 unsigned int insn_flags
= (is_stride_candidate (insn
)
1078 ? HAS_FLEXIBLE_STRIDE
1079 : HAS_FIXED_STRIDE
);
1081 auto alts
= get_preferred_alternatives (insn
);
1083 int operand_matches
[MAX_RECOG_OPERANDS
];
1084 unsigned int operand_flags
[MAX_RECOG_OPERANDS
];
1085 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1087 operand_matches
[i
] = -1;
1088 operand_flags
[i
] = 0;
1091 // Extract information from the constraints, considering all plausible
1093 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
1095 if (!(alts
& ALTERNATIVE_BIT (altno
)))
1098 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
1099 if (!likely_alternative_match_p (op_alt
))
1102 // Use SRC_OPNO's constraints to derive information about DEST_OPNO.
1103 auto record_operand
= [&](int src_opno
, int dest_opno
)
1105 int matches
= op_alt
[src_opno
].matches
;
1107 operand_matches
[dest_opno
] = matches
;
1109 auto cl
= alternative_class (op_alt
, src_opno
);
1112 if (reg_class_subset_p (cl
, FP_REGS
))
1113 operand_flags
[dest_opno
] |= ALLOWS_FPR32
;
1114 if (reg_class_subset_p (cl
, FP_LO_REGS
))
1115 operand_flags
[dest_opno
] |= ALLOWS_FPR16
;
1116 if (reg_class_subset_p (cl
, FP_LO8_REGS
))
1117 operand_flags
[dest_opno
] |= ALLOWS_FPR8
;
1118 if (!reg_classes_intersect_p (cl
, FP_REGS
))
1119 operand_flags
[dest_opno
] |= ALLOWS_NONFPR
;
1123 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1125 record_operand (i
, i
);
1126 if (recog_data
.constraints
[i
][0] == '%')
1128 record_operand (i
, i
+ 1);
1129 record_operand (i
+ 1, i
);
1134 // Process the information we collected above.
1135 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1137 rtx op
= recog_data
.operand
[i
];
1138 machine_mode orig_mode
= GET_MODE (op
);
1140 op
= SUBREG_REG (op
);
1142 // Record the accumulated information in m_pseudo_regs.
1143 if (REG_P (op
) && !HARD_REGISTER_P (op
))
1145 // The flags so far just describe what at least one alternative
1146 // would accept. Calculate the associated NEEDS_* information.
1147 auto flags
= operand_flags
[i
];
1148 if (!(flags
& ALLOWS_FPR32
) && (flags
& ALLOWS_NONFPR
))
1149 flags
|= NEEDS_NONFPR
;
1150 else if ((flags
& ALLOWS_FPR32
) && !(flags
& ALLOWS_NONFPR
))
1152 if (flags
& ALLOWS_FPR8
)
1153 flags
|= NEEDS_FPR8
;
1154 if (flags
& ALLOWS_FPR16
)
1155 flags
|= NEEDS_FPR16
;
1156 flags
|= NEEDS_FPR32
;
1159 // Look for multi-register vector operands.
1160 if (VECTOR_MODE_P (orig_mode
)
1161 && targetm
.hard_regno_mode_ok (V0_REGNUM
, orig_mode
)
1162 && hard_regno_nregs (V0_REGNUM
, orig_mode
) > 1)
1163 flags
|= insn_flags
;
1165 m_pseudo_regs
[REGNO (op
)].flags
|= flags
;
1166 m_pseudo_regs
[REGNO (op
)].mode
= GET_MODE (op
);
1169 // Treat matching constraints as equivalent to moves.
1170 if (operand_matches
[i
] >= 0)
1171 preprocess_move (recog_data
.operand
[operand_matches
[i
]], op
);
1175 // Make one pass through the instructions, collecting information that
1176 // will be needed later.
1178 early_ra::preprocess_insns ()
1180 m_pseudo_regs
.safe_grow_cleared (max_reg_num ());
1181 m_pseudo_reg_copies
.safe_push (reg_copy_info ());
1182 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
1184 if (!NONDEBUG_INSN_P (insn
))
1187 // Mark all registers that occur in addresses as needing a GPR.
1188 vec_rtx_properties properties
;
1189 properties
.add_insn (insn
, true);
1190 for (rtx_obj_reference ref
: properties
.refs ())
1192 && ref
.in_address ()
1193 && !HARD_REGISTER_NUM_P (ref
.regno
))
1194 m_pseudo_regs
[ref
.regno
].flags
|= ALLOWS_NONFPR
| NEEDS_NONFPR
;
1196 if (GET_CODE (PATTERN (insn
)) == USE
1197 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
1200 rtx set
= single_set (insn
);
1201 if (set
&& is_move_set (set
))
1202 preprocess_move (SET_DEST (set
), SET_SRC (set
));
1204 process_pseudo_reg_constraints (insn
);
1208 // Return a signed integer that says (roughly) how strong an affinity
1209 // pseudo register REGNO has with FPRs. A positive value indicates
1210 // that we should try to allocate an FPR, a negative value indicates
1211 // that we shouldn't, and 0 indicates neutrality.
1213 early_ra::fpr_preference (unsigned int regno
)
1215 auto mode
= m_pseudo_regs
[regno
].mode
;
1216 auto flags
= m_pseudo_regs
[regno
].flags
;
1217 if (mode
== VOIDmode
|| !targetm
.hard_regno_mode_ok (V0_REGNUM
, mode
))
1219 else if (flags
& HAS_FLEXIBLE_STRIDE
)
1221 else if (flags
& NEEDS_FPR32
)
1223 else if (!(flags
& ALLOWS_FPR32
) && (flags
& ALLOWS_NONFPR
))
1225 else if ((flags
& HAS_FPR_COPY
) && !(flags
& HAS_NONFPR_COPY
))
1227 else if ((flags
& HAS_NONFPR_COPY
) && !(flags
& HAS_FPR_COPY
))
1233 // Propagate information about pseudo-registers along copy edges,
1234 // while doing so doesn't create conflicting FPR preferences.
1236 early_ra::propagate_pseudo_reg_info ()
1238 struct stack_entry
{ unsigned int regno
, copyi
; };
1240 auto_vec
<stack_entry
, 32> stack
;
1241 for (unsigned int i
= FIRST_PSEUDO_REGISTER
;
1242 i
< m_pseudo_regs
.length (); ++i
)
1244 auto start
= m_pseudo_regs
[i
].first_copy
;
1248 stack
.quick_push ({ i
, start
});
1249 while (!stack
.is_empty ())
1251 auto entry
= stack
.pop ();
1252 auto ©
= m_pseudo_reg_copies
[entry
.copyi
];
1253 auto src_regno
= entry
.regno
;
1254 auto dest_regno
= (src_regno
== copy
.regnos
[1]
1257 auto next_copyi
= (src_regno
== copy
.regnos
[1]
1258 ? copy
.next_copies
[1]
1259 : copy
.next_copies
[0]);
1261 stack
.safe_push ({ src_regno
, next_copyi
});
1263 auto &src_reg
= m_pseudo_regs
[src_regno
];
1264 auto &dest_reg
= m_pseudo_regs
[dest_regno
];
1266 if (src_reg
.flags
& ~dest_reg
.flags
& PSEUDO_COPY_FLAGS
)
1268 auto src_preference
= fpr_preference (src_regno
);
1269 auto dest_preference
= fpr_preference (dest_regno
);
1270 if ((src_preference
>= 0 && dest_preference
>= 0)
1271 || (src_preference
<= 0 && dest_preference
<= 0))
1273 dest_reg
.flags
|= (src_reg
.flags
& PSEUDO_COPY_FLAGS
);
1274 stack
.safe_push ({ dest_regno
, dest_reg
.first_copy
});
1281 // Decide which pseudos should be allocated an FPR, setting m_fpr_pseudos
1284 early_ra::choose_fpr_pseudos ()
1286 for (unsigned int i
= FIRST_PSEUDO_REGISTER
;
1287 i
< m_pseudo_regs
.length (); ++i
)
1288 if (fpr_preference (i
) > 0)
1289 bitmap_set_bit (m_fpr_pseudos
, i
);
1292 // Clear out information about the previous CFG region (if any)
1293 // and set up the data for a new region.
1295 early_ra::start_new_region ()
1297 obstack_free (&m_region_obstack
, m_region_alloc_start
);
1298 m_regno_to_group
.empty ();
1299 m_allocno_copies
.truncate (0);
1300 m_allocnos
.truncate (0);
1301 m_sorted_allocnos
.truncate (0);
1302 m_shared_allocnos
.truncate (0);
1303 m_colors
.truncate (0);
1304 m_insn_ranges
.truncate (0);
1305 for (auto &fpr_ranges
: m_fpr_ranges
)
1306 fpr_ranges
.truncate (0);
1307 for (auto &call_points
: m_call_points
)
1308 call_points
.truncate (0);
1309 gcc_assert (bitmap_empty_p (m_live_allocnos
) && m_live_fprs
== 0);
1310 m_dead_insns
.truncate (0);
1311 m_allocated_fprs
= 0;
1312 m_call_preserved_fprs
= 0;
1313 m_allocation_successful
= true;
1314 m_current_region
+= 1;
1317 // Create and return an allocno group of size SIZE for register REGNO.
1318 // REGNO can be INVALID_REGNUM if the group just exists to allow
1319 // other groups to be chained together, and does not have any new
1320 // allocnos of its own.
1321 early_ra::allocno_group_info
*
1322 early_ra::create_allocno_group (unsigned int regno
, unsigned int size
)
1324 static_assert (alignof (unsigned int) == alignof (allocno_info
),
1325 "allocno_info alignment");
1326 unsigned int num_allocnos
= (regno
!= INVALID_REGNUM
? size
: 0);
1328 // Allocate an allocno_group_info, followed by an array of chain heads,
1329 // followed by the allocnos themselves.
1330 size_t alloc_size
= (sizeof (allocno_group_info
)
1331 + size
* sizeof (unsigned int)
1332 + num_allocnos
* sizeof (allocno_info
));
1333 void *data
= obstack_alloc (&m_region_obstack
, alloc_size
);
1335 // Initialize the group.
1336 auto *group
= reinterpret_cast<allocno_group_info
*> (data
);
1337 memset (group
, 0, sizeof (*group
));
1338 group
->m_color_rep
= group
;
1339 group
->regno
= regno
;
1342 group
->fpr_size
= FPR_D
;
1343 group
->fpr_candidates
= ~0U;
1345 // Initialize the chain heads.
1346 auto heads
= group
->chain_heads ();
1347 for (unsigned int i
= 0; i
< heads
.size (); ++i
)
1348 heads
[i
] = (i
< num_allocnos
? m_allocnos
.length () + i
: INVALID_ALLOCNO
);
1350 // Initialize the allocnos.
1351 if (num_allocnos
> 0)
1353 auto allocnos
= group
->allocnos ();
1354 memset (allocnos
.begin (), 0, num_allocnos
* sizeof (allocno_info
));
1355 for (unsigned int i
= 0; i
< num_allocnos
; ++i
)
1357 auto *allocno
= &allocnos
[i
];
1358 allocno
->id
= m_allocnos
.length ();
1359 allocno
->offset
= i
;
1360 allocno
->group_size
= size
;
1361 allocno
->hard_regno
= FIRST_PSEUDO_REGISTER
;
1362 allocno
->start_point
= END_OF_REGION
;
1363 allocno
->end_point
= START_OF_REGION
;
1364 allocno
->copy_dest
= INVALID_ALLOCNO
;
1365 allocno
->related_allocno
= INVALID_ALLOCNO
;
1366 allocno
->chain_next
= INVALID_ALLOCNO
;
1367 allocno
->chain_prev
= INVALID_ALLOCNO
;
1368 m_allocnos
.safe_push (allocno
);
1374 // If REG refers to a pseudo register that might be allocated to FPRs,
1375 // return the associated range of allocnos, creating new ones if necessary.
1376 // Return an empty range otherwise.
1377 early_ra::allocno_subgroup
1378 early_ra::get_allocno_subgroup (rtx reg
)
1380 if (GET_CODE (reg
) == SUBREG
)
1382 allocno_subgroup inner
= get_allocno_subgroup (SUBREG_REG (reg
));
1386 if (!targetm
.modes_tieable_p (GET_MODE (SUBREG_REG (reg
)),
1389 m_allocation_successful
= false;
1394 subreg_get_info (V0_REGNUM
, GET_MODE (SUBREG_REG (reg
)),
1395 SUBREG_BYTE (reg
), GET_MODE (reg
), &info
);
1396 if (!info
.representable_p
)
1398 m_allocation_successful
= false;
1402 inner
.start
+= info
.offset
;
1403 inner
.count
= info
.nregs
;
1407 if (!REG_P (reg
) || HARD_REGISTER_P (reg
))
1410 unsigned int regno
= REGNO (reg
);
1411 if (fpr_preference (regno
) <= 0)
1414 unsigned int count
= hard_regno_nregs (V0_REGNUM
, GET_MODE (reg
));
1416 auto &entry
= m_regno_to_group
.get_or_insert (regno
, &existed
);
1419 auto *group
= create_allocno_group (regno
, count
);
1420 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1422 auto allocnos
= group
->allocnos ();
1423 fprintf (dump_file
, "Creating allocnos [%d:%d] for r%d\n",
1424 allocnos
.front ().id
, allocnos
.back ().id
, regno
);
1427 auto reg_bits
= GET_MODE_BITSIZE (GET_MODE (reg
));
1428 auto fpr_bits
= exact_div (reg_bits
, count
);
1429 auto flags
= m_pseudo_regs
[regno
].flags
;
1431 // Punt for now if there is a choice to be made between using an
1432 // FPR and a non-FPR.
1433 if ((flags
& NEEDS_NONFPR
)
1434 || ((flags
& ALLOWS_NONFPR
)
1435 && !FLOAT_MODE_P (GET_MODE (reg
))
1436 && !VECTOR_MODE_P (GET_MODE (reg
))))
1437 m_allocation_successful
= false;
1439 if (flags
& ALLOWS_FPR8
)
1440 group
->fpr_candidates
&= 0xff;
1441 else if (flags
& ALLOWS_FPR16
)
1442 group
->fpr_candidates
&= 0xffff;
1443 group
->fpr_candidates
&= ~0U >> (count
- 1);
1445 group
->has_flexible_stride
= ((flags
& HAS_FLEXIBLE_STRIDE
) != 0
1446 && (flags
& HAS_FIXED_STRIDE
) == 0);
1448 group
->fpr_size
= (maybe_gt (fpr_bits
, 128) ? FPR_Z
1449 : maybe_gt (fpr_bits
, 64) ? FPR_Q
: FPR_D
);
1453 return { entry
, 0, count
};
1456 // Record a use of FPR REGNO at the current program point, as part of
1457 // a backwards walk over a block.
1459 early_ra::record_fpr_use (unsigned int regno
)
1461 gcc_assert (IN_RANGE (regno
, V0_REGNUM
, V31_REGNUM
));
1462 unsigned int offset
= regno
- V0_REGNUM
;
1463 if (!(m_live_fprs
& (1U << offset
)))
1465 m_fpr_ranges
[offset
].safe_push ({ START_OF_REGION
, m_current_point
,
1467 m_live_fprs
|= 1U << offset
;
1471 // Record a definition of FPR REGNO at the current program point, as part of
1472 // a backwards walk over a block.
1474 early_ra::record_fpr_def (unsigned int regno
)
1476 gcc_assert (IN_RANGE (regno
, V0_REGNUM
, V31_REGNUM
));
1477 unsigned int offset
= regno
- V0_REGNUM
;
1479 // The definition completes the current live range. If the result
1480 // of the definition is used, the live range extends to the last use.
1481 // Otherwise the live range is just a momentary blip at the current point.
1482 auto &ranges
= m_fpr_ranges
[offset
];
1483 if (m_live_fprs
& (1U << offset
))
1485 ranges
.last ().start_point
= m_current_point
;
1486 m_live_fprs
&= ~(1U << offset
);
1489 ranges
.safe_push ({ m_current_point
, m_current_point
, INVALID_ALLOCNO
});
1492 // Record a use of allocno ALLOCNO at the current program point, as part
1493 // of a backwards walk over a block.
1495 early_ra::record_allocno_use (allocno_info
*allocno
)
1497 if (allocno
->start_point
== m_current_point
)
1500 gcc_checking_assert (!allocno
->is_shared ());
1501 bitmap_set_bit (m_live_allocnos
, allocno
->id
);
1502 if (allocno
->end_point
> m_current_point
)
1504 allocno
->end_point
= m_current_point
;
1505 allocno
->last_def_point
= START_OF_REGION
;
1506 allocno
->last_use_point
= END_OF_REGION
;
1509 allocno
->last_use_point
= allocno
->start_point
;
1510 allocno
->start_point
= m_current_point
;
1511 allocno
->is_copy_dest
= false;
1512 allocno
->is_strong_copy_src
= false;
1513 allocno
->related_allocno
= INVALID_ALLOCNO
;
1514 allocno
->is_equiv
= false;
1517 // Record a definition of the allocno with index AI at the current program
1518 // point, as part of a backwards walk over a block. The allocno is known
1521 early_ra::record_allocno_def (allocno_info
*allocno
)
1523 gcc_checking_assert (!allocno
->is_shared ());
1524 allocno
->last_use_point
= allocno
->start_point
;
1525 allocno
->last_def_point
= m_current_point
;
1526 allocno
->start_point
= m_current_point
;
1527 allocno
->num_defs
= MIN (allocno
->num_defs
+ 1, 2);
1528 gcc_checking_assert (!allocno
->is_copy_dest
1529 && !allocno
->is_strong_copy_src
);
1530 if (!bitmap_clear_bit (m_live_allocnos
, allocno
->id
))
1534 // SRC_ALLOCNO is copied or tied to DEST_ALLOCNO; IS_EQUIV is true if the
1535 // two allocnos are known to be equal. See whether we can mark a chain of
1536 // allocnos ending at DEST_ALLOCNO as related to SRC_ALLOCNO. Return the
1537 // start of the chain if so, otherwise return null.
1539 // If IS_EQUIV, a chain that contains just DEST_ALLOCNO should be treated
1540 // as an equivalence. Otherwise the chain should be shared with SRC_ALLOCNO.
1542 // Sharing chains are a rather hacky workaround for the fact that we
1543 // don't collect segmented live ranges, and that in the end we want to do
1544 // simple interval graph coloring.
1545 early_ra::allocno_info
*
1546 early_ra::find_related_start (allocno_info
*dest_allocno
,
1547 allocno_info
*src_allocno
, bool is_equiv
)
1549 allocno_info
*res
= nullptr;
1552 if (src_allocno
->end_point
> dest_allocno
->end_point
)
1553 // The src allocno dies first.
1556 if (src_allocno
->num_defs
!= 0)
1558 if (dest_allocno
->end_point
< m_current_bb_point
)
1559 // We don't currently track enough information to handle multiple
1560 // definitions across basic block boundaries.
1563 if (src_allocno
->last_def_point
>= dest_allocno
->end_point
)
1564 // There is another definition during the destination's live range.
1569 if (dest_allocno
->num_defs
== 1)
1570 // dest_allocno is equivalent to src_allocno for dest_allocno's
1571 // entire live range. Fall back to that if we can't establish
1577 if (src_allocno
->last_use_point
>= dest_allocno
->end_point
)
1578 // src_allocno is live during dest_allocno's live range,
1579 // and the two allocnos do not necessarily have the same value.
1583 if (dest_allocno
->group_size
!= 1
1584 // Account for definitions by shared registers.
1585 || dest_allocno
->num_defs
> 1
1586 || DF_REG_DEF_COUNT (dest_allocno
->group ()->regno
) != 1)
1587 // Currently only single allocnos that are defined once can
1588 // share registers with non-equivalent allocnos. This could be
1589 // relaxed, but at the time of writing, aggregates are not valid
1590 // SSA names and so generally only use a single pseudo throughout
1594 if (dest_allocno
->copy_dest
== src_allocno
->id
)
1595 // We've found a complete and valid sharing chain.
1596 return dest_allocno
;
1598 if (dest_allocno
->copy_dest
== INVALID_ALLOCNO
)
1601 auto *next_allocno
= m_allocnos
[dest_allocno
->copy_dest
];
1602 if (!is_chain_candidate (dest_allocno
, next_allocno
, ALL_REASONS
))
1605 dest_allocno
= next_allocno
;
1610 // Add FROM_ALLOCNO's definition information to TO_ALLOCNO's.
1612 early_ra::accumulate_defs (allocno_info
*to_allocno
,
1613 allocno_info
*from_allocno
)
1615 if (from_allocno
->num_defs
> 0)
1617 to_allocno
->num_defs
= MIN (from_allocno
->num_defs
1618 + to_allocno
->num_defs
, 2);
1619 to_allocno
->last_def_point
= MAX (to_allocno
->last_def_point
,
1620 from_allocno
->last_def_point
);
1624 // Record any relevant allocno-related information for an actual or imagined
1625 // copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit
1626 // move instruction, false if it represents one way of satisfying the previous
1627 // instruction's constraints.
1629 early_ra::record_copy (rtx dest
, rtx src
, bool from_move_p
)
1631 auto dest_range
= get_allocno_subgroup (dest
);
1632 auto src_range
= get_allocno_subgroup (src
);
1636 && FP_REGNUM_P (REGNO (src
)))
1638 // A copy from an FPR to an allocno group.
1639 unsigned int fpr
= REGNO (src
) - V0_REGNUM
;
1640 m_allocno_copies
.safe_push ({ dest_range
.allocno (0)->id
, fpr
,
1641 dest_range
.count
});
1643 // If the allocno at the other end of the chain of copies from DEST
1644 // has a copy to the same FPR, record that all intervening copy chains
1645 // could become "strong" ones. This indicates that picking the FPR
1646 // avoids a copy at both ends.
1647 unsigned int hard_regno
= REGNO (src
);
1648 for (auto &dest_allocno
: dest_range
.allocnos ())
1649 if (dest_allocno
.hard_regno
== hard_regno
++)
1650 dest_allocno
.is_strong_copy_src
= true;
1652 else if (from_move_p
1655 && FP_REGNUM_P (REGNO (dest
)))
1657 // A copy from an allocno group to an FPR.
1658 unsigned int fpr
= REGNO (dest
) - V0_REGNUM
;
1659 m_allocno_copies
.safe_push ({ src_range
.allocno (0)->id
, fpr
,
1661 for (auto &src_allocno
: src_range
.allocnos ())
1663 // If the copy comes from a move, see whether the destination
1664 // FPR is known to be equal to the source allocno for the FPR's
1666 if (from_move_p
&& src_allocno
.num_defs
== 0)
1668 auto &last_range
= m_fpr_ranges
[fpr
].last ();
1669 if (last_range
.end_point
>= src_allocno
.end_point
)
1670 last_range
.allocno
= src_allocno
.id
;
1672 src_allocno
.hard_regno
= V0_REGNUM
+ fpr
;
1676 else if (src_range
&& dest_range
)
1678 // A copy between two allocno groups. We can only have a mismatched
1679 // number of FPRs for imaginary, non-move copies. In that case
1680 // the matching happens on the common lowparts.
1681 gcc_assert (!from_move_p
|| src_range
.count
== dest_range
.count
);
1682 unsigned int count
= std::min (src_range
.count
, dest_range
.count
);
1683 if (WORDS_BIG_ENDIAN
)
1685 src_range
.start
+= src_range
.count
- count
;
1686 dest_range
.start
+= dest_range
.count
- count
;
1688 src_range
.count
= count
;
1689 dest_range
.count
= count
;
1691 // Ignore (imaginary non-move) copies if the destination is still live.
1692 for (auto &dest_allocno
: dest_range
.allocnos ())
1693 if (bitmap_bit_p (m_live_allocnos
, dest_allocno
.id
))
1696 for (unsigned int i
= 0; i
< src_range
.count
; ++i
)
1698 auto *dest_allocno
= dest_range
.allocno (i
);
1699 auto *src_allocno
= src_range
.allocno (i
);
1700 if (src_allocno
->end_point
> dest_allocno
->start_point
)
1702 gcc_assert (src_allocno
->copy_dest
== INVALID_ALLOCNO
1703 || src_allocno
->copy_dest
== dest_allocno
->id
);
1704 src_allocno
->copy_dest
= dest_allocno
->id
;
1705 src_allocno
->hard_regno
= dest_allocno
->hard_regno
;
1706 dest_allocno
->is_copy_dest
= 1;
1708 else if (auto *start_allocno
= find_related_start (dest_allocno
,
1712 auto *next_allocno
= dest_allocno
;
1715 next_allocno
->related_allocno
= src_allocno
->id
;
1716 next_allocno
->is_equiv
= (start_allocno
== dest_allocno
1718 // If we're sharing two allocnos that are not equivalent,
1719 // carry across the definition information. This is needed
1720 // to prevent multiple incompatible attempts to share with
1721 // the same register.
1722 if (next_allocno
->is_shared ())
1723 accumulate_defs (src_allocno
, next_allocno
);
1724 src_allocno
->last_use_point
1725 = MAX (src_allocno
->last_use_point
,
1726 next_allocno
->last_use_point
);
1728 if (next_allocno
== start_allocno
)
1730 next_allocno
= m_allocnos
[next_allocno
->copy_dest
];
1737 // Record any relevant allocno-related information about the constraints
1738 // on INSN, which has already been extracted.
1740 early_ra::record_constraints (rtx_insn
*insn
)
1742 preprocess_constraints (insn
);
1744 int operand_matches
[MAX_RECOG_OPERANDS
];
1745 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1746 operand_matches
[i
] = -1;
1748 auto alts
= get_preferred_alternatives (insn
);
1749 bool any_ok
= recog_data
.n_alternatives
== 0;
1751 // The set of output operands that are earlyclobber in at least one
1753 operand_mask earlyclobber_operands
= 0;
1755 // The set of output operands that are matched to inputs in at least
1757 operand_mask matched_operands
= 0;
1759 // The set of output operands that are not matched to inputs in at least
1761 operand_mask unmatched_operands
= 0;
1763 // The set of input operands that are matched to outputs in at least one
1764 // alternative, or that overlap with such an input if the output is not
1765 // earlyclobber. The latter part of the condition copes with things
1766 // like y = x * x, where the first x is tied to the destination, and where
1767 // y is not earlyclobber.
1768 operand_mask matches_operands
= 0;
1770 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
1772 if (!(alts
& ALTERNATIVE_BIT (altno
)))
1775 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
1776 if (!likely_alternative_match_p (op_alt
))
1781 // Update the information for operand DEST_OPNO based on the constraint
1782 // information for operand SRC_OPNO. The numbers can be different for
1783 // swapped commutative operands.
1784 auto record_operand
= [&](int src_opno
, int dest_opno
)
1786 int matches
= op_alt
[src_opno
].matches
;
1787 // A matched earlyclobber cannot be used if the same operand value
1788 // occurs in an unmatched operand. E.g. for y = x * x, a matched
1789 // earlyclobber on the first input does not cover the second input.
1792 rtx op
= recog_data
.operand
[dest_opno
];
1793 operand_mask overlaps
= 0;
1794 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
1796 && !recog_data
.is_operator
[i
]
1797 && recog_data
.operand_type
[i
] != OP_OUT
1798 && reg_overlap_mentioned_p (op
, recog_data
.operand
[i
]))
1799 overlaps
|= 1U << i
;
1800 if (!op_alt
[matches
].earlyclobber
|| overlaps
== 0)
1802 operand_matches
[dest_opno
] = matches
;
1803 matches_operands
|= (1U << dest_opno
) | overlaps
;
1808 auto reject
= count_rejects (op_alt
);
1809 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1811 operand_mask op_mask
= operand_mask (1) << opno
;
1813 if (recog_data
.operand_type
[opno
] != OP_IN
)
1815 if (reject
== 0 && op_alt
[opno
].matched
>= 0)
1816 matched_operands
|= op_mask
;
1818 unmatched_operands
|= op_mask
;
1821 if (op_alt
[opno
].earlyclobber
)
1822 earlyclobber_operands
|= op_mask
;
1824 // Punt for now on scratches. If we wanted to handle them,
1825 // we'd need to create allocnos for them, like IRA does.
1826 rtx op
= recog_data
.operand
[opno
];
1827 if (GET_CODE (op
) == SCRATCH
1828 && reg_classes_intersect_p (op_alt
[opno
].cl
, FP_REGS
))
1829 m_allocation_successful
= false;
1831 // Record filter information, which applies to the first register
1833 if (auto filters
= alternative_register_filters (op_alt
, opno
))
1834 if (auto range
= get_allocno_subgroup (recog_data
.operand
[opno
]))
1835 for (unsigned int fpr
= range
.start
; fpr
< 32; ++fpr
)
1836 if (!test_register_filters (filters
, fpr
))
1837 range
.group
->fpr_candidates
&= ~(1U << (fpr
- range
.start
));
1841 // Record possible matched operands.
1842 record_operand (opno
, opno
);
1843 if (recog_data
.constraints
[opno
][0] == '%')
1845 record_operand (opno
, opno
+ 1);
1846 record_operand (opno
+ 1, opno
);
1854 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1855 fprintf (dump_file
, " -- no match\n");
1856 m_allocation_successful
= false;
1859 // Record if there is an output operand that is never earlyclobber and never
1860 // matched to an input. See the comment below for how this is used.
1861 rtx dest_op
= NULL_RTX
;
1862 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1864 auto op_mask
= operand_mask (1) << opno
;
1865 if (recog_data
.operand_type
[opno
] == OP_OUT
1866 && (earlyclobber_operands
& op_mask
) == 0
1867 && (matched_operands
& op_mask
) == 0)
1869 dest_op
= recog_data
.operand
[opno
];
1874 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
1876 auto op_mask
= operand_mask (1) << opno
;
1877 rtx op
= recog_data
.operand
[opno
];
1878 int matches
= operand_matches
[opno
];
1880 // Punt for now on operands that already have a fixed choice of
1881 // register, since we don't have IRA's ability to find an alternative.
1882 // It's better if earlier passes don't create this kind of situation.
1883 if (REG_P (op
) && FP_REGNUM_P (REGNO (op
)))
1884 m_allocation_successful
= false;
1886 // Treat input operands as being earlyclobbered if an output is
1887 // sometimes earlyclobber and if the input never matches an output.
1888 // Do the same if there is an output that is always matched to an
1889 // input, and if this operand doesn't match that input. In both
1890 // cases, tying the input and the output would lead to an impossible
1891 // combination (or at least one that is difficult to reload).
1892 if (recog_data
.operand_type
[opno
] != OP_OUT
1893 && ((earlyclobber_operands
&& matches
< 0)
1894 || ((matched_operands
& ~unmatched_operands
)
1895 && !(matches_operands
& op_mask
))))
1896 for (auto &allocno
: get_allocno_subgroup (op
).allocnos ())
1897 if (allocno
.end_point
+ 1 == m_current_point
)
1898 allocno
.is_earlyclobbered
= true;
1900 // Create copies between operands that can be tied. This (deliberately)
1901 // might add several copies to the same destination register; later code
1902 // can then choose between them based on other criteria.
1904 // If there is an output operand that is never matched or earlyclobber,
1905 // and an input operand that never matches an output operand, create
1906 // a tentative copy between them. This allows hard register preferences
1907 // to be transmitted along the copy chains.
1909 record_copy (recog_data
.operand
[matches
], op
);
1910 else if (dest_op
&& recog_data
.operand_type
[opno
] == OP_IN
)
1911 record_copy (dest_op
, op
);
1915 // If FLAGS is DF_REF_AT_TOP, model the artificial uses and defs at the
1916 // start of the current basic block, otherwise model the artificial uses
1917 // and defs at the end of the basic block. This is done as part of a
1918 // backwards walk, so defs should be processed before uses.
1920 early_ra::record_artificial_refs (unsigned int flags
)
1924 FOR_EACH_ARTIFICIAL_DEF (ref
, m_current_bb
->index
)
1925 if ((DF_REF_FLAGS (ref
) & DF_REF_AT_TOP
) == flags
1926 && IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1927 record_fpr_def (DF_REF_REGNO (ref
));
1928 m_current_point
+= 1;
1930 FOR_EACH_ARTIFICIAL_USE (ref
, m_current_bb
->index
)
1931 if ((DF_REF_FLAGS (ref
) & DF_REF_AT_TOP
) == flags
1932 && IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1933 record_fpr_use (DF_REF_REGNO (ref
));
1934 m_current_point
+= 1;
1937 // Model the register references in INSN as part of a backwards walk.
1939 early_ra::record_insn_refs (rtx_insn
*insn
)
1943 // Record all definitions, excluding partial call clobbers.
1944 FOR_EACH_INSN_DEF (ref
, insn
)
1945 if (IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1946 record_fpr_def (DF_REF_REGNO (ref
));
1949 auto range
= get_allocno_subgroup (DF_REF_REG (ref
));
1950 for (auto &allocno
: range
.allocnos ())
1952 // If the destination is unused, record a momentary blip
1953 // in its live range.
1954 if (!bitmap_bit_p (m_live_allocnos
, allocno
.id
))
1955 record_allocno_use (&allocno
);
1956 record_allocno_def (&allocno
);
1959 m_current_point
+= 1;
1961 // Model the call made by a call insn as a separate phase in the
1962 // evaluation of the insn. Any partial call clobbers happen at that
1963 // point, rather than in the definition or use phase of the insn.
1964 if (auto *call_insn
= dyn_cast
<rtx_call_insn
*> (insn
))
1966 function_abi abi
= insn_callee_abi (call_insn
);
1967 m_call_points
[abi
.id ()].safe_push (m_current_point
);
1968 m_current_point
+= 1;
1971 // Record all uses. We can ignore READ_MODIFY_WRITE uses of plain subregs,
1972 // since we track the FPR-sized parts of them individually.
1973 FOR_EACH_INSN_USE (ref
, insn
)
1974 if (IN_RANGE (DF_REF_REGNO (ref
), V0_REGNUM
, V31_REGNUM
))
1975 record_fpr_use (DF_REF_REGNO (ref
));
1976 else if (!DF_REF_FLAGS_IS_SET (ref
, DF_REF_READ_WRITE
)
1977 || DF_REF_FLAGS_IS_SET (ref
, DF_REF_STRICT_LOW_PART
)
1978 || DF_REF_FLAGS_IS_SET (ref
, DF_REF_ZERO_EXTRACT
))
1980 auto range
= get_allocno_subgroup (DF_REF_REG (ref
));
1981 for (auto &allocno
: range
.allocnos ())
1982 record_allocno_use (&allocno
);
1984 m_current_point
+= 1;
1987 // ALLOCNO->is_strong_copy_src is true. See whether ALLOCNO heads a
1988 // natural chain that has an affinity with the same hard register at
1991 early_ra::consider_strong_copy_src_chain (allocno_info
*allocno
)
1993 auto *src_allocno
= allocno
;
1994 while (src_allocno
->copy_dest
!= INVALID_ALLOCNO
)
1996 auto *dest_allocno
= m_allocnos
[src_allocno
->copy_dest
];
1997 if (dest_allocno
->start_point
> src_allocno
->end_point
1998 || dest_allocno
->hard_regno
!= src_allocno
->hard_regno
)
2000 gcc_checking_assert (dest_allocno
->is_copy_dest
);
2001 src_allocno
= dest_allocno
;
2004 while (allocno
->copy_dest
!= INVALID_ALLOCNO
)
2006 allocno
->is_strong_copy_src
= 1;
2007 allocno
= m_allocnos
[allocno
->copy_dest
];
2008 allocno
->is_strong_copy_dest
= 1;
2013 // ALLOCNO1 and ALLOCNO2 are linked in some way, and might end up being
2014 // chained together. See whether chaining them requires the containing
2015 // groups to have the same stride, or whether it requires them to have
2016 // different strides. Return 1 if they should have the same stride,
2017 // -1 if they should have different strides, or 0 if it doesn't matter.
2019 early_ra::strided_polarity_pref (allocno_info
*allocno1
,
2020 allocno_info
*allocno2
)
2022 if (allocno1
->offset
+ 1 < allocno1
->group_size
2023 && allocno2
->offset
+ 1 < allocno2
->group_size
)
2025 if (is_chain_candidate (allocno1
+ 1, allocno2
+ 1, ALL_REASONS
))
2031 if (allocno1
->offset
> 0 && allocno2
->offset
> 0)
2033 if (is_chain_candidate (allocno1
- 1, allocno2
- 1, ALL_REASONS
))
2042 // Decide which groups should be strided. Also complete "strong" copy chains.
2044 early_ra::find_strided_accesses ()
2046 // This function forms a graph of allocnos, linked by equivalences and
2047 // natural copy chains. It temporarily uses chain_next to record the
2048 // reverse of equivalence edges (related_allocno) and chain_prev to record
2049 // the reverse of copy edges (copy_dest).
2050 unsigned int allocno_info::*links
[] = {
2051 &allocno_info::chain_next
,
2052 &allocno_info::chain_prev
,
2053 &allocno_info::copy_dest
,
2054 &allocno_info::related_allocno
2057 // Set up the temporary reverse edges. Check for strong copy chains.
2058 for (unsigned int i
= m_allocnos
.length (); i
-- > 0; )
2060 auto *allocno1
= m_allocnos
[i
];
2061 if (allocno1
->copy_dest
!= INVALID_ALLOCNO
)
2062 m_allocnos
[allocno1
->copy_dest
]->chain_prev
= allocno1
->id
;
2063 if (allocno1
->related_allocno
!= INVALID_ALLOCNO
)
2064 m_allocnos
[allocno1
->related_allocno
]->chain_next
= allocno1
->id
;
2066 if (allocno1
->is_strong_copy_src
2067 && !allocno1
->is_copy_dest
2068 && !consider_strong_copy_src_chain (allocno1
))
2069 allocno1
->is_strong_copy_src
= false;
2072 // Partition the graph into cliques based on edges that have the following
2075 // - the edge joins two allocnos whose groups have a free choice between
2076 // consecutive and strided allocations.
2078 // - the two groups have a relative strided polarity preference (that is
2079 // they should make the same choice between consecutive and strided,
2080 // or they should make opposite choices).
2082 // Assign relative polarities to each group connected in this way.
2084 // The aim is to discover natural move-free striding choices, which will
2085 // often exist in carefully written ACLE code.
2086 unsigned int num_edges
= m_allocnos
.length () * ARRAY_SIZE (links
);
2087 auto_sbitmap
visited_edges (num_edges
);
2088 bitmap_clear (visited_edges
);
2090 auto_vec
<unsigned int, 32> worklist
;
2091 for (unsigned int i
= 0; i
< num_edges
; ++i
)
2093 if (!bitmap_set_bit (visited_edges
, i
))
2095 worklist
.quick_push (i
);
2096 while (!worklist
.is_empty ())
2098 auto ei
= worklist
.pop ();
2099 auto *allocno1
= m_allocnos
[ei
/ ARRAY_SIZE (links
)];
2100 auto ai2
= allocno1
->*links
[ei
% ARRAY_SIZE (links
)];
2101 if (ai2
== INVALID_ALLOCNO
)
2104 auto *allocno2
= m_allocnos
[ai2
];
2105 auto *group1
= allocno1
->group ();
2106 auto *group2
= allocno2
->group ();
2107 if (!group1
->has_flexible_stride
|| !group2
->has_flexible_stride
)
2110 int pref
= strided_polarity_pref (allocno1
, allocno2
);
2114 for (auto *group
: { group1
, group2
})
2115 for (auto &allocno
: group
->allocnos ())
2116 for (unsigned int j
= 0; j
< ARRAY_SIZE (links
); ++j
)
2117 if (bitmap_set_bit (visited_edges
, allocno
.id
* 4 + j
))
2118 worklist
.safe_push (allocno
.id
* 4 + j
);
2120 if (group1
->strided_polarity
)
2121 group2
->strided_polarity
= group1
->strided_polarity
* pref
;
2122 else if (group2
->strided_polarity
)
2123 group1
->strided_polarity
= group2
->strided_polarity
* pref
;
2126 group1
->strided_polarity
= 1;
2127 group2
->strided_polarity
= pref
;
2132 // Now look for edges between allocnos in multi-register groups where:
2134 // - the two groups have a relative strided polarity preference (as above).
2136 // - one group (G1) has a free choice between consecutive and strided
2139 // - the other group (G2) must use consecutive allocations.
2141 // Update G1's individual preference for strided or consecutive allocations
2142 // based on G2. If the previous loop chose a polarity for G1, work out
2143 // whether it is better for polarity 1 or -1 to correspond to consecutive
2145 int consecutive_pref
= 0;
2146 for (unsigned int i
= m_allocnos
.length (); i
-- > 0; )
2148 auto *allocno1
= m_allocnos
[i
];
2149 for (auto link
: links
)
2151 auto ai2
= allocno1
->*link
;
2152 if (ai2
== INVALID_ALLOCNO
)
2155 auto *allocno2
= m_allocnos
[ai2
];
2156 auto *group1
= allocno1
->group ();
2157 auto *group2
= allocno2
->group ();
2158 if (group1
->has_flexible_stride
== group2
->has_flexible_stride
)
2161 int pref
= strided_polarity_pref (allocno1
, allocno2
);
2165 auto *group
= (group1
->has_flexible_stride
? group1
: group2
);
2166 consecutive_pref
+= group
->strided_polarity
* pref
;
2167 group
->consecutive_pref
+= pref
;
2171 // If it doesn't matter whether polarity 1 or -1 corresponds to consecutive
2172 // allocation, arbitrarily pick 1.
2173 if (consecutive_pref
== 0)
2174 consecutive_pref
= 1;
2176 // Record which multi-register groups should use strided allocations.
2177 // Clear out the temporary edges.
2178 for (unsigned int ai
= 0; ai
< m_allocnos
.length (); ++ai
)
2180 auto *allocno
= m_allocnos
[ai
];
2181 allocno
->chain_prev
= INVALID_ALLOCNO
;
2182 allocno
->chain_next
= INVALID_ALLOCNO
;
2184 if (allocno
->offset
!= 0)
2187 auto *group
= allocno
->group ();
2188 if (!group
->has_flexible_stride
)
2191 bool make_strided
= (group
->strided_polarity
2192 ? (consecutive_pref
* group
->strided_polarity
) < 0
2193 : group
->consecutive_pref
< 0);
2194 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2195 fprintf (dump_file
, "Allocno [%d:%d]: strided polarity %d,"
2196 " consecutive pref %d, %s\n",
2197 allocno
->id
, allocno
->id
+ group
->size
- 1,
2198 group
->strided_polarity
, group
->consecutive_pref
,
2199 make_strided
? "making strided" : "keeping consecutive");
2203 // 2-register groups have a stride of 8 FPRs and must start in
2204 // registers matching the mask 0x17. 4-register groups have a stride
2205 // of 4 FPRs and must start in registers matching the mask 0x13.
2206 group
->stride
= group
->size
== 2 ? 8 : 4;
2207 gcc_checking_assert (group
->fpr_candidates
2208 == (group
->size
== 2 ? 0x55555555 : 0x11111111));
2209 group
->fpr_candidates
= (group
->size
== 2 ? 0xff00ff : 0xf000f);
2213 // Compare the allocnos at *ALLOCNO1_PTR and *ALLOCNO2_PTR and return a <=>
2214 // result that puts allocnos in order of increasing FIELD.
2215 template<unsigned int early_ra::allocno_info::*field
>
2217 early_ra::cmp_increasing (const void *allocno1_ptr
, const void *allocno2_ptr
)
2219 auto *allocno1
= *(allocno_info
*const *) allocno1_ptr
;
2220 auto *allocno2
= *(allocno_info
*const *) allocno2_ptr
;
2222 if (allocno1
->*field
!= allocno2
->*field
)
2223 return allocno1
->*field
< allocno2
->*field
? -1 : 1;
2224 return (allocno1
->id
< allocno2
->id
? -1
2225 : allocno1
->id
== allocno2
->id
? 0 : 1);
2228 // Return true if we should consider chaining ALLOCNO1 onto the head
2229 // of ALLOCNO2. STRICTNESS says whether we should take copy-elision
2230 // heuristics into account, or whether we should just consider things
2231 // that matter for correctness.
2233 // This is just a local test of the two allocnos; it doesn't guarantee
2234 // that chaining them would give a self-consistent system.
2236 early_ra::is_chain_candidate (allocno_info
*allocno1
, allocno_info
*allocno2
,
2237 test_strictness strictness
)
2239 if (allocno2
->is_shared ())
2242 while (allocno1
->is_equiv
)
2243 allocno1
= m_allocnos
[allocno1
->related_allocno
];
2245 if (allocno2
->start_point
>= allocno1
->end_point
2246 && !allocno2
->is_equiv_to (allocno1
->id
))
2249 if (allocno1
->is_earlyclobbered
2250 && allocno1
->end_point
== allocno2
->start_point
+ 1)
2253 if (strictness
== ALL_REASONS
&& allocno2
->is_copy_dest
)
2255 if (allocno1
->copy_dest
!= allocno2
->id
)
2257 if (allocno2
->is_strong_copy_dest
&& !allocno1
->is_strong_copy_src
)
2263 // We're trying to chain allocno ALLOCNO1 to a later allocno.
2264 // Rate how good a choice ALLOCNO2 would be, with higher being better.
2266 early_ra::rate_chain (allocno_info
*allocno1
, allocno_info
*allocno2
)
2269 if (allocno2
->is_strong_copy_dest
)
2271 else if (allocno2
->is_copy_dest
)
2274 // Prefer well-aligned matches.
2275 auto *group1
= allocno1
->group ();
2276 auto *group2
= allocno2
->group ();
2277 if (group1
->stride
== 1 && group2
->stride
== 1)
2279 unsigned int min_size
= std::min (group1
->color_rep ()->size
,
2280 group2
->color_rep ()->size
);
2281 if ((group1
->color_rep_offset
+ allocno1
->offset
) % min_size
2282 == (group2
->color_rep_offset
+ allocno2
->offset
) % min_size
)
2290 // Sort the chain_candidate_infos at ARG1 and ARG2 in order of decreasing
2293 early_ra::cmp_chain_candidates (const void *arg1
, const void *arg2
)
2295 auto &candidate1
= *(const chain_candidate_info
*) arg1
;
2296 auto &candidate2
= *(const chain_candidate_info
*) arg2
;
2297 if (candidate1
.score
!= candidate2
.score
)
2298 return candidate1
.score
> candidate2
.score
? -1 : 1;
2300 // Prefer to increase the gap between uses of the allocated register,
2301 // to give the scheduler more freedom.
2302 auto *allocno1
= candidate1
.allocno
;
2303 auto *allocno2
= candidate2
.allocno
;
2304 if (allocno1
->start_point
!= allocno2
->start_point
)
2305 return allocno1
->start_point
< allocno2
->start_point
? -1 : 1;
2307 if (allocno1
!= allocno2
)
2308 return allocno1
->id
< allocno2
->id
? -1 : 1;
2313 // Join the chains of allocnos that start at HEADI1 and HEADI2.
2314 // HEADI1 is either empty or a single allocno.
2316 early_ra::chain_allocnos (unsigned int &headi1
, unsigned int &headi2
)
2318 if (headi1
== INVALID_ALLOCNO
)
2320 else if (headi2
== INVALID_ALLOCNO
)
2324 auto *head1
= m_allocnos
[headi1
];
2325 auto *head2
= m_allocnos
[headi2
];
2326 gcc_checking_assert (head1
->chain_next
== INVALID_ALLOCNO
2327 && head1
->chain_prev
== INVALID_ALLOCNO
2328 && head2
->chain_prev
== INVALID_ALLOCNO
);
2331 && m_allocnos
[head1
->related_allocno
]->copy_dest
== headi2
)
2333 head1
->is_copy_dest
= head2
->is_copy_dest
;
2334 head1
->is_strong_copy_dest
= head2
->is_strong_copy_dest
;
2335 m_allocnos
[head1
->related_allocno
]->copy_dest
= headi1
;
2337 head1
->chain_next
= headi2
;
2338 head2
->chain_prev
= headi1
;
2344 // Add GROUP2's FPR information to GROUP1's, given that GROUP2 starts
2345 // OFFSET allocnos into GROUP2.
2347 early_ra::merge_fpr_info (allocno_group_info
*group1
,
2348 allocno_group_info
*group2
,
2349 unsigned int offset
)
2351 group1
->fpr_size
= std::max (group1
->fpr_size
, group2
->fpr_size
);
2352 group1
->fpr_candidates
&= (group2
->fpr_candidates
2353 >> (offset
* group1
->stride
));
2356 // Set the color representative of ALLOCNO's group to REP, such that ALLOCNO
2357 // ends being at allocno offset REP_OFFSET from the start of REP.
2359 early_ra::set_single_color_rep (allocno_info
*allocno
, allocno_group_info
*rep
,
2360 unsigned int rep_offset
)
2362 auto *group
= allocno
->group ();
2363 if (group
->m_color_rep
== rep
)
2366 group
->m_color_rep
= rep
;
2367 gcc_checking_assert (multiple_p (group
->stride
, rep
->stride
));
2368 unsigned int factor
= group
->stride
/ rep
->stride
;
2369 gcc_checking_assert (rep_offset
>= allocno
->offset
* factor
);
2370 group
->color_rep_offset
= rep_offset
- allocno
->offset
* factor
;
2371 merge_fpr_info (rep
, group
, group
->color_rep_offset
);
2374 // REP1 and REP2 are color representatives. Change REP1's color representative
2375 // to REP2, with REP1 starting at allocno offset REP2_OFFSET into REP2.
2377 early_ra::set_color_rep (allocno_group_info
*rep1
, allocno_group_info
*rep2
,
2378 unsigned int rep2_offset
)
2380 gcc_checking_assert (rep1
!= rep2
2381 && rep2
->m_color_rep
== rep2
2382 && multiple_p (rep1
->stride
, rep2
->stride
));
2384 auto heads1
= rep1
->chain_heads ();
2385 auto heads2
= rep2
->chain_heads ();
2386 for (unsigned int i1
= 0; i1
< heads1
.size (); ++i1
)
2387 if (heads1
[i1
] != INVALID_ALLOCNO
)
2389 unsigned int i2
= rep2_offset
+ i1
* rep1
->stride
/ rep2
->stride
;
2390 if (heads2
[i2
] == INVALID_ALLOCNO
)
2391 heads2
[i2
] = heads1
[i1
];
2393 gcc_checking_assert (heads2
[i2
] == heads1
[i1
]);
2394 set_single_color_rep (m_allocnos
[heads1
[i1
]], rep2
, i2
);
2398 // Try to chain ALLOCNO1 to the head of the chain starting at ALLOCNO2.
2399 // Return true on success.
2401 early_ra::try_to_chain_allocnos (allocno_info
*allocno1
,
2402 allocno_info
*allocno2
)
2404 auto *group1
= allocno1
->group ()->color_rep ();
2405 auto *group2
= allocno2
->group ()->color_rep ();
2407 // Avoid trying to tie different subgroups of the same group. This can
2408 // happen if the parts of a register are defined and used piecemeal.
2409 if (group1
== group2
)
2412 // The stride (in FPRs) between allocnos of each color representative.
2413 auto fpr_stride1
= group1
->stride
;
2414 auto fpr_stride2
= group2
->stride
;
2416 // The offset (in FPRs) of each allocno group from its color representative.
2417 auto fpr_offset1
= allocno1
->group ()->color_rep_offset
* fpr_stride1
;
2418 auto fpr_offset2
= allocno2
->group ()->color_rep_offset
* fpr_stride2
;
2420 // The offset (in FPRs) of each allocno from its color representative.
2421 fpr_offset1
+= allocno1
->offset
* allocno1
->group ()->stride
;
2422 fpr_offset2
+= allocno2
->offset
* allocno2
->group ()->stride
;
2424 // The FPR overlap is in multiples of the larger stride.
2425 auto max_fpr_stride
= std::max (fpr_stride1
, fpr_stride2
);
2426 auto min_fpr_offset
= std::min (fpr_offset1
, fpr_offset2
);
2427 auto fpr_overlap_offset
= ROUND_DOWN (min_fpr_offset
, max_fpr_stride
);
2429 // The offset (in FPRs) of the start of the overlapping region from
2430 // each color representative.
2431 fpr_offset1
-= fpr_overlap_offset
;
2432 fpr_offset2
-= fpr_overlap_offset
;
2434 // The number of FPRs in each color representative after the start
2435 // of the overlapping region.
2436 auto fpr_after1
= (group1
->size
- 1) * fpr_stride1
- fpr_offset1
;
2437 auto fpr_after2
= (group2
->size
- 1) * fpr_stride2
- fpr_offset2
;
2439 auto min_fpr_after
= std::min (fpr_after1
, fpr_after2
);
2441 // The number of overlapping allocnos.
2442 auto allocno_overlap_size
= min_fpr_after
/ max_fpr_stride
+ 1;
2444 // The offset (in allocnos) of the overlapping region from the start
2445 // of each color representative.
2446 auto allocno_offset1
= fpr_offset1
/ fpr_stride1
;
2447 auto allocno_offset2
= fpr_offset2
/ fpr_stride2
;
2449 // The stride (in allocnos) between overlapping allocnos.
2450 auto allocno_stride1
= max_fpr_stride
/ fpr_stride1
;
2451 auto allocno_stride2
= max_fpr_stride
/ fpr_stride2
;
2453 // Reject combinations that are impossible to allocate.
2454 auto fprs1
= group1
->fpr_candidates
;
2455 auto fprs2
= group2
->fpr_candidates
;
2456 if (fpr_offset1
> fpr_offset2
)
2457 fprs2
>>= (fpr_offset1
- fpr_offset2
);
2459 fprs1
>>= (fpr_offset2
- fpr_offset1
);
2460 if ((fprs1
& fprs2
) == 0)
2462 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2463 fprintf (dump_file
, " - cannot chain %d->%d, no FPRs in common"
2464 " (%08x@%d and %08x@%d)\n", allocno1
->id
, allocno2
->id
,
2465 group1
->fpr_candidates
, fpr_offset1
,
2466 group2
->fpr_candidates
, fpr_offset2
);
2470 // Check whether the chain can be formed.
2471 auto heads1
= group1
->chain_heads ();
2472 auto heads2
= group2
->chain_heads ();
2473 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2475 auto headi1
= heads1
[allocno_offset1
+ i
* allocno_stride1
];
2476 auto headi2
= heads2
[allocno_offset2
+ i
* allocno_stride2
];
2477 if (headi1
!= INVALID_ALLOCNO
&& headi2
!= INVALID_ALLOCNO
)
2479 auto *head1
= m_allocnos
[headi1
];
2480 auto *head2
= m_allocnos
[headi2
];
2481 if (head1
->chain_next
!= INVALID_ALLOCNO
)
2483 if (!is_chain_candidate (head1
, head2
, CORRECTNESS_ONLY
))
2488 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2490 fprintf (dump_file
, " - chaining allocnos [");
2491 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2492 fprintf (dump_file
, "%s%d", i
? "," : "",
2493 heads1
[allocno_offset1
+ i
* allocno_stride1
]);
2494 fprintf (dump_file
, "] and [");
2495 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2496 fprintf (dump_file
, "%s%d", i
? "," : "",
2497 heads2
[allocno_offset2
+ i
* allocno_stride2
]);
2498 fprintf (dump_file
, "]\n");
2501 // Chain the allocnos, updating the chain heads.
2502 for (unsigned int i
= 0; i
< allocno_overlap_size
; ++i
)
2503 chain_allocnos (heads1
[allocno_offset1
+ i
* allocno_stride1
],
2504 heads2
[allocno_offset2
+ i
* allocno_stride2
]);
2506 // Pick a color representative for the merged groups.
2507 allocno_group_info
*new_rep
;
2508 if (allocno_offset1
== 0
2509 && group1
->size
== allocno_overlap_size
* allocno_stride1
2510 && multiple_p (fpr_stride1
, fpr_stride2
))
2512 // The first group fits within the second.
2513 set_color_rep (group1
, group2
, allocno_offset2
);
2516 else if (allocno_offset2
== 0
2517 && group2
->size
== allocno_overlap_size
* allocno_stride2
2518 && multiple_p (fpr_stride2
, fpr_stride1
))
2520 // The second group fits within the first.
2521 set_color_rep (group2
, group1
, allocno_offset1
);
2526 // We need a new group that is big enough to span both groups.
2527 // The new group always has an FPR stride of 1.
2528 auto max_fpr_offset
= std::max (fpr_offset1
, fpr_offset2
);
2529 auto max_fpr_after
= std::max (fpr_after1
, fpr_after2
);
2530 auto new_size
= max_fpr_offset
+ max_fpr_after
+ 1;
2531 new_rep
= create_allocno_group (INVALID_REGNUM
, new_size
);
2533 set_color_rep (group1
, new_rep
, max_fpr_offset
- fpr_offset1
);
2534 set_color_rep (group2
, new_rep
, max_fpr_offset
- fpr_offset2
);
2537 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2539 fprintf (dump_file
, " - new frontier [");
2540 auto new_heads
= new_rep
->chain_heads ();
2541 for (unsigned int i
= 0; i
< new_heads
.size (); ++i
)
2544 fprintf (dump_file
, ",");
2545 if (new_heads
[i
] == INVALID_ALLOCNO
)
2546 fprintf (dump_file
, "-");
2548 fprintf (dump_file
, "%d", new_heads
[i
]);
2550 fprintf (dump_file
, "]\n");
2556 // Create a color_info for color representative GROUP.
2558 early_ra::create_color (allocno_group_info
*group
)
2560 auto *color
= region_allocate
<color_info
> ();
2561 color
->id
= m_colors
.length ();
2562 color
->hard_regno
= FIRST_PSEUDO_REGISTER
;
2563 color
->group
= group
;
2565 gcc_checking_assert (group
->m_color_rep
== group
);
2566 group
->has_color
= true;
2567 group
->color
= m_colors
.length ();
2569 m_colors
.safe_push (color
);
2572 // Form allocnos into chains. Create colors for each resulting clique.
2574 early_ra::form_chains ()
2576 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2577 fprintf (dump_file
, "\nChaining allocnos:\n");
2579 // Perform (modified) interval graph coloring. First sort by
2580 // increasing start point.
2581 m_sorted_allocnos
.reserve (m_allocnos
.length ());
2582 m_sorted_allocnos
.splice (m_allocnos
);
2583 m_sorted_allocnos
.qsort (cmp_increasing
<&allocno_info::start_point
>);
2585 // During this phase, color representatives are only correct for
2586 // unprocessed allocno groups (where the color representative is
2587 // the group itself) and for groups that contain a current chain head.
2588 unsigned int ti
= 0;
2589 auto_vec
<chain_candidate_info
> candidates
;
2590 for (unsigned int hi
= 0; hi
< m_sorted_allocnos
.length (); ++hi
)
2592 auto *allocno1
= m_sorted_allocnos
[hi
];
2593 if (allocno1
->chain_next
!= INVALID_ALLOCNO
)
2596 // Record conflicts with direct uses for FPR hard registers.
2597 auto *group1
= allocno1
->group ();
2598 for (unsigned int fpr
= allocno1
->offset
; fpr
< 32; ++fpr
)
2599 if (fpr_conflicts_with_allocno_p (fpr
, allocno1
))
2600 group1
->fpr_candidates
&= ~(1U << (fpr
- allocno1
->offset
));
2602 // Record conflicts due to partially call-clobbered registers.
2603 // (Full clobbers are handled by the previous loop.)
2604 for (unsigned int abi_id
= 0; abi_id
< NUM_ABI_IDS
; ++abi_id
)
2605 if (call_in_range_p (abi_id
, allocno1
->start_point
,
2606 allocno1
->end_point
))
2608 auto fprs
= partial_fpr_clobbers (abi_id
, group1
->fpr_size
);
2609 group1
->fpr_candidates
&= ~fprs
>> allocno1
->offset
;
2612 if (allocno1
->is_shared ())
2614 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2615 fprintf (dump_file
, " Allocno %d shares the same hard register"
2616 " as allocno %d\n", allocno1
->id
,
2617 allocno1
->related_allocno
);
2618 auto *allocno2
= m_allocnos
[allocno1
->related_allocno
];
2619 merge_fpr_info (allocno2
->group (), group1
, allocno2
->offset
);
2620 m_shared_allocnos
.safe_push (allocno1
);
2624 // Find earlier allocnos (in processing order) that could be chained
2626 candidates
.truncate (0);
2627 for (unsigned int sci
= ti
; sci
< hi
; ++sci
)
2629 auto *allocno2
= m_sorted_allocnos
[sci
];
2630 if (allocno2
->chain_prev
== INVALID_ALLOCNO
)
2632 if (!is_chain_candidate (allocno1
, allocno2
, ALL_REASONS
))
2634 chain_candidate_info candidate
;
2635 candidate
.allocno
= allocno2
;
2636 candidate
.score
= rate_chain (allocno1
, allocno2
);
2637 candidates
.safe_push (candidate
);
2643 // Sort the candidates by decreasing score.
2644 candidates
.qsort (cmp_chain_candidates
);
2645 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2647 fprintf (dump_file
, " Chain candidates for %d:", allocno1
->id
);
2648 for (auto &candidate
: candidates
)
2649 fprintf (dump_file
, " %d(%d)", candidate
.allocno
->id
,
2651 fprintf (dump_file
, "\n");
2654 // Pick the first candidate that works.
2655 for (auto &candidate
: candidates
)
2656 if (try_to_chain_allocnos (allocno1
, candidate
.allocno
))
2660 // Create color_infos for each group. Make sure that each group's
2661 // color representative is up to date.
2662 for (unsigned int hi
= m_sorted_allocnos
.length (); hi
-- > 0; )
2664 auto *allocno
= m_sorted_allocnos
[hi
];
2665 if (allocno
->is_shared ())
2668 auto *rep
= allocno
->group ()->color_rep ();
2673 auto heads
= rep
->chain_heads ();
2674 for (unsigned int i
= 0; i
< heads
.size (); ++i
)
2676 unsigned int ai
= heads
[i
];
2677 while (ai
!= INVALID_ALLOCNO
)
2679 allocno
= m_allocnos
[ai
];
2680 set_single_color_rep (allocno
, rep
, i
* rep
->stride
);
2681 ai
= allocno
->chain_next
;
2687 // Return true if the given FPR (starting at 0) conflicts with allocno
2690 early_ra::fpr_conflicts_with_allocno_p (unsigned int fpr
,
2691 allocno_info
*allocno
)
2693 auto &ranges
= m_fpr_ranges
[fpr
];
2694 unsigned int start_i
= 0;
2695 unsigned int end_i
= ranges
.length ();
2696 while (start_i
< end_i
)
2698 unsigned int mid_i
= (start_i
+ end_i
) / 2;
2699 auto &range
= ranges
[mid_i
];
2700 if (allocno
->end_point
> range
.start_point
)
2701 start_i
= mid_i
+ 1;
2702 else if (allocno
->start_point
< range
.end_point
)
2706 if (range
.allocno
!= allocno
->id
)
2708 // The FPR is equivalent to ALLOCNO for this particular range.
2709 // See whether ALLOCNO conflicts with a neighboring range.
2711 && ranges
[mid_i
- 1].start_point
>= allocno
->end_point
)
2713 if (mid_i
+ 1 < ranges
.length ()
2714 && ranges
[mid_i
+ 1].end_point
<= allocno
->start_point
)
2722 // Return true if there is a call with ABI identifier ABI_ID in the inclusive
2723 // program point range [START_POINT, END_POINT].
2725 early_ra::call_in_range_p (unsigned int abi_id
, unsigned int start_point
,
2726 unsigned int end_point
)
2728 auto &points
= m_call_points
[abi_id
];
2729 unsigned int start_i
= 0;
2730 unsigned int end_i
= points
.length ();
2731 while (start_i
< end_i
)
2733 unsigned int mid_i
= (start_i
+ end_i
) / 2;
2734 auto point
= points
[mid_i
];
2735 if (end_point
> point
)
2736 start_i
= mid_i
+ 1;
2737 else if (start_point
< point
)
2745 // Return the set of FPRs for which a value of size SIZE will be clobbered
2746 // by a call to a function with ABI identifier ABI_ID, but would not be
2747 // for some smaller size. The set therefore excludes FPRs that are
2748 // fully-clobbered, like V0 in the base ABI.
2750 early_ra::partial_fpr_clobbers (unsigned int abi_id
, fpr_size_info size
)
2752 auto &abi
= function_abis
[abi_id
];
2753 unsigned int clobbers
= 0;
2754 machine_mode mode
= (size
== FPR_D
? V8QImode
2755 : size
== FPR_Q
? V16QImode
: VNx16QImode
);
2756 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
2757 if (!abi
.clobbers_full_reg_p (regno
)
2758 && abi
.clobbers_reg_p (mode
, regno
))
2759 clobbers
|= 1U << (regno
- V0_REGNUM
);
2763 // Process copies between pseudo registers and hard registers and update
2764 // the FPR preferences for the associated colors.
2766 early_ra::process_copies ()
2768 for (auto ©
: m_allocno_copies
)
2770 auto *allocno
= m_allocnos
[copy
.allocno
];
2771 auto *group
= allocno
->group ();
2772 auto offset
= group
->color_rep_offset
+ allocno
->offset
;
2773 if (offset
> copy
.fpr
)
2776 unsigned int fpr
= copy
.fpr
- offset
;
2777 auto *color
= m_colors
[group
->color_rep ()->color
];
2778 color
->fpr_preferences
[fpr
] = MIN (color
->fpr_preferences
[fpr
]
2779 + copy
.weight
, 127);
2780 color
->num_fpr_preferences
+= copy
.weight
;
2784 // Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=>
2785 // result that puts colors in allocation order.
2787 early_ra::cmp_allocation_order (const void *color1_ptr
, const void *color2_ptr
)
2789 auto *color1
= *(color_info
*const *) color1_ptr
;
2790 auto *color2
= *(color_info
*const *) color2_ptr
;
2792 // Allocate bigger groups before smaller groups.
2793 if (color1
->group
->size
!= color2
->group
->size
)
2794 return color1
->group
->size
> color2
->group
->size
? -1 : 1;
2796 // Allocate groups with stronger FPR preferences before groups with weaker
2798 if (color1
->num_fpr_preferences
!= color2
->num_fpr_preferences
)
2799 return color1
->num_fpr_preferences
> color2
->num_fpr_preferences
? -1 : 1;
2801 return (color1
->id
< color2
->id
? -1
2802 : color1
->id
== color2
->id
? 0 : 1);
2805 // Allocate a register to each color. If we run out of registers,
2806 // give up on doing a full allocation of the FPR-based pseudos in the
2809 early_ra::allocate_colors ()
2811 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2812 fprintf (dump_file
, "\nAllocating registers:\n");
2814 auto_vec
<color_info
*> sorted_colors
;
2815 sorted_colors
.safe_splice (m_colors
);
2816 sorted_colors
.qsort (cmp_allocation_order
);
2818 for (unsigned int i
= 0; i
< 32; ++i
)
2819 if (!crtl
->abi
->clobbers_full_reg_p (V0_REGNUM
+ i
))
2820 m_call_preserved_fprs
|= 1U << i
;
2822 for (auto *color
: sorted_colors
)
2824 unsigned int candidates
= color
->group
->fpr_candidates
;
2825 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
2826 candidates
&= ~(m_allocated_fprs
>> i
);
2827 unsigned int best
= INVALID_REGNUM
;
2828 int best_weight
= 0;
2829 unsigned int best_recency
= 0;
2830 for (unsigned int fpr
= 0; fpr
<= 32U - color
->group
->size
; ++fpr
)
2832 if ((candidates
& (1U << fpr
)) == 0)
2834 int weight
= color
->fpr_preferences
[fpr
];
2835 unsigned int recency
= 0;
2836 // Account for registers that the current function must preserve.
2837 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
2839 if (m_call_preserved_fprs
& (1U << (fpr
+ i
)))
2841 recency
= MAX (recency
, m_fpr_recency
[fpr
+ i
]);
2843 // Prefer higher-numbered registers in the event of a tie.
2844 // This should tend to keep lower-numbered registers free
2845 // for allocnos that require V0-V7 or V0-V15.
2846 if (best
== INVALID_REGNUM
2847 || best_weight
< weight
2848 || (best_weight
== weight
&& recency
<= best_recency
))
2851 best_weight
= weight
;
2852 best_recency
= recency
;
2856 if (best
== INVALID_REGNUM
)
2858 m_allocation_successful
= false;
2862 color
->hard_regno
= best
+ V0_REGNUM
;
2863 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2864 fprintf (dump_file
, " Allocating [v%d:v%d] to color %d\n",
2865 best
, best
+ color
->group
->size
- 1, color
->id
);
2866 m_allocated_fprs
|= ((1U << color
->group
->size
) - 1) << best
;
2870 // See if ALLOCNO ends a subchain of single registers that can be split
2871 // off without affecting the rest of the chain, and without introducing
2872 // any moves. Return the start of the chain if so (which might be ALLOCNO
2873 // itself), otherwise return null.
2874 early_ra::allocno_info
*
2875 early_ra::find_independent_subchain (allocno_info
*allocno
)
2877 // Make sure ALLOCNO ends a natural subchain.
2878 if (auto *next_allocno
= chain_next (allocno
))
2879 if (next_allocno
->start_point
+ 1 >= allocno
->end_point
)
2882 // Check the allocnos in the purported subchain and find the other end.
2885 auto *group
= allocno
->group ();
2886 if (group
->m_color_rep
== group
)
2888 if (group
->size
!= 1)
2891 auto *prev_allocno
= chain_prev (allocno
);
2892 if (!prev_allocno
|| allocno
->start_point
+ 1 < prev_allocno
->end_point
)
2895 allocno
= prev_allocno
;
2899 // Search the colors starting at index FIRST_COLOR whose FPRs do not belong
2900 // to FPR_CONFLICTS. Return the first such color that has no group. If all
2901 // such colors have groups, instead return the color with the latest
2902 // (smallest) start point.
2903 early_ra::color_info
*
2904 early_ra::find_oldest_color (unsigned int first_color
,
2905 unsigned int fpr_conflicts
)
2907 color_info
*best
= nullptr;
2908 unsigned int best_start_point
= ~0U;
2909 unsigned int best_recency
= 0;
2910 for (unsigned int ci
= first_color
; ci
< m_colors
.length (); ++ci
)
2912 auto *color
= m_colors
[ci
];
2913 unsigned int fpr
= color
->hard_regno
- V0_REGNUM
;
2914 if (fpr_conflicts
& (1U << fpr
))
2916 unsigned int start_point
= 0;
2919 auto chain_head
= color
->group
->chain_heads ()[0];
2920 start_point
= m_allocnos
[chain_head
]->start_point
;
2922 unsigned int recency
= m_fpr_recency
[fpr
];
2924 || best_start_point
> start_point
2925 || (best_start_point
== start_point
&& recency
< best_recency
))
2928 best_start_point
= start_point
;
2929 best_recency
= recency
;
2935 // If there are some spare FPRs that can be reused without introducing saves,
2936 // restores, or moves, use them to "broaden" the allocation, in order to give
2937 // the scheduler more freedom. This is particularly useful for forming LDPs
2940 early_ra::broaden_colors ()
2942 // Create dummy colors for every leftover FPR that can be used cheaply.
2943 unsigned int first_color
= m_colors
.length ();
2944 for (unsigned int fpr
= 0; fpr
< 32; ++fpr
)
2945 if (((m_allocated_fprs
| m_call_preserved_fprs
) & (1U << fpr
)) == 0)
2947 auto *color
= region_allocate
<color_info
> ();
2948 color
->id
= m_colors
.length ();
2949 color
->hard_regno
= V0_REGNUM
+ fpr
;
2950 color
->group
= nullptr;
2951 m_colors
.safe_push (color
);
2954 // Exit early if there are no spare FPRs.
2955 if (first_color
== m_colors
.length ())
2958 // Go through the allocnos in order, seeing if there is a subchain of
2959 // single-FPR allocnos that can be split off from the containingg clique.
2960 // Allocate such subchains to the new colors on an oldest-first basis.
2961 for (auto *allocno
: m_sorted_allocnos
)
2962 if (auto *start_allocno
= find_independent_subchain (allocno
))
2964 unsigned int fpr_conflicts
= 0;
2965 auto *member
= allocno
;
2968 fpr_conflicts
|= ~member
->group ()->fpr_candidates
;
2969 if (member
== start_allocno
)
2971 member
= m_allocnos
[member
->chain_prev
];
2974 auto *color
= find_oldest_color (first_color
, fpr_conflicts
);
2980 auto *group
= allocno
->group ();
2981 color
->group
= group
;
2982 group
->color
= color
->id
;
2983 group
->chain_heads ()[0] = INVALID_ALLOCNO
;
2987 auto chain_head
= color
->group
->chain_heads ()[0];
2988 auto start_point
= m_allocnos
[chain_head
]->start_point
;
2989 if (start_point
>= allocno
->end_point
)
2990 // Allocating to COLOR isn't viable, and it was the best
2991 // option available.
2994 auto *next_allocno
= chain_next (allocno
);
2995 if (!next_allocno
|| next_allocno
->start_point
<= start_point
)
2996 // The current allocation gives at least as much scheduling
2997 // freedom as COLOR would.
3001 // Unlink the chain.
3002 if (auto *next_allocno
= chain_next (allocno
))
3003 next_allocno
->chain_prev
= start_allocno
->chain_prev
;
3004 if (auto *prev_allocno
= chain_prev (start_allocno
))
3005 prev_allocno
->chain_next
= allocno
->chain_next
;
3007 // Make the subchain use COLOR.
3008 allocno
->chain_next
= color
->group
->chain_heads ()[0];
3009 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3010 fprintf (dump_file
, "Moving to optional color %d (register %s):",
3011 color
->id
, reg_names
[color
->hard_regno
]);
3014 auto *group
= allocno
->group ();
3015 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3016 fprintf (dump_file
, " r%d", group
->regno
);
3017 group
->m_color_rep
= color
->group
;
3018 group
->color_rep_offset
= 0;
3019 if (allocno
== start_allocno
)
3021 allocno
= m_allocnos
[allocno
->chain_prev
];
3023 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3024 fprintf (dump_file
, "\n");
3025 color
->group
->chain_heads ()[0] = start_allocno
->id
;
3029 // Record the final choice of hard register for each allocno.
3031 early_ra::finalize_allocation ()
3033 for (auto *color
: m_colors
)
3036 unsigned int fpr
= color
->hard_regno
- V0_REGNUM
;
3037 for (unsigned int i
= 0; i
< color
->group
->size
; ++i
)
3038 m_fpr_recency
[fpr
+ i
] = m_current_region
;
3040 for (auto *allocno
: m_allocnos
)
3042 if (allocno
->is_shared ())
3044 auto *group
= allocno
->group ();
3045 auto *rep
= group
->color_rep ();
3046 auto rep_regno
= m_colors
[rep
->color
]->hard_regno
;
3047 auto group_regno
= rep_regno
+ group
->color_rep_offset
;
3048 allocno
->hard_regno
= group_regno
+ allocno
->offset
* group
->stride
;
3050 for (auto *allocno
: m_shared_allocnos
)
3051 allocno
->hard_regno
= m_allocnos
[allocno
->related_allocno
]->hard_regno
;
3054 // Replace any allocno references in REFS with the allocated register.
3055 // INSN is the instruction that contains REFS.
3057 early_ra::replace_regs (rtx_insn
*insn
, df_ref refs
)
3059 bool changed
= false;
3060 for (df_ref ref
= refs
; ref
; ref
= DF_REF_NEXT_LOC (ref
))
3062 auto range
= get_allocno_subgroup (DF_REF_REG (ref
));
3066 auto new_regno
= range
.allocno (0)->hard_regno
;
3067 if (new_regno
== FIRST_PSEUDO_REGISTER
)
3069 // Reset a debug instruction if, after DCE, the only remaining
3070 // references to a register are in such instructions.
3071 gcc_assert (DEBUG_INSN_P (insn
));
3072 INSN_VAR_LOCATION_LOC (insn
) = gen_rtx_UNKNOWN_VAR_LOC ();
3075 *DF_REF_LOC (ref
) = gen_rtx_REG (GET_MODE (DF_REF_REG (ref
)), new_regno
);
3081 // Try to make INSN match its FPR-related constraints. If this needs
3082 // a source operand (SRC) to be copied to a destination operand (DEST)
3083 // before INSN, add the associated (DEST, SRC) pairs to MOVES.
3085 // Return -1 on failure, otherwise return a ?/!-style reject count.
3086 // The reject count doesn't model the moves, just the internal alternative
3089 early_ra::try_enforce_constraints (rtx_insn
*insn
,
3090 vec
<std::pair
<int, int>> &moves
)
3092 if (!constrain_operands (0, get_preferred_alternatives (insn
)))
3095 // Pick the alternative with the lowest cost.
3097 auto alts
= get_preferred_alternatives (insn
);
3098 for (int altno
= 0; altno
< recog_data
.n_alternatives
; ++altno
)
3100 if (!(alts
& ALTERNATIVE_BIT (altno
)))
3103 auto *op_alt
= &recog_op_alt
[altno
* recog_data
.n_operands
];
3104 if (!likely_alternative_match_p (op_alt
))
3107 auto_vec
<std::pair
<int, int>, 4> new_moves
;
3108 for (int opno
= 0; opno
< recog_data
.n_operands
; ++opno
)
3110 rtx op
= recog_data
.operand
[opno
];
3112 && FP_REGNUM_P (REGNO (op
))
3113 && op_alt
[opno
].matched
>= 0)
3115 rtx old_src
= recog_data
.operand
[op_alt
[opno
].matched
];
3116 if (!operands_match_p (op
, old_src
))
3118 for (int i
= 0; i
< recog_data
.n_operands
; ++i
)
3121 rtx other
= recog_data
.operand
[i
];
3122 if (reg_overlap_mentioned_p (op
, other
))
3130 new_moves
.safe_push ({ opno
, op_alt
[opno
].matched
});
3134 int cost
= count_rejects (op_alt
) + new_moves
.length () * 7;
3135 if (best
< 0 || cost
< best
)
3139 moves
.safe_splice (new_moves
);
3145 // Make INSN matches its FPR-related constraints.
3147 early_ra::enforce_constraints (rtx_insn
*insn
)
3149 extract_insn (insn
);
3150 preprocess_constraints (insn
);
3152 // First try with the operands they are.
3153 auto_vec
<std::pair
<int, int>, 4> moves
;
3154 int cost
= try_enforce_constraints (insn
, moves
);
3156 // Next try taking advantage of commutativity.
3157 for (int opno
= 0; opno
< recog_data
.n_operands
- 1; ++opno
)
3158 if (recog_data
.constraints
[opno
][0] == '%')
3160 std::swap (*recog_data
.operand_loc
[opno
],
3161 *recog_data
.operand_loc
[opno
+ 1]);
3162 std::swap (recog_data
.operand
[opno
],
3163 recog_data
.operand
[opno
+ 1]);
3164 auto_vec
<std::pair
<int, int>, 4> swapped_moves
;
3165 int swapped_cost
= try_enforce_constraints (insn
, swapped_moves
);
3166 if (swapped_cost
>= 0 && (cost
< 0 || swapped_cost
< cost
))
3168 cost
= swapped_cost
;
3170 moves
.safe_splice (swapped_moves
);
3174 std::swap (*recog_data
.operand_loc
[opno
],
3175 *recog_data
.operand_loc
[opno
+ 1]);
3176 std::swap (recog_data
.operand
[opno
],
3177 recog_data
.operand
[opno
+ 1]);
3181 // The allocation should ensure that there is at least one valid combination.
3182 // It's too late to back out now if not.
3183 gcc_assert (cost
>= 0);
3184 for (int i
= 0; i
< recog_data
.n_dups
; ++i
)
3186 int dup_of
= recog_data
.dup_num
[i
];
3187 rtx new_op
= *recog_data
.operand_loc
[dup_of
];
3188 if (new_op
!= recog_data
.operand
[dup_of
])
3189 *recog_data
.dup_loc
[i
] = copy_rtx (new_op
);
3191 for (auto move
: moves
)
3193 int dest_opno
= move
.first
;
3194 int src_opno
= move
.second
;
3195 rtx dest
= recog_data
.operand
[dest_opno
];
3196 rtx old_src
= recog_data
.operand
[src_opno
];
3197 rtx new_src
= lowpart_subreg (GET_MODE (old_src
), dest
, GET_MODE (dest
));
3198 emit_insn_before (gen_move_insn (new_src
, old_src
), insn
);
3199 *recog_data
.operand_loc
[src_opno
] = new_src
;
3203 // See whether INSN is an instruction that operates on multi-register vectors,
3204 // and if we have decided to make it use strided rather than consecutive
3205 // accesses. Update the pattern and return true if so.
3207 early_ra::maybe_convert_to_strided_access (rtx_insn
*insn
)
3209 if (!NONJUMP_INSN_P (insn
) || recog_memoized (insn
) < 0)
3212 auto stride_type
= get_attr_stride_type (insn
);
3213 rtx pat
= PATTERN (insn
);
3215 if (stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
)
3216 op
= SET_DEST (pat
);
3217 else if (stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
)
3218 op
= XVECEXP (SET_SRC (pat
), 0, 1);
3222 auto range
= get_allocno_subgroup (op
);
3223 if (!range
|| range
.group
->stride
== 1)
3226 gcc_assert (range
.start
== 0 && range
.count
== range
.group
->size
);
3227 auto elt_mode
= GET_MODE_INNER (GET_MODE (op
));
3228 auto single_mode
= aarch64_full_sve_mode (elt_mode
).require ();
3229 auto_vec
<rtx
, 4> regs
;
3230 for (unsigned int i
= 0; i
< range
.count
; ++i
)
3231 regs
.quick_push (gen_rtx_REG (single_mode
, range
.allocno (i
)->hard_regno
));
3233 extract_insn (insn
);
3234 if (stride_type
== STRIDE_TYPE_LD1_CONSECUTIVE
)
3236 auto unspec
= XINT (SET_SRC (pat
), 1);
3237 if (range
.count
== 2)
3238 pat
= gen_aarch64_strided2 (unspec
, GET_MODE (op
), regs
[0], regs
[1],
3239 recog_data
.operand
[1],
3240 recog_data
.operand
[2]);
3242 pat
= gen_aarch64_strided4 (unspec
, GET_MODE (op
),
3243 regs
[0], regs
[1], regs
[2], regs
[3],
3244 recog_data
.operand
[1],
3245 recog_data
.operand
[2]);
3247 else if (stride_type
== STRIDE_TYPE_ST1_CONSECUTIVE
)
3249 auto unspec
= XINT (SET_SRC (pat
), 1);
3250 if (range
.count
== 2)
3251 pat
= gen_aarch64_strided2 (unspec
, GET_MODE (op
),
3252 recog_data
.operand
[0],
3253 recog_data
.operand
[2], regs
[0], regs
[1]);
3255 pat
= gen_aarch64_strided4 (unspec
, GET_MODE (op
),
3256 recog_data
.operand
[0],
3257 recog_data
.operand
[2],
3258 regs
[0], regs
[1], regs
[2], regs
[3]);
3259 // Ensure correct sharing for the source memory.
3261 // ??? Why doesn't the generator get this right?
3262 XVECEXP (SET_SRC (pat
), 0, XVECLEN (SET_SRC (pat
), 0) - 1)
3263 = *recog_data
.dup_loc
[0];
3267 PATTERN (insn
) = pat
;
3268 INSN_CODE (insn
) = -1;
3269 df_insn_rescan (insn
);
3273 // We've successfully allocated the current region. Apply the allocation
3274 // to the instructions.
3276 early_ra::apply_allocation ()
3278 for (auto *insn
: m_dead_insns
)
3279 set_insn_deleted (insn
);
3282 for (auto insn_range
: m_insn_ranges
)
3283 for (rtx_insn
*insn
= insn_range
.first
;
3284 insn
!= insn_range
.second
;
3287 prev
= PREV_INSN (insn
);
3291 bool changed
= maybe_convert_to_strided_access (insn
);
3292 changed
|= replace_regs (insn
, DF_INSN_DEFS (insn
));
3293 changed
|= replace_regs (insn
, DF_INSN_USES (insn
));
3294 if (changed
&& NONDEBUG_INSN_P (insn
))
3296 if (GET_CODE (PATTERN (insn
)) != USE
3297 && GET_CODE (PATTERN (insn
)) != CLOBBER
3298 && !is_move_set (PATTERN (insn
)))
3299 enforce_constraints (insn
);
3301 // A REG_EQUIV note establishes an equivalence throughout
3302 // the function, but here we're reusing hard registers for
3303 // multiple pseudo registers. We also no longer need REG_EQUIV
3304 // notes that record potential spill locations, since we've
3305 // allocated the pseudo register without spilling.
3306 rtx
*ptr
= ®_NOTES (insn
);
3308 if (REG_NOTE_KIND (*ptr
) == REG_EQUIV
)
3309 *ptr
= XEXP (*ptr
, 1);
3311 ptr
= &XEXP (*ptr
, 1);
3313 changed
|= replace_regs (insn
, DF_INSN_EQ_USES (insn
));
3315 df_insn_rescan (insn
);
3318 for (auto *insn
: m_dead_insns
)
3322 // Try to allocate the current region. Update the instructions if successful.
3324 early_ra::process_region ()
3326 for (auto *allocno
: m_allocnos
)
3328 allocno
->chain_next
= INVALID_ALLOCNO
;
3329 allocno
->chain_prev
= INVALID_ALLOCNO
;
3332 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3339 find_strided_accesses ();
3341 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3346 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3351 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3355 if (!m_allocation_successful
)
3359 finalize_allocation ();
3361 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3363 fprintf (dump_file
, "\nAllocation successful\nFinal allocation:\n");
3368 apply_allocation ();
3371 // Return true if INSN would become dead if we successfully allocate the
3374 early_ra::is_dead_insn (rtx_insn
*insn
)
3376 rtx set
= single_set (insn
);
3380 rtx dest
= SET_DEST (set
);
3381 auto dest_range
= get_allocno_subgroup (dest
);
3385 for (auto &allocno
: dest_range
.allocnos ())
3386 if (bitmap_bit_p (m_live_allocnos
, allocno
.id
))
3389 if (side_effects_p (set
))
3392 /* If we can't delete dead exceptions and the insn throws,
3393 then the instruction is not dead. */
3394 if (!cfun
->can_delete_dead_exceptions
3395 && !insn_nothrow_p (insn
))
3401 // Build up information about block BB. IS_ISOLATED is true if the
3402 // block is not part of a larger region.
3404 early_ra::process_block (basic_block bb
, bool is_isolated
)
3407 m_current_point
+= 1;
3408 m_current_bb_point
= m_current_point
;
3410 // Process live-out FPRs.
3411 bitmap live_out
= df_get_live_out (bb
);
3412 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
3413 if (bitmap_bit_p (live_out
, regno
))
3414 record_fpr_use (regno
);
3416 // Process live-out allocnos. We don't track individual FPR liveness
3417 // across block boundaries, so we have to assume that the whole pseudo
3418 // register is live.
3421 EXECUTE_IF_AND_IN_BITMAP (df_get_live_out (bb
), m_fpr_pseudos
,
3422 FIRST_PSEUDO_REGISTER
, regno
, bi
)
3424 auto range
= get_allocno_subgroup (regno_reg_rtx
[regno
]);
3425 for (auto &allocno
: range
.allocnos ())
3426 record_allocno_use (&allocno
);
3429 m_current_point
+= 1;
3431 record_artificial_refs (0);
3433 bool is_first
= true;
3434 rtx_insn
*start_insn
= BB_END (bb
);
3436 FOR_BB_INSNS_REVERSE (bb
, insn
)
3438 if (!NONDEBUG_INSN_P (insn
))
3441 // CLOBBERs are used to prevent pseudos from being upwards exposed.
3442 // We can ignore them if allocation is successful.
3443 if (GET_CODE (PATTERN (insn
)) == CLOBBER
)
3445 if (get_allocno_subgroup (XEXP (PATTERN (insn
), 0)))
3446 m_dead_insns
.safe_push (insn
);
3450 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3453 fprintf (dump_file
, "\nBlock %d:\n", bb
->index
);
3454 fprintf (dump_file
, "%6d:", m_current_point
);
3455 pretty_printer rtl_slim_pp
;
3456 rtl_slim_pp
.set_output_stream (dump_file
);
3457 print_insn (&rtl_slim_pp
, insn
, 1);
3458 pp_flush (&rtl_slim_pp
);
3459 fprintf (dump_file
, "\n");
3463 if (is_dead_insn (insn
))
3465 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3466 fprintf (dump_file
, "%14s -- dead\n", "");
3467 m_dead_insns
.safe_push (insn
);
3471 record_insn_refs (insn
);
3472 rtx pat
= PATTERN (insn
);
3473 if (is_move_set (pat
))
3474 record_copy (SET_DEST (pat
), SET_SRC (pat
), true);
3477 extract_insn (insn
);
3478 record_constraints (insn
);
3482 // See whether we have a complete region, with no remaining live
3485 && bitmap_empty_p (m_live_allocnos
)
3487 && m_allocation_successful
3488 && !m_allocnos
.is_empty ())
3490 rtx_insn
*prev_insn
= PREV_INSN (insn
);
3491 m_insn_ranges
.safe_push ({ start_insn
, prev_insn
});
3493 start_new_region ();
3495 start_insn
= prev_insn
;
3498 m_insn_ranges
.safe_push ({ start_insn
, BB_HEAD (bb
) });
3500 record_artificial_refs (DF_REF_AT_TOP
);
3502 // Process live-in FPRs.
3503 bitmap live_in
= df_get_live_in (bb
);
3504 for (unsigned int regno
= V0_REGNUM
; regno
<= V31_REGNUM
; ++regno
)
3505 if (bitmap_bit_p (live_in
, regno
)
3506 && (m_live_fprs
& (1U << (regno
- V0_REGNUM
))))
3507 record_fpr_def (regno
);
3509 // Process live-in allocnos.
3510 EXECUTE_IF_AND_IN_BITMAP (live_in
, m_fpr_pseudos
,
3511 FIRST_PSEUDO_REGISTER
, regno
, bi
)
3513 auto range
= get_allocno_subgroup (regno_reg_rtx
[regno
]);
3514 for (auto &allocno
: range
.allocnos ())
3515 if (bitmap_bit_p (m_live_allocnos
, allocno
.id
))
3516 record_allocno_def (&allocno
);
3519 m_current_point
+= 1;
3521 bitmap_clear (m_live_allocnos
);
3525 // Divide the function into regions, such that there no edges into or out
3526 // of the region have live "FPR pseudos".
3528 early_ra::process_blocks ()
3530 auto_sbitmap
visited (last_basic_block_for_fn (m_fn
));
3531 auto_sbitmap
fpr_pseudos_live_out (last_basic_block_for_fn (m_fn
));
3532 auto_sbitmap
fpr_pseudos_live_in (last_basic_block_for_fn (m_fn
));
3534 bitmap_clear (visited
);
3535 bitmap_clear (fpr_pseudos_live_out
);
3536 bitmap_clear (fpr_pseudos_live_in
);
3538 // Record which blocks have live FPR pseudos on entry and exit.
3540 FOR_EACH_BB_FN (bb
, m_fn
)
3542 if (bitmap_intersect_p (df_get_live_out (bb
), m_fpr_pseudos
))
3543 bitmap_set_bit (fpr_pseudos_live_out
, bb
->index
);
3544 if (bitmap_intersect_p (df_get_live_in (bb
), m_fpr_pseudos
))
3545 bitmap_set_bit (fpr_pseudos_live_in
, bb
->index
);
3548 // This is incremented by 1 at the start of each region.
3549 m_current_region
= 0;
3550 memset (m_fpr_recency
, 0, sizeof (m_fpr_recency
));
3552 struct stack_node
{ edge_iterator ei
; basic_block bb
; };
3554 auto_vec
<stack_node
, 32> stack
;
3555 auto_vec
<basic_block
, 32> region
;
3557 // Go through the function in reverse postorder and process the region
3558 // containing each block.
3559 unsigned int n_blocks
= df_get_n_blocks (DF_FORWARD
);
3560 int *order
= df_get_postorder (DF_FORWARD
);
3561 for (unsigned int bbi
= 0; bbi
< n_blocks
; ++bbi
)
3563 basic_block bb
= BASIC_BLOCK_FOR_FN (m_fn
, order
[bbi
]);
3564 if (bb
->index
< NUM_FIXED_BLOCKS
)
3567 if (!bitmap_set_bit (visited
, bb
->index
))
3570 // Process forward edges before backward edges (so push backward
3571 // edges first). Build the region in an approximation of reverse
3573 if (bitmap_bit_p (fpr_pseudos_live_in
, bb
->index
))
3574 stack
.quick_push ({ ei_start (bb
->preds
), nullptr });
3575 if (bitmap_bit_p (fpr_pseudos_live_out
, bb
->index
))
3576 stack
.quick_push ({ ei_start (bb
->succs
), bb
});
3578 region
.safe_push (bb
);
3579 while (!stack
.is_empty ())
3581 auto &node
= stack
.last ();
3582 if (ei_end_p (node
.ei
))
3585 region
.safe_push (node
.bb
);
3589 edge e
= ei_edge (node
.ei
);
3592 // A forward edge from a node that has not yet been added
3594 if (bitmap_bit_p (fpr_pseudos_live_in
, e
->dest
->index
)
3595 && bitmap_set_bit (visited
, e
->dest
->index
))
3597 stack
.safe_push ({ ei_start (e
->dest
->preds
), nullptr });
3598 if (bitmap_bit_p (fpr_pseudos_live_out
, e
->dest
->index
))
3599 stack
.safe_push ({ ei_start (e
->dest
->succs
), e
->dest
});
3601 region
.safe_push (e
->dest
);
3608 // A backward edge from a node that has already been added
3610 if (bitmap_bit_p (fpr_pseudos_live_out
, e
->src
->index
)
3611 && bitmap_set_bit (visited
, e
->src
->index
))
3613 if (bitmap_bit_p (fpr_pseudos_live_in
, e
->src
->index
))
3614 stack
.safe_push ({ ei_start (e
->src
->preds
), nullptr });
3615 stack
.safe_push ({ ei_start (e
->src
->succs
), e
->src
});
3622 m_current_point
= 2;
3623 start_new_region ();
3625 if (region
.is_empty ())
3626 process_block (bb
, true);
3629 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3631 fprintf (dump_file
, "\nRegion (from %d):", bb
->index
);
3632 for (unsigned int j
= 0; j
< region
.length (); ++j
)
3633 fprintf (dump_file
, " %d", region
[j
]->index
);
3634 fprintf (dump_file
, "\n");
3636 for (unsigned int j
= 0; j
< region
.length (); ++j
)
3638 basic_block bb
= region
[j
];
3640 = ((j
== 0 && !bitmap_bit_p (fpr_pseudos_live_out
, bb
->index
))
3641 || (j
== region
.length () - 1
3642 && !bitmap_bit_p (fpr_pseudos_live_in
, bb
->index
)));
3643 process_block (bb
, is_isolated
);
3646 region
.truncate (0);
3648 if (!m_allocnos
.is_empty () && m_allocation_successful
)
3653 // Run the pass on the current function.
3655 early_ra::execute ()
3659 preprocess_insns ();
3660 propagate_pseudo_reg_info ();
3661 choose_fpr_pseudos ();
3662 if (bitmap_empty_p (m_fpr_pseudos
))
3665 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3666 dump_pseudo_regs ();
3672 class pass_early_ra
: public rtl_opt_pass
3675 pass_early_ra (gcc::context
*ctxt
)
3676 : rtl_opt_pass (pass_data_early_ra
, ctxt
)
3679 // opt_pass methods:
3680 virtual bool gate (function
*);
3681 virtual unsigned int execute (function
*);
3685 pass_early_ra::gate (function
*)
3687 // Require a vector ISA to be enabled.
3688 if (!TARGET_SIMD
&& !TARGET_SVE
)
3691 if (aarch64_early_ra
== AARCH64_EARLY_RA_NONE
)
3694 if (aarch64_early_ra
== AARCH64_EARLY_RA_STRIDED
3695 && !TARGET_STREAMING_SME2
)
3702 pass_early_ra::execute (function
*fn
)
3704 early_ra (fn
).execute ();
3710 // Create a new instance of the pass.
3712 make_pass_aarch64_early_ra (gcc::context
*ctxt
)
3714 return new pass_early_ra (ctxt
);