[PR testsuite/116860] Testsuite adjustment for recently added tests
[official-gcc.git] / gcc / config / aarch64 / aarch64-early-ra.cc
blob479fe56b4d870e877955846dc7c601487d152959
1 // Early register allocation pass.
2 // Copyright (C) 2023-2025 Free Software Foundation, Inc.
3 //
4 // This file is part of GCC.
5 //
6 // GCC is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation; either version 3, or (at your option) any later
9 // version.
11 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 // for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 // This pass implements a simple form of early register allocation.
21 // It is restricted to FP/SIMD registers, and it only allocates
22 // a region of FP/SIMD usage if it can do so without any spilling.
23 // It punts on anything too complicated, leaving it to the real
24 // register allocator.
26 // There are two main purposes:
28 // (1) The pass runs before scheduling. It therefore has a chance to
29 // bag a spill-free allocation, if there is one, before scheduling
30 // moves things around.
32 // (2) The pass can make use of strided register operations, such as the
33 // strided forms of LD1 and ST1 in SME2.
35 // The allocator works at the level of individual FPRs, rather than whole
36 // pseudo registers. It is mostly intended to help optimize ACLE code.
38 // The pass is very simplistic. There are many things that could be improved.
39 #define IN_TARGET_CODE 1
41 #define INCLUDE_ALGORITHM
42 #define INCLUDE_FUNCTIONAL
43 #define INCLUDE_ARRAY
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "backend.h"
48 #include "rtl.h"
49 #include "df.h"
50 #include "rtl-ssa.h"
51 #include "tree-pass.h"
52 #include "target.h"
53 #include "expr.h"
54 #include "cfgrtl.h"
55 #include "print-rtl.h"
56 #include "insn-attr.h"
57 #include "insn-opinit.h"
58 #include "reload.h"
60 template<typename T>
61 class simple_iterator : public wrapper_iterator<T>
63 public:
64 using wrapper_iterator<T>::wrapper_iterator;
66 simple_iterator &operator-- () { --this->m_contents; return *this; }
67 simple_iterator operator-- (int) { return this->m_contents--; }
68 simple_iterator &operator++ () { ++this->m_contents; return *this; }
69 simple_iterator operator++ (int) { return this->m_contents++; }
72 using namespace rtl_ssa;
74 namespace {
75 const pass_data pass_data_early_ra =
77 RTL_PASS, // type
78 "early_ra", // name
79 OPTGROUP_NONE, // optinfo_flags
80 TV_NONE, // tv_id
81 0, // properties_required
82 0, // properties_provided
83 0, // properties_destroyed
84 0, // todo_flags_start
85 TODO_df_finish, // todo_flags_finish
88 using allocno_iterator = simple_iterator<unsigned int>;
90 // Class that represents one run of the pass.
91 class early_ra
93 public:
94 early_ra (function *fn);
95 ~early_ra ();
96 void execute ();
98 private:
99 // Whether to test only things that are required for correctness,
100 // or whether to take optimization heuristics into account as well.
101 enum test_strictness { CORRECTNESS_ONLY, ALL_REASONS };
103 static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits");
104 using operand_mask = uint32_t;
106 // Points in the function are represented using "program points".
107 // The program points are allocated in reverse order, with smaller
108 // numbers indicating later points. These special values indicate
109 // the start and end of a region.
110 static constexpr unsigned int START_OF_REGION = ~0U;
111 static constexpr unsigned int END_OF_REGION = 0U;
113 // An invalid allocno index, used to represent no allocno.
114 static constexpr unsigned int INVALID_ALLOCNO = ~0U;
116 // Enumerates the single FPR sizes that matter for register allocation.
117 // Anything smaller than 64 bits is treated as FPR_D.
118 enum fpr_size_info
120 FPR_D,
121 FPR_Q,
122 FPR_Z
125 // A live range for an FPR, containing program points [START_POINT,
126 // END_POINT]. If ALLOCNO is not INVALID_ALLOCNO, the FPR is known
127 // to be equal to ALLOCNO for the duration of the live range.
128 struct fpr_range_info
130 unsigned int start_point;
131 unsigned int end_point;
132 unsigned int allocno;
135 // Flags used in pseudo_reg_info.
137 // Whether the pseudo register occurs in one instruction alternative that
138 // matches (respectively) V0-V7, V0-V15, V0-V31 or a non-FP register.
139 static constexpr unsigned int ALLOWS_FPR8 = 1U << 0;
140 static constexpr unsigned int ALLOWS_FPR16 = 1U << 1;
141 static constexpr unsigned int ALLOWS_FPR32 = 1U << 2;
142 static constexpr unsigned int ALLOWS_NONFPR = 1U << 3;
144 // Likewise whether the register occurs in an instruction that requires
145 // the associated register type.
146 static constexpr unsigned int NEEDS_FPR8 = 1U << 4;
147 static constexpr unsigned int NEEDS_FPR16 = 1U << 5;
148 static constexpr unsigned int NEEDS_FPR32 = 1U << 6;
149 static constexpr unsigned int NEEDS_NONFPR = 1U << 7;
151 // Whether the pseudo register is copied to or from a hard FP register.
152 static constexpr unsigned int HAS_FPR_COPY = 1U << 8;
154 // Whether the pseudo register is copied to or from a hard non-FP register.
155 static constexpr unsigned int HAS_NONFPR_COPY = 1U << 9;
157 // Whether the pseudo register is used as a multi-register vector operand
158 // to an instruction that supports strided accesses, and whether it is used
159 // as a multi-register vector operand in some other non-move instruction.
160 static constexpr unsigned int HAS_FLEXIBLE_STRIDE = 1U << 10;
161 static constexpr unsigned int HAS_FIXED_STRIDE = 1U << 11;
163 // Whether we have decided not to allocate the pseudo register.
164 static constexpr unsigned int IGNORE_REG = 1U << 12;
166 // Flags that should be propagated across moves between pseudo registers.
167 static constexpr unsigned int PSEUDO_COPY_FLAGS = ~(HAS_FLEXIBLE_STRIDE
168 | HAS_FIXED_STRIDE);
170 // Information about a copy between two registers.
171 struct reg_copy_info
173 // The two registers, in order.
174 unsigned int regnos[2];
176 // Index I gives the index of the next reg_copy_info involving REGNOS[I],
177 // or 0 if none.
178 unsigned int next_copies[2];
181 // Information about a pseudo register.
182 struct pseudo_reg_info
184 // Flags describing how the register is used, defined above.
185 unsigned int flags : 16;
187 // The mode of the pseudo register, cached for convenience.
188 machine_mode mode : 16;
190 // The index of the first copy, or 0 if none.
191 unsigned int first_copy;
194 // Information about a group of allocnos that have a fixed offset
195 // relative to each other. The allocnos in each group must be allocated
196 // together.
198 // Allocnos that can share the same hard register are eventually
199 // chained together. These chains represent edges on a graph of
200 // allocnos, such that two allocnos joined by an edge use the same FPR.
201 // These chains are formed between individual allocnos rather than
202 // whole groups, although the system is required to be self-consistent.
203 // Each clique in the graph has at least one "full-width" allocno group
204 // that has one allocno for every FPR that needs to be allocated to
205 // the clique.
207 // One group of allocnos is chosen as the "color representative" of
208 // each clique in the graph. This group will be a full-width group.
209 struct allocno_info;
210 struct allocno_group_info
212 array_slice<unsigned int> chain_heads ();
213 array_slice<allocno_info> allocnos ();
214 allocno_group_info *color_rep ();
215 allocno_info *allocno (unsigned int);
217 // The color representative of the containing clique.
218 allocno_group_info *m_color_rep;
220 // The pseudo register associated with this allocno, or INVALID_REGNUM
221 // if none.
222 unsigned int regno;
224 // The offset of the first allocno (and thus this group) from the start
225 // of color_rep.
226 unsigned int color_rep_offset : 8;
228 // The number of allocnos in the group, and thus the number of FPRs
229 // that need to be allocated.
230 unsigned int size : 8;
232 // The gap between FPRs in the group. This is normally 1, but can be
233 // higher if we've decided to use strided multi-register accesses.
234 unsigned int stride : 4;
236 // Used temporarily while deciding which allocnos should have non-unit
237 // strides; see find_strided_accesses for details.
238 int consecutive_pref : 4;
239 int strided_polarity : 2;
241 // The largest size of FPR needed by references to the allocno group.
242 fpr_size_info fpr_size : 2;
244 // True if all non-move accesses can be converted to strided form.
245 unsigned int has_flexible_stride : 1;
247 // True if we've assigned a color index to this group.
248 unsigned int has_color : 1;
250 // The mask of FPRs that would make valid choices for the first allocno,
251 // taking the requirements of all the allocnos in the group into account.
252 unsigned int fpr_candidates;
254 // The index of the color that has been assigned to the containing clique.
255 unsigned int color;
258 // Represents a single FPR-sized quantity that needs to be allocated.
259 // Each allocno is identified by index (for compactness).
261 // Quantities that span multiple FPRs are assigned groups of consecutive
262 // allocnos. Quantities that occupy a single FPR are assigned their own
263 // group.
264 struct allocno_info
266 allocno_group_info *group ();
267 bool is_shared ();
268 bool is_equiv_to (unsigned int);
270 // The allocno's unique identifier.
271 unsigned int id;
273 // The offset of this allocno into the containing group.
274 unsigned int offset : 8;
276 // The number of allocnos in the containing group.
277 unsigned int group_size : 8;
279 // If the allocno has an affinity with at least one hard register
280 // (so that choosing that hard register would avoid a copy), this is
281 // the number of one such hard register, otherwise it is
282 // FIRST_PSEUDO_REGISTER.
283 unsigned int hard_regno : 8;
285 // Set to 1 if the allocno has a single definition or 2 if it has more.
286 unsigned int num_defs : 2;
288 // True if, at START_POINT, another allocno is copied to this one.
289 // See callers of record_copy for what counts as a copy.
290 unsigned int is_copy_dest : 1;
292 // True if, at START_POINT, another allocno is copied to this one,
293 // and if the allocnos at both ends of the copy chain have an affinity
294 // with the same hard register.
295 unsigned int is_strong_copy_dest : 1;
297 // True if, at END_POINT, this allocno is copied to another one,
298 // and both allocnos have an affinity with the same hard register.
299 unsigned int is_strong_copy_src : 1;
301 // True if the allocno is subject to an earlyclobber at END_POINT,
302 // so that it cannot be tied to the destination of the instruction.
303 unsigned int is_earlyclobbered : 1;
305 // True if this allocno is known to be equivalent to related_allocno
306 // for the whole of this allocno's lifetime.
307 unsigned int is_equiv : 1;
309 // The inclusive range of program points spanned by the allocno.
310 // START_POINT >= END_POINT.
311 unsigned int start_point;
312 unsigned int end_point;
314 // If, at END_POINT, this allocno is copied to another allocno, this
315 // is the index of that allocno, otherwise it is INVALID_ALLOCNO.
316 // See callers of record_copy for what counts as a copy.
317 unsigned int copy_dest;
319 // If this field is not INVALID_ALLOCNO, it indicates one of two things:
321 // - if is_equiv, this allocno is equivalent to related_allocno for
322 // the whole of this allocno's lifetime.
324 // - if !is_equiv, this allocno's live range is a subrange of
325 // related_allocno's and we have committed to making this allocno
326 // share whatever register related_allocno uses.
327 unsigned int related_allocno;
329 union
331 // The program point at which the allocno was last defined,
332 // or START_OF_REGION if none. This is only used temporarily
333 // while recording allocnos; after that, chain_next below is
334 // used instead.
335 unsigned int last_def_point;
337 // The next chained allocno in program order (i.e. at lower program
338 // points), or INVALID_ALLOCNO if none.
339 unsigned int chain_next;
342 union
344 // The program point before start_point at which the allocno was
345 // last used, or END_OF_REGION if none. This is only used temporarily
346 // while recording allocnos; after that, chain_prev below is used
347 // instead.
348 unsigned int last_use_point;
350 // The previous chained allocno in program order (i.e. at higher
351 // program points), or INVALID_ALLOCNO if none.
352 unsigned int chain_prev;
356 // Information about a full allocno group or a subgroup of it.
357 // The subgroup can be empty to indicate "none".
358 struct allocno_subgroup
360 array_slice<allocno_info> allocnos ();
361 allocno_info *allocno (unsigned int);
363 // True if a subgroup is present.
364 operator bool () const { return count; }
366 // The containing group.
367 allocno_group_info *group;
369 // The offset of the subgroup from the start of GROUP.
370 unsigned int start;
372 // The number of allocnos in the subgroup.
373 unsigned int count;
376 // Represents information about a copy between an allocno and an FPR.
377 // This establishes an affinity between the allocno and the FPR.
378 struct allocno_copy_info
380 // The allocno involved in the copy.
381 unsigned int allocno;
383 // The FPR involved in the copy, relative to V0_REGNUM.
384 unsigned int fpr : 16;
386 // A measure of how strong the affinity between the allocno and FPR is.
387 unsigned int weight : 16;
390 // Information about a possible allocno chain.
391 struct chain_candidate_info
393 // The candidate target allocno.
394 allocno_info *allocno;
396 // A rating of the candidate (higher is better).
397 int score;
400 // Information about an allocno color.
401 struct color_info
403 // The color's unique identifier.
404 int id;
406 // The allocated hard register, when known.
407 unsigned int hard_regno;
409 // The clique's representative group.
410 allocno_group_info *group;
412 // The number of FPR preferences recorded in fpr_preferences.
413 unsigned int num_fpr_preferences;
415 // Weights in favor of choosing each FPR as the first register for GROUP.
416 int8_t fpr_preferences[32];
419 template<typename T, typename... Ts>
420 T *region_allocate (Ts...);
422 allocno_info *chain_prev (allocno_info *);
423 allocno_info *chain_next (allocno_info *);
425 void dump_pseudo_regs ();
426 void dump_fpr_ranges ();
427 void dump_copies ();
428 void dump_allocnos ();
429 void dump_colors ();
431 iterator_range<allocno_iterator> get_group_allocnos (unsigned int);
433 void preprocess_move (rtx, rtx);
434 void process_pseudo_reg_constraints (rtx_insn *);
435 void preprocess_insns ();
437 int fpr_preference (unsigned int);
438 void propagate_pseudo_reg_info ();
440 void choose_fpr_pseudos ();
442 void start_new_region ();
444 template<typename T>
445 void record_live_range_failure (T);
446 template<typename T>
447 void record_allocation_failure (T);
449 allocno_group_info *create_allocno_group (unsigned int, unsigned int);
450 allocno_subgroup get_allocno_subgroup (rtx);
451 void record_fpr_use (unsigned int);
452 void record_fpr_def (unsigned int);
453 void record_allocno_use (allocno_info *);
454 void record_allocno_def (allocno_info *);
455 allocno_info *find_related_start (allocno_info *, allocno_info *, bool);
456 void accumulate_defs (allocno_info *, allocno_info *);
457 void record_copy (rtx, rtx, bool = false);
458 void record_constraints (rtx_insn *);
459 void record_artificial_refs (unsigned int);
460 void record_insn_defs (rtx_insn *);
461 void record_insn_call (rtx_call_insn *);
462 void record_insn_uses (rtx_insn *);
464 bool consider_strong_copy_src_chain (allocno_info *);
465 int strided_polarity_pref (allocno_info *, allocno_info *);
466 void find_strided_accesses ();
468 template<unsigned int allocno_info::*field>
469 static int cmp_increasing (const void *, const void *);
470 bool is_chain_candidate (allocno_info *, allocno_info *, test_strictness);
471 int rate_chain (allocno_info *, allocno_info *);
472 static int cmp_chain_candidates (const void *, const void *);
473 void chain_allocnos (unsigned int &, unsigned int &);
474 void merge_fpr_info (allocno_group_info *, allocno_group_info *,
475 unsigned int);
476 void set_single_color_rep (allocno_info *, allocno_group_info *,
477 unsigned int);
478 void set_color_rep (allocno_group_info *, allocno_group_info *,
479 unsigned int);
480 bool try_to_chain_allocnos (allocno_info *, allocno_info *);
481 void create_color (allocno_group_info *);
482 void form_chains ();
484 bool fpr_conflicts_with_allocno_p (unsigned int, allocno_info *);
485 bool call_in_range_p (unsigned int, unsigned int, unsigned int);
486 unsigned int partial_fpr_clobbers (unsigned int, fpr_size_info);
488 void process_copies ();
490 static int cmp_allocation_order (const void *, const void *);
491 void allocate_colors ();
492 allocno_info *find_independent_subchain (allocno_info *);
493 color_info *find_oldest_color (unsigned int, unsigned int);
494 void broaden_colors ();
495 void finalize_allocation ();
497 bool replace_regs (rtx_insn *, df_ref);
498 int try_enforce_constraints (rtx_insn *, vec<std::pair<int, int>> &);
499 void enforce_constraints (rtx_insn *);
500 bool maybe_convert_to_strided_access (rtx_insn *);
501 void apply_allocation ();
503 void process_region ();
504 bool is_dead_insn (rtx_insn *);
505 bool could_split_region_here ();
506 void process_block (basic_block, bool);
507 void process_blocks ();
509 // ----------------------------------------------------------------------
511 // The function we're operating on.
512 function *m_fn;
514 // Information about each pseudo register, indexed by REGNO.
515 auto_vec<pseudo_reg_info> m_pseudo_regs;
517 // All recorded register copies.
518 auto_vec<reg_copy_info> m_pseudo_reg_copies;
520 // The set of pseudos that we've decided to allocate an FPR to.
521 auto_bitmap m_fpr_pseudos;
523 // ----------------------------------------------------------------------
525 // An obstack for allocating information that is referenced by the member
526 // variables below.
527 obstack m_region_obstack;
528 void *m_region_alloc_start;
530 // ----------------------------------------------------------------------
532 // The basic block that we're currently processing.
533 basic_block m_current_bb;
535 // The lowest-numbered program point in the current basic block.
536 unsigned int m_current_bb_point;
538 // The program point that we're currently processing (described above).
539 unsigned int m_current_point;
541 // The set of allocnos that are currently live.
542 auto_bitmap m_live_allocnos;
544 // The set of FPRs that are currently live.
545 unsigned int m_live_fprs;
547 // A unique one-based identifier for the current region.
548 unsigned int m_current_region;
550 // The region in which each FPR was last used, or 0 if none.
551 unsigned int m_fpr_recency[32];
553 // ----------------------------------------------------------------------
555 // A mask of the FPRs that have already been allocated.
556 unsigned int m_allocated_fprs;
558 // A mask of the FPRs that must be at least partially preserved by the
559 // current function.
560 unsigned int m_call_preserved_fprs;
562 // True if we have so far been able to track the live ranges of individual
563 // allocnos.
564 bool m_accurate_live_ranges;
566 // True if we haven't yet failed to allocate the current region.
567 bool m_allocation_successful;
569 // A map from pseudo registers to the first allocno in their associated
570 // allocno groups.
571 hash_map<int_hash<unsigned int, INVALID_REGNUM>,
572 allocno_group_info *> m_regno_to_group;
574 // All recorded copies between allocnos and FPRs.
575 auto_vec<allocno_copy_info> m_allocno_copies;
577 // All allocnos, by index.
578 auto_vec<allocno_info *> m_allocnos;
580 // All allocnos, by increasing START_POINT.
581 auto_vec<allocno_info *> m_sorted_allocnos;
583 // Allocnos for which is_shared is true.
584 auto_vec<allocno_info *> m_shared_allocnos;
586 // All colors, by index.
587 auto_vec<color_info *> m_colors;
589 // The instruction ranges that make up the current region,
590 // as half-open ranges [LAST, FIRST).
591 auto_vec<std::pair<rtx_insn *, rtx_insn *>> m_insn_ranges;
593 // The live ranges of each FPR, in order of increasing program point.
594 auto_vec<fpr_range_info> m_fpr_ranges[32];
596 // For each function call id, a list of program points at which a call
597 // to such a function is made. Each list is in order of increasing
598 // program point.
599 auto_vec<unsigned int> m_call_points[NUM_ABI_IDS];
601 // A list of instructions that can be removed if allocation succeeds.
602 auto_vec<rtx_insn *> m_dead_insns;
605 // True if PAT is something that would typically be treated as a move.
606 static inline bool
607 is_move_set (rtx pat)
609 if (GET_CODE (pat) != SET)
610 return false;
612 rtx dest = SET_DEST (pat);
613 if (SUBREG_P (dest))
614 dest = SUBREG_REG (dest);
615 if (!OBJECT_P (dest))
616 return false;
618 rtx src = SET_SRC (pat);
619 if (SUBREG_P (src))
620 src = SUBREG_REG (src);
621 if (!OBJECT_P (src) && !CONSTANT_P (src))
622 return false;
624 return true;
627 // Return true if operand OP is likely to match OP_ALT after register
628 // allocation.
629 static bool
630 likely_operand_match_p (const operand_alternative &op_alt, rtx op)
632 // Empty constraints match everything.
633 const char *constraint = op_alt.constraint;
634 if (constraint[0] == 0 || constraint[0] == ',')
635 return true;
637 for (;;)
639 char c = *constraint;
640 int len = CONSTRAINT_LEN (c, constraint);
641 if (c == 0 || c == ',')
642 break;
644 if (c == 'X')
645 return true;
647 auto cn = lookup_constraint (constraint);
648 switch (get_constraint_type (cn))
650 case CT_REGISTER:
651 if (REG_P (op) || SUBREG_P (op))
652 return true;
653 break;
655 case CT_MEMORY:
656 case CT_SPECIAL_MEMORY:
657 case CT_RELAXED_MEMORY:
658 if (MEM_P (op))
659 return true;
660 break;
662 case CT_CONST_INT:
663 case CT_ADDRESS:
664 case CT_FIXED_FORM:
665 if (constraint_satisfied_p (op, cn))
666 return true;
667 break;
670 constraint += len;
673 if (op_alt.matches >= 0)
675 rtx other = recog_data.operand[op_alt.matches];
676 if ((REG_P (other) || SUBREG_P (other))
677 && (REG_P (op) || SUBREG_P (op)))
678 return true;
680 return false;
683 // Return true if the operands of the current instruction are likely to
684 // match OP_ALT.
685 static bool
686 likely_alternative_match_p (const operand_alternative *op_alt)
688 for (int i = 0; i < recog_data.n_operands; ++i)
689 if (!likely_operand_match_p (op_alt[i], recog_data.operand[i]))
690 return false;
691 return true;
694 // Return the sum of how disparaged OP_ALT is.
695 static int
696 count_rejects (const operand_alternative *op_alt)
698 int reject = 0;
699 for (int opno = 0; opno < recog_data.n_operands; ++opno)
700 reject += op_alt[opno].reject;
701 return reject;
704 // Allocate a T from the region obstack.
705 template<typename T, typename... Ts>
706 inline T *
707 early_ra::region_allocate (Ts... args)
709 static_assert (std::is_trivially_destructible<T>::value,
710 "destructor won't be called");
711 void *addr = obstack_alloc (&m_region_obstack, sizeof (T));
712 return new (addr) T (std::forward<Ts> (args)...);
715 early_ra::early_ra (function *fn) : m_fn (fn), m_live_fprs (0)
717 gcc_obstack_init (&m_region_obstack);
718 m_region_alloc_start = obstack_alloc (&m_region_obstack, 0);
719 bitmap_tree_view (m_live_allocnos);
722 early_ra::~early_ra ()
724 obstack_free (&m_region_obstack, nullptr);
727 // Return an array that, for each allocno A in the group, contains the index
728 // of the allocno at the head of A's chain (that is, the one with the highest
729 // START_POINT). The index is INVALID_ALLOCNO if the chain is empty.
730 inline array_slice<unsigned int>
731 early_ra::allocno_group_info::chain_heads ()
733 auto *start = reinterpret_cast<unsigned int *> (this + 1);
734 return { start, size };
737 // Return the array of allocnos in the group.
738 inline array_slice<early_ra::allocno_info>
739 early_ra::allocno_group_info::allocnos ()
741 gcc_checking_assert (regno != INVALID_REGNUM);
742 auto *chain_end = reinterpret_cast<unsigned int *> (this + 1) + size;
743 auto *allocno_start = reinterpret_cast<allocno_info *> (chain_end);
744 return { allocno_start, size };
747 // Return the group's color representative.
748 inline early_ra::allocno_group_info *
749 early_ra::allocno_group_info::color_rep ()
751 gcc_checking_assert (m_color_rep->m_color_rep == m_color_rep);
752 return m_color_rep;
755 // Return the group that contains the allocno.
756 inline early_ra::allocno_group_info *
757 early_ra::allocno_info::group ()
759 auto *chain_end = reinterpret_cast<unsigned int *> (this - offset);
760 return reinterpret_cast<allocno_group_info *> (chain_end - group_size) - 1;
763 // Return true if this allocno's live range is a subrange of related_allocno's
764 // and if we have committed to making this allocno share whatever register
765 // related_allocno uses.
766 inline bool
767 early_ra::allocno_info::is_shared ()
769 return related_allocno != INVALID_ALLOCNO && !is_equiv;
772 // Return true if this allocno is known to be equivalent to ALLOCNO.
773 inline bool
774 early_ra::allocno_info::is_equiv_to (unsigned int allocno)
776 return is_equiv && related_allocno == allocno;
779 // Return the allocnos in the subgroup.
780 inline array_slice<early_ra::allocno_info>
781 early_ra::allocno_subgroup::allocnos ()
783 if (!count)
784 return {};
785 return { &group->allocnos ()[start], count };
788 // Return allocno I in the subgroup, with 0 being the first.
789 inline early_ra::allocno_info *
790 early_ra::allocno_subgroup::allocno (unsigned int i)
792 return &group->allocnos ()[start + i];
795 // Return the previous (earlier) allocno in ALLOCNO's chain, or null if none.
796 inline early_ra::allocno_info *
797 early_ra::chain_prev (allocno_info *allocno)
799 if (allocno->chain_prev != INVALID_ALLOCNO)
800 return m_allocnos[allocno->chain_prev];
801 return nullptr;
804 // Return the next (later) allocno in ALLOCNO's chain, or null if none.
805 inline early_ra::allocno_info *
806 early_ra::chain_next (allocno_info *allocno)
808 if (allocno->chain_next != INVALID_ALLOCNO)
809 return m_allocnos[allocno->chain_next];
810 return nullptr;
813 // Dump the information in m_pseudo_regs.
814 void
815 early_ra::dump_pseudo_regs ()
817 fprintf (dump_file, "\nPseudos:\n");
818 fprintf (dump_file, " %6s %6s %6s %6s %6s %6s %8s %s\n",
819 "Id", "FPR8", "FPR16", "FPR32", "NONFPR", "Stride",
820 "FPRness", "Copies");
821 pseudo_reg_info unused_reg = {};
822 for (unsigned int regno = FIRST_PSEUDO_REGISTER;
823 regno < m_pseudo_regs.length (); ++regno)
825 const auto &reg = m_pseudo_regs[regno];
826 if (memcmp (&reg, &unused_reg, sizeof (reg)) == 0)
827 continue;
829 fprintf (dump_file, " %6d %6s %6s %6s %6s %6s %8d", regno,
830 reg.flags & NEEDS_FPR8 ? "Req"
831 : reg.flags & ALLOWS_FPR8 ? "OK" : "-",
832 reg.flags & NEEDS_FPR16 ? "Req"
833 : reg.flags & ALLOWS_FPR16 ? "OK" : "-",
834 reg.flags & NEEDS_FPR32 ? "Req"
835 : reg.flags & ALLOWS_FPR32 ? "OK" : "-",
836 reg.flags & NEEDS_NONFPR ? "Req"
837 : reg.flags & ALLOWS_NONFPR ? "OK" : "-",
838 ~reg.flags & HAS_FLEXIBLE_STRIDE ? "-"
839 : reg.flags & HAS_FIXED_STRIDE ? "Some" : "All",
840 fpr_preference (regno));
841 if (reg.flags & HAS_FPR_COPY)
842 fprintf (dump_file, " FPR");
843 if (reg.flags & HAS_NONFPR_COPY)
844 fprintf (dump_file, " Non-FPR");
845 unsigned int copyi = reg.first_copy;
846 while (copyi)
848 const auto &copy = m_pseudo_reg_copies[copyi];
849 if (copy.regnos[0] == regno)
851 fprintf (dump_file, " r%d", copy.regnos[1]);
852 copyi = copy.next_copies[0];
854 else
856 fprintf (dump_file, " r%d", copy.regnos[0]);
857 copyi = copy.next_copies[1];
860 fprintf (dump_file, "\n");
864 // Dump the information in m_fpr_ranges.
865 void
866 early_ra::dump_fpr_ranges ()
868 fprintf (dump_file, "\nFPR live ranges:\n");
869 for (unsigned int fpr = 0; fpr < 32; ++fpr)
871 auto &intervals = m_fpr_ranges[fpr];
872 if (intervals.is_empty ())
873 continue;
875 fprintf (dump_file, " %2d", fpr);
876 for (unsigned int i = 0; i < intervals.length (); ++i)
878 auto &interval = intervals[i];
879 if (i && (i % 4) == 0)
880 fprintf (dump_file, "\n ");
881 fprintf (dump_file, " [ %6d %6d ]", interval.start_point,
882 interval.end_point);
884 fprintf (dump_file, "\n");
888 // Dump the information in m_allocno_copies.
889 void
890 early_ra::dump_copies ()
892 fprintf (dump_file, "\nCopies:\n");
893 fprintf (dump_file, " %8s %3s %6s\n",
894 "Allocno", "FPR", "Weight");
895 for (const auto &copy : m_allocno_copies)
896 fprintf (dump_file, " %8d %3d %6d\n", copy.allocno,
897 copy.fpr, copy.weight);
900 // Dump the information in m_allocnos.
901 void
902 early_ra::dump_allocnos ()
904 char buffer[sizeof ("r[:]") + 3 * 3 * sizeof (int) + 1];
905 fprintf (dump_file, "\nAllocno groups:\n");
906 fprintf (dump_file,
907 " %12s %12s %4s %6s %8s %s\n",
908 "Ids", "Regno", "Size", "Stride", "Cands", "Heads");
909 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
911 auto *allocno = m_allocnos[ai];
912 if (allocno->offset != 0)
913 continue;
914 auto *group = allocno->group ();
915 snprintf (buffer, sizeof (buffer), "[%d:%d]", allocno->id,
916 allocno->id + group->size - 1);
917 fprintf (dump_file, " %12s", buffer);
918 snprintf (buffer, sizeof (buffer), "r%d[0:%d]", group->regno,
919 group->size - 1);
920 fprintf (dump_file, " %12s %4s %6d %08x", buffer,
921 group->fpr_size == FPR_D ? "D"
922 : group->fpr_size == FPR_Q ? "Q" : "Z",
923 group->stride,
924 group->fpr_candidates);
925 for (auto head : group->chain_heads ())
926 if (head == INVALID_ALLOCNO)
927 fprintf (dump_file, " -");
928 else
929 fprintf (dump_file, " %d", head);
930 fprintf (dump_file, "\n");
933 fprintf (dump_file, "\nAllocno chains:\n");
934 fprintf (dump_file, " %5s %12s %12s %6s %5s %5s %6s %5s\n",
935 "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "Shared", "FPR");
936 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
938 auto *allocno = m_allocnos[ai];
939 if (allocno->chain_prev != INVALID_ALLOCNO)
940 continue;
941 const char *prefix = "=>";
942 for (;;)
944 auto *group = allocno->group ();
945 fprintf (dump_file, " %2s", prefix);
946 fprintf (dump_file, " %5d", allocno->id);
947 snprintf (buffer, sizeof (buffer), "r%d[%d]", group->regno,
948 allocno->offset);
949 fprintf (dump_file, " %12s", buffer);
950 snprintf (buffer, sizeof (buffer), "[%d,%d]",
951 allocno->start_point, allocno->end_point);
952 fprintf (dump_file, " %11s%s %6s", buffer,
953 allocno->is_earlyclobbered ? "*" : " ",
954 allocno->is_strong_copy_dest ? "Strong"
955 : allocno->is_copy_dest ? "Yes" : "-");
956 if (allocno->copy_dest == INVALID_ALLOCNO)
957 fprintf (dump_file, " %5s", "-");
958 else
959 fprintf (dump_file, " %5d", allocno->copy_dest);
960 if (allocno->is_equiv)
961 fprintf (dump_file, " %5d", allocno->related_allocno);
962 else
963 fprintf (dump_file, " %5s", "-");
964 if (allocno->is_shared ())
965 fprintf (dump_file, " %6d", allocno->related_allocno);
966 else
967 fprintf (dump_file, " %6s", "-");
968 if (allocno->hard_regno == FIRST_PSEUDO_REGISTER)
969 fprintf (dump_file, " %5s", "-");
970 else
971 fprintf (dump_file, " %5s", reg_names[allocno->hard_regno]);
972 fprintf (dump_file, "\n");
973 if (allocno->chain_next == INVALID_ALLOCNO)
974 break;
975 allocno = m_allocnos[allocno->chain_next];
976 prefix = "";
981 // Dump the information in m_colors.
982 void
983 early_ra::dump_colors ()
985 fprintf (dump_file, "\nColors:\n");
986 for (unsigned int i = 0; i < m_colors.length (); ++i)
988 auto *color = m_colors[i];
989 if (!color->group)
990 continue;
992 fprintf (dump_file, " color %d:\n", i);
993 fprintf (dump_file, " chains:\n");
994 auto heads = color->group->chain_heads ();
995 for (unsigned int i = 0; i < color->group->size; ++i)
997 fprintf (dump_file, " %2d:", i);
998 auto ai = heads[i];
999 while (ai != INVALID_ALLOCNO)
1001 auto *allocno = m_allocnos[ai];
1002 fprintf (dump_file, " r%d[%d]", allocno->group ()->regno,
1003 allocno->offset);
1004 ai = allocno->chain_next;
1006 fprintf (dump_file, "\n");
1008 fprintf (dump_file, " FPR candidates:");
1009 for (unsigned int fpr = 0; fpr < 32; ++fpr)
1010 fprintf (dump_file, "%s%c", fpr % 8 ? "" : " ",
1011 color->group->fpr_candidates & (1U << fpr) ? 'Y' : '-');
1012 fprintf (dump_file, "\n");
1013 fprintf (dump_file, " FPR preferences:");
1014 for (unsigned int fpr = 0; fpr < 32; ++fpr)
1015 if (color->fpr_preferences[fpr])
1016 fprintf (dump_file, " %d(%d)", fpr, color->fpr_preferences[fpr]);
1017 fprintf (dump_file, "\n");
1021 // Record any necessary information about a move from SRC to DEST.
1022 void
1023 early_ra::preprocess_move (rtx dest, rtx src)
1025 if (SUBREG_P (dest))
1026 dest = SUBREG_REG (dest);
1027 if (!REG_P (dest))
1028 return;
1030 if (SUBREG_P (src))
1031 src = SUBREG_REG (src);
1032 if (!REG_P (src))
1033 return;
1035 // Sort the registers by increasing REGNO.
1036 rtx regs[] = { dest, src };
1037 if (REGNO (dest) > REGNO (src))
1038 std::swap (regs[0], regs[1]);
1039 unsigned int regno0 = REGNO (regs[0]);
1040 unsigned int regno1 = REGNO (regs[1]);
1042 // Ignore moves between hard registers.
1043 if (HARD_REGISTER_NUM_P (regno1))
1044 return;
1046 // For moves between hard registers and pseudos, just record the type
1047 // of hard register involved.
1048 auto &reg1 = m_pseudo_regs[regno1];
1049 reg1.mode = GET_MODE (regs[1]);
1050 if (HARD_REGISTER_NUM_P (regno0))
1052 reg1.flags |= (FP_REGNUM_P (regno0) ? HAS_FPR_COPY : HAS_NONFPR_COPY);
1053 return;
1056 // Record a move between two pseudo registers.
1057 auto &reg0 = m_pseudo_regs[regno0];
1058 reg0.mode = GET_MODE (regs[0]);
1060 reg_copy_info copy;
1061 copy.regnos[0] = regno0;
1062 copy.regnos[1] = regno1;
1063 copy.next_copies[0] = reg0.first_copy;
1064 copy.next_copies[1] = reg1.first_copy;
1066 reg0.first_copy = reg1.first_copy = m_pseudo_reg_copies.length ();
1067 m_pseudo_reg_copies.safe_push (copy);
1070 // Return true if INSN has a multi-vector operand and if that operand
1071 // could be converted to strided form.
1072 static bool
1073 is_stride_candidate (rtx_insn *insn)
1075 if (recog_memoized (insn) < 0)
1076 return false;
1078 auto stride_type = get_attr_stride_type (insn);
1079 return (TARGET_STREAMING_SME2
1080 && (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
1081 || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE));
1084 // Go through the constraints of INSN, which has already been extracted,
1085 // and record any relevant information about pseudo registers.
1086 void
1087 early_ra::process_pseudo_reg_constraints (rtx_insn *insn)
1089 extract_insn (insn);
1090 preprocess_constraints (insn);
1092 // Flags that describe any multi-register vector operands.
1093 unsigned int insn_flags = (is_stride_candidate (insn)
1094 ? HAS_FLEXIBLE_STRIDE
1095 : HAS_FIXED_STRIDE);
1097 auto alts = get_preferred_alternatives (insn);
1099 int operand_matches[MAX_RECOG_OPERANDS];
1100 unsigned int operand_flags[MAX_RECOG_OPERANDS];
1101 for (int i = 0; i < recog_data.n_operands; ++i)
1103 operand_matches[i] = -1;
1104 operand_flags[i] = 0;
1107 // Extract information from the constraints, considering all plausible
1108 // alternatives.
1109 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
1111 if (!(alts & ALTERNATIVE_BIT (altno)))
1112 continue;
1114 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
1115 if (!likely_alternative_match_p (op_alt))
1116 continue;
1118 // Use SRC_OPNO's constraints to derive information about DEST_OPNO.
1119 auto record_operand = [&](int src_opno, int dest_opno)
1121 int matches = op_alt[src_opno].matches;
1122 if (matches >= 0)
1123 operand_matches[dest_opno] = matches;
1125 auto cl = alternative_class (op_alt, src_opno);
1126 if (cl != NO_REGS)
1128 if (reg_class_subset_p (cl, FP_REGS))
1129 operand_flags[dest_opno] |= ALLOWS_FPR32;
1130 if (reg_class_subset_p (cl, FP_LO_REGS))
1131 operand_flags[dest_opno] |= ALLOWS_FPR16;
1132 if (reg_class_subset_p (cl, FP_LO8_REGS))
1133 operand_flags[dest_opno] |= ALLOWS_FPR8;
1134 if (!reg_classes_intersect_p (cl, FP_REGS))
1135 operand_flags[dest_opno] |= ALLOWS_NONFPR;
1139 for (int i = 0; i < recog_data.n_operands; ++i)
1141 record_operand (i, i);
1142 if (recog_data.constraints[i][0] == '%')
1144 record_operand (i, i + 1);
1145 record_operand (i + 1, i);
1150 // Process the information we collected above.
1151 for (int i = 0; i < recog_data.n_operands; ++i)
1153 rtx op = recog_data.operand[i];
1154 machine_mode orig_mode = GET_MODE (op);
1155 if (SUBREG_P (op))
1156 op = SUBREG_REG (op);
1158 // Record the accumulated information in m_pseudo_regs.
1159 if (REG_P (op) && !HARD_REGISTER_P (op))
1161 // The flags so far just describe what at least one alternative
1162 // would accept. Calculate the associated NEEDS_* information.
1163 auto flags = operand_flags[i];
1164 if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR))
1165 flags |= NEEDS_NONFPR;
1166 else if ((flags & ALLOWS_FPR32) && !(flags & ALLOWS_NONFPR))
1168 if (flags & ALLOWS_FPR8)
1169 flags |= NEEDS_FPR8;
1170 if (flags & ALLOWS_FPR16)
1171 flags |= NEEDS_FPR16;
1172 flags |= NEEDS_FPR32;
1175 // Look for multi-register vector operands.
1176 if (VECTOR_MODE_P (orig_mode)
1177 && targetm.hard_regno_mode_ok (V0_REGNUM, orig_mode)
1178 && hard_regno_nregs (V0_REGNUM, orig_mode) > 1)
1179 flags |= insn_flags;
1181 m_pseudo_regs[REGNO (op)].flags |= flags;
1182 m_pseudo_regs[REGNO (op)].mode = GET_MODE (op);
1185 // Treat matching constraints as equivalent to moves.
1186 if (operand_matches[i] >= 0)
1187 preprocess_move (recog_data.operand[operand_matches[i]], op);
1191 // Make one pass through the instructions, collecting information that
1192 // will be needed later.
1193 void
1194 early_ra::preprocess_insns ()
1196 m_pseudo_regs.safe_grow_cleared (max_reg_num ());
1197 m_pseudo_reg_copies.safe_push (reg_copy_info ());
1198 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
1200 if (!NONDEBUG_INSN_P (insn))
1201 continue;
1203 // Mark all registers that occur in addresses as needing a GPR.
1204 vec_rtx_properties properties;
1205 properties.add_insn (insn, true);
1206 for (rtx_obj_reference ref : properties.refs ())
1207 if (ref.is_reg ()
1208 && ref.in_address ()
1209 && !HARD_REGISTER_NUM_P (ref.regno))
1210 m_pseudo_regs[ref.regno].flags |= ALLOWS_NONFPR | NEEDS_NONFPR;
1212 if (GET_CODE (PATTERN (insn)) == USE
1213 || GET_CODE (PATTERN (insn)) == CLOBBER)
1214 continue;
1216 rtx set = single_set (insn);
1217 if (set && is_move_set (set))
1218 preprocess_move (SET_DEST (set), SET_SRC (set));
1219 else
1220 process_pseudo_reg_constraints (insn);
1224 // Return a signed integer that says (roughly) how strong an affinity
1225 // pseudo register REGNO has with FPRs. A positive value indicates
1226 // that we should try to allocate an FPR, a negative value indicates
1227 // that we shouldn't, and 0 indicates neutrality.
1229 early_ra::fpr_preference (unsigned int regno)
1231 auto mode = m_pseudo_regs[regno].mode;
1232 auto flags = m_pseudo_regs[regno].flags;
1233 if (flags & IGNORE_REG)
1234 return -4;
1235 else if (mode == VOIDmode || !targetm.hard_regno_mode_ok (V0_REGNUM, mode))
1236 return -3;
1237 else if (flags & HAS_FLEXIBLE_STRIDE)
1238 return 3;
1239 else if (flags & NEEDS_FPR32)
1240 return 2;
1241 else if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR))
1242 return -2;
1243 else if ((flags & HAS_FPR_COPY) && !(flags & HAS_NONFPR_COPY))
1244 return 1;
1245 else if ((flags & HAS_NONFPR_COPY) && !(flags & HAS_FPR_COPY))
1246 return -1;
1247 else
1248 return 0;
1251 // Propagate information about pseudo-registers along copy edges,
1252 // while doing so doesn't create conflicting FPR preferences.
1253 void
1254 early_ra::propagate_pseudo_reg_info ()
1256 struct stack_entry { unsigned int regno, copyi; };
1258 auto_vec<stack_entry, 32> stack;
1259 for (unsigned int i = FIRST_PSEUDO_REGISTER;
1260 i < m_pseudo_regs.length (); ++i)
1262 auto start = m_pseudo_regs[i].first_copy;
1263 if (!start)
1264 continue;
1266 stack.quick_push ({ i, start });
1267 while (!stack.is_empty ())
1269 auto entry = stack.pop ();
1270 auto &copy = m_pseudo_reg_copies[entry.copyi];
1271 auto src_regno = entry.regno;
1272 auto dest_regno = (src_regno == copy.regnos[1]
1273 ? copy.regnos[0]
1274 : copy.regnos[1]);
1275 auto next_copyi = (src_regno == copy.regnos[1]
1276 ? copy.next_copies[1]
1277 : copy.next_copies[0]);
1278 if (next_copyi)
1279 stack.safe_push ({ src_regno, next_copyi });
1281 auto &src_reg = m_pseudo_regs[src_regno];
1282 auto &dest_reg = m_pseudo_regs[dest_regno];
1284 if (src_reg.flags & ~dest_reg.flags & PSEUDO_COPY_FLAGS)
1286 auto src_preference = fpr_preference (src_regno);
1287 auto dest_preference = fpr_preference (dest_regno);
1288 if ((src_preference >= 0 && dest_preference >= 0)
1289 || (src_preference <= 0 && dest_preference <= 0))
1291 dest_reg.flags |= (src_reg.flags & PSEUDO_COPY_FLAGS);
1292 stack.safe_push ({ dest_regno, dest_reg.first_copy });
1299 // Decide which pseudos should be allocated an FPR, setting m_fpr_pseudos
1300 // accordingly.
1301 void
1302 early_ra::choose_fpr_pseudos ()
1304 for (unsigned int i = FIRST_PSEUDO_REGISTER;
1305 i < m_pseudo_regs.length (); ++i)
1306 if (fpr_preference (i) > 0)
1307 bitmap_set_bit (m_fpr_pseudos, i);
1310 // Clear out information about the previous CFG region (if any)
1311 // and set up the data for a new region.
1312 void
1313 early_ra::start_new_region ()
1315 obstack_free (&m_region_obstack, m_region_alloc_start);
1316 m_regno_to_group.empty ();
1317 m_allocno_copies.truncate (0);
1318 m_allocnos.truncate (0);
1319 m_sorted_allocnos.truncate (0);
1320 m_shared_allocnos.truncate (0);
1321 m_colors.truncate (0);
1322 m_insn_ranges.truncate (0);
1323 for (auto &fpr_ranges : m_fpr_ranges)
1324 fpr_ranges.truncate (0);
1325 for (auto &call_points : m_call_points)
1326 call_points.truncate (0);
1327 gcc_assert (bitmap_empty_p (m_live_allocnos) && m_live_fprs == 0);
1328 m_dead_insns.truncate (0);
1329 m_allocated_fprs = 0;
1330 m_call_preserved_fprs = 0;
1331 m_accurate_live_ranges = true;
1332 m_allocation_successful = true;
1333 m_current_region += 1;
1336 // Record that we can no longer track the liveness of individual allocnos.
1337 // Call DUMP to dump the reason to a dump file.
1338 template<typename T>
1339 void
1340 early_ra::record_live_range_failure (T dump)
1342 if (!m_accurate_live_ranges)
1343 return;
1345 m_accurate_live_ranges = false;
1346 m_allocation_successful = false;
1347 if (dump_file && (dump_flags & TDF_DETAILS))
1349 fprintf (dump_file, "Unable to track live ranges further: ");
1350 dump ();
1351 fprintf (dump_file, "\n");
1355 // Record that the allocation of the current region has filed. Call DUMP to
1356 // dump the reason to a dump file.
1357 template<typename T>
1358 void
1359 early_ra::record_allocation_failure (T dump)
1361 if (!m_allocation_successful)
1362 return;
1364 m_allocation_successful = false;
1365 if (dump_file && (dump_flags & TDF_DETAILS))
1367 fprintf (dump_file, "Not allocating region: ");
1368 dump ();
1369 fprintf (dump_file, "\n");
1373 // Create and return an allocno group of size SIZE for register REGNO.
1374 // REGNO can be INVALID_REGNUM if the group just exists to allow
1375 // other groups to be chained together, and does not have any new
1376 // allocnos of its own.
1377 early_ra::allocno_group_info *
1378 early_ra::create_allocno_group (unsigned int regno, unsigned int size)
1380 static_assert (alignof (unsigned int) == alignof (allocno_info),
1381 "allocno_info alignment");
1382 unsigned int num_allocnos = (regno != INVALID_REGNUM ? size : 0);
1384 // Allocate an allocno_group_info, followed by an array of chain heads,
1385 // followed by the allocnos themselves.
1386 size_t alloc_size = (sizeof (allocno_group_info)
1387 + size * sizeof (unsigned int)
1388 + num_allocnos * sizeof (allocno_info));
1389 void *data = obstack_alloc (&m_region_obstack, alloc_size);
1391 // Initialize the group.
1392 auto *group = reinterpret_cast<allocno_group_info *> (data);
1393 memset (group, 0, sizeof (*group));
1394 group->m_color_rep = group;
1395 group->regno = regno;
1396 group->size = size;
1397 group->stride = 1;
1398 group->fpr_size = FPR_D;
1399 group->fpr_candidates = ~0U;
1401 // Initialize the chain heads.
1402 auto heads = group->chain_heads ();
1403 for (unsigned int i = 0; i < heads.size (); ++i)
1404 heads[i] = (i < num_allocnos ? m_allocnos.length () + i : INVALID_ALLOCNO);
1406 // Initialize the allocnos.
1407 if (num_allocnos > 0)
1409 auto allocnos = group->allocnos ();
1410 memset (allocnos.begin (), 0, num_allocnos * sizeof (allocno_info));
1411 for (unsigned int i = 0; i < num_allocnos; ++i)
1413 auto *allocno = &allocnos[i];
1414 allocno->id = m_allocnos.length ();
1415 allocno->offset = i;
1416 allocno->group_size = size;
1417 allocno->hard_regno = FIRST_PSEUDO_REGISTER;
1418 allocno->start_point = END_OF_REGION;
1419 allocno->end_point = START_OF_REGION;
1420 allocno->copy_dest = INVALID_ALLOCNO;
1421 allocno->related_allocno = INVALID_ALLOCNO;
1422 allocno->chain_next = INVALID_ALLOCNO;
1423 allocno->chain_prev = INVALID_ALLOCNO;
1424 m_allocnos.safe_push (allocno);
1427 return group;
1430 // If REG refers to a pseudo register that might be allocated to FPRs,
1431 // return the associated range of allocnos, creating new ones if necessary.
1432 // Return an empty range otherwise.
1433 early_ra::allocno_subgroup
1434 early_ra::get_allocno_subgroup (rtx reg)
1436 if (GET_CODE (reg) == SUBREG)
1438 allocno_subgroup inner = get_allocno_subgroup (SUBREG_REG (reg));
1439 if (!inner)
1440 return {};
1442 if (!targetm.can_change_mode_class (GET_MODE (SUBREG_REG (reg)),
1443 GET_MODE (reg), FP_REGS))
1445 record_live_range_failure ([&](){
1446 fprintf (dump_file, "cannot refer to r%d:%s in mode %s",
1447 REGNO (SUBREG_REG (reg)),
1448 GET_MODE_NAME (GET_MODE (SUBREG_REG (reg))),
1449 GET_MODE_NAME (GET_MODE (reg)));
1451 return {};
1454 if (!targetm.modes_tieable_p (GET_MODE (SUBREG_REG (reg)),
1455 GET_MODE (reg)))
1456 record_allocation_failure ([&](){
1457 fprintf (dump_file, "r%d's mode %s is not tieable to mode %s",
1458 REGNO (SUBREG_REG (reg)),
1459 GET_MODE_NAME (GET_MODE (SUBREG_REG (reg))),
1460 GET_MODE_NAME (GET_MODE (reg)));
1463 subreg_info info;
1464 subreg_get_info (V0_REGNUM, GET_MODE (SUBREG_REG (reg)),
1465 SUBREG_BYTE (reg), GET_MODE (reg), &info);
1466 if (!info.representable_p)
1468 record_live_range_failure ([&](){
1469 fprintf (dump_file, "subreg of r%d is invalid for V0",
1470 REGNO (SUBREG_REG (reg)));
1472 return {};
1475 inner.start += info.offset;
1476 inner.count = info.nregs;
1477 return inner;
1480 if (!REG_P (reg) || HARD_REGISTER_P (reg))
1481 return {};
1483 unsigned int regno = REGNO (reg);
1484 if (fpr_preference (regno) <= 0)
1485 return {};
1487 unsigned int count = hard_regno_nregs (V0_REGNUM, GET_MODE (reg));
1488 bool existed;
1489 auto &entry = m_regno_to_group.get_or_insert (regno, &existed);
1490 if (!existed)
1492 auto *group = create_allocno_group (regno, count);
1493 if (dump_file && (dump_flags & TDF_DETAILS))
1495 auto allocnos = group->allocnos ();
1496 fprintf (dump_file, "Creating allocnos [%d:%d] for r%d\n",
1497 allocnos.front ().id, allocnos.back ().id, regno);
1500 auto reg_bits = GET_MODE_BITSIZE (GET_MODE (reg));
1501 auto fpr_bits = exact_div (reg_bits, count);
1502 auto flags = m_pseudo_regs[regno].flags;
1504 // Punt for now if there is a choice to be made between using an
1505 // FPR and a non-FPR.
1506 if ((flags & NEEDS_NONFPR)
1507 || ((flags & ALLOWS_NONFPR)
1508 && !FLOAT_MODE_P (GET_MODE (reg))
1509 && !VECTOR_MODE_P (GET_MODE (reg))))
1510 record_allocation_failure ([&](){
1511 fprintf (dump_file, "r%d has FPR and non-FPR references", regno);
1514 if (flags & ALLOWS_FPR8)
1515 group->fpr_candidates &= 0xff;
1516 else if (flags & ALLOWS_FPR16)
1517 group->fpr_candidates &= 0xffff;
1518 group->fpr_candidates &= ~0U >> (count - 1);
1520 group->has_flexible_stride = ((flags & HAS_FLEXIBLE_STRIDE) != 0
1521 && (flags & HAS_FIXED_STRIDE) == 0);
1523 group->fpr_size = (maybe_gt (fpr_bits, 128) ? FPR_Z
1524 : maybe_gt (fpr_bits, 64) ? FPR_Q : FPR_D);
1526 entry = group;
1528 return { entry, 0, count };
1531 // Record a use of FPR REGNO at the current program point, as part of
1532 // a backwards walk over a block.
1533 void
1534 early_ra::record_fpr_use (unsigned int regno)
1536 gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM));
1537 unsigned int offset = regno - V0_REGNUM;
1538 if (!(m_live_fprs & (1U << offset)))
1540 m_fpr_ranges[offset].safe_push ({ START_OF_REGION, m_current_point,
1541 INVALID_ALLOCNO });
1542 m_live_fprs |= 1U << offset;
1546 // Record a definition of FPR REGNO at the current program point, as part of
1547 // a backwards walk over a block.
1548 void
1549 early_ra::record_fpr_def (unsigned int regno)
1551 gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM));
1552 unsigned int offset = regno - V0_REGNUM;
1554 // The definition completes the current live range. If the result
1555 // of the definition is used, the live range extends to the last use.
1556 // Otherwise the live range is just a momentary blip at the current point.
1557 auto &ranges = m_fpr_ranges[offset];
1558 if (m_live_fprs & (1U << offset))
1560 ranges.last ().start_point = m_current_point;
1561 m_live_fprs &= ~(1U << offset);
1563 else
1564 ranges.safe_push ({ m_current_point, m_current_point, INVALID_ALLOCNO });
1567 // Record a use of allocno ALLOCNO at the current program point, as part
1568 // of a backwards walk over a block.
1569 void
1570 early_ra::record_allocno_use (allocno_info *allocno)
1572 if (allocno->start_point == m_current_point)
1573 return;
1575 gcc_checking_assert (!allocno->is_shared ());
1576 bitmap_set_bit (m_live_allocnos, allocno->id);
1577 if (allocno->end_point > m_current_point)
1579 allocno->end_point = m_current_point;
1580 allocno->last_def_point = START_OF_REGION;
1581 allocno->last_use_point = END_OF_REGION;
1583 else
1584 allocno->last_use_point = allocno->start_point;
1585 allocno->start_point = m_current_point;
1586 allocno->is_copy_dest = false;
1587 allocno->is_strong_copy_src = false;
1588 allocno->related_allocno = INVALID_ALLOCNO;
1589 allocno->is_equiv = false;
1592 // Record a definition of the allocno with index AI at the current program
1593 // point, as part of a backwards walk over a block. The allocno is known
1594 // to be live.
1595 void
1596 early_ra::record_allocno_def (allocno_info *allocno)
1598 gcc_checking_assert (!allocno->is_shared ());
1599 allocno->last_use_point = allocno->start_point;
1600 allocno->last_def_point = m_current_point;
1601 allocno->start_point = m_current_point;
1602 allocno->num_defs = MIN (allocno->num_defs + 1, 2);
1603 gcc_checking_assert (!allocno->is_copy_dest
1604 && !allocno->is_strong_copy_src);
1605 if (!bitmap_clear_bit (m_live_allocnos, allocno->id))
1606 gcc_unreachable ();
1609 // SRC_ALLOCNO is copied or tied to DEST_ALLOCNO; IS_EQUIV is true if the
1610 // two allocnos are known to be equal. See whether we can mark a chain of
1611 // allocnos ending at DEST_ALLOCNO as related to SRC_ALLOCNO. Return the
1612 // start of the chain if so, otherwise return null.
1614 // If IS_EQUIV, a chain that contains just DEST_ALLOCNO should be treated
1615 // as an equivalence. Otherwise the chain should be shared with SRC_ALLOCNO.
1617 // Sharing chains are a rather hacky workaround for the fact that we
1618 // don't collect segmented live ranges, and that in the end we want to do
1619 // simple interval graph coloring.
1620 early_ra::allocno_info *
1621 early_ra::find_related_start (allocno_info *dest_allocno,
1622 allocno_info *src_allocno, bool is_equiv)
1624 allocno_info *res = nullptr;
1625 for (;;)
1627 if (src_allocno->end_point > dest_allocno->end_point)
1628 // The src allocno dies first.
1629 return res;
1631 if (src_allocno->num_defs != 0)
1633 if (dest_allocno->end_point < m_current_bb_point)
1634 // We don't currently track enough information to handle multiple
1635 // definitions across basic block boundaries.
1636 return res;
1638 if (src_allocno->last_def_point >= dest_allocno->end_point)
1639 // There is another definition during the destination's live range.
1640 return res;
1642 if (is_equiv)
1644 if (dest_allocno->num_defs == 1)
1645 // dest_allocno is equivalent to src_allocno for dest_allocno's
1646 // entire live range. Fall back to that if we can't establish
1647 // a sharing chain.
1648 res = dest_allocno;
1650 else
1652 if (src_allocno->last_use_point >= dest_allocno->end_point)
1653 // src_allocno is live during dest_allocno's live range,
1654 // and the two allocnos do not necessarily have the same value.
1655 return res;
1658 if (dest_allocno->group_size != 1
1659 // Account for definitions by shared registers.
1660 || dest_allocno->num_defs > 1
1661 || DF_REG_DEF_COUNT (dest_allocno->group ()->regno) != 1)
1662 // Currently only single allocnos that are defined once can
1663 // share registers with non-equivalent allocnos. This could be
1664 // relaxed, but at the time of writing, aggregates are not valid
1665 // SSA names and so generally only use a single pseudo throughout
1666 // their lifetime.
1667 return res;
1669 if (dest_allocno->copy_dest == src_allocno->id)
1670 // We've found a complete and valid sharing chain.
1671 return dest_allocno;
1673 if (dest_allocno->copy_dest == INVALID_ALLOCNO)
1674 return res;
1676 auto *next_allocno = m_allocnos[dest_allocno->copy_dest];
1677 if (!is_chain_candidate (dest_allocno, next_allocno, ALL_REASONS))
1678 return res;
1680 dest_allocno = next_allocno;
1681 is_equiv = false;
1685 // Add FROM_ALLOCNO's definition information to TO_ALLOCNO's.
1686 void
1687 early_ra::accumulate_defs (allocno_info *to_allocno,
1688 allocno_info *from_allocno)
1690 if (from_allocno->num_defs > 0)
1692 to_allocno->num_defs = MIN (from_allocno->num_defs
1693 + to_allocno->num_defs, 2);
1694 to_allocno->last_def_point = MAX (to_allocno->last_def_point,
1695 from_allocno->last_def_point);
1699 // Record any relevant allocno-related information for an actual or imagined
1700 // copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit
1701 // move instruction, false if it represents one way of satisfying the previous
1702 // instruction's constraints.
1703 void
1704 early_ra::record_copy (rtx dest, rtx src, bool from_move_p)
1706 auto dest_range = get_allocno_subgroup (dest);
1707 auto src_range = get_allocno_subgroup (src);
1708 if (from_move_p
1709 && dest_range
1710 && REG_P (src)
1711 && FP_REGNUM_P (REGNO (src)))
1713 // A copy from an FPR to an allocno group.
1714 unsigned int fpr = REGNO (src) - V0_REGNUM;
1715 m_allocno_copies.safe_push ({ dest_range.allocno (0)->id, fpr,
1716 dest_range.count });
1718 // If the allocno at the other end of the chain of copies from DEST
1719 // has a copy to the same FPR, record that all intervening copy chains
1720 // could become "strong" ones. This indicates that picking the FPR
1721 // avoids a copy at both ends.
1722 unsigned int hard_regno = REGNO (src);
1723 for (auto &dest_allocno : dest_range.allocnos ())
1724 if (dest_allocno.hard_regno == hard_regno++)
1725 dest_allocno.is_strong_copy_src = true;
1727 else if (from_move_p
1728 && src_range
1729 && REG_P (dest)
1730 && FP_REGNUM_P (REGNO (dest)))
1732 // A copy from an allocno group to an FPR.
1733 unsigned int fpr = REGNO (dest) - V0_REGNUM;
1734 m_allocno_copies.safe_push ({ src_range.allocno (0)->id, fpr,
1735 src_range.count });
1736 for (auto &src_allocno : src_range.allocnos ())
1738 // If the copy comes from a move, see whether the destination
1739 // FPR is known to be equal to the source allocno for the FPR's
1740 // last live range.
1741 if (from_move_p && src_allocno.num_defs == 0)
1743 auto &last_range = m_fpr_ranges[fpr].last ();
1744 if (last_range.end_point >= src_allocno.end_point)
1745 last_range.allocno = src_allocno.id;
1747 src_allocno.hard_regno = V0_REGNUM + fpr;
1748 fpr += 1;
1751 else if (src_range && dest_range)
1753 // A copy between two allocno groups. We can only have a mismatched
1754 // number of FPRs for imaginary, non-move copies. In that case
1755 // the matching happens on the common lowparts.
1756 gcc_assert (!from_move_p || src_range.count == dest_range.count);
1757 unsigned int count = std::min (src_range.count, dest_range.count);
1758 if (WORDS_BIG_ENDIAN)
1760 src_range.start += src_range.count - count;
1761 dest_range.start += dest_range.count - count;
1763 src_range.count = count;
1764 dest_range.count = count;
1766 // Ignore (imaginary non-move) copies if the destination is still live.
1767 for (auto &dest_allocno : dest_range.allocnos ())
1768 if (bitmap_bit_p (m_live_allocnos, dest_allocno.id))
1769 return;
1771 for (unsigned int i = 0; i < src_range.count; ++i)
1773 auto *dest_allocno = dest_range.allocno (i);
1774 auto *src_allocno = src_range.allocno (i);
1775 if (src_allocno->end_point > dest_allocno->start_point)
1777 gcc_assert (src_allocno->copy_dest == INVALID_ALLOCNO
1778 || src_allocno->copy_dest == dest_allocno->id);
1779 src_allocno->copy_dest = dest_allocno->id;
1780 src_allocno->hard_regno = dest_allocno->hard_regno;
1781 dest_allocno->is_copy_dest = 1;
1783 else if (auto *start_allocno = find_related_start (dest_allocno,
1784 src_allocno,
1785 from_move_p))
1787 auto *next_allocno = dest_allocno;
1788 for (;;)
1790 next_allocno->related_allocno = src_allocno->id;
1791 next_allocno->is_equiv = (start_allocno == dest_allocno
1792 && from_move_p);
1793 // If we're sharing two allocnos that are not equivalent,
1794 // carry across the definition information. This is needed
1795 // to prevent multiple incompatible attempts to share with
1796 // the same register.
1797 if (next_allocno->is_shared ())
1798 accumulate_defs (src_allocno, next_allocno);
1799 src_allocno->last_use_point
1800 = MAX (src_allocno->last_use_point,
1801 next_allocno->last_use_point);
1803 if (next_allocno == start_allocno)
1804 break;
1805 next_allocno = m_allocnos[next_allocno->copy_dest];
1812 // Record any relevant allocno-related information about the constraints
1813 // on INSN, which has already been extracted.
1814 void
1815 early_ra::record_constraints (rtx_insn *insn)
1817 preprocess_constraints (insn);
1819 int operand_matches[MAX_RECOG_OPERANDS];
1820 for (int i = 0; i < recog_data.n_operands; ++i)
1821 operand_matches[i] = -1;
1823 auto alts = get_preferred_alternatives (insn);
1824 bool any_ok = recog_data.n_alternatives == 0;
1826 // The set of output operands that are earlyclobber in at least one
1827 // alternative.
1828 operand_mask earlyclobber_operands = 0;
1830 // The set of output operands that are matched to inputs in at least
1831 // one alternative.
1832 operand_mask matched_operands = 0;
1834 // The set of output operands that are not matched to inputs in at least
1835 // one alternative.
1836 operand_mask unmatched_operands = 0;
1838 // The set of input operands that are matched to outputs in at least one
1839 // alternative, or that overlap with such an input if the output is not
1840 // earlyclobber. The latter part of the condition copes with things
1841 // like y = x * x, where the first x is tied to the destination, and where
1842 // y is not earlyclobber.
1843 operand_mask matches_operands = 0;
1845 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
1847 if (!(alts & ALTERNATIVE_BIT (altno)))
1848 continue;
1850 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
1851 if (!likely_alternative_match_p (op_alt))
1852 continue;
1854 any_ok = true;
1856 // Update the information for operand DEST_OPNO based on the constraint
1857 // information for operand SRC_OPNO. The numbers can be different for
1858 // swapped commutative operands.
1859 auto record_operand = [&](int src_opno, int dest_opno)
1861 int matches = op_alt[src_opno].matches;
1862 // A matched earlyclobber cannot be used if the same operand value
1863 // occurs in an unmatched operand. E.g. for y = x * x, a matched
1864 // earlyclobber on the first input does not cover the second input.
1865 if (matches >= 0)
1867 rtx op = recog_data.operand[dest_opno];
1868 operand_mask overlaps = 0;
1869 for (int i = 0; i < recog_data.n_operands; ++i)
1870 if (i != dest_opno
1871 && !recog_data.is_operator[i]
1872 && recog_data.operand_type[i] != OP_OUT
1873 && reg_overlap_mentioned_p (op, recog_data.operand[i]))
1874 overlaps |= 1U << i;
1875 if (!op_alt[matches].earlyclobber || overlaps == 0)
1877 operand_matches[dest_opno] = matches;
1878 matches_operands |= (1U << dest_opno) | overlaps;
1883 auto reject = count_rejects (op_alt);
1884 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1886 operand_mask op_mask = operand_mask (1) << opno;
1888 if (recog_data.operand_type[opno] != OP_IN)
1890 if (reject == 0 && op_alt[opno].matched >= 0)
1891 matched_operands |= op_mask;
1892 else
1893 unmatched_operands |= op_mask;
1896 if (op_alt[opno].earlyclobber)
1897 earlyclobber_operands |= op_mask;
1899 // Punt for now on scratches. If we wanted to handle them,
1900 // we'd need to create allocnos for them, like IRA does.
1901 rtx op = recog_data.operand[opno];
1902 if (GET_CODE (op) == SCRATCH
1903 && reg_classes_intersect_p (op_alt[opno].cl, FP_REGS))
1904 record_allocation_failure ([&](){
1905 fprintf (dump_file, "insn %d has FPR match_scratch",
1906 INSN_UID (insn));
1909 // Record filter information, which applies to the first register
1910 // in the operand.
1911 if (auto filters = alternative_register_filters (op_alt, opno))
1912 if (auto range = get_allocno_subgroup (recog_data.operand[opno]))
1913 for (unsigned int fpr = range.start; fpr < 32; ++fpr)
1914 if (!test_register_filters (filters, fpr))
1915 range.group->fpr_candidates &= ~(1U << (fpr - range.start));
1917 if (reject == 0)
1919 // Record possible matched operands.
1920 record_operand (opno, opno);
1921 if (recog_data.constraints[opno][0] == '%')
1923 record_operand (opno, opno + 1);
1924 record_operand (opno + 1, opno);
1930 if (!any_ok)
1932 if (dump_file && (dump_flags & TDF_DETAILS))
1933 fprintf (dump_file, " -- no match\n");
1934 record_allocation_failure ([&](){
1935 fprintf (dump_file, "no matching constraints for insn %d",
1936 INSN_UID (insn));
1940 rtx dest_op = NULL_RTX;
1941 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1943 rtx op = recog_data.operand[opno];
1944 auto op_mask = operand_mask (1) << opno;
1945 // Record if there is an output operand that is "independent" of the
1946 // inputs, in the sense that it is never earlyclobber and is never
1947 // matched to an input. See the comment below for how this is used.
1948 if (recog_data.operand_type[opno] == OP_OUT
1949 && (earlyclobber_operands & op_mask) == 0
1950 && (matched_operands & op_mask) == 0)
1952 dest_op = op;
1953 break;
1955 // We sometimes decide not to allocate pseudos even if they meet
1956 // the normal FPR preference criteria; see the setters of IGNORE_REG
1957 // for details. However, the premise is that we should only ignore
1958 // definitions of such pseudos if they are independent of the other
1959 // operands in the instruction.
1960 else if (recog_data.operand_type[opno] != OP_IN
1961 && REG_P (op)
1962 && !HARD_REGISTER_P (op)
1963 && (m_pseudo_regs[REGNO (op)].flags & IGNORE_REG) != 0)
1964 record_allocation_failure ([&](){
1965 fprintf (dump_file, "ignored register r%d depends on input operands",
1966 REGNO (op));
1970 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1972 auto op_mask = operand_mask (1) << opno;
1973 rtx op = recog_data.operand[opno];
1974 int matches = operand_matches[opno];
1976 // Punt for now on operands that already have a fixed choice of
1977 // register, since we don't have IRA's ability to find an alternative.
1978 // It's better if earlier passes don't create this kind of situation.
1979 if (REG_P (op) && FP_REGNUM_P (REGNO (op)))
1980 record_allocation_failure ([&](){
1981 fprintf (dump_file, "operand %d of insn %d refers directly to %s",
1982 opno, INSN_UID (insn), reg_names[REGNO (op)]);
1985 // Treat input operands as being earlyclobbered if an output is
1986 // sometimes earlyclobber and if the input never matches an output.
1987 // Do the same if there is an output that is always matched to an
1988 // input, and if this operand doesn't match that input. In both
1989 // cases, tying the input and the output would lead to an impossible
1990 // combination (or at least one that is difficult to reload).
1991 if (recog_data.operand_type[opno] != OP_OUT
1992 && ((earlyclobber_operands && matches < 0)
1993 || ((matched_operands & ~unmatched_operands)
1994 && !(matches_operands & op_mask))))
1995 for (auto &allocno : get_allocno_subgroup (op).allocnos ())
1996 if (allocno.end_point + 1 == m_current_point)
1997 allocno.is_earlyclobbered = true;
1999 // Create copies between operands that can be tied. This (deliberately)
2000 // might add several copies to the same destination register; later code
2001 // can then choose between them based on other criteria.
2003 // If there is an output operand that is never matched or earlyclobber,
2004 // and an input operand that never matches an output operand, create
2005 // a tentative copy between them. This allows hard register preferences
2006 // to be transmitted along the copy chains.
2007 if (matches >= 0)
2008 record_copy (recog_data.operand[matches], op);
2009 else if (dest_op && recog_data.operand_type[opno] == OP_IN)
2010 record_copy (dest_op, op);
2014 // If FLAGS is DF_REF_AT_TOP, model the artificial uses and defs at the
2015 // start of the current basic block, otherwise model the artificial uses
2016 // and defs at the end of the basic block. This is done as part of a
2017 // backwards walk, so defs should be processed before uses.
2018 void
2019 early_ra::record_artificial_refs (unsigned int flags)
2021 df_ref ref;
2023 FOR_EACH_ARTIFICIAL_DEF (ref, m_current_bb->index)
2024 if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags
2025 && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
2026 record_fpr_def (DF_REF_REGNO (ref));
2027 m_current_point += 1;
2029 FOR_EACH_ARTIFICIAL_USE (ref, m_current_bb->index)
2030 if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags
2031 && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
2032 record_fpr_use (DF_REF_REGNO (ref));
2033 m_current_point += 1;
2036 // Return true if:
2038 // - X is a SUBREG, in which case it is a SUBREG of some REG Y
2040 // - one 64-bit word of Y can be modified while preserving all other words
2042 // - X refers to no more than one 64-bit word of Y
2044 // - assigning FPRs to Y would put more than one 64-bit word in each FPR
2046 // For example, this is true of:
2048 // - (subreg:DI (reg:TI R) 0) and
2049 // - (subreg:DI (reg:TI R) 8)
2051 // but is not true of:
2053 // - (subreg:V2SI (reg:V2x2SI R) 0) or
2054 // - (subreg:V2SI (reg:V2x2SI R) 8).
2055 static bool
2056 allocno_assignment_is_rmw (rtx x)
2058 if (partial_subreg_p (x))
2060 auto outer_mode = GET_MODE (x);
2061 auto inner_mode = GET_MODE (SUBREG_REG (x));
2062 if (known_eq (REGMODE_NATURAL_SIZE (inner_mode), 0U + UNITS_PER_WORD)
2063 && known_lt (GET_MODE_SIZE (outer_mode), UNITS_PER_VREG))
2065 auto nregs = targetm.hard_regno_nregs (V0_REGNUM, inner_mode);
2066 if (maybe_ne (nregs * UNITS_PER_WORD, GET_MODE_SIZE (inner_mode)))
2067 return true;
2070 return false;
2073 // Called as part of a backwards walk over a block. Model the definitions
2074 // in INSN, excluding partial call clobbers.
2075 void
2076 early_ra::record_insn_defs (rtx_insn *insn)
2078 df_ref ref;
2080 FOR_EACH_INSN_DEF (ref, insn)
2081 if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
2082 record_fpr_def (DF_REF_REGNO (ref));
2083 else
2085 rtx reg = DF_REF_REG (ref);
2086 auto range = get_allocno_subgroup (reg);
2087 for (auto &allocno : range.allocnos ())
2089 // Make sure that assigning to the DF_REF_REG clobbers the
2090 // whole of this allocno, not just some of it.
2091 if (allocno_assignment_is_rmw (reg))
2093 record_live_range_failure ([&](){
2094 fprintf (dump_file, "read-modify-write of allocno %d",
2095 allocno.id);
2097 break;
2100 // If the destination is unused, record a momentary blip
2101 // in its live range.
2102 if (!bitmap_bit_p (m_live_allocnos, allocno.id))
2103 record_allocno_use (&allocno);
2104 record_allocno_def (&allocno);
2107 m_current_point += 1;
2110 // Called as part of a backwards walk over a block. Model the call made
2111 // by INSN as a separate phase in the evaluation of the insn. Any partial
2112 // call clobbers happen at that point, rather than in the definition or use
2113 // phase of the insn.
2114 void
2115 early_ra::record_insn_call (rtx_call_insn *insn)
2117 function_abi abi = insn_callee_abi (insn);
2118 m_call_points[abi.id ()].safe_push (m_current_point);
2119 m_current_point += 1;
2122 // Called as part of a backwards walk over a block. Model the uses in INSN.
2123 // We can ignore READ_MODIFY_WRITE uses of plain subregs, since we track the
2124 // FPR-sized parts of them individually.
2125 void
2126 early_ra::record_insn_uses (rtx_insn *insn)
2128 df_ref ref;
2130 FOR_EACH_INSN_USE (ref, insn)
2131 if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
2132 record_fpr_use (DF_REF_REGNO (ref));
2133 else if (!DF_REF_FLAGS_IS_SET (ref, DF_REF_READ_WRITE)
2134 || DF_REF_FLAGS_IS_SET (ref, DF_REF_STRICT_LOW_PART)
2135 || DF_REF_FLAGS_IS_SET (ref, DF_REF_ZERO_EXTRACT))
2137 auto range = get_allocno_subgroup (DF_REF_REG (ref));
2138 for (auto &allocno : range.allocnos ())
2139 record_allocno_use (&allocno);
2141 m_current_point += 1;
2144 // ALLOCNO->is_strong_copy_src is true. See whether ALLOCNO heads a
2145 // natural chain that has an affinity with the same hard register at
2146 // both ends.
2147 bool
2148 early_ra::consider_strong_copy_src_chain (allocno_info *allocno)
2150 auto *src_allocno = allocno;
2151 while (src_allocno->copy_dest != INVALID_ALLOCNO)
2153 auto *dest_allocno = m_allocnos[src_allocno->copy_dest];
2154 if (dest_allocno->start_point > src_allocno->end_point
2155 || dest_allocno->hard_regno != src_allocno->hard_regno)
2156 return false;
2157 gcc_checking_assert (dest_allocno->is_copy_dest);
2158 src_allocno = dest_allocno;
2161 while (allocno->copy_dest != INVALID_ALLOCNO)
2163 allocno->is_strong_copy_src = 1;
2164 allocno = m_allocnos[allocno->copy_dest];
2165 allocno->is_strong_copy_dest = 1;
2167 return true;
2170 // ALLOCNO1 and ALLOCNO2 are linked in some way, and might end up being
2171 // chained together. See whether chaining them requires the containing
2172 // groups to have the same stride, or whether it requires them to have
2173 // different strides. Return 1 if they should have the same stride,
2174 // -1 if they should have different strides, or 0 if it doesn't matter.
2176 early_ra::strided_polarity_pref (allocno_info *allocno1,
2177 allocno_info *allocno2)
2179 if (allocno1->offset + 1 < allocno1->group_size
2180 && allocno2->offset + 1 < allocno2->group_size)
2182 if (is_chain_candidate (allocno1 + 1, allocno2 + 1, ALL_REASONS))
2183 return 1;
2184 else
2185 return -1;
2188 if (allocno1->offset > 0 && allocno2->offset > 0)
2190 if (is_chain_candidate (allocno1 - 1, allocno2 - 1, ALL_REASONS))
2191 return 1;
2192 else
2193 return -1;
2196 return 0;
2199 // Decide which groups should be strided. Also complete "strong" copy chains.
2200 void
2201 early_ra::find_strided_accesses ()
2203 // This function forms a graph of allocnos, linked by equivalences and
2204 // natural copy chains. It temporarily uses chain_next to record the
2205 // reverse of equivalence edges (related_allocno) and chain_prev to record
2206 // the reverse of copy edges (copy_dest).
2207 unsigned int allocno_info::*links[] = {
2208 &allocno_info::chain_next,
2209 &allocno_info::chain_prev,
2210 &allocno_info::copy_dest,
2211 &allocno_info::related_allocno
2214 // Set up the temporary reverse edges. Check for strong copy chains.
2215 for (unsigned int i = m_allocnos.length (); i-- > 0; )
2217 auto *allocno1 = m_allocnos[i];
2218 if (allocno1->copy_dest != INVALID_ALLOCNO)
2219 m_allocnos[allocno1->copy_dest]->chain_prev = allocno1->id;
2220 if (allocno1->related_allocno != INVALID_ALLOCNO)
2221 m_allocnos[allocno1->related_allocno]->chain_next = allocno1->id;
2223 if (allocno1->is_strong_copy_src
2224 && !allocno1->is_copy_dest
2225 && !consider_strong_copy_src_chain (allocno1))
2226 allocno1->is_strong_copy_src = false;
2229 // Partition the graph into cliques based on edges that have the following
2230 // properties:
2232 // - the edge joins two allocnos whose groups have a free choice between
2233 // consecutive and strided allocations.
2235 // - the two groups have a relative strided polarity preference (that is
2236 // they should make the same choice between consecutive and strided,
2237 // or they should make opposite choices).
2239 // Assign relative polarities to each group connected in this way.
2241 // The aim is to discover natural move-free striding choices, which will
2242 // often exist in carefully written ACLE code.
2243 unsigned int num_edges = m_allocnos.length () * ARRAY_SIZE (links);
2244 auto_sbitmap visited_edges (num_edges);
2245 bitmap_clear (visited_edges);
2247 auto_vec<unsigned int, 32> worklist;
2248 for (unsigned int i = 0; i < num_edges; ++i)
2250 if (!bitmap_set_bit (visited_edges, i))
2251 continue;
2252 worklist.quick_push (i);
2253 while (!worklist.is_empty ())
2255 auto ei = worklist.pop ();
2256 auto *allocno1 = m_allocnos[ei / ARRAY_SIZE (links)];
2257 auto ai2 = allocno1->*links[ei % ARRAY_SIZE (links)];
2258 if (ai2 == INVALID_ALLOCNO)
2259 continue;
2261 auto *allocno2 = m_allocnos[ai2];
2262 auto *group1 = allocno1->group ();
2263 auto *group2 = allocno2->group ();
2264 if (!group1->has_flexible_stride || !group2->has_flexible_stride)
2265 continue;
2267 int pref = strided_polarity_pref (allocno1, allocno2);
2268 if (pref == 0)
2269 continue;
2271 for (auto *group : { group1, group2 })
2272 for (auto &allocno : group->allocnos ())
2273 for (unsigned int j = 0; j < ARRAY_SIZE (links); ++j)
2274 if (bitmap_set_bit (visited_edges, allocno.id * 4 + j))
2275 worklist.safe_push (allocno.id * 4 + j);
2277 if (group1->strided_polarity)
2278 group2->strided_polarity = group1->strided_polarity * pref;
2279 else if (group2->strided_polarity)
2280 group1->strided_polarity = group2->strided_polarity * pref;
2281 else
2283 group1->strided_polarity = 1;
2284 group2->strided_polarity = pref;
2289 // Now look for edges between allocnos in multi-register groups where:
2291 // - the two groups have a relative strided polarity preference (as above).
2293 // - one group (G1) has a free choice between consecutive and strided
2294 // allocations.
2296 // - the other group (G2) must use consecutive allocations.
2298 // Update G1's individual preference for strided or consecutive allocations
2299 // based on G2. If the previous loop chose a polarity for G1, work out
2300 // whether it is better for polarity 1 or -1 to correspond to consecutive
2301 // allocation.
2302 int consecutive_pref = 0;
2303 for (unsigned int i = m_allocnos.length (); i-- > 0; )
2305 auto *allocno1 = m_allocnos[i];
2306 for (auto link : links)
2308 auto ai2 = allocno1->*link;
2309 if (ai2 == INVALID_ALLOCNO)
2310 continue;
2312 auto *allocno2 = m_allocnos[ai2];
2313 auto *group1 = allocno1->group ();
2314 auto *group2 = allocno2->group ();
2315 if (group1->has_flexible_stride == group2->has_flexible_stride)
2316 continue;
2318 int pref = strided_polarity_pref (allocno1, allocno2);
2319 if (pref == 0)
2320 continue;
2322 auto *group = (group1->has_flexible_stride ? group1 : group2);
2323 consecutive_pref += group->strided_polarity * pref;
2324 group->consecutive_pref += pref;
2328 // If it doesn't matter whether polarity 1 or -1 corresponds to consecutive
2329 // allocation, arbitrarily pick 1.
2330 if (consecutive_pref == 0)
2331 consecutive_pref = 1;
2333 // Record which multi-register groups should use strided allocations.
2334 // Clear out the temporary edges.
2335 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
2337 auto *allocno = m_allocnos[ai];
2338 allocno->chain_prev = INVALID_ALLOCNO;
2339 allocno->chain_next = INVALID_ALLOCNO;
2341 if (allocno->offset != 0)
2342 continue;
2344 auto *group = allocno->group ();
2345 if (!group->has_flexible_stride)
2346 continue;
2348 bool make_strided = (group->strided_polarity
2349 ? (consecutive_pref * group->strided_polarity) < 0
2350 : group->consecutive_pref < 0);
2351 if (dump_file && (dump_flags & TDF_DETAILS))
2352 fprintf (dump_file, "Allocno [%d:%d]: strided polarity %d,"
2353 " consecutive pref %d, %s\n",
2354 allocno->id, allocno->id + group->size - 1,
2355 group->strided_polarity, group->consecutive_pref,
2356 make_strided ? "making strided" : "keeping consecutive");
2357 if (!make_strided)
2358 continue;
2360 // 2-register groups have a stride of 8 FPRs and must start in
2361 // registers matching the mask 0x17. 4-register groups have a stride
2362 // of 4 FPRs and must start in registers matching the mask 0x13.
2363 group->stride = group->size == 2 ? 8 : 4;
2364 gcc_checking_assert (group->fpr_candidates
2365 == (group->size == 2 ? 0x55555555 : 0x11111111));
2366 group->fpr_candidates = (group->size == 2 ? 0xff00ff : 0xf000f);
2370 // Compare the allocnos at *ALLOCNO1_PTR and *ALLOCNO2_PTR and return a <=>
2371 // result that puts allocnos in order of increasing FIELD.
2372 template<unsigned int early_ra::allocno_info::*field>
2374 early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr)
2376 auto *allocno1 = *(allocno_info *const *) allocno1_ptr;
2377 auto *allocno2 = *(allocno_info *const *) allocno2_ptr;
2379 if (allocno1->*field != allocno2->*field)
2380 return allocno1->*field < allocno2->*field ? -1 : 1;
2381 return (allocno1->id < allocno2->id ? -1
2382 : allocno1->id == allocno2->id ? 0 : 1);
2385 // Return true if we should consider chaining ALLOCNO1 onto the head
2386 // of ALLOCNO2. STRICTNESS says whether we should take copy-elision
2387 // heuristics into account, or whether we should just consider things
2388 // that matter for correctness.
2390 // This is just a local test of the two allocnos; it doesn't guarantee
2391 // that chaining them would give a self-consistent system.
2392 bool
2393 early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2,
2394 test_strictness strictness)
2396 if (allocno2->is_shared ())
2397 return false;
2399 while (allocno1->is_equiv)
2400 allocno1 = m_allocnos[allocno1->related_allocno];
2402 if (allocno2->start_point >= allocno1->end_point
2403 && !allocno2->is_equiv_to (allocno1->id))
2404 return false;
2406 if (allocno1->is_earlyclobbered
2407 && allocno1->end_point == allocno2->start_point + 1)
2408 return false;
2410 if (strictness == ALL_REASONS && allocno2->is_copy_dest)
2412 if (allocno1->copy_dest != allocno2->id)
2413 return false;
2414 if (allocno2->is_strong_copy_dest && !allocno1->is_strong_copy_src)
2415 return false;
2417 return true;
2420 // We're trying to chain allocno ALLOCNO1 to a later allocno.
2421 // Rate how good a choice ALLOCNO2 would be, with higher being better.
2423 early_ra::rate_chain (allocno_info *allocno1, allocno_info *allocno2)
2425 int score = 0;
2426 if (allocno2->is_strong_copy_dest)
2427 score += 256;
2428 else if (allocno2->is_copy_dest)
2429 score += 128;
2431 // Prefer well-aligned matches.
2432 auto *group1 = allocno1->group ();
2433 auto *group2 = allocno2->group ();
2434 if (group1->stride == 1 && group2->stride == 1)
2436 unsigned int min_size = std::min (group1->color_rep ()->size,
2437 group2->color_rep ()->size);
2438 if ((group1->color_rep_offset + allocno1->offset) % min_size
2439 == (group2->color_rep_offset + allocno2->offset) % min_size)
2440 score += min_size;
2441 else
2442 score -= min_size;
2444 return score;
2447 // Sort the chain_candidate_infos at ARG1 and ARG2 in order of decreasing
2448 // score.
2450 early_ra::cmp_chain_candidates (const void *arg1, const void *arg2)
2452 auto &candidate1 = *(const chain_candidate_info *) arg1;
2453 auto &candidate2 = *(const chain_candidate_info *) arg2;
2454 if (candidate1.score != candidate2.score)
2455 return candidate1.score > candidate2.score ? -1 : 1;
2457 // Prefer to increase the gap between uses of the allocated register,
2458 // to give the scheduler more freedom.
2459 auto *allocno1 = candidate1.allocno;
2460 auto *allocno2 = candidate2.allocno;
2461 if (allocno1->start_point != allocno2->start_point)
2462 return allocno1->start_point < allocno2->start_point ? -1 : 1;
2464 if (allocno1 != allocno2)
2465 return allocno1->id < allocno2->id ? -1 : 1;
2467 return 0;
2470 // Join the chains of allocnos that start at HEADI1 and HEADI2.
2471 // HEADI1 is either empty or a single allocno.
2472 void
2473 early_ra::chain_allocnos (unsigned int &headi1, unsigned int &headi2)
2475 if (headi1 == INVALID_ALLOCNO)
2476 headi1 = headi2;
2477 else if (headi2 == INVALID_ALLOCNO)
2478 headi2 = headi1;
2479 else
2481 auto *head1 = m_allocnos[headi1];
2482 auto *head2 = m_allocnos[headi2];
2483 gcc_checking_assert (head1->chain_next == INVALID_ALLOCNO
2484 && head1->chain_prev == INVALID_ALLOCNO
2485 && head2->chain_prev == INVALID_ALLOCNO);
2487 if (head1->is_equiv
2488 && m_allocnos[head1->related_allocno]->copy_dest == headi2)
2490 head1->is_copy_dest = head2->is_copy_dest;
2491 head1->is_strong_copy_dest = head2->is_strong_copy_dest;
2492 m_allocnos[head1->related_allocno]->copy_dest = headi1;
2494 head1->chain_next = headi2;
2495 head2->chain_prev = headi1;
2497 headi2 = headi1;
2501 // Add GROUP2's FPR information to GROUP1's, given that GROUP2 starts
2502 // OFFSET allocnos into GROUP2.
2503 void
2504 early_ra::merge_fpr_info (allocno_group_info *group1,
2505 allocno_group_info *group2,
2506 unsigned int offset)
2508 group1->fpr_size = std::max (group1->fpr_size, group2->fpr_size);
2509 group1->fpr_candidates &= (group2->fpr_candidates
2510 >> (offset * group1->stride));
2513 // Set the color representative of ALLOCNO's group to REP, such that ALLOCNO
2514 // ends being at allocno offset REP_OFFSET from the start of REP.
2515 void
2516 early_ra::set_single_color_rep (allocno_info *allocno, allocno_group_info *rep,
2517 unsigned int rep_offset)
2519 auto *group = allocno->group ();
2520 if (group->m_color_rep == rep)
2521 return;
2523 group->m_color_rep = rep;
2524 gcc_checking_assert (multiple_p (group->stride, rep->stride));
2525 unsigned int factor = group->stride / rep->stride;
2526 gcc_checking_assert (rep_offset >= allocno->offset * factor);
2527 group->color_rep_offset = rep_offset - allocno->offset * factor;
2528 merge_fpr_info (rep, group, group->color_rep_offset);
2531 // REP1 and REP2 are color representatives. Change REP1's color representative
2532 // to REP2, with REP1 starting at allocno offset REP2_OFFSET into REP2.
2533 void
2534 early_ra::set_color_rep (allocno_group_info *rep1, allocno_group_info *rep2,
2535 unsigned int rep2_offset)
2537 gcc_checking_assert (rep1 != rep2
2538 && rep2->m_color_rep == rep2
2539 && multiple_p (rep1->stride, rep2->stride));
2541 auto heads1 = rep1->chain_heads ();
2542 auto heads2 = rep2->chain_heads ();
2543 for (unsigned int i1 = 0; i1 < heads1.size (); ++i1)
2544 if (heads1[i1] != INVALID_ALLOCNO)
2546 unsigned int i2 = rep2_offset + i1 * rep1->stride / rep2->stride;
2547 if (heads2[i2] == INVALID_ALLOCNO)
2548 heads2[i2] = heads1[i1];
2549 else
2550 gcc_checking_assert (heads2[i2] == heads1[i1]);
2551 set_single_color_rep (m_allocnos[heads1[i1]], rep2, i2);
2555 // Try to chain ALLOCNO1 to the head of the chain starting at ALLOCNO2.
2556 // Return true on success.
2557 bool
2558 early_ra::try_to_chain_allocnos (allocno_info *allocno1,
2559 allocno_info *allocno2)
2561 auto *group1 = allocno1->group ()->color_rep ();
2562 auto *group2 = allocno2->group ()->color_rep ();
2564 // Avoid trying to tie different subgroups of the same group. This can
2565 // happen if the parts of a register are defined and used piecemeal.
2566 if (group1 == group2)
2567 return false;
2569 // The stride (in FPRs) between allocnos of each color representative.
2570 auto fpr_stride1 = group1->stride;
2571 auto fpr_stride2 = group2->stride;
2573 // The offset (in FPRs) of each allocno group from its color representative.
2574 auto fpr_offset1 = allocno1->group ()->color_rep_offset * fpr_stride1;
2575 auto fpr_offset2 = allocno2->group ()->color_rep_offset * fpr_stride2;
2577 // The offset (in FPRs) of each allocno from its color representative.
2578 fpr_offset1 += allocno1->offset * allocno1->group ()->stride;
2579 fpr_offset2 += allocno2->offset * allocno2->group ()->stride;
2581 // The FPR overlap is in multiples of the larger stride.
2582 auto max_fpr_stride = std::max (fpr_stride1, fpr_stride2);
2583 auto min_fpr_offset = std::min (fpr_offset1, fpr_offset2);
2584 auto fpr_overlap_offset = ROUND_DOWN (min_fpr_offset, max_fpr_stride);
2586 // The offset (in FPRs) of the start of the overlapping region from
2587 // each color representative.
2588 fpr_offset1 -= fpr_overlap_offset;
2589 fpr_offset2 -= fpr_overlap_offset;
2591 // The number of FPRs in each color representative after the start
2592 // of the overlapping region.
2593 auto fpr_after1 = (group1->size - 1) * fpr_stride1 - fpr_offset1;
2594 auto fpr_after2 = (group2->size - 1) * fpr_stride2 - fpr_offset2;
2596 auto min_fpr_after = std::min (fpr_after1, fpr_after2);
2598 // The number of overlapping allocnos.
2599 auto allocno_overlap_size = min_fpr_after / max_fpr_stride + 1;
2601 // The offset (in allocnos) of the overlapping region from the start
2602 // of each color representative.
2603 auto allocno_offset1 = fpr_offset1 / fpr_stride1;
2604 auto allocno_offset2 = fpr_offset2 / fpr_stride2;
2606 // The stride (in allocnos) between overlapping allocnos.
2607 auto allocno_stride1 = max_fpr_stride / fpr_stride1;
2608 auto allocno_stride2 = max_fpr_stride / fpr_stride2;
2610 // Reject combinations that are impossible to allocate.
2611 auto fprs1 = group1->fpr_candidates;
2612 auto fprs2 = group2->fpr_candidates;
2613 if (fpr_offset1 > fpr_offset2)
2614 fprs2 >>= (fpr_offset1 - fpr_offset2);
2615 else
2616 fprs1 >>= (fpr_offset2 - fpr_offset1);
2617 if ((fprs1 & fprs2) == 0)
2619 if (dump_file && (dump_flags & TDF_DETAILS))
2620 fprintf (dump_file, " - cannot chain %d->%d, no FPRs in common"
2621 " (%08x@%d and %08x@%d)\n", allocno1->id, allocno2->id,
2622 group1->fpr_candidates, fpr_offset1,
2623 group2->fpr_candidates, fpr_offset2);
2624 return false;
2627 // Check whether the chain can be formed.
2628 auto heads1 = group1->chain_heads ();
2629 auto heads2 = group2->chain_heads ();
2630 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2632 auto headi1 = heads1[allocno_offset1 + i * allocno_stride1];
2633 auto headi2 = heads2[allocno_offset2 + i * allocno_stride2];
2634 if (headi1 != INVALID_ALLOCNO && headi2 != INVALID_ALLOCNO)
2636 auto *head1 = m_allocnos[headi1];
2637 auto *head2 = m_allocnos[headi2];
2638 if (head1->chain_next != INVALID_ALLOCNO)
2639 return false;
2640 if (!is_chain_candidate (head1, head2, CORRECTNESS_ONLY))
2641 return false;
2645 if (dump_file && (dump_flags & TDF_DETAILS))
2647 fprintf (dump_file, " - chaining allocnos [");
2648 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2649 fprintf (dump_file, "%s%d", i ? "," : "",
2650 heads1[allocno_offset1 + i * allocno_stride1]);
2651 fprintf (dump_file, "] and [");
2652 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2653 fprintf (dump_file, "%s%d", i ? "," : "",
2654 heads2[allocno_offset2 + i * allocno_stride2]);
2655 fprintf (dump_file, "]\n");
2658 // Chain the allocnos, updating the chain heads.
2659 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2660 chain_allocnos (heads1[allocno_offset1 + i * allocno_stride1],
2661 heads2[allocno_offset2 + i * allocno_stride2]);
2663 // Pick a color representative for the merged groups.
2664 allocno_group_info *new_rep;
2665 if (allocno_offset1 == 0
2666 && group1->size == allocno_overlap_size * allocno_stride1
2667 && multiple_p (fpr_stride1, fpr_stride2))
2669 // The first group fits within the second.
2670 set_color_rep (group1, group2, allocno_offset2);
2671 new_rep = group2;
2673 else if (allocno_offset2 == 0
2674 && group2->size == allocno_overlap_size * allocno_stride2
2675 && multiple_p (fpr_stride2, fpr_stride1))
2677 // The second group fits within the first.
2678 set_color_rep (group2, group1, allocno_offset1);
2679 new_rep = group1;
2681 else
2683 // We need a new group that is big enough to span both groups.
2684 // The new group always has an FPR stride of 1.
2685 auto max_fpr_offset = std::max (fpr_offset1, fpr_offset2);
2686 auto max_fpr_after = std::max (fpr_after1, fpr_after2);
2687 auto new_size = max_fpr_offset + max_fpr_after + 1;
2688 new_rep = create_allocno_group (INVALID_REGNUM, new_size);
2690 set_color_rep (group1, new_rep, max_fpr_offset - fpr_offset1);
2691 set_color_rep (group2, new_rep, max_fpr_offset - fpr_offset2);
2694 if (dump_file && (dump_flags & TDF_DETAILS))
2696 fprintf (dump_file, " - new frontier [");
2697 auto new_heads = new_rep->chain_heads ();
2698 for (unsigned int i = 0; i < new_heads.size (); ++i)
2700 if (i)
2701 fprintf (dump_file, ",");
2702 if (new_heads[i] == INVALID_ALLOCNO)
2703 fprintf (dump_file, "-");
2704 else
2705 fprintf (dump_file, "%d", new_heads[i]);
2707 fprintf (dump_file, "]\n");
2710 return true;
2713 // Create a color_info for color representative GROUP.
2714 void
2715 early_ra::create_color (allocno_group_info *group)
2717 auto *color = region_allocate<color_info> ();
2718 color->id = m_colors.length ();
2719 color->hard_regno = FIRST_PSEUDO_REGISTER;
2720 color->group = group;
2722 gcc_checking_assert (group->m_color_rep == group);
2723 group->has_color = true;
2724 group->color = m_colors.length ();
2726 m_colors.safe_push (color);
2729 // Form allocnos into chains. Create colors for each resulting clique.
2730 void
2731 early_ra::form_chains ()
2733 if (dump_file && (dump_flags & TDF_DETAILS))
2734 fprintf (dump_file, "\nChaining allocnos:\n");
2736 // Perform (modified) interval graph coloring. First sort by
2737 // increasing start point.
2738 m_sorted_allocnos.reserve (m_allocnos.length ());
2739 m_sorted_allocnos.splice (m_allocnos);
2740 m_sorted_allocnos.qsort (cmp_increasing<&allocno_info::start_point>);
2742 // During this phase, color representatives are only correct for
2743 // unprocessed allocno groups (where the color representative is
2744 // the group itself) and for groups that contain a current chain head.
2745 unsigned int ti = 0;
2746 auto_vec<chain_candidate_info> candidates;
2747 for (unsigned int hi = 0; hi < m_sorted_allocnos.length (); ++hi)
2749 auto *allocno1 = m_sorted_allocnos[hi];
2750 if (allocno1->chain_next != INVALID_ALLOCNO)
2751 continue;
2753 // Record conflicts with direct uses for FPR hard registers.
2754 auto *group1 = allocno1->group ();
2755 for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
2756 if (fpr_conflicts_with_allocno_p (fpr, allocno1))
2757 group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
2759 // Record conflicts due to partially call-clobbered registers.
2760 // (Full clobbers are handled by the previous loop.)
2761 for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
2762 if (call_in_range_p (abi_id, allocno1->start_point,
2763 allocno1->end_point))
2765 auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
2766 group1->fpr_candidates &= ~fprs >> allocno1->offset;
2769 if (allocno1->is_shared ())
2771 if (dump_file && (dump_flags & TDF_DETAILS))
2772 fprintf (dump_file, " Allocno %d shares the same hard register"
2773 " as allocno %d\n", allocno1->id,
2774 allocno1->related_allocno);
2775 auto *allocno2 = m_allocnos[allocno1->related_allocno];
2776 merge_fpr_info (allocno2->group (), group1, allocno2->offset);
2777 m_shared_allocnos.safe_push (allocno1);
2778 continue;
2781 // Find earlier allocnos (in processing order) that could be chained
2782 // to this one.
2783 candidates.truncate (0);
2784 for (unsigned int sci = ti; sci < hi; ++sci)
2786 auto *allocno2 = m_sorted_allocnos[sci];
2787 if (allocno2->chain_prev == INVALID_ALLOCNO)
2789 if (!is_chain_candidate (allocno1, allocno2, ALL_REASONS))
2790 continue;
2791 chain_candidate_info candidate;
2792 candidate.allocno = allocno2;
2793 candidate.score = rate_chain (allocno1, allocno2);
2794 candidates.safe_push (candidate);
2796 else if (sci == ti)
2797 ++ti;
2800 // Sort the candidates by decreasing score.
2801 candidates.qsort (cmp_chain_candidates);
2802 if (dump_file && (dump_flags & TDF_DETAILS))
2804 fprintf (dump_file, " Chain candidates for %d:", allocno1->id);
2805 for (auto &candidate : candidates)
2806 fprintf (dump_file, " %d(%d)", candidate.allocno->id,
2807 candidate.score);
2808 fprintf (dump_file, "\n");
2811 // Pick the first candidate that works.
2812 for (auto &candidate : candidates)
2813 if (try_to_chain_allocnos (allocno1, candidate.allocno))
2814 break;
2817 // Create color_infos for each group. Make sure that each group's
2818 // color representative is up to date.
2819 for (unsigned int hi = m_sorted_allocnos.length (); hi-- > 0; )
2821 auto *allocno = m_sorted_allocnos[hi];
2822 if (allocno->is_shared ())
2823 continue;
2825 auto *rep = allocno->group ()->color_rep ();
2826 if (rep->has_color)
2827 continue;
2829 create_color (rep);
2830 auto heads = rep->chain_heads ();
2831 for (unsigned int i = 0; i < heads.size (); ++i)
2833 unsigned int ai = heads[i];
2834 while (ai != INVALID_ALLOCNO)
2836 allocno = m_allocnos[ai];
2837 set_single_color_rep (allocno, rep, i * rep->stride);
2838 ai = allocno->chain_next;
2844 // Return true if the given FPR (starting at 0) conflicts with allocno
2845 // ALLOCNO.
2846 bool
2847 early_ra::fpr_conflicts_with_allocno_p (unsigned int fpr,
2848 allocno_info *allocno)
2850 auto &ranges = m_fpr_ranges[fpr];
2851 unsigned int start_i = 0;
2852 unsigned int end_i = ranges.length ();
2853 while (start_i < end_i)
2855 unsigned int mid_i = (start_i + end_i) / 2;
2856 auto &range = ranges[mid_i];
2857 if (allocno->end_point > range.start_point)
2858 start_i = mid_i + 1;
2859 else if (allocno->start_point < range.end_point)
2860 end_i = mid_i;
2861 else
2863 if (range.allocno != allocno->id)
2864 return true;
2865 // The FPR is equivalent to ALLOCNO for this particular range.
2866 // See whether ALLOCNO conflicts with a neighboring range.
2867 if (mid_i > 0
2868 && ranges[mid_i - 1].start_point >= allocno->end_point)
2869 return true;
2870 if (mid_i + 1 < ranges.length ()
2871 && ranges[mid_i + 1].end_point <= allocno->start_point)
2872 return true;
2873 return false;
2876 return false;
2879 // Return true if there is a call with ABI identifier ABI_ID in the inclusive
2880 // program point range [START_POINT, END_POINT].
2881 bool
2882 early_ra::call_in_range_p (unsigned int abi_id, unsigned int start_point,
2883 unsigned int end_point)
2885 auto &points = m_call_points[abi_id];
2886 unsigned int start_i = 0;
2887 unsigned int end_i = points.length ();
2888 while (start_i < end_i)
2890 unsigned int mid_i = (start_i + end_i) / 2;
2891 auto point = points[mid_i];
2892 if (end_point > point)
2893 start_i = mid_i + 1;
2894 else if (start_point < point)
2895 end_i = mid_i;
2896 else
2897 return true;
2899 return false;
2902 // Return the set of FPRs for which a value of size SIZE will be clobbered
2903 // by a call to a function with ABI identifier ABI_ID, but would not be
2904 // for some smaller size. The set therefore excludes FPRs that are
2905 // fully-clobbered, like V0 in the base ABI.
2906 unsigned int
2907 early_ra::partial_fpr_clobbers (unsigned int abi_id, fpr_size_info size)
2909 auto &abi = function_abis[abi_id];
2910 unsigned int clobbers = 0;
2911 machine_mode mode = (size == FPR_D ? V8QImode
2912 : size == FPR_Q ? V16QImode : VNx16QImode);
2913 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
2914 if (!abi.clobbers_full_reg_p (regno)
2915 && abi.clobbers_reg_p (mode, regno))
2916 clobbers |= 1U << (regno - V0_REGNUM);
2917 return clobbers;
2920 // Process copies between pseudo registers and hard registers and update
2921 // the FPR preferences for the associated colors.
2922 void
2923 early_ra::process_copies ()
2925 for (auto &copy : m_allocno_copies)
2927 auto *allocno = m_allocnos[copy.allocno];
2928 auto *group = allocno->group ();
2929 auto offset = group->color_rep_offset + allocno->offset;
2930 if (offset > copy.fpr)
2931 continue;
2933 unsigned int fpr = copy.fpr - offset;
2934 auto *color = m_colors[group->color_rep ()->color];
2935 color->fpr_preferences[fpr] = MIN (color->fpr_preferences[fpr]
2936 + copy.weight, 127);
2937 color->num_fpr_preferences += copy.weight;
2941 // Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=>
2942 // result that puts colors in allocation order.
2944 early_ra::cmp_allocation_order (const void *color1_ptr, const void *color2_ptr)
2946 auto *color1 = *(color_info *const *) color1_ptr;
2947 auto *color2 = *(color_info *const *) color2_ptr;
2949 // Allocate bigger groups before smaller groups.
2950 if (color1->group->size != color2->group->size)
2951 return color1->group->size > color2->group->size ? -1 : 1;
2953 // Allocate groups with stronger FPR preferences before groups with weaker
2954 // FPR preferences.
2955 if (color1->num_fpr_preferences != color2->num_fpr_preferences)
2956 return color1->num_fpr_preferences > color2->num_fpr_preferences ? -1 : 1;
2958 return (color1->id < color2->id ? -1
2959 : color1->id == color2->id ? 0 : 1);
2962 // Allocate a register to each color. If we run out of registers,
2963 // give up on doing a full allocation of the FPR-based pseudos in the
2964 // region.
2965 void
2966 early_ra::allocate_colors ()
2968 if (dump_file && (dump_flags & TDF_DETAILS))
2969 fprintf (dump_file, "\nAllocating registers:\n");
2971 auto_vec<color_info *> sorted_colors;
2972 sorted_colors.safe_splice (m_colors);
2973 sorted_colors.qsort (cmp_allocation_order);
2975 for (unsigned int i = 0; i < 32; ++i)
2976 if (!crtl->abi->clobbers_full_reg_p (V0_REGNUM + i))
2977 m_call_preserved_fprs |= 1U << i;
2979 for (auto *color : sorted_colors)
2981 unsigned int candidates = color->group->fpr_candidates;
2982 for (unsigned int i = 0; i < color->group->size; ++i)
2983 candidates &= ~(m_allocated_fprs >> i);
2984 unsigned int best = INVALID_REGNUM;
2985 int best_weight = 0;
2986 unsigned int best_recency = 0;
2987 for (unsigned int fpr = 0; fpr <= 32U - color->group->size; ++fpr)
2989 if ((candidates & (1U << fpr)) == 0)
2990 continue;
2991 int weight = color->fpr_preferences[fpr];
2992 unsigned int recency = 0;
2993 // Account for registers that the current function must preserve.
2994 for (unsigned int i = 0; i < color->group->size; ++i)
2996 if (m_call_preserved_fprs & (1U << (fpr + i)))
2997 weight -= 1;
2998 recency = MAX (recency, m_fpr_recency[fpr + i]);
3000 // Prefer higher-numbered registers in the event of a tie.
3001 // This should tend to keep lower-numbered registers free
3002 // for allocnos that require V0-V7 or V0-V15.
3003 if (best == INVALID_REGNUM
3004 || best_weight < weight
3005 || (best_weight == weight && recency <= best_recency))
3007 best = fpr;
3008 best_weight = weight;
3009 best_recency = recency;
3013 if (best == INVALID_REGNUM)
3015 record_allocation_failure ([&](){
3016 fprintf (dump_file, "no free register for color %d", color->id);
3018 return;
3021 color->hard_regno = best + V0_REGNUM;
3022 if (dump_file && (dump_flags & TDF_DETAILS))
3023 fprintf (dump_file, " Allocating [v%d:v%d] to color %d\n",
3024 best, best + color->group->size - 1, color->id);
3025 m_allocated_fprs |= ((1U << color->group->size) - 1) << best;
3029 // See if ALLOCNO ends a subchain of single registers that can be split
3030 // off without affecting the rest of the chain, and without introducing
3031 // any moves. Return the start of the chain if so (which might be ALLOCNO
3032 // itself), otherwise return null.
3033 early_ra::allocno_info *
3034 early_ra::find_independent_subchain (allocno_info *allocno)
3036 // Make sure ALLOCNO ends a natural subchain.
3037 if (auto *next_allocno = chain_next (allocno))
3038 if (next_allocno->start_point + 1 >= allocno->end_point)
3039 return nullptr;
3041 // Check the allocnos in the purported subchain and find the other end.
3042 for (;;)
3044 auto *group = allocno->group ();
3045 if (group->m_color_rep == group)
3046 return nullptr;
3047 if (group->size != 1)
3048 return nullptr;
3050 auto *prev_allocno = chain_prev (allocno);
3051 if (!prev_allocno || allocno->start_point + 1 < prev_allocno->end_point)
3052 return allocno;
3054 allocno = prev_allocno;
3058 // Search the colors starting at index FIRST_COLOR whose FPRs do not belong
3059 // to FPR_CONFLICTS. Return the first such color that has no group. If all
3060 // such colors have groups, instead return the color with the latest
3061 // (smallest) start point.
3062 early_ra::color_info *
3063 early_ra::find_oldest_color (unsigned int first_color,
3064 unsigned int fpr_conflicts)
3066 color_info *best = nullptr;
3067 unsigned int best_start_point = ~0U;
3068 unsigned int best_recency = 0;
3069 for (unsigned int ci = first_color; ci < m_colors.length (); ++ci)
3071 auto *color = m_colors[ci];
3072 unsigned int fpr = color->hard_regno - V0_REGNUM;
3073 if (fpr_conflicts & (1U << fpr))
3074 continue;
3075 unsigned int start_point = 0;
3076 if (color->group)
3078 auto chain_head = color->group->chain_heads ()[0];
3079 start_point = m_allocnos[chain_head]->start_point;
3081 unsigned int recency = m_fpr_recency[fpr];
3082 if (!best
3083 || best_start_point > start_point
3084 || (best_start_point == start_point && recency < best_recency))
3086 best = color;
3087 best_start_point = start_point;
3088 best_recency = recency;
3091 return best;
3094 // If there are some spare FPRs that can be reused without introducing saves,
3095 // restores, or moves, use them to "broaden" the allocation, in order to give
3096 // the scheduler more freedom. This is particularly useful for forming LDPs
3097 // and STPs.
3098 void
3099 early_ra::broaden_colors ()
3101 // Create dummy colors for every leftover FPR that can be used cheaply.
3102 unsigned int first_color = m_colors.length ();
3103 for (unsigned int fpr = 0; fpr < 32; ++fpr)
3104 if (((m_allocated_fprs | m_call_preserved_fprs) & (1U << fpr)) == 0)
3106 auto *color = region_allocate<color_info> ();
3107 color->id = m_colors.length ();
3108 color->hard_regno = V0_REGNUM + fpr;
3109 color->group = nullptr;
3110 m_colors.safe_push (color);
3113 // Exit early if there are no spare FPRs.
3114 if (first_color == m_colors.length ())
3115 return;
3117 // Go through the allocnos in order, seeing if there is a subchain of
3118 // single-FPR allocnos that can be split off from the containingg clique.
3119 // Allocate such subchains to the new colors on an oldest-first basis.
3120 for (auto *allocno : m_sorted_allocnos)
3121 if (auto *start_allocno = find_independent_subchain (allocno))
3123 unsigned int fpr_conflicts = 0;
3124 auto *member = allocno;
3125 for (;;)
3127 fpr_conflicts |= ~member->group ()->fpr_candidates;
3128 if (member == start_allocno)
3129 break;
3130 member = m_allocnos[member->chain_prev];
3133 auto *color = find_oldest_color (first_color, fpr_conflicts);
3134 if (!color)
3135 continue;
3137 if (!color->group)
3139 auto *group = allocno->group ();
3140 color->group = group;
3141 group->color = color->id;
3142 group->chain_heads ()[0] = INVALID_ALLOCNO;
3144 else
3146 auto chain_head = color->group->chain_heads ()[0];
3147 auto start_point = m_allocnos[chain_head]->start_point;
3148 if (start_point >= allocno->end_point)
3149 // Allocating to COLOR isn't viable, and it was the best
3150 // option available.
3151 continue;
3153 auto *next_allocno = chain_next (allocno);
3154 if (!next_allocno || next_allocno->start_point <= start_point)
3155 // The current allocation gives at least as much scheduling
3156 // freedom as COLOR would.
3157 continue;
3160 // Unlink the chain.
3161 if (auto *next_allocno = chain_next (allocno))
3162 next_allocno->chain_prev = start_allocno->chain_prev;
3163 if (auto *prev_allocno = chain_prev (start_allocno))
3164 prev_allocno->chain_next = allocno->chain_next;
3166 // Make the subchain use COLOR.
3167 allocno->chain_next = color->group->chain_heads ()[0];
3168 if (dump_file && (dump_flags & TDF_DETAILS))
3169 fprintf (dump_file, "Moving to optional color %d (register %s):",
3170 color->id, reg_names[color->hard_regno]);
3171 for (;;)
3173 auto *group = allocno->group ();
3174 if (dump_file && (dump_flags & TDF_DETAILS))
3175 fprintf (dump_file, " r%d", group->regno);
3176 group->m_color_rep = color->group;
3177 group->color_rep_offset = 0;
3178 if (allocno == start_allocno)
3179 break;
3180 allocno = m_allocnos[allocno->chain_prev];
3182 if (dump_file && (dump_flags & TDF_DETAILS))
3183 fprintf (dump_file, "\n");
3184 color->group->chain_heads ()[0] = start_allocno->id;
3188 // Record the final choice of hard register for each allocno.
3189 void
3190 early_ra::finalize_allocation ()
3192 for (auto *color : m_colors)
3193 if (color->group)
3195 unsigned int fpr = color->hard_regno - V0_REGNUM;
3196 for (unsigned int i = 0; i < color->group->size; ++i)
3197 m_fpr_recency[fpr + i] = m_current_region;
3199 for (auto *allocno : m_allocnos)
3201 if (allocno->is_shared ())
3202 continue;
3203 auto *group = allocno->group ();
3204 auto *rep = group->color_rep ();
3205 auto rep_regno = m_colors[rep->color]->hard_regno;
3206 auto group_regno = rep_regno + group->color_rep_offset;
3207 allocno->hard_regno = group_regno + allocno->offset * group->stride;
3209 for (auto *allocno : m_shared_allocnos)
3210 allocno->hard_regno = m_allocnos[allocno->related_allocno]->hard_regno;
3213 // Replace any allocno references in REFS with the allocated register.
3214 // INSN is the instruction that contains REFS.
3215 bool
3216 early_ra::replace_regs (rtx_insn *insn, df_ref refs)
3218 bool changed = false;
3219 for (df_ref ref = refs; ref; ref = DF_REF_NEXT_LOC (ref))
3221 auto range = get_allocno_subgroup (DF_REF_REG (ref));
3222 if (!range)
3223 continue;
3225 auto new_regno = range.allocno (0)->hard_regno;
3226 if (new_regno == FIRST_PSEUDO_REGISTER)
3228 // Reset a debug instruction if, after DCE, the only remaining
3229 // references to a register are in such instructions.
3230 gcc_assert (DEBUG_INSN_P (insn));
3231 INSN_VAR_LOCATION_LOC (insn) = gen_rtx_UNKNOWN_VAR_LOC ();
3232 return true;
3234 *DF_REF_LOC (ref) = gen_rtx_REG (GET_MODE (DF_REF_REG (ref)), new_regno);
3235 changed = true;
3237 return changed;
3240 // Try to make INSN match its FPR-related constraints. If this needs
3241 // a source operand (SRC) to be copied to a destination operand (DEST)
3242 // before INSN, add the associated (DEST, SRC) pairs to MOVES.
3244 // Return -1 on failure, otherwise return a ?/!-style reject count.
3245 // The reject count doesn't model the moves, just the internal alternative
3246 // preferences.
3248 early_ra::try_enforce_constraints (rtx_insn *insn,
3249 vec<std::pair<int, int>> &moves)
3251 if (!constrain_operands (0, get_preferred_alternatives (insn)))
3252 return -1;
3254 // Pick the alternative with the lowest cost.
3255 int best = -1;
3256 auto alts = get_preferred_alternatives (insn);
3257 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
3259 if (!(alts & ALTERNATIVE_BIT (altno)))
3260 continue;
3262 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
3263 if (!likely_alternative_match_p (op_alt))
3264 continue;
3266 auto_vec<std::pair<int, int>, 4> new_moves;
3267 for (int opno = 0; opno < recog_data.n_operands; ++opno)
3269 rtx op = recog_data.operand[opno];
3270 if (REG_P (op)
3271 && FP_REGNUM_P (REGNO (op))
3272 && op_alt[opno].matched >= 0)
3274 rtx old_src = recog_data.operand[op_alt[opno].matched];
3275 if (!operands_match_p (op, old_src))
3277 for (int i = 0; i < recog_data.n_operands; ++i)
3278 if (i != opno)
3280 rtx other = recog_data.operand[i];
3281 if (reg_overlap_mentioned_p (op, other))
3283 old_src = NULL_RTX;
3284 break;
3287 if (!old_src)
3288 continue;
3289 new_moves.safe_push ({ opno, op_alt[opno].matched });
3293 int cost = count_rejects (op_alt) + new_moves.length () * 7;
3294 if (best < 0 || cost < best)
3296 best = cost;
3297 moves.truncate (0);
3298 moves.safe_splice (new_moves);
3301 return best;
3304 // Make INSN matches its FPR-related constraints.
3305 void
3306 early_ra::enforce_constraints (rtx_insn *insn)
3308 extract_insn (insn);
3309 preprocess_constraints (insn);
3311 // First try with the operands they are.
3312 auto_vec<std::pair<int, int>, 4> moves;
3313 int cost = try_enforce_constraints (insn, moves);
3315 // Next try taking advantage of commutativity.
3316 for (int opno = 0; opno < recog_data.n_operands - 1; ++opno)
3317 if (recog_data.constraints[opno][0] == '%')
3319 std::swap (*recog_data.operand_loc[opno],
3320 *recog_data.operand_loc[opno + 1]);
3321 std::swap (recog_data.operand[opno],
3322 recog_data.operand[opno + 1]);
3323 auto_vec<std::pair<int, int>, 4> swapped_moves;
3324 int swapped_cost = try_enforce_constraints (insn, swapped_moves);
3325 if (swapped_cost >= 0 && (cost < 0 || swapped_cost < cost))
3327 cost = swapped_cost;
3328 moves.truncate (0);
3329 moves.safe_splice (swapped_moves);
3331 else
3333 std::swap (*recog_data.operand_loc[opno],
3334 *recog_data.operand_loc[opno + 1]);
3335 std::swap (recog_data.operand[opno],
3336 recog_data.operand[opno + 1]);
3340 // The allocation should ensure that there is at least one valid combination.
3341 // It's too late to back out now if not.
3342 gcc_assert (cost >= 0);
3343 for (int i = 0; i < recog_data.n_dups; ++i)
3345 int dup_of = recog_data.dup_num[i];
3346 rtx new_op = *recog_data.operand_loc[dup_of];
3347 if (new_op != recog_data.operand[dup_of])
3348 *recog_data.dup_loc[i] = copy_rtx (new_op);
3350 for (auto move : moves)
3352 int dest_opno = move.first;
3353 int src_opno = move.second;
3354 rtx dest = recog_data.operand[dest_opno];
3355 rtx old_src = recog_data.operand[src_opno];
3356 rtx new_src = lowpart_subreg (GET_MODE (old_src), dest, GET_MODE (dest));
3357 emit_insn_before (gen_move_insn (new_src, old_src), insn);
3358 *recog_data.operand_loc[src_opno] = new_src;
3362 // See whether INSN is an instruction that operates on multi-register vectors,
3363 // and if we have decided to make it use strided rather than consecutive
3364 // accesses. Update the pattern and return true if so.
3365 bool
3366 early_ra::maybe_convert_to_strided_access (rtx_insn *insn)
3368 if (!NONJUMP_INSN_P (insn) || recog_memoized (insn) < 0)
3369 return false;
3371 auto stride_type = get_attr_stride_type (insn);
3372 rtx pat = PATTERN (insn);
3373 rtx op;
3374 if (TARGET_STREAMING_SME2 && stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
3375 op = SET_DEST (pat);
3376 else if (TARGET_STREAMING_SME2 && stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
3377 op = XVECEXP (SET_SRC (pat), 0, 1);
3378 else
3379 return false;
3381 auto range = get_allocno_subgroup (op);
3382 if (!range || range.group->stride == 1)
3383 return false;
3385 gcc_assert (range.start == 0 && range.count == range.group->size);
3386 auto elt_mode = GET_MODE_INNER (GET_MODE (op));
3387 auto single_mode = aarch64_full_sve_mode (elt_mode).require ();
3388 auto_vec<rtx, 4> regs;
3389 for (unsigned int i = 0; i < range.count; ++i)
3390 regs.quick_push (gen_rtx_REG (single_mode, range.allocno (i)->hard_regno));
3392 extract_insn (insn);
3393 if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
3395 auto unspec = XINT (SET_SRC (pat), 1);
3396 if (range.count == 2)
3397 pat = gen_aarch64_strided2 (unspec, GET_MODE (op), regs[0], regs[1],
3398 recog_data.operand[1],
3399 recog_data.operand[2]);
3400 else
3401 pat = gen_aarch64_strided4 (unspec, GET_MODE (op),
3402 regs[0], regs[1], regs[2], regs[3],
3403 recog_data.operand[1],
3404 recog_data.operand[2]);
3406 else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
3408 auto unspec = XINT (SET_SRC (pat), 1);
3409 if (range.count == 2)
3410 pat = gen_aarch64_strided2 (unspec, GET_MODE (op),
3411 recog_data.operand[0],
3412 recog_data.operand[2], regs[0], regs[1]);
3413 else
3414 pat = gen_aarch64_strided4 (unspec, GET_MODE (op),
3415 recog_data.operand[0],
3416 recog_data.operand[2],
3417 regs[0], regs[1], regs[2], regs[3]);
3418 // Ensure correct sharing for the source memory.
3420 // ??? Why doesn't the generator get this right?
3421 XVECEXP (SET_SRC (pat), 0, XVECLEN (SET_SRC (pat), 0) - 1)
3422 = *recog_data.dup_loc[0];
3424 else
3425 gcc_unreachable ();
3426 PATTERN (insn) = pat;
3427 INSN_CODE (insn) = -1;
3428 df_insn_rescan (insn);
3429 return true;
3432 // We've successfully allocated the current region. Apply the allocation
3433 // to the instructions.
3434 void
3435 early_ra::apply_allocation ()
3437 for (auto *insn : m_dead_insns)
3438 set_insn_deleted (insn);
3440 rtx_insn *prev;
3441 for (auto insn_range : m_insn_ranges)
3442 for (rtx_insn *insn = insn_range.first;
3443 insn != insn_range.second;
3444 insn = prev)
3446 prev = PREV_INSN (insn);
3447 if (!INSN_P (insn))
3448 continue;
3450 bool changed = maybe_convert_to_strided_access (insn);
3451 changed |= replace_regs (insn, DF_INSN_DEFS (insn));
3452 changed |= replace_regs (insn, DF_INSN_USES (insn));
3453 if (changed && NONDEBUG_INSN_P (insn))
3455 if (GET_CODE (PATTERN (insn)) != USE
3456 && GET_CODE (PATTERN (insn)) != CLOBBER
3457 && !is_move_set (PATTERN (insn)))
3458 enforce_constraints (insn);
3460 // A REG_EQUIV note establishes an equivalence throughout
3461 // the function, but here we're reusing hard registers for
3462 // multiple pseudo registers. We also no longer need REG_EQUIV
3463 // notes that record potential spill locations, since we've
3464 // allocated the pseudo register without spilling.
3465 rtx *ptr = &REG_NOTES (insn);
3466 while (*ptr)
3467 if (REG_NOTE_KIND (*ptr) == REG_EQUIV)
3468 *ptr = XEXP (*ptr, 1);
3469 else
3470 ptr = &XEXP (*ptr, 1);
3472 changed |= replace_regs (insn, DF_INSN_EQ_USES (insn));
3473 if (changed)
3474 df_insn_rescan (insn);
3477 for (auto *insn : m_dead_insns)
3478 delete_insn (insn);
3481 // Try to allocate the current region. Update the instructions if successful.
3482 void
3483 early_ra::process_region ()
3485 for (auto *allocno : m_allocnos)
3487 allocno->chain_next = INVALID_ALLOCNO;
3488 allocno->chain_prev = INVALID_ALLOCNO;
3491 if (dump_file && (dump_flags & TDF_DETAILS))
3493 dump_fpr_ranges ();
3494 dump_copies ();
3495 dump_allocnos ();
3498 find_strided_accesses ();
3500 if (dump_file && (dump_flags & TDF_DETAILS))
3501 dump_allocnos ();
3503 form_chains ();
3505 if (dump_file && (dump_flags & TDF_DETAILS))
3506 dump_allocnos ();
3508 process_copies ();
3510 if (dump_file && (dump_flags & TDF_DETAILS))
3511 dump_colors ();
3513 allocate_colors ();
3514 if (!m_allocation_successful)
3515 return;
3517 broaden_colors ();
3518 finalize_allocation ();
3520 if (dump_file && (dump_flags & TDF_DETAILS))
3522 fprintf (dump_file, "\nAllocation successful\nFinal allocation:\n");
3523 dump_allocnos ();
3524 dump_colors ();
3527 apply_allocation ();
3530 // Return true if INSN would become dead if we successfully allocate the
3531 // current region.
3532 bool
3533 early_ra::is_dead_insn (rtx_insn *insn)
3535 rtx set = single_set (insn);
3536 if (!set)
3537 return false;
3539 rtx dest = SET_DEST (set);
3540 auto dest_range = get_allocno_subgroup (dest);
3541 if (!dest_range)
3542 return false;
3544 for (auto &allocno : dest_range.allocnos ())
3545 if (bitmap_bit_p (m_live_allocnos, allocno.id))
3546 return false;
3548 if (side_effects_p (set))
3549 return false;
3551 /* If we can't delete dead exceptions and the insn throws,
3552 then the instruction is not dead. */
3553 if (!cfun->can_delete_dead_exceptions
3554 && !insn_nothrow_p (insn))
3555 return false;
3557 return true;
3560 // Return true if we could split a single-block region into two regions
3561 // at the current program point. This is true if the block up to this
3562 // point is worth treating as an independent region and if no registers
3563 // that would affect the allocation are currently live.
3564 inline bool
3565 early_ra::could_split_region_here ()
3567 return (m_accurate_live_ranges
3568 && bitmap_empty_p (m_live_allocnos)
3569 && m_live_fprs == 0
3570 && !m_allocnos.is_empty ());
3573 // Return true if any of the pseudos defined by INSN are also defined
3574 // elsewhere.
3575 static bool
3576 defines_multi_def_pseudo (rtx_insn *insn)
3578 df_ref ref;
3580 FOR_EACH_INSN_DEF (ref, insn)
3582 unsigned int regno = DF_REF_REGNO (ref);
3583 if (!HARD_REGISTER_NUM_P (regno) && DF_REG_DEF_COUNT (regno) > 1)
3584 return true;
3586 return false;
3589 // Build up information about block BB. IS_ISOLATED is true if the
3590 // block is not part of a larger region.
3591 void
3592 early_ra::process_block (basic_block bb, bool is_isolated)
3594 m_current_bb = bb;
3595 m_current_point += 1;
3596 m_current_bb_point = m_current_point;
3598 // Process live-out FPRs.
3599 bitmap live_out = df_get_live_out (bb);
3600 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
3601 if (bitmap_bit_p (live_out, regno))
3602 record_fpr_use (regno);
3604 // Process live-out allocnos. We don't track individual FPR liveness
3605 // across block boundaries, so we have to assume that the whole pseudo
3606 // register is live.
3607 bitmap_iterator bi;
3608 unsigned int regno;
3609 EXECUTE_IF_AND_IN_BITMAP (df_get_live_out (bb), m_fpr_pseudos,
3610 FIRST_PSEUDO_REGISTER, regno, bi)
3612 auto range = get_allocno_subgroup (regno_reg_rtx[regno]);
3613 for (auto &allocno : range.allocnos ())
3614 record_allocno_use (&allocno);
3617 m_current_point += 1;
3619 record_artificial_refs (0);
3621 bool is_first = true;
3622 rtx_insn *start_insn = BB_END (bb);
3623 rtx_insn *insn;
3624 FOR_BB_INSNS_REVERSE (bb, insn)
3626 if (!NONDEBUG_INSN_P (insn))
3627 continue;
3629 // CLOBBERs are used to prevent pseudos from being upwards exposed.
3630 // We can ignore them if allocation is successful.
3631 if (GET_CODE (PATTERN (insn)) == CLOBBER)
3633 if (get_allocno_subgroup (XEXP (PATTERN (insn), 0)))
3634 m_dead_insns.safe_push (insn);
3635 continue;
3638 if (dump_file && (dump_flags & TDF_DETAILS))
3640 if (is_first)
3641 fprintf (dump_file, "\nBlock %d:\n", bb->index);
3642 fprintf (dump_file, "%6d:", m_current_point);
3643 pretty_printer rtl_slim_pp;
3644 rtl_slim_pp.set_output_stream (dump_file);
3645 print_insn (&rtl_slim_pp, insn, 1);
3646 pp_flush (&rtl_slim_pp);
3647 fprintf (dump_file, "\n");
3649 is_first = false;
3651 if (is_dead_insn (insn))
3653 if (dump_file && (dump_flags & TDF_DETAILS))
3654 fprintf (dump_file, "%14s -- dead\n", "");
3655 m_dead_insns.safe_push (insn);
3657 else
3659 record_insn_defs (insn);
3661 // If we've decided not to allocate the current region, and if
3662 // all relevant registers are now dead, consider splitting the
3663 // block into two regions at this point.
3665 // This is useful when a sequence of vector code ends in a
3666 // vector-to-scalar-integer operation. We wouldn't normally
3667 // want to allocate the scalar integer, since IRA is much better
3668 // at handling cross-file moves. But if nothing of relevance is
3669 // live between the death of the final vector and the birth of the
3670 // scalar integer, we can try to split the region at that point.
3671 // We'd then allocate the vector input and leave the real RA
3672 // to handle the scalar output.
3673 if (is_isolated
3674 && !m_allocation_successful
3675 && could_split_region_here ()
3676 && !defines_multi_def_pseudo (insn))
3678 // Mark that we've deliberately chosen to leave the output
3679 // pseudos to the real RA.
3680 df_ref ref;
3681 FOR_EACH_INSN_DEF (ref, insn)
3683 unsigned int regno = DF_REF_REGNO (ref);
3684 if (!HARD_REGISTER_NUM_P (regno))
3686 if (dump_file && (dump_flags & TDF_DETAILS))
3687 fprintf (dump_file, "Ignoring register r%d\n", regno);
3688 m_pseudo_regs[regno].flags |= IGNORE_REG;
3689 bitmap_clear_bit (m_fpr_pseudos, regno);
3693 if (dump_file && (dump_flags & TDF_DETAILS))
3694 fprintf (dump_file, "\nStarting new region at insn %d\n",
3695 INSN_UID (insn));
3697 // Start a new region and replay the definitions. The replay
3698 // is needed if the instruction sets a hard FP register.
3699 start_new_region ();
3700 record_insn_defs (insn);
3701 start_insn = insn;
3704 if (auto *call_insn = dyn_cast<rtx_call_insn *> (insn))
3705 record_insn_call (call_insn);
3706 record_insn_uses (insn);
3707 rtx pat = PATTERN (insn);
3708 if (is_move_set (pat))
3709 record_copy (SET_DEST (pat), SET_SRC (pat), true);
3710 else
3712 extract_insn (insn);
3713 record_constraints (insn);
3717 // See whether we have a complete region, with no remaining live
3718 // allocnos.
3719 if (is_isolated && could_split_region_here ())
3721 rtx_insn *prev_insn = PREV_INSN (insn);
3722 if (m_allocation_successful)
3724 m_insn_ranges.safe_push ({ start_insn, prev_insn });
3725 process_region ();
3727 start_new_region ();
3728 is_first = true;
3729 start_insn = prev_insn;
3732 m_insn_ranges.safe_push ({ start_insn, BB_HEAD (bb) });
3734 record_artificial_refs (DF_REF_AT_TOP);
3736 // Process live-in FPRs.
3737 bitmap live_in = df_get_live_in (bb);
3738 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
3739 if (bitmap_bit_p (live_in, regno)
3740 && (m_live_fprs & (1U << (regno - V0_REGNUM))))
3741 record_fpr_def (regno);
3743 // Process live-in allocnos.
3744 EXECUTE_IF_AND_IN_BITMAP (live_in, m_fpr_pseudos,
3745 FIRST_PSEUDO_REGISTER, regno, bi)
3747 auto range = get_allocno_subgroup (regno_reg_rtx[regno]);
3748 for (auto &allocno : range.allocnos ())
3749 if (bitmap_bit_p (m_live_allocnos, allocno.id))
3750 record_allocno_def (&allocno);
3753 m_current_point += 1;
3755 bitmap_clear (m_live_allocnos);
3756 m_live_fprs = 0;
3759 // Divide the function into regions, such that there no edges into or out
3760 // of the region have live "FPR pseudos".
3761 void
3762 early_ra::process_blocks ()
3764 auto_sbitmap visited (last_basic_block_for_fn (m_fn));
3765 auto_sbitmap fpr_pseudos_live_out (last_basic_block_for_fn (m_fn));
3766 auto_sbitmap fpr_pseudos_live_in (last_basic_block_for_fn (m_fn));
3768 bitmap_clear (visited);
3769 bitmap_clear (fpr_pseudos_live_out);
3770 bitmap_clear (fpr_pseudos_live_in);
3772 // Record which blocks have live FPR pseudos on entry and exit.
3773 basic_block bb;
3774 FOR_EACH_BB_FN (bb, m_fn)
3776 if (bitmap_intersect_p (df_get_live_out (bb), m_fpr_pseudos))
3777 bitmap_set_bit (fpr_pseudos_live_out, bb->index);
3778 if (bitmap_intersect_p (df_get_live_in (bb), m_fpr_pseudos))
3779 bitmap_set_bit (fpr_pseudos_live_in, bb->index);
3782 // This is incremented by 1 at the start of each region.
3783 m_current_region = 0;
3784 memset (m_fpr_recency, 0, sizeof (m_fpr_recency));
3786 struct stack_node { edge_iterator ei; basic_block bb; };
3788 auto_vec<stack_node, 32> stack;
3789 auto_vec<basic_block, 32> region;
3791 // Go through the function in reverse postorder and process the region
3792 // containing each block.
3793 unsigned int n_blocks = df_get_n_blocks (DF_FORWARD);
3794 int *order = df_get_postorder (DF_FORWARD);
3795 for (unsigned int bbi = 0; bbi < n_blocks; ++bbi)
3797 basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, order[bbi]);
3798 if (bb->index < NUM_FIXED_BLOCKS)
3799 continue;
3801 if (!bitmap_set_bit (visited, bb->index))
3802 continue;
3804 // Process forward edges before backward edges (so push backward
3805 // edges first). Build the region in an approximation of reverse
3806 // program order.
3807 if (bitmap_bit_p (fpr_pseudos_live_in, bb->index))
3808 stack.quick_push ({ ei_start (bb->preds), nullptr });
3809 if (bitmap_bit_p (fpr_pseudos_live_out, bb->index))
3810 stack.quick_push ({ ei_start (bb->succs), bb });
3811 else
3812 region.safe_push (bb);
3813 while (!stack.is_empty ())
3815 auto &node = stack.last ();
3816 if (ei_end_p (node.ei))
3818 if (node.bb)
3819 region.safe_push (node.bb);
3820 stack.pop ();
3821 continue;
3823 edge e = ei_edge (node.ei);
3824 if (node.bb)
3826 // A forward edge from a node that has not yet been added
3827 // to region.
3828 if (bitmap_bit_p (fpr_pseudos_live_in, e->dest->index)
3829 && bitmap_set_bit (visited, e->dest->index))
3831 stack.safe_push ({ ei_start (e->dest->preds), nullptr });
3832 if (bitmap_bit_p (fpr_pseudos_live_out, e->dest->index))
3833 stack.safe_push ({ ei_start (e->dest->succs), e->dest });
3834 else
3835 region.safe_push (e->dest);
3837 else
3838 ei_next (&node.ei);
3840 else
3842 // A backward edge from a node that has already been added
3843 // to the region.
3844 if (bitmap_bit_p (fpr_pseudos_live_out, e->src->index)
3845 && bitmap_set_bit (visited, e->src->index))
3847 if (bitmap_bit_p (fpr_pseudos_live_in, e->src->index))
3848 stack.safe_push ({ ei_start (e->src->preds), nullptr });
3849 stack.safe_push ({ ei_start (e->src->succs), e->src });
3851 else
3852 ei_next (&node.ei);
3856 m_current_point = 2;
3857 start_new_region ();
3859 if (region.is_empty ())
3860 process_block (bb, true);
3861 else
3863 if (dump_file && (dump_flags & TDF_DETAILS))
3865 fprintf (dump_file, "\nRegion (from %d):", bb->index);
3866 for (unsigned int j = 0; j < region.length (); ++j)
3867 fprintf (dump_file, " %d", region[j]->index);
3868 fprintf (dump_file, "\n");
3870 for (unsigned int j = 0; j < region.length (); ++j)
3872 basic_block bb = region[j];
3873 bool is_isolated
3874 = ((j == 0 && !bitmap_bit_p (fpr_pseudos_live_out, bb->index))
3875 || (j == region.length () - 1
3876 && !bitmap_bit_p (fpr_pseudos_live_in, bb->index)));
3877 process_block (bb, is_isolated);
3880 region.truncate (0);
3882 if (!m_allocnos.is_empty () && m_allocation_successful)
3883 process_region ();
3887 // Run the pass on the current function.
3888 void
3889 early_ra::execute ()
3891 df_analyze ();
3893 preprocess_insns ();
3894 propagate_pseudo_reg_info ();
3895 choose_fpr_pseudos ();
3896 if (bitmap_empty_p (m_fpr_pseudos))
3897 return;
3899 if (dump_file && (dump_flags & TDF_DETAILS))
3900 dump_pseudo_regs ();
3902 process_blocks ();
3903 df_verify ();
3906 class pass_early_ra : public rtl_opt_pass
3908 public:
3909 pass_early_ra (gcc::context *ctxt)
3910 : rtl_opt_pass (pass_data_early_ra, ctxt)
3913 // opt_pass methods:
3914 virtual bool gate (function *);
3915 virtual unsigned int execute (function *);
3918 bool
3919 pass_early_ra::gate (function *)
3921 // Require a vector ISA to be enabled.
3922 if (!TARGET_SIMD && !TARGET_SVE)
3923 return false;
3925 if (aarch64_early_ra == AARCH64_EARLY_RA_NONE)
3926 return false;
3928 if (aarch64_early_ra == AARCH64_EARLY_RA_STRIDED
3929 && !TARGET_STREAMING_SME2)
3930 return false;
3932 return true;
3935 unsigned int
3936 pass_early_ra::execute (function *fn)
3938 early_ra (fn).execute ();
3939 return 0;
3942 } // end namespace
3944 // Create a new instance of the pass.
3945 rtl_opt_pass *
3946 make_pass_aarch64_early_ra (gcc::context *ctxt)
3948 return new pass_early_ra (ctxt);