libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / config / aarch64 / aarch64-early-ra.cc
blob6e544dd619150aa9997fb73407c2fa030a284d39
1 // Early register allocation pass.
2 // Copyright (C) 2023-2024 Free Software Foundation, Inc.
3 //
4 // This file is part of GCC.
5 //
6 // GCC is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation; either version 3, or (at your option) any later
9 // version.
11 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 // for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 // This pass implements a simple form of early register allocation.
21 // It is restricted to FP/SIMD registers, and it only allocates
22 // a region of FP/SIMD usage if it can do so without any spilling.
23 // It punts on anything too complicated, leaving it to the real
24 // register allocator.
26 // There are two main purposes:
28 // (1) The pass runs before scheduling. It therefore has a chance to
29 // bag a spill-free allocation, if there is one, before scheduling
30 // moves things around.
32 // (2) The pass can make use of strided register operations, such as the
33 // strided forms of LD1 and ST1 in SME2.
35 // The allocator works at the level of individual FPRs, rather than whole
36 // pseudo registers. It is mostly intended to help optimize ACLE code.
38 // The pass is very simplistic. There are many things that could be improved.
39 #define IN_TARGET_CODE 1
41 #define INCLUDE_ALGORITHM
42 #define INCLUDE_FUNCTIONAL
43 #define INCLUDE_ARRAY
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "backend.h"
48 #include "rtl.h"
49 #include "df.h"
50 #include "rtl-ssa.h"
51 #include "tree-pass.h"
52 #include "target.h"
53 #include "expr.h"
54 #include "cfgrtl.h"
55 #include "print-rtl.h"
56 #include "insn-attr.h"
57 #include "insn-opinit.h"
58 #include "reload.h"
60 template<typename T>
61 class simple_iterator : public wrapper_iterator<T>
63 public:
64 using wrapper_iterator<T>::wrapper_iterator;
66 simple_iterator &operator-- () { --this->m_contents; return *this; }
67 simple_iterator operator-- (int) { return this->m_contents--; }
68 simple_iterator &operator++ () { ++this->m_contents; return *this; }
69 simple_iterator operator++ (int) { return this->m_contents++; }
72 using namespace rtl_ssa;
74 namespace {
75 const pass_data pass_data_early_ra =
77 RTL_PASS, // type
78 "early_ra", // name
79 OPTGROUP_NONE, // optinfo_flags
80 TV_NONE, // tv_id
81 0, // properties_required
82 0, // properties_provided
83 0, // properties_destroyed
84 0, // todo_flags_start
85 TODO_df_finish, // todo_flags_finish
88 using allocno_iterator = simple_iterator<unsigned int>;
90 // Class that represents one run of the pass.
91 class early_ra
93 public:
94 early_ra (function *fn);
95 ~early_ra ();
96 void execute ();
98 private:
99 // Whether to test only things that are required for correctness,
100 // or whether to take optimization heuristics into account as well.
101 enum test_strictness { CORRECTNESS_ONLY, ALL_REASONS };
103 static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits");
104 using operand_mask = uint32_t;
106 // Points in the function are represented using "program points".
107 // The program points are allocated in reverse order, with smaller
108 // numbers indicating later points. These special values indicate
109 // the start and end of a region.
110 static constexpr unsigned int START_OF_REGION = ~0U;
111 static constexpr unsigned int END_OF_REGION = 0U;
113 // An invalid allocno index, used to represent no allocno.
114 static constexpr unsigned int INVALID_ALLOCNO = ~0U;
116 // Enumerates the single FPR sizes that matter for register allocation.
117 // Anything smaller than 64 bits is treated as FPR_D.
118 enum fpr_size_info
120 FPR_D,
121 FPR_Q,
122 FPR_Z
125 // A live range for an FPR, containing program points [START_POINT,
126 // END_POINT]. If ALLOCNO is not INVALID_ALLOCNO, the FPR is known
127 // to be equal to ALLOCNO for the duration of the live range.
128 struct fpr_range_info
130 unsigned int start_point;
131 unsigned int end_point;
132 unsigned int allocno;
135 // Flags used in pseudo_reg_info.
137 // Whether the pseudo register occurs in one instruction alternative that
138 // matches (respectively) V0-V7, V0-V15, V0-V31 or a non-FP register.
139 static constexpr unsigned int ALLOWS_FPR8 = 1U << 0;
140 static constexpr unsigned int ALLOWS_FPR16 = 1U << 1;
141 static constexpr unsigned int ALLOWS_FPR32 = 1U << 2;
142 static constexpr unsigned int ALLOWS_NONFPR = 1U << 3;
144 // Likewise whether the register occurs in an instruction that requires
145 // the associated register type.
146 static constexpr unsigned int NEEDS_FPR8 = 1U << 4;
147 static constexpr unsigned int NEEDS_FPR16 = 1U << 5;
148 static constexpr unsigned int NEEDS_FPR32 = 1U << 6;
149 static constexpr unsigned int NEEDS_NONFPR = 1U << 7;
151 // Whether the pseudo register is copied to or from a hard FP register.
152 static constexpr unsigned int HAS_FPR_COPY = 1U << 8;
154 // Whether the pseudo register is copied to or from a hard non-FP register.
155 static constexpr unsigned int HAS_NONFPR_COPY = 1U << 9;
157 // Whether the pseudo register is used as a multi-register vector operand
158 // to an instruction that supports strided accesses, and whether it is used
159 // as a multi-register vector operand in some other non-move instruction.
160 static constexpr unsigned int HAS_FLEXIBLE_STRIDE = 1U << 10;
161 static constexpr unsigned int HAS_FIXED_STRIDE = 1U << 11;
163 // Flags that should be propagated across moves between pseudo registers.
164 static constexpr unsigned int PSEUDO_COPY_FLAGS = ~(HAS_FLEXIBLE_STRIDE
165 | HAS_FIXED_STRIDE);
167 // Information about a copy between two registers.
168 struct reg_copy_info
170 // The two registers, in order.
171 unsigned int regnos[2];
173 // Index I gives the index of the next reg_copy_info involving REGNOS[I],
174 // or 0 if none.
175 unsigned int next_copies[2];
178 // Information about a pseudo register.
179 struct pseudo_reg_info
181 // Flags describing how the register is used, defined above.
182 unsigned int flags : 16;
184 // The mode of the pseudo register, cached for convenience.
185 machine_mode mode : 16;
187 // The index of the first copy, or 0 if none.
188 unsigned int first_copy;
191 // Information about a group of allocnos that have a fixed offset
192 // relative to each other. The allocnos in each group must be allocated
193 // together.
195 // Allocnos that can share the same hard register are eventually
196 // chained together. These chains represent edges on a graph of
197 // allocnos, such that two allocnos joined by an edge use the same FPR.
198 // These chains are formed between individual allocnos rather than
199 // whole groups, although the system is required to be self-consistent.
200 // Each clique in the graph has at least one "full-width" allocno group
201 // that has one allocno for every FPR that needs to be allocated to
202 // the clique.
204 // One group of allocnos is chosen as the "color representative" of
205 // each clique in the graph. This group will be a full-width group.
206 struct allocno_info;
207 struct allocno_group_info
209 array_slice<unsigned int> chain_heads ();
210 array_slice<allocno_info> allocnos ();
211 allocno_group_info *color_rep ();
212 allocno_info *allocno (unsigned int);
214 // The color representative of the containing clique.
215 allocno_group_info *m_color_rep;
217 // The pseudo register associated with this allocno, or INVALID_REGNUM
218 // if none.
219 unsigned int regno;
221 // The offset of the first allocno (and thus this group) from the start
222 // of color_rep.
223 unsigned int color_rep_offset : 8;
225 // The number of allocnos in the group, and thus the number of FPRs
226 // that need to be allocated.
227 unsigned int size : 8;
229 // The gap between FPRs in the group. This is normally 1, but can be
230 // higher if we've decided to use strided multi-register accesses.
231 unsigned int stride : 4;
233 // Used temporarily while deciding which allocnos should have non-unit
234 // strides; see find_strided_accesses for details.
235 int consecutive_pref : 4;
236 int strided_polarity : 2;
238 // The largest size of FPR needed by references to the allocno group.
239 fpr_size_info fpr_size : 2;
241 // True if all non-move accesses can be converted to strided form.
242 unsigned int has_flexible_stride : 1;
244 // True if we've assigned a color index to this group.
245 unsigned int has_color : 1;
247 // The mask of FPRs that would make valid choices for the first allocno,
248 // taking the requirements of all the allocnos in the group into account.
249 unsigned int fpr_candidates;
251 // The index of the color that has been assigned to the containing clique.
252 unsigned int color;
255 // Represents a single FPR-sized quantity that needs to be allocated.
256 // Each allocno is identified by index (for compactness).
258 // Quantities that span multiple FPRs are assigned groups of consecutive
259 // allocnos. Quantities that occupy a single FPR are assigned their own
260 // group.
261 struct allocno_info
263 allocno_group_info *group ();
264 bool is_shared ();
265 bool is_equiv_to (unsigned int);
267 // The allocno's unique identifier.
268 unsigned int id;
270 // The offset of this allocno into the containing group.
271 unsigned int offset : 8;
273 // The number of allocnos in the containing group.
274 unsigned int group_size : 8;
276 // If the allocno has an affinity with at least one hard register
277 // (so that choosing that hard register would avoid a copy), this is
278 // the number of one such hard register, otherwise it is
279 // FIRST_PSEUDO_REGISTER.
280 unsigned int hard_regno : 8;
282 // Set to 1 if the allocno has a single definition or 2 if it has more.
283 unsigned int num_defs : 2;
285 // True if, at START_POINT, another allocno is copied to this one.
286 // See callers of record_copy for what counts as a copy.
287 unsigned int is_copy_dest : 1;
289 // True if, at START_POINT, another allocno is copied to this one,
290 // and if the allocnos at both ends of the copy chain have an affinity
291 // with the same hard register.
292 unsigned int is_strong_copy_dest : 1;
294 // True if, at END_POINT, this allocno is copied to another one,
295 // and both allocnos have an affinity with the same hard register.
296 unsigned int is_strong_copy_src : 1;
298 // True if the allocno is subject to an earlyclobber at END_POINT,
299 // so that it cannot be tied to the destination of the instruction.
300 unsigned int is_earlyclobbered : 1;
302 // True if this allocno is known to be equivalent to related_allocno
303 // for the whole of this allocno's lifetime.
304 unsigned int is_equiv : 1;
306 // The inclusive range of program points spanned by the allocno.
307 // START_POINT >= END_POINT.
308 unsigned int start_point;
309 unsigned int end_point;
311 // If, at END_POINT, this allocno is copied to another allocno, this
312 // is the index of that allocno, otherwise it is INVALID_ALLOCNO.
313 // See callers of record_copy for what counts as a copy.
314 unsigned int copy_dest;
316 // If this field is not INVALID_ALLOCNO, it indicates one of two things:
318 // - if is_equiv, this allocno is equivalent to related_allocno for
319 // the whole of this allocno's lifetime.
321 // - if !is_equiv, this allocno's live range is a subrange of
322 // related_allocno's and we have committed to making this allocno
323 // share whatever register related_allocno uses.
324 unsigned int related_allocno;
326 union
328 // The program point at which the allocno was last defined,
329 // or START_OF_REGION if none. This is only used temporarily
330 // while recording allocnos; after that, chain_next below is
331 // used instead.
332 unsigned int last_def_point;
334 // The next chained allocno in program order (i.e. at lower program
335 // points), or INVALID_ALLOCNO if none.
336 unsigned int chain_next;
339 union
341 // The program point before start_point at which the allocno was
342 // last used, or END_OF_REGION if none. This is only used temporarily
343 // while recording allocnos; after that, chain_prev below is used
344 // instead.
345 unsigned int last_use_point;
347 // The previous chained allocno in program order (i.e. at higher
348 // program points), or INVALID_ALLOCNO if none.
349 unsigned int chain_prev;
353 // Information about a full allocno group or a subgroup of it.
354 // The subgroup can be empty to indicate "none".
355 struct allocno_subgroup
357 array_slice<allocno_info> allocnos ();
358 allocno_info *allocno (unsigned int);
360 // True if a subgroup is present.
361 operator bool () const { return count; }
363 // The containing group.
364 allocno_group_info *group;
366 // The offset of the subgroup from the start of GROUP.
367 unsigned int start;
369 // The number of allocnos in the subgroup.
370 unsigned int count;
373 // Represents information about a copy between an allocno and an FPR.
374 // This establishes an affinity between the allocno and the FPR.
375 struct allocno_copy_info
377 // The allocno involved in the copy.
378 unsigned int allocno;
380 // The FPR involved in the copy, relative to V0_REGNUM.
381 unsigned int fpr : 16;
383 // A measure of how strong the affinity between the allocno and FPR is.
384 unsigned int weight : 16;
387 // Information about a possible allocno chain.
388 struct chain_candidate_info
390 // The candidate target allocno.
391 allocno_info *allocno;
393 // A rating of the candidate (higher is better).
394 int score;
397 // Information about an allocno color.
398 struct color_info
400 // The color's unique identifier.
401 int id;
403 // The allocated hard register, when known.
404 unsigned int hard_regno;
406 // The clique's representative group.
407 allocno_group_info *group;
409 // The number of FPR preferences recorded in fpr_preferences.
410 unsigned int num_fpr_preferences;
412 // Weights in favor of choosing each FPR as the first register for GROUP.
413 int8_t fpr_preferences[32];
416 template<typename T, typename... Ts>
417 T *region_allocate (Ts...);
419 allocno_info *chain_prev (allocno_info *);
420 allocno_info *chain_next (allocno_info *);
422 void dump_pseudo_regs ();
423 void dump_fpr_ranges ();
424 void dump_copies ();
425 void dump_allocnos ();
426 void dump_colors ();
428 iterator_range<allocno_iterator> get_group_allocnos (unsigned int);
430 void preprocess_move (rtx, rtx);
431 void process_pseudo_reg_constraints (rtx_insn *);
432 void preprocess_insns ();
434 int fpr_preference (unsigned int);
435 void propagate_pseudo_reg_info ();
437 void choose_fpr_pseudos ();
439 void start_new_region ();
441 allocno_group_info *create_allocno_group (unsigned int, unsigned int);
442 allocno_subgroup get_allocno_subgroup (rtx);
443 void record_fpr_use (unsigned int);
444 void record_fpr_def (unsigned int);
445 void record_allocno_use (allocno_info *);
446 void record_allocno_def (allocno_info *);
447 allocno_info *find_related_start (allocno_info *, allocno_info *, bool);
448 void accumulate_defs (allocno_info *, allocno_info *);
449 void record_copy (rtx, rtx, bool = false);
450 void record_constraints (rtx_insn *);
451 void record_artificial_refs (unsigned int);
452 void record_insn_refs (rtx_insn *);
454 bool consider_strong_copy_src_chain (allocno_info *);
455 int strided_polarity_pref (allocno_info *, allocno_info *);
456 void find_strided_accesses ();
458 template<unsigned int allocno_info::*field>
459 static int cmp_increasing (const void *, const void *);
460 bool is_chain_candidate (allocno_info *, allocno_info *, test_strictness);
461 int rate_chain (allocno_info *, allocno_info *);
462 static int cmp_chain_candidates (const void *, const void *);
463 void chain_allocnos (unsigned int &, unsigned int &);
464 void merge_fpr_info (allocno_group_info *, allocno_group_info *,
465 unsigned int);
466 void set_single_color_rep (allocno_info *, allocno_group_info *,
467 unsigned int);
468 void set_color_rep (allocno_group_info *, allocno_group_info *,
469 unsigned int);
470 bool try_to_chain_allocnos (allocno_info *, allocno_info *);
471 void create_color (allocno_group_info *);
472 void form_chains ();
474 bool fpr_conflicts_with_allocno_p (unsigned int, allocno_info *);
475 bool call_in_range_p (unsigned int, unsigned int, unsigned int);
476 unsigned int partial_fpr_clobbers (unsigned int, fpr_size_info);
478 void process_copies ();
480 static int cmp_allocation_order (const void *, const void *);
481 void allocate_colors ();
482 allocno_info *find_independent_subchain (allocno_info *);
483 color_info *find_oldest_color (unsigned int, unsigned int);
484 void broaden_colors ();
485 void finalize_allocation ();
487 bool replace_regs (rtx_insn *, df_ref);
488 int try_enforce_constraints (rtx_insn *, vec<std::pair<int, int>> &);
489 void enforce_constraints (rtx_insn *);
490 bool maybe_convert_to_strided_access (rtx_insn *);
491 void apply_allocation ();
493 void process_region ();
494 bool is_dead_insn (rtx_insn *);
495 void process_block (basic_block, bool);
496 void process_blocks ();
498 // ----------------------------------------------------------------------
500 // The function we're operating on.
501 function *m_fn;
503 // Information about each pseudo register, indexed by REGNO.
504 auto_vec<pseudo_reg_info> m_pseudo_regs;
506 // All recorded register copies.
507 auto_vec<reg_copy_info> m_pseudo_reg_copies;
509 // The set of pseudos that we've decided to allocate an FPR to.
510 auto_bitmap m_fpr_pseudos;
512 // ----------------------------------------------------------------------
514 // An obstack for allocating information that is referenced by the member
515 // variables below.
516 obstack m_region_obstack;
517 void *m_region_alloc_start;
519 // ----------------------------------------------------------------------
521 // The basic block that we're currently processing.
522 basic_block m_current_bb;
524 // The lowest-numbered program point in the current basic block.
525 unsigned int m_current_bb_point;
527 // The program point that we're currently processing (described above).
528 unsigned int m_current_point;
530 // The set of allocnos that are currently live.
531 auto_bitmap m_live_allocnos;
533 // The set of FPRs that are currently live.
534 unsigned int m_live_fprs;
536 // A unique one-based identifier for the current region.
537 unsigned int m_current_region;
539 // The region in which each FPR was last used, or 0 if none.
540 unsigned int m_fpr_recency[32];
542 // ----------------------------------------------------------------------
544 // A mask of the FPRs that have already been allocated.
545 unsigned int m_allocated_fprs;
547 // A mask of the FPRs that must be at least partially preserved by the
548 // current function.
549 unsigned int m_call_preserved_fprs;
551 // True if we haven't yet failed to allocate the current region.
552 bool m_allocation_successful;
554 // A map from pseudo registers to the first allocno in their associated
555 // allocno groups.
556 hash_map<int_hash<unsigned int, INVALID_REGNUM>,
557 allocno_group_info *> m_regno_to_group;
559 // All recorded copies between allocnos and FPRs.
560 auto_vec<allocno_copy_info> m_allocno_copies;
562 // All allocnos, by index.
563 auto_vec<allocno_info *> m_allocnos;
565 // All allocnos, by increasing START_POINT.
566 auto_vec<allocno_info *> m_sorted_allocnos;
568 // Allocnos for which is_shared is true.
569 auto_vec<allocno_info *> m_shared_allocnos;
571 // All colors, by index.
572 auto_vec<color_info *> m_colors;
574 // The instruction ranges that make up the current region,
575 // as half-open ranges [LAST, FIRST).
576 auto_vec<std::pair<rtx_insn *, rtx_insn *>> m_insn_ranges;
578 // The live ranges of each FPR, in order of increasing program point.
579 auto_vec<fpr_range_info> m_fpr_ranges[32];
581 // For each function call id, a list of program points at which a call
582 // to such a function is made. Each list is in order of increasing
583 // program point.
584 auto_vec<unsigned int> m_call_points[NUM_ABI_IDS];
586 // A list of instructions that can be removed if allocation succeeds.
587 auto_vec<rtx_insn *> m_dead_insns;
590 // True if PAT is something that would typically be treated as a move.
591 static inline bool
592 is_move_set (rtx pat)
594 if (GET_CODE (pat) != SET)
595 return false;
597 rtx dest = SET_DEST (pat);
598 if (SUBREG_P (dest))
599 dest = SUBREG_REG (dest);
600 if (!OBJECT_P (dest))
601 return false;
603 rtx src = SET_SRC (pat);
604 if (SUBREG_P (src))
605 src = SUBREG_REG (src);
606 if (!OBJECT_P (src) && !CONSTANT_P (src))
607 return false;
609 return true;
612 // Return true if operand OP is likely to match OP_ALT after register
613 // allocation.
614 static bool
615 likely_operand_match_p (const operand_alternative &op_alt, rtx op)
617 // Empty constraints match everything.
618 const char *constraint = op_alt.constraint;
619 if (constraint[0] == 0 || constraint[0] == ',')
620 return true;
622 for (;;)
624 char c = *constraint;
625 int len = CONSTRAINT_LEN (c, constraint);
626 if (c == 0 || c == ',')
627 break;
629 if (c == 'X')
630 return true;
632 auto cn = lookup_constraint (constraint);
633 switch (get_constraint_type (cn))
635 case CT_REGISTER:
636 if (REG_P (op) || SUBREG_P (op))
637 return true;
638 break;
640 case CT_MEMORY:
641 case CT_SPECIAL_MEMORY:
642 case CT_RELAXED_MEMORY:
643 if (MEM_P (op))
644 return true;
645 break;
647 case CT_CONST_INT:
648 case CT_ADDRESS:
649 case CT_FIXED_FORM:
650 if (constraint_satisfied_p (op, cn))
651 return true;
652 break;
655 constraint += len;
658 if (op_alt.matches >= 0)
660 rtx other = recog_data.operand[op_alt.matches];
661 if ((REG_P (other) || SUBREG_P (other))
662 && (REG_P (op) || SUBREG_P (op)))
663 return true;
665 return false;
668 // Return true if the operands of the current instruction are likely to
669 // match OP_ALT.
670 static bool
671 likely_alternative_match_p (const operand_alternative *op_alt)
673 for (int i = 0; i < recog_data.n_operands; ++i)
674 if (!likely_operand_match_p (op_alt[i], recog_data.operand[i]))
675 return false;
676 return true;
679 // Return the sum of how disparaged OP_ALT is.
680 static int
681 count_rejects (const operand_alternative *op_alt)
683 int reject = 0;
684 for (int opno = 0; opno < recog_data.n_operands; ++opno)
685 reject += op_alt[opno].reject;
686 return reject;
689 // Allocate a T from the region obstack.
690 template<typename T, typename... Ts>
691 inline T *
692 early_ra::region_allocate (Ts... args)
694 static_assert (std::is_trivially_destructible<T>::value,
695 "destructor won't be called");
696 void *addr = obstack_alloc (&m_region_obstack, sizeof (T));
697 return new (addr) T (std::forward<Ts> (args)...);
700 early_ra::early_ra (function *fn) : m_fn (fn), m_live_fprs (0)
702 gcc_obstack_init (&m_region_obstack);
703 m_region_alloc_start = obstack_alloc (&m_region_obstack, 0);
704 bitmap_tree_view (m_live_allocnos);
707 early_ra::~early_ra ()
709 obstack_free (&m_region_obstack, nullptr);
712 // Return an array that, for each allocno A in the group, contains the index
713 // of the allocno at the head of A's chain (that is, the one with the highest
714 // START_POINT). The index is INVALID_ALLOCNO if the chain is empty.
715 inline array_slice<unsigned int>
716 early_ra::allocno_group_info::chain_heads ()
718 auto *start = reinterpret_cast<unsigned int *> (this + 1);
719 return { start, size };
722 // Return the array of allocnos in the group.
723 inline array_slice<early_ra::allocno_info>
724 early_ra::allocno_group_info::allocnos ()
726 gcc_checking_assert (regno != INVALID_REGNUM);
727 auto *chain_end = reinterpret_cast<unsigned int *> (this + 1) + size;
728 auto *allocno_start = reinterpret_cast<allocno_info *> (chain_end);
729 return { allocno_start, size };
732 // Return the group's color representative.
733 inline early_ra::allocno_group_info *
734 early_ra::allocno_group_info::color_rep ()
736 gcc_checking_assert (m_color_rep->m_color_rep == m_color_rep);
737 return m_color_rep;
740 // Return the group that contains the allocno.
741 inline early_ra::allocno_group_info *
742 early_ra::allocno_info::group ()
744 auto *chain_end = reinterpret_cast<unsigned int *> (this - offset);
745 return reinterpret_cast<allocno_group_info *> (chain_end - group_size) - 1;
748 // Return true if this allocno's live range is a subrange of related_allocno's
749 // and if we have committed to making this allocno share whatever register
750 // related_allocno uses.
751 inline bool
752 early_ra::allocno_info::is_shared ()
754 return related_allocno != INVALID_ALLOCNO && !is_equiv;
757 // Return true if this allocno is known to be equivalent to ALLOCNO.
758 inline bool
759 early_ra::allocno_info::is_equiv_to (unsigned int allocno)
761 return is_equiv && related_allocno == allocno;
764 // Return the allocnos in the subgroup.
765 inline array_slice<early_ra::allocno_info>
766 early_ra::allocno_subgroup::allocnos ()
768 if (!count)
769 return {};
770 return { &group->allocnos ()[start], count };
773 // Return allocno I in the subgroup, with 0 being the first.
774 inline early_ra::allocno_info *
775 early_ra::allocno_subgroup::allocno (unsigned int i)
777 return &group->allocnos ()[start + i];
780 // Return the previous (earlier) allocno in ALLOCNO's chain, or null if none.
781 inline early_ra::allocno_info *
782 early_ra::chain_prev (allocno_info *allocno)
784 if (allocno->chain_prev != INVALID_ALLOCNO)
785 return m_allocnos[allocno->chain_prev];
786 return nullptr;
789 // Return the next (later) allocno in ALLOCNO's chain, or null if none.
790 inline early_ra::allocno_info *
791 early_ra::chain_next (allocno_info *allocno)
793 if (allocno->chain_next != INVALID_ALLOCNO)
794 return m_allocnos[allocno->chain_next];
795 return nullptr;
798 // Dump the information in m_pseudo_regs.
799 void
800 early_ra::dump_pseudo_regs ()
802 fprintf (dump_file, "\nPseudos:\n");
803 fprintf (dump_file, " %6s %6s %6s %6s %6s %6s %8s %s\n",
804 "Id", "FPR8", "FPR16", "FPR32", "NONFPR", "Stride",
805 "FPRness", "Copies");
806 pseudo_reg_info unused_reg = {};
807 for (unsigned int regno = FIRST_PSEUDO_REGISTER;
808 regno < m_pseudo_regs.length (); ++regno)
810 const auto &reg = m_pseudo_regs[regno];
811 if (memcmp (&reg, &unused_reg, sizeof (reg)) == 0)
812 continue;
814 fprintf (dump_file, " %6d %6s %6s %6s %6s %6s %8d", regno,
815 reg.flags & NEEDS_FPR8 ? "Req"
816 : reg.flags & ALLOWS_FPR8 ? "OK" : "-",
817 reg.flags & NEEDS_FPR16 ? "Req"
818 : reg.flags & ALLOWS_FPR16 ? "OK" : "-",
819 reg.flags & NEEDS_FPR32 ? "Req"
820 : reg.flags & ALLOWS_FPR32 ? "OK" : "-",
821 reg.flags & NEEDS_NONFPR ? "Req"
822 : reg.flags & ALLOWS_NONFPR ? "OK" : "-",
823 ~reg.flags & HAS_FLEXIBLE_STRIDE ? "-"
824 : reg.flags & HAS_FIXED_STRIDE ? "Some" : "All",
825 fpr_preference (regno));
826 if (reg.flags & HAS_FPR_COPY)
827 fprintf (dump_file, " FPR");
828 if (reg.flags & HAS_NONFPR_COPY)
829 fprintf (dump_file, " Non-FPR");
830 unsigned int copyi = reg.first_copy;
831 while (copyi)
833 const auto &copy = m_pseudo_reg_copies[copyi];
834 if (copy.regnos[0] == regno)
836 fprintf (dump_file, " r%d", copy.regnos[1]);
837 copyi = copy.next_copies[0];
839 else
841 fprintf (dump_file, " r%d", copy.regnos[0]);
842 copyi = copy.next_copies[1];
845 fprintf (dump_file, "\n");
849 // Dump the information in m_fpr_ranges.
850 void
851 early_ra::dump_fpr_ranges ()
853 fprintf (dump_file, "\nFPR live ranges:\n");
854 for (unsigned int fpr = 0; fpr < 32; ++fpr)
856 auto &intervals = m_fpr_ranges[fpr];
857 if (intervals.is_empty ())
858 continue;
860 fprintf (dump_file, " %2d", fpr);
861 for (unsigned int i = 0; i < intervals.length (); ++i)
863 auto &interval = intervals[i];
864 if (i && (i % 4) == 0)
865 fprintf (dump_file, "\n ");
866 fprintf (dump_file, " [ %6d %6d ]", interval.start_point,
867 interval.end_point);
869 fprintf (dump_file, "\n");
873 // Dump the information in m_allocno_copies.
874 void
875 early_ra::dump_copies ()
877 fprintf (dump_file, "\nCopies:\n");
878 fprintf (dump_file, " %8s %3s %6s\n",
879 "Allocno", "FPR", "Weight");
880 for (const auto &copy : m_allocno_copies)
881 fprintf (dump_file, " %8d %3d %6d\n", copy.allocno,
882 copy.fpr, copy.weight);
885 // Dump the information in m_allocnos.
886 void
887 early_ra::dump_allocnos ()
889 char buffer[sizeof ("r[:]") + 3 * 3 * sizeof (int) + 1];
890 fprintf (dump_file, "\nAllocno groups:\n");
891 fprintf (dump_file,
892 " %12s %12s %4s %6s %8s %s\n",
893 "Ids", "Regno", "Size", "Stride", "Cands", "Heads");
894 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
896 auto *allocno = m_allocnos[ai];
897 if (allocno->offset != 0)
898 continue;
899 auto *group = allocno->group ();
900 snprintf (buffer, sizeof (buffer), "[%d:%d]", allocno->id,
901 allocno->id + group->size - 1);
902 fprintf (dump_file, " %12s", buffer);
903 snprintf (buffer, sizeof (buffer), "r%d[0:%d]", group->regno,
904 group->size - 1);
905 fprintf (dump_file, " %12s %4s %6d %08x", buffer,
906 group->fpr_size == FPR_D ? "D"
907 : group->fpr_size == FPR_Q ? "Q" : "Z",
908 group->stride,
909 group->fpr_candidates);
910 for (auto head : group->chain_heads ())
911 if (head == INVALID_ALLOCNO)
912 fprintf (dump_file, " -");
913 else
914 fprintf (dump_file, " %d", head);
915 fprintf (dump_file, "\n");
918 fprintf (dump_file, "\nAllocno chains:\n");
919 fprintf (dump_file, " %5s %12s %12s %6s %5s %5s %6s %5s\n",
920 "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "Shared", "FPR");
921 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
923 auto *allocno = m_allocnos[ai];
924 if (allocno->chain_prev != INVALID_ALLOCNO)
925 continue;
926 const char *prefix = "=>";
927 for (;;)
929 auto *group = allocno->group ();
930 fprintf (dump_file, " %2s", prefix);
931 fprintf (dump_file, " %5d", allocno->id);
932 snprintf (buffer, sizeof (buffer), "r%d[%d]", group->regno,
933 allocno->offset);
934 fprintf (dump_file, " %12s", buffer);
935 snprintf (buffer, sizeof (buffer), "[%d,%d]",
936 allocno->start_point, allocno->end_point);
937 fprintf (dump_file, " %11s%s %6s", buffer,
938 allocno->is_earlyclobbered ? "*" : " ",
939 allocno->is_strong_copy_dest ? "Strong"
940 : allocno->is_copy_dest ? "Yes" : "-");
941 if (allocno->copy_dest == INVALID_ALLOCNO)
942 fprintf (dump_file, " %5s", "-");
943 else
944 fprintf (dump_file, " %5d", allocno->copy_dest);
945 if (allocno->is_equiv)
946 fprintf (dump_file, " %5d", allocno->related_allocno);
947 else
948 fprintf (dump_file, " %5s", "-");
949 if (allocno->is_shared ())
950 fprintf (dump_file, " %6d", allocno->related_allocno);
951 else
952 fprintf (dump_file, " %6s", "-");
953 if (allocno->hard_regno == FIRST_PSEUDO_REGISTER)
954 fprintf (dump_file, " %5s", "-");
955 else
956 fprintf (dump_file, " %5s", reg_names[allocno->hard_regno]);
957 fprintf (dump_file, "\n");
958 if (allocno->chain_next == INVALID_ALLOCNO)
959 break;
960 allocno = m_allocnos[allocno->chain_next];
961 prefix = "";
966 // Dump the information in m_colors.
967 void
968 early_ra::dump_colors ()
970 fprintf (dump_file, "\nColors:\n");
971 for (unsigned int i = 0; i < m_colors.length (); ++i)
973 auto *color = m_colors[i];
974 if (!color->group)
975 continue;
977 fprintf (dump_file, " color %d:\n", i);
978 fprintf (dump_file, " chains:\n");
979 auto heads = color->group->chain_heads ();
980 for (unsigned int i = 0; i < color->group->size; ++i)
982 fprintf (dump_file, " %2d:", i);
983 auto ai = heads[i];
984 while (ai != INVALID_ALLOCNO)
986 auto *allocno = m_allocnos[ai];
987 fprintf (dump_file, " r%d[%d]", allocno->group ()->regno,
988 allocno->offset);
989 ai = allocno->chain_next;
991 fprintf (dump_file, "\n");
993 fprintf (dump_file, " FPR candidates:");
994 for (unsigned int fpr = 0; fpr < 32; ++fpr)
995 fprintf (dump_file, "%s%c", fpr % 8 ? "" : " ",
996 color->group->fpr_candidates & (1U << fpr) ? 'Y' : '-');
997 fprintf (dump_file, "\n");
998 fprintf (dump_file, " FPR preferences:");
999 for (unsigned int fpr = 0; fpr < 32; ++fpr)
1000 if (color->fpr_preferences[fpr])
1001 fprintf (dump_file, " %d(%d)", fpr, color->fpr_preferences[fpr]);
1002 fprintf (dump_file, "\n");
1006 // Record any necessary information about a move from SRC to DEST.
1007 void
1008 early_ra::preprocess_move (rtx dest, rtx src)
1010 if (SUBREG_P (dest))
1011 dest = SUBREG_REG (dest);
1012 if (!REG_P (dest))
1013 return;
1015 if (SUBREG_P (src))
1016 src = SUBREG_REG (src);
1017 if (!REG_P (src))
1018 return;
1020 // Sort the registers by increasing REGNO.
1021 rtx regs[] = { dest, src };
1022 if (REGNO (dest) > REGNO (src))
1023 std::swap (regs[0], regs[1]);
1024 unsigned int regno0 = REGNO (regs[0]);
1025 unsigned int regno1 = REGNO (regs[1]);
1027 // Ignore moves between hard registers.
1028 if (HARD_REGISTER_NUM_P (regno1))
1029 return;
1031 // For moves between hard registers and pseudos, just record the type
1032 // of hard register involved.
1033 auto &reg1 = m_pseudo_regs[regno1];
1034 reg1.mode = GET_MODE (regs[1]);
1035 if (HARD_REGISTER_NUM_P (regno0))
1037 reg1.flags |= (FP_REGNUM_P (regno0) ? HAS_FPR_COPY : HAS_NONFPR_COPY);
1038 return;
1041 // Record a move between two pseudo registers.
1042 auto &reg0 = m_pseudo_regs[regno0];
1043 reg0.mode = GET_MODE (regs[0]);
1045 reg_copy_info copy;
1046 copy.regnos[0] = regno0;
1047 copy.regnos[1] = regno1;
1048 copy.next_copies[0] = reg0.first_copy;
1049 copy.next_copies[1] = reg1.first_copy;
1051 reg0.first_copy = reg1.first_copy = m_pseudo_reg_copies.length ();
1052 m_pseudo_reg_copies.safe_push (copy);
1055 // Return true if INSN has a multi-vector operand and if that operand
1056 // could be converted to strided form.
1057 static bool
1058 is_stride_candidate (rtx_insn *insn)
1060 if (recog_memoized (insn) < 0)
1061 return false;
1063 auto stride_type = get_attr_stride_type (insn);
1064 return (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
1065 || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE);
1068 // Go through the constraints of INSN, which has already been extracted,
1069 // and record any relevant information about pseudo registers.
1070 void
1071 early_ra::process_pseudo_reg_constraints (rtx_insn *insn)
1073 extract_insn (insn);
1074 preprocess_constraints (insn);
1076 // Flags that describe any multi-register vector operands.
1077 unsigned int insn_flags = (is_stride_candidate (insn)
1078 ? HAS_FLEXIBLE_STRIDE
1079 : HAS_FIXED_STRIDE);
1081 auto alts = get_preferred_alternatives (insn);
1083 int operand_matches[MAX_RECOG_OPERANDS];
1084 unsigned int operand_flags[MAX_RECOG_OPERANDS];
1085 for (int i = 0; i < recog_data.n_operands; ++i)
1087 operand_matches[i] = -1;
1088 operand_flags[i] = 0;
1091 // Extract information from the constraints, considering all plausible
1092 // alternatives.
1093 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
1095 if (!(alts & ALTERNATIVE_BIT (altno)))
1096 continue;
1098 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
1099 if (!likely_alternative_match_p (op_alt))
1100 continue;
1102 // Use SRC_OPNO's constraints to derive information about DEST_OPNO.
1103 auto record_operand = [&](int src_opno, int dest_opno)
1105 int matches = op_alt[src_opno].matches;
1106 if (matches >= 0)
1107 operand_matches[dest_opno] = matches;
1109 auto cl = alternative_class (op_alt, src_opno);
1110 if (cl != NO_REGS)
1112 if (reg_class_subset_p (cl, FP_REGS))
1113 operand_flags[dest_opno] |= ALLOWS_FPR32;
1114 if (reg_class_subset_p (cl, FP_LO_REGS))
1115 operand_flags[dest_opno] |= ALLOWS_FPR16;
1116 if (reg_class_subset_p (cl, FP_LO8_REGS))
1117 operand_flags[dest_opno] |= ALLOWS_FPR8;
1118 if (!reg_classes_intersect_p (cl, FP_REGS))
1119 operand_flags[dest_opno] |= ALLOWS_NONFPR;
1123 for (int i = 0; i < recog_data.n_operands; ++i)
1125 record_operand (i, i);
1126 if (recog_data.constraints[i][0] == '%')
1128 record_operand (i, i + 1);
1129 record_operand (i + 1, i);
1134 // Process the information we collected above.
1135 for (int i = 0; i < recog_data.n_operands; ++i)
1137 rtx op = recog_data.operand[i];
1138 machine_mode orig_mode = GET_MODE (op);
1139 if (SUBREG_P (op))
1140 op = SUBREG_REG (op);
1142 // Record the accumulated information in m_pseudo_regs.
1143 if (REG_P (op) && !HARD_REGISTER_P (op))
1145 // The flags so far just describe what at least one alternative
1146 // would accept. Calculate the associated NEEDS_* information.
1147 auto flags = operand_flags[i];
1148 if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR))
1149 flags |= NEEDS_NONFPR;
1150 else if ((flags & ALLOWS_FPR32) && !(flags & ALLOWS_NONFPR))
1152 if (flags & ALLOWS_FPR8)
1153 flags |= NEEDS_FPR8;
1154 if (flags & ALLOWS_FPR16)
1155 flags |= NEEDS_FPR16;
1156 flags |= NEEDS_FPR32;
1159 // Look for multi-register vector operands.
1160 if (VECTOR_MODE_P (orig_mode)
1161 && targetm.hard_regno_mode_ok (V0_REGNUM, orig_mode)
1162 && hard_regno_nregs (V0_REGNUM, orig_mode) > 1)
1163 flags |= insn_flags;
1165 m_pseudo_regs[REGNO (op)].flags |= flags;
1166 m_pseudo_regs[REGNO (op)].mode = GET_MODE (op);
1169 // Treat matching constraints as equivalent to moves.
1170 if (operand_matches[i] >= 0)
1171 preprocess_move (recog_data.operand[operand_matches[i]], op);
1175 // Make one pass through the instructions, collecting information that
1176 // will be needed later.
1177 void
1178 early_ra::preprocess_insns ()
1180 m_pseudo_regs.safe_grow_cleared (max_reg_num ());
1181 m_pseudo_reg_copies.safe_push (reg_copy_info ());
1182 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
1184 if (!NONDEBUG_INSN_P (insn))
1185 continue;
1187 // Mark all registers that occur in addresses as needing a GPR.
1188 vec_rtx_properties properties;
1189 properties.add_insn (insn, true);
1190 for (rtx_obj_reference ref : properties.refs ())
1191 if (ref.is_reg ()
1192 && ref.in_address ()
1193 && !HARD_REGISTER_NUM_P (ref.regno))
1194 m_pseudo_regs[ref.regno].flags |= ALLOWS_NONFPR | NEEDS_NONFPR;
1196 if (GET_CODE (PATTERN (insn)) == USE
1197 || GET_CODE (PATTERN (insn)) == CLOBBER)
1198 continue;
1200 rtx set = single_set (insn);
1201 if (set && is_move_set (set))
1202 preprocess_move (SET_DEST (set), SET_SRC (set));
1203 else
1204 process_pseudo_reg_constraints (insn);
1208 // Return a signed integer that says (roughly) how strong an affinity
1209 // pseudo register REGNO has with FPRs. A positive value indicates
1210 // that we should try to allocate an FPR, a negative value indicates
1211 // that we shouldn't, and 0 indicates neutrality.
1213 early_ra::fpr_preference (unsigned int regno)
1215 auto mode = m_pseudo_regs[regno].mode;
1216 auto flags = m_pseudo_regs[regno].flags;
1217 if (mode == VOIDmode || !targetm.hard_regno_mode_ok (V0_REGNUM, mode))
1218 return -3;
1219 else if (flags & HAS_FLEXIBLE_STRIDE)
1220 return 3;
1221 else if (flags & NEEDS_FPR32)
1222 return 2;
1223 else if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR))
1224 return -2;
1225 else if ((flags & HAS_FPR_COPY) && !(flags & HAS_NONFPR_COPY))
1226 return 1;
1227 else if ((flags & HAS_NONFPR_COPY) && !(flags & HAS_FPR_COPY))
1228 return -1;
1229 else
1230 return 0;
1233 // Propagate information about pseudo-registers along copy edges,
1234 // while doing so doesn't create conflicting FPR preferences.
1235 void
1236 early_ra::propagate_pseudo_reg_info ()
1238 struct stack_entry { unsigned int regno, copyi; };
1240 auto_vec<stack_entry, 32> stack;
1241 for (unsigned int i = FIRST_PSEUDO_REGISTER;
1242 i < m_pseudo_regs.length (); ++i)
1244 auto start = m_pseudo_regs[i].first_copy;
1245 if (!start)
1246 continue;
1248 stack.quick_push ({ i, start });
1249 while (!stack.is_empty ())
1251 auto entry = stack.pop ();
1252 auto &copy = m_pseudo_reg_copies[entry.copyi];
1253 auto src_regno = entry.regno;
1254 auto dest_regno = (src_regno == copy.regnos[1]
1255 ? copy.regnos[0]
1256 : copy.regnos[1]);
1257 auto next_copyi = (src_regno == copy.regnos[1]
1258 ? copy.next_copies[1]
1259 : copy.next_copies[0]);
1260 if (next_copyi)
1261 stack.safe_push ({ src_regno, next_copyi });
1263 auto &src_reg = m_pseudo_regs[src_regno];
1264 auto &dest_reg = m_pseudo_regs[dest_regno];
1266 if (src_reg.flags & ~dest_reg.flags & PSEUDO_COPY_FLAGS)
1268 auto src_preference = fpr_preference (src_regno);
1269 auto dest_preference = fpr_preference (dest_regno);
1270 if ((src_preference >= 0 && dest_preference >= 0)
1271 || (src_preference <= 0 && dest_preference <= 0))
1273 dest_reg.flags |= (src_reg.flags & PSEUDO_COPY_FLAGS);
1274 stack.safe_push ({ dest_regno, dest_reg.first_copy });
1281 // Decide which pseudos should be allocated an FPR, setting m_fpr_pseudos
1282 // accordingly.
1283 void
1284 early_ra::choose_fpr_pseudos ()
1286 for (unsigned int i = FIRST_PSEUDO_REGISTER;
1287 i < m_pseudo_regs.length (); ++i)
1288 if (fpr_preference (i) > 0)
1289 bitmap_set_bit (m_fpr_pseudos, i);
1292 // Clear out information about the previous CFG region (if any)
1293 // and set up the data for a new region.
1294 void
1295 early_ra::start_new_region ()
1297 obstack_free (&m_region_obstack, m_region_alloc_start);
1298 m_regno_to_group.empty ();
1299 m_allocno_copies.truncate (0);
1300 m_allocnos.truncate (0);
1301 m_sorted_allocnos.truncate (0);
1302 m_shared_allocnos.truncate (0);
1303 m_colors.truncate (0);
1304 m_insn_ranges.truncate (0);
1305 for (auto &fpr_ranges : m_fpr_ranges)
1306 fpr_ranges.truncate (0);
1307 for (auto &call_points : m_call_points)
1308 call_points.truncate (0);
1309 gcc_assert (bitmap_empty_p (m_live_allocnos) && m_live_fprs == 0);
1310 m_dead_insns.truncate (0);
1311 m_allocated_fprs = 0;
1312 m_call_preserved_fprs = 0;
1313 m_allocation_successful = true;
1314 m_current_region += 1;
1317 // Create and return an allocno group of size SIZE for register REGNO.
1318 // REGNO can be INVALID_REGNUM if the group just exists to allow
1319 // other groups to be chained together, and does not have any new
1320 // allocnos of its own.
1321 early_ra::allocno_group_info *
1322 early_ra::create_allocno_group (unsigned int regno, unsigned int size)
1324 static_assert (alignof (unsigned int) == alignof (allocno_info),
1325 "allocno_info alignment");
1326 unsigned int num_allocnos = (regno != INVALID_REGNUM ? size : 0);
1328 // Allocate an allocno_group_info, followed by an array of chain heads,
1329 // followed by the allocnos themselves.
1330 size_t alloc_size = (sizeof (allocno_group_info)
1331 + size * sizeof (unsigned int)
1332 + num_allocnos * sizeof (allocno_info));
1333 void *data = obstack_alloc (&m_region_obstack, alloc_size);
1335 // Initialize the group.
1336 auto *group = reinterpret_cast<allocno_group_info *> (data);
1337 memset (group, 0, sizeof (*group));
1338 group->m_color_rep = group;
1339 group->regno = regno;
1340 group->size = size;
1341 group->stride = 1;
1342 group->fpr_size = FPR_D;
1343 group->fpr_candidates = ~0U;
1345 // Initialize the chain heads.
1346 auto heads = group->chain_heads ();
1347 for (unsigned int i = 0; i < heads.size (); ++i)
1348 heads[i] = (i < num_allocnos ? m_allocnos.length () + i : INVALID_ALLOCNO);
1350 // Initialize the allocnos.
1351 if (num_allocnos > 0)
1353 auto allocnos = group->allocnos ();
1354 memset (allocnos.begin (), 0, num_allocnos * sizeof (allocno_info));
1355 for (unsigned int i = 0; i < num_allocnos; ++i)
1357 auto *allocno = &allocnos[i];
1358 allocno->id = m_allocnos.length ();
1359 allocno->offset = i;
1360 allocno->group_size = size;
1361 allocno->hard_regno = FIRST_PSEUDO_REGISTER;
1362 allocno->start_point = END_OF_REGION;
1363 allocno->end_point = START_OF_REGION;
1364 allocno->copy_dest = INVALID_ALLOCNO;
1365 allocno->related_allocno = INVALID_ALLOCNO;
1366 allocno->chain_next = INVALID_ALLOCNO;
1367 allocno->chain_prev = INVALID_ALLOCNO;
1368 m_allocnos.safe_push (allocno);
1371 return group;
1374 // If REG refers to a pseudo register that might be allocated to FPRs,
1375 // return the associated range of allocnos, creating new ones if necessary.
1376 // Return an empty range otherwise.
1377 early_ra::allocno_subgroup
1378 early_ra::get_allocno_subgroup (rtx reg)
1380 if (GET_CODE (reg) == SUBREG)
1382 allocno_subgroup inner = get_allocno_subgroup (SUBREG_REG (reg));
1383 if (!inner)
1384 return {};
1386 if (!targetm.modes_tieable_p (GET_MODE (SUBREG_REG (reg)),
1387 GET_MODE (reg)))
1389 m_allocation_successful = false;
1390 return {};
1393 subreg_info info;
1394 subreg_get_info (V0_REGNUM, GET_MODE (SUBREG_REG (reg)),
1395 SUBREG_BYTE (reg), GET_MODE (reg), &info);
1396 if (!info.representable_p)
1398 m_allocation_successful = false;
1399 return {};
1402 inner.start += info.offset;
1403 inner.count = info.nregs;
1404 return inner;
1407 if (!REG_P (reg) || HARD_REGISTER_P (reg))
1408 return {};
1410 unsigned int regno = REGNO (reg);
1411 if (fpr_preference (regno) <= 0)
1412 return {};
1414 unsigned int count = hard_regno_nregs (V0_REGNUM, GET_MODE (reg));
1415 bool existed;
1416 auto &entry = m_regno_to_group.get_or_insert (regno, &existed);
1417 if (!existed)
1419 auto *group = create_allocno_group (regno, count);
1420 if (dump_file && (dump_flags & TDF_DETAILS))
1422 auto allocnos = group->allocnos ();
1423 fprintf (dump_file, "Creating allocnos [%d:%d] for r%d\n",
1424 allocnos.front ().id, allocnos.back ().id, regno);
1427 auto reg_bits = GET_MODE_BITSIZE (GET_MODE (reg));
1428 auto fpr_bits = exact_div (reg_bits, count);
1429 auto flags = m_pseudo_regs[regno].flags;
1431 // Punt for now if there is a choice to be made between using an
1432 // FPR and a non-FPR.
1433 if ((flags & NEEDS_NONFPR)
1434 || ((flags & ALLOWS_NONFPR)
1435 && !FLOAT_MODE_P (GET_MODE (reg))
1436 && !VECTOR_MODE_P (GET_MODE (reg))))
1437 m_allocation_successful = false;
1439 if (flags & ALLOWS_FPR8)
1440 group->fpr_candidates &= 0xff;
1441 else if (flags & ALLOWS_FPR16)
1442 group->fpr_candidates &= 0xffff;
1443 group->fpr_candidates &= ~0U >> (count - 1);
1445 group->has_flexible_stride = ((flags & HAS_FLEXIBLE_STRIDE) != 0
1446 && (flags & HAS_FIXED_STRIDE) == 0);
1448 group->fpr_size = (maybe_gt (fpr_bits, 128) ? FPR_Z
1449 : maybe_gt (fpr_bits, 64) ? FPR_Q : FPR_D);
1451 entry = group;
1453 return { entry, 0, count };
1456 // Record a use of FPR REGNO at the current program point, as part of
1457 // a backwards walk over a block.
1458 void
1459 early_ra::record_fpr_use (unsigned int regno)
1461 gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM));
1462 unsigned int offset = regno - V0_REGNUM;
1463 if (!(m_live_fprs & (1U << offset)))
1465 m_fpr_ranges[offset].safe_push ({ START_OF_REGION, m_current_point,
1466 INVALID_ALLOCNO });
1467 m_live_fprs |= 1U << offset;
1471 // Record a definition of FPR REGNO at the current program point, as part of
1472 // a backwards walk over a block.
1473 void
1474 early_ra::record_fpr_def (unsigned int regno)
1476 gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM));
1477 unsigned int offset = regno - V0_REGNUM;
1479 // The definition completes the current live range. If the result
1480 // of the definition is used, the live range extends to the last use.
1481 // Otherwise the live range is just a momentary blip at the current point.
1482 auto &ranges = m_fpr_ranges[offset];
1483 if (m_live_fprs & (1U << offset))
1485 ranges.last ().start_point = m_current_point;
1486 m_live_fprs &= ~(1U << offset);
1488 else
1489 ranges.safe_push ({ m_current_point, m_current_point, INVALID_ALLOCNO });
1492 // Record a use of allocno ALLOCNO at the current program point, as part
1493 // of a backwards walk over a block.
1494 void
1495 early_ra::record_allocno_use (allocno_info *allocno)
1497 if (allocno->start_point == m_current_point)
1498 return;
1500 gcc_checking_assert (!allocno->is_shared ());
1501 bitmap_set_bit (m_live_allocnos, allocno->id);
1502 if (allocno->end_point > m_current_point)
1504 allocno->end_point = m_current_point;
1505 allocno->last_def_point = START_OF_REGION;
1506 allocno->last_use_point = END_OF_REGION;
1508 else
1509 allocno->last_use_point = allocno->start_point;
1510 allocno->start_point = m_current_point;
1511 allocno->is_copy_dest = false;
1512 allocno->is_strong_copy_src = false;
1513 allocno->related_allocno = INVALID_ALLOCNO;
1514 allocno->is_equiv = false;
1517 // Record a definition of the allocno with index AI at the current program
1518 // point, as part of a backwards walk over a block. The allocno is known
1519 // to be live.
1520 void
1521 early_ra::record_allocno_def (allocno_info *allocno)
1523 gcc_checking_assert (!allocno->is_shared ());
1524 allocno->last_use_point = allocno->start_point;
1525 allocno->last_def_point = m_current_point;
1526 allocno->start_point = m_current_point;
1527 allocno->num_defs = MIN (allocno->num_defs + 1, 2);
1528 gcc_checking_assert (!allocno->is_copy_dest
1529 && !allocno->is_strong_copy_src);
1530 if (!bitmap_clear_bit (m_live_allocnos, allocno->id))
1531 gcc_unreachable ();
1534 // SRC_ALLOCNO is copied or tied to DEST_ALLOCNO; IS_EQUIV is true if the
1535 // two allocnos are known to be equal. See whether we can mark a chain of
1536 // allocnos ending at DEST_ALLOCNO as related to SRC_ALLOCNO. Return the
1537 // start of the chain if so, otherwise return null.
1539 // If IS_EQUIV, a chain that contains just DEST_ALLOCNO should be treated
1540 // as an equivalence. Otherwise the chain should be shared with SRC_ALLOCNO.
1542 // Sharing chains are a rather hacky workaround for the fact that we
1543 // don't collect segmented live ranges, and that in the end we want to do
1544 // simple interval graph coloring.
1545 early_ra::allocno_info *
1546 early_ra::find_related_start (allocno_info *dest_allocno,
1547 allocno_info *src_allocno, bool is_equiv)
1549 allocno_info *res = nullptr;
1550 for (;;)
1552 if (src_allocno->end_point > dest_allocno->end_point)
1553 // The src allocno dies first.
1554 return res;
1556 if (src_allocno->num_defs != 0)
1558 if (dest_allocno->end_point < m_current_bb_point)
1559 // We don't currently track enough information to handle multiple
1560 // definitions across basic block boundaries.
1561 return res;
1563 if (src_allocno->last_def_point >= dest_allocno->end_point)
1564 // There is another definition during the destination's live range.
1565 return res;
1567 if (is_equiv)
1569 if (dest_allocno->num_defs == 1)
1570 // dest_allocno is equivalent to src_allocno for dest_allocno's
1571 // entire live range. Fall back to that if we can't establish
1572 // a sharing chain.
1573 res = dest_allocno;
1575 else
1577 if (src_allocno->last_use_point >= dest_allocno->end_point)
1578 // src_allocno is live during dest_allocno's live range,
1579 // and the two allocnos do not necessarily have the same value.
1580 return res;
1583 if (dest_allocno->group_size != 1
1584 // Account for definitions by shared registers.
1585 || dest_allocno->num_defs > 1
1586 || DF_REG_DEF_COUNT (dest_allocno->group ()->regno) != 1)
1587 // Currently only single allocnos that are defined once can
1588 // share registers with non-equivalent allocnos. This could be
1589 // relaxed, but at the time of writing, aggregates are not valid
1590 // SSA names and so generally only use a single pseudo throughout
1591 // their lifetime.
1592 return res;
1594 if (dest_allocno->copy_dest == src_allocno->id)
1595 // We've found a complete and valid sharing chain.
1596 return dest_allocno;
1598 if (dest_allocno->copy_dest == INVALID_ALLOCNO)
1599 return res;
1601 auto *next_allocno = m_allocnos[dest_allocno->copy_dest];
1602 if (!is_chain_candidate (dest_allocno, next_allocno, ALL_REASONS))
1603 return res;
1605 dest_allocno = next_allocno;
1606 is_equiv = false;
1610 // Add FROM_ALLOCNO's definition information to TO_ALLOCNO's.
1611 void
1612 early_ra::accumulate_defs (allocno_info *to_allocno,
1613 allocno_info *from_allocno)
1615 if (from_allocno->num_defs > 0)
1617 to_allocno->num_defs = MIN (from_allocno->num_defs
1618 + to_allocno->num_defs, 2);
1619 to_allocno->last_def_point = MAX (to_allocno->last_def_point,
1620 from_allocno->last_def_point);
1624 // Record any relevant allocno-related information for an actual or imagined
1625 // copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit
1626 // move instruction, false if it represents one way of satisfying the previous
1627 // instruction's constraints.
1628 void
1629 early_ra::record_copy (rtx dest, rtx src, bool from_move_p)
1631 auto dest_range = get_allocno_subgroup (dest);
1632 auto src_range = get_allocno_subgroup (src);
1633 if (from_move_p
1634 && dest_range
1635 && REG_P (src)
1636 && FP_REGNUM_P (REGNO (src)))
1638 // A copy from an FPR to an allocno group.
1639 unsigned int fpr = REGNO (src) - V0_REGNUM;
1640 m_allocno_copies.safe_push ({ dest_range.allocno (0)->id, fpr,
1641 dest_range.count });
1643 // If the allocno at the other end of the chain of copies from DEST
1644 // has a copy to the same FPR, record that all intervening copy chains
1645 // could become "strong" ones. This indicates that picking the FPR
1646 // avoids a copy at both ends.
1647 unsigned int hard_regno = REGNO (src);
1648 for (auto &dest_allocno : dest_range.allocnos ())
1649 if (dest_allocno.hard_regno == hard_regno++)
1650 dest_allocno.is_strong_copy_src = true;
1652 else if (from_move_p
1653 && src_range
1654 && REG_P (dest)
1655 && FP_REGNUM_P (REGNO (dest)))
1657 // A copy from an allocno group to an FPR.
1658 unsigned int fpr = REGNO (dest) - V0_REGNUM;
1659 m_allocno_copies.safe_push ({ src_range.allocno (0)->id, fpr,
1660 src_range.count });
1661 for (auto &src_allocno : src_range.allocnos ())
1663 // If the copy comes from a move, see whether the destination
1664 // FPR is known to be equal to the source allocno for the FPR's
1665 // last live range.
1666 if (from_move_p && src_allocno.num_defs == 0)
1668 auto &last_range = m_fpr_ranges[fpr].last ();
1669 if (last_range.end_point >= src_allocno.end_point)
1670 last_range.allocno = src_allocno.id;
1672 src_allocno.hard_regno = V0_REGNUM + fpr;
1673 fpr += 1;
1676 else if (src_range && dest_range)
1678 // A copy between two allocno groups. We can only have a mismatched
1679 // number of FPRs for imaginary, non-move copies. In that case
1680 // the matching happens on the common lowparts.
1681 gcc_assert (!from_move_p || src_range.count == dest_range.count);
1682 unsigned int count = std::min (src_range.count, dest_range.count);
1683 if (WORDS_BIG_ENDIAN)
1685 src_range.start += src_range.count - count;
1686 dest_range.start += dest_range.count - count;
1688 src_range.count = count;
1689 dest_range.count = count;
1691 // Ignore (imaginary non-move) copies if the destination is still live.
1692 for (auto &dest_allocno : dest_range.allocnos ())
1693 if (bitmap_bit_p (m_live_allocnos, dest_allocno.id))
1694 return;
1696 for (unsigned int i = 0; i < src_range.count; ++i)
1698 auto *dest_allocno = dest_range.allocno (i);
1699 auto *src_allocno = src_range.allocno (i);
1700 if (src_allocno->end_point > dest_allocno->start_point)
1702 gcc_assert (src_allocno->copy_dest == INVALID_ALLOCNO
1703 || src_allocno->copy_dest == dest_allocno->id);
1704 src_allocno->copy_dest = dest_allocno->id;
1705 src_allocno->hard_regno = dest_allocno->hard_regno;
1706 dest_allocno->is_copy_dest = 1;
1708 else if (auto *start_allocno = find_related_start (dest_allocno,
1709 src_allocno,
1710 from_move_p))
1712 auto *next_allocno = dest_allocno;
1713 for (;;)
1715 next_allocno->related_allocno = src_allocno->id;
1716 next_allocno->is_equiv = (start_allocno == dest_allocno
1717 && from_move_p);
1718 // If we're sharing two allocnos that are not equivalent,
1719 // carry across the definition information. This is needed
1720 // to prevent multiple incompatible attempts to share with
1721 // the same register.
1722 if (next_allocno->is_shared ())
1723 accumulate_defs (src_allocno, next_allocno);
1724 src_allocno->last_use_point
1725 = MAX (src_allocno->last_use_point,
1726 next_allocno->last_use_point);
1728 if (next_allocno == start_allocno)
1729 break;
1730 next_allocno = m_allocnos[next_allocno->copy_dest];
1737 // Record any relevant allocno-related information about the constraints
1738 // on INSN, which has already been extracted.
1739 void
1740 early_ra::record_constraints (rtx_insn *insn)
1742 preprocess_constraints (insn);
1744 int operand_matches[MAX_RECOG_OPERANDS];
1745 for (int i = 0; i < recog_data.n_operands; ++i)
1746 operand_matches[i] = -1;
1748 auto alts = get_preferred_alternatives (insn);
1749 bool any_ok = recog_data.n_alternatives == 0;
1751 // The set of output operands that are earlyclobber in at least one
1752 // alternative.
1753 operand_mask earlyclobber_operands = 0;
1755 // The set of output operands that are matched to inputs in at least
1756 // one alternative.
1757 operand_mask matched_operands = 0;
1759 // The set of output operands that are not matched to inputs in at least
1760 // one alternative.
1761 operand_mask unmatched_operands = 0;
1763 // The set of input operands that are matched to outputs in at least one
1764 // alternative, or that overlap with such an input if the output is not
1765 // earlyclobber. The latter part of the condition copes with things
1766 // like y = x * x, where the first x is tied to the destination, and where
1767 // y is not earlyclobber.
1768 operand_mask matches_operands = 0;
1770 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
1772 if (!(alts & ALTERNATIVE_BIT (altno)))
1773 continue;
1775 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
1776 if (!likely_alternative_match_p (op_alt))
1777 continue;
1779 any_ok = true;
1781 // Update the information for operand DEST_OPNO based on the constraint
1782 // information for operand SRC_OPNO. The numbers can be different for
1783 // swapped commutative operands.
1784 auto record_operand = [&](int src_opno, int dest_opno)
1786 int matches = op_alt[src_opno].matches;
1787 // A matched earlyclobber cannot be used if the same operand value
1788 // occurs in an unmatched operand. E.g. for y = x * x, a matched
1789 // earlyclobber on the first input does not cover the second input.
1790 if (matches >= 0)
1792 rtx op = recog_data.operand[dest_opno];
1793 operand_mask overlaps = 0;
1794 for (int i = 0; i < recog_data.n_operands; ++i)
1795 if (i != dest_opno
1796 && !recog_data.is_operator[i]
1797 && recog_data.operand_type[i] != OP_OUT
1798 && reg_overlap_mentioned_p (op, recog_data.operand[i]))
1799 overlaps |= 1U << i;
1800 if (!op_alt[matches].earlyclobber || overlaps == 0)
1802 operand_matches[dest_opno] = matches;
1803 matches_operands |= (1U << dest_opno) | overlaps;
1808 auto reject = count_rejects (op_alt);
1809 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1811 operand_mask op_mask = operand_mask (1) << opno;
1813 if (recog_data.operand_type[opno] != OP_IN)
1815 if (reject == 0 && op_alt[opno].matched >= 0)
1816 matched_operands |= op_mask;
1817 else
1818 unmatched_operands |= op_mask;
1821 if (op_alt[opno].earlyclobber)
1822 earlyclobber_operands |= op_mask;
1824 // Punt for now on scratches. If we wanted to handle them,
1825 // we'd need to create allocnos for them, like IRA does.
1826 rtx op = recog_data.operand[opno];
1827 if (GET_CODE (op) == SCRATCH
1828 && reg_classes_intersect_p (op_alt[opno].cl, FP_REGS))
1829 m_allocation_successful = false;
1831 // Record filter information, which applies to the first register
1832 // in the operand.
1833 if (auto filters = alternative_register_filters (op_alt, opno))
1834 if (auto range = get_allocno_subgroup (recog_data.operand[opno]))
1835 for (unsigned int fpr = range.start; fpr < 32; ++fpr)
1836 if (!test_register_filters (filters, fpr))
1837 range.group->fpr_candidates &= ~(1U << (fpr - range.start));
1839 if (reject == 0)
1841 // Record possible matched operands.
1842 record_operand (opno, opno);
1843 if (recog_data.constraints[opno][0] == '%')
1845 record_operand (opno, opno + 1);
1846 record_operand (opno + 1, opno);
1852 if (!any_ok)
1854 if (dump_file && (dump_flags & TDF_DETAILS))
1855 fprintf (dump_file, " -- no match\n");
1856 m_allocation_successful = false;
1859 // Record if there is an output operand that is never earlyclobber and never
1860 // matched to an input. See the comment below for how this is used.
1861 rtx dest_op = NULL_RTX;
1862 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1864 auto op_mask = operand_mask (1) << opno;
1865 if (recog_data.operand_type[opno] == OP_OUT
1866 && (earlyclobber_operands & op_mask) == 0
1867 && (matched_operands & op_mask) == 0)
1869 dest_op = recog_data.operand[opno];
1870 break;
1874 for (int opno = 0; opno < recog_data.n_operands; ++opno)
1876 auto op_mask = operand_mask (1) << opno;
1877 rtx op = recog_data.operand[opno];
1878 int matches = operand_matches[opno];
1880 // Punt for now on operands that already have a fixed choice of
1881 // register, since we don't have IRA's ability to find an alternative.
1882 // It's better if earlier passes don't create this kind of situation.
1883 if (REG_P (op) && FP_REGNUM_P (REGNO (op)))
1884 m_allocation_successful = false;
1886 // Treat input operands as being earlyclobbered if an output is
1887 // sometimes earlyclobber and if the input never matches an output.
1888 // Do the same if there is an output that is always matched to an
1889 // input, and if this operand doesn't match that input. In both
1890 // cases, tying the input and the output would lead to an impossible
1891 // combination (or at least one that is difficult to reload).
1892 if (recog_data.operand_type[opno] != OP_OUT
1893 && ((earlyclobber_operands && matches < 0)
1894 || ((matched_operands & ~unmatched_operands)
1895 && !(matches_operands & op_mask))))
1896 for (auto &allocno : get_allocno_subgroup (op).allocnos ())
1897 if (allocno.end_point + 1 == m_current_point)
1898 allocno.is_earlyclobbered = true;
1900 // Create copies between operands that can be tied. This (deliberately)
1901 // might add several copies to the same destination register; later code
1902 // can then choose between them based on other criteria.
1904 // If there is an output operand that is never matched or earlyclobber,
1905 // and an input operand that never matches an output operand, create
1906 // a tentative copy between them. This allows hard register preferences
1907 // to be transmitted along the copy chains.
1908 if (matches >= 0)
1909 record_copy (recog_data.operand[matches], op);
1910 else if (dest_op && recog_data.operand_type[opno] == OP_IN)
1911 record_copy (dest_op, op);
1915 // If FLAGS is DF_REF_AT_TOP, model the artificial uses and defs at the
1916 // start of the current basic block, otherwise model the artificial uses
1917 // and defs at the end of the basic block. This is done as part of a
1918 // backwards walk, so defs should be processed before uses.
1919 void
1920 early_ra::record_artificial_refs (unsigned int flags)
1922 df_ref ref;
1924 FOR_EACH_ARTIFICIAL_DEF (ref, m_current_bb->index)
1925 if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags
1926 && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1927 record_fpr_def (DF_REF_REGNO (ref));
1928 m_current_point += 1;
1930 FOR_EACH_ARTIFICIAL_USE (ref, m_current_bb->index)
1931 if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags
1932 && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1933 record_fpr_use (DF_REF_REGNO (ref));
1934 m_current_point += 1;
1937 // Model the register references in INSN as part of a backwards walk.
1938 void
1939 early_ra::record_insn_refs (rtx_insn *insn)
1941 df_ref ref;
1943 // Record all definitions, excluding partial call clobbers.
1944 FOR_EACH_INSN_DEF (ref, insn)
1945 if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1946 record_fpr_def (DF_REF_REGNO (ref));
1947 else
1949 auto range = get_allocno_subgroup (DF_REF_REG (ref));
1950 for (auto &allocno : range.allocnos ())
1952 // If the destination is unused, record a momentary blip
1953 // in its live range.
1954 if (!bitmap_bit_p (m_live_allocnos, allocno.id))
1955 record_allocno_use (&allocno);
1956 record_allocno_def (&allocno);
1959 m_current_point += 1;
1961 // Model the call made by a call insn as a separate phase in the
1962 // evaluation of the insn. Any partial call clobbers happen at that
1963 // point, rather than in the definition or use phase of the insn.
1964 if (auto *call_insn = dyn_cast<rtx_call_insn *> (insn))
1966 function_abi abi = insn_callee_abi (call_insn);
1967 m_call_points[abi.id ()].safe_push (m_current_point);
1968 m_current_point += 1;
1971 // Record all uses. We can ignore READ_MODIFY_WRITE uses of plain subregs,
1972 // since we track the FPR-sized parts of them individually.
1973 FOR_EACH_INSN_USE (ref, insn)
1974 if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
1975 record_fpr_use (DF_REF_REGNO (ref));
1976 else if (!DF_REF_FLAGS_IS_SET (ref, DF_REF_READ_WRITE)
1977 || DF_REF_FLAGS_IS_SET (ref, DF_REF_STRICT_LOW_PART)
1978 || DF_REF_FLAGS_IS_SET (ref, DF_REF_ZERO_EXTRACT))
1980 auto range = get_allocno_subgroup (DF_REF_REG (ref));
1981 for (auto &allocno : range.allocnos ())
1982 record_allocno_use (&allocno);
1984 m_current_point += 1;
1987 // ALLOCNO->is_strong_copy_src is true. See whether ALLOCNO heads a
1988 // natural chain that has an affinity with the same hard register at
1989 // both ends.
1990 bool
1991 early_ra::consider_strong_copy_src_chain (allocno_info *allocno)
1993 auto *src_allocno = allocno;
1994 while (src_allocno->copy_dest != INVALID_ALLOCNO)
1996 auto *dest_allocno = m_allocnos[src_allocno->copy_dest];
1997 if (dest_allocno->start_point > src_allocno->end_point
1998 || dest_allocno->hard_regno != src_allocno->hard_regno)
1999 return false;
2000 gcc_checking_assert (dest_allocno->is_copy_dest);
2001 src_allocno = dest_allocno;
2004 while (allocno->copy_dest != INVALID_ALLOCNO)
2006 allocno->is_strong_copy_src = 1;
2007 allocno = m_allocnos[allocno->copy_dest];
2008 allocno->is_strong_copy_dest = 1;
2010 return true;
2013 // ALLOCNO1 and ALLOCNO2 are linked in some way, and might end up being
2014 // chained together. See whether chaining them requires the containing
2015 // groups to have the same stride, or whether it requires them to have
2016 // different strides. Return 1 if they should have the same stride,
2017 // -1 if they should have different strides, or 0 if it doesn't matter.
2019 early_ra::strided_polarity_pref (allocno_info *allocno1,
2020 allocno_info *allocno2)
2022 if (allocno1->offset + 1 < allocno1->group_size
2023 && allocno2->offset + 1 < allocno2->group_size)
2025 if (is_chain_candidate (allocno1 + 1, allocno2 + 1, ALL_REASONS))
2026 return 1;
2027 else
2028 return -1;
2031 if (allocno1->offset > 0 && allocno2->offset > 0)
2033 if (is_chain_candidate (allocno1 - 1, allocno2 - 1, ALL_REASONS))
2034 return 1;
2035 else
2036 return -1;
2039 return 0;
2042 // Decide which groups should be strided. Also complete "strong" copy chains.
2043 void
2044 early_ra::find_strided_accesses ()
2046 // This function forms a graph of allocnos, linked by equivalences and
2047 // natural copy chains. It temporarily uses chain_next to record the
2048 // reverse of equivalence edges (related_allocno) and chain_prev to record
2049 // the reverse of copy edges (copy_dest).
2050 unsigned int allocno_info::*links[] = {
2051 &allocno_info::chain_next,
2052 &allocno_info::chain_prev,
2053 &allocno_info::copy_dest,
2054 &allocno_info::related_allocno
2057 // Set up the temporary reverse edges. Check for strong copy chains.
2058 for (unsigned int i = m_allocnos.length (); i-- > 0; )
2060 auto *allocno1 = m_allocnos[i];
2061 if (allocno1->copy_dest != INVALID_ALLOCNO)
2062 m_allocnos[allocno1->copy_dest]->chain_prev = allocno1->id;
2063 if (allocno1->related_allocno != INVALID_ALLOCNO)
2064 m_allocnos[allocno1->related_allocno]->chain_next = allocno1->id;
2066 if (allocno1->is_strong_copy_src
2067 && !allocno1->is_copy_dest
2068 && !consider_strong_copy_src_chain (allocno1))
2069 allocno1->is_strong_copy_src = false;
2072 // Partition the graph into cliques based on edges that have the following
2073 // properties:
2075 // - the edge joins two allocnos whose groups have a free choice between
2076 // consecutive and strided allocations.
2078 // - the two groups have a relative strided polarity preference (that is
2079 // they should make the same choice between consecutive and strided,
2080 // or they should make opposite choices).
2082 // Assign relative polarities to each group connected in this way.
2084 // The aim is to discover natural move-free striding choices, which will
2085 // often exist in carefully written ACLE code.
2086 unsigned int num_edges = m_allocnos.length () * ARRAY_SIZE (links);
2087 auto_sbitmap visited_edges (num_edges);
2088 bitmap_clear (visited_edges);
2090 auto_vec<unsigned int, 32> worklist;
2091 for (unsigned int i = 0; i < num_edges; ++i)
2093 if (!bitmap_set_bit (visited_edges, i))
2094 continue;
2095 worklist.quick_push (i);
2096 while (!worklist.is_empty ())
2098 auto ei = worklist.pop ();
2099 auto *allocno1 = m_allocnos[ei / ARRAY_SIZE (links)];
2100 auto ai2 = allocno1->*links[ei % ARRAY_SIZE (links)];
2101 if (ai2 == INVALID_ALLOCNO)
2102 continue;
2104 auto *allocno2 = m_allocnos[ai2];
2105 auto *group1 = allocno1->group ();
2106 auto *group2 = allocno2->group ();
2107 if (!group1->has_flexible_stride || !group2->has_flexible_stride)
2108 continue;
2110 int pref = strided_polarity_pref (allocno1, allocno2);
2111 if (pref == 0)
2112 continue;
2114 for (auto *group : { group1, group2 })
2115 for (auto &allocno : group->allocnos ())
2116 for (unsigned int j = 0; j < ARRAY_SIZE (links); ++j)
2117 if (bitmap_set_bit (visited_edges, allocno.id * 4 + j))
2118 worklist.safe_push (allocno.id * 4 + j);
2120 if (group1->strided_polarity)
2121 group2->strided_polarity = group1->strided_polarity * pref;
2122 else if (group2->strided_polarity)
2123 group1->strided_polarity = group2->strided_polarity * pref;
2124 else
2126 group1->strided_polarity = 1;
2127 group2->strided_polarity = pref;
2132 // Now look for edges between allocnos in multi-register groups where:
2134 // - the two groups have a relative strided polarity preference (as above).
2136 // - one group (G1) has a free choice between consecutive and strided
2137 // allocations.
2139 // - the other group (G2) must use consecutive allocations.
2141 // Update G1's individual preference for strided or consecutive allocations
2142 // based on G2. If the previous loop chose a polarity for G1, work out
2143 // whether it is better for polarity 1 or -1 to correspond to consecutive
2144 // allocation.
2145 int consecutive_pref = 0;
2146 for (unsigned int i = m_allocnos.length (); i-- > 0; )
2148 auto *allocno1 = m_allocnos[i];
2149 for (auto link : links)
2151 auto ai2 = allocno1->*link;
2152 if (ai2 == INVALID_ALLOCNO)
2153 continue;
2155 auto *allocno2 = m_allocnos[ai2];
2156 auto *group1 = allocno1->group ();
2157 auto *group2 = allocno2->group ();
2158 if (group1->has_flexible_stride == group2->has_flexible_stride)
2159 continue;
2161 int pref = strided_polarity_pref (allocno1, allocno2);
2162 if (pref == 0)
2163 continue;
2165 auto *group = (group1->has_flexible_stride ? group1 : group2);
2166 consecutive_pref += group->strided_polarity * pref;
2167 group->consecutive_pref += pref;
2171 // If it doesn't matter whether polarity 1 or -1 corresponds to consecutive
2172 // allocation, arbitrarily pick 1.
2173 if (consecutive_pref == 0)
2174 consecutive_pref = 1;
2176 // Record which multi-register groups should use strided allocations.
2177 // Clear out the temporary edges.
2178 for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
2180 auto *allocno = m_allocnos[ai];
2181 allocno->chain_prev = INVALID_ALLOCNO;
2182 allocno->chain_next = INVALID_ALLOCNO;
2184 if (allocno->offset != 0)
2185 continue;
2187 auto *group = allocno->group ();
2188 if (!group->has_flexible_stride)
2189 continue;
2191 bool make_strided = (group->strided_polarity
2192 ? (consecutive_pref * group->strided_polarity) < 0
2193 : group->consecutive_pref < 0);
2194 if (dump_file && (dump_flags & TDF_DETAILS))
2195 fprintf (dump_file, "Allocno [%d:%d]: strided polarity %d,"
2196 " consecutive pref %d, %s\n",
2197 allocno->id, allocno->id + group->size - 1,
2198 group->strided_polarity, group->consecutive_pref,
2199 make_strided ? "making strided" : "keeping consecutive");
2200 if (!make_strided)
2201 continue;
2203 // 2-register groups have a stride of 8 FPRs and must start in
2204 // registers matching the mask 0x17. 4-register groups have a stride
2205 // of 4 FPRs and must start in registers matching the mask 0x13.
2206 group->stride = group->size == 2 ? 8 : 4;
2207 gcc_checking_assert (group->fpr_candidates
2208 == (group->size == 2 ? 0x55555555 : 0x11111111));
2209 group->fpr_candidates = (group->size == 2 ? 0xff00ff : 0xf000f);
2213 // Compare the allocnos at *ALLOCNO1_PTR and *ALLOCNO2_PTR and return a <=>
2214 // result that puts allocnos in order of increasing FIELD.
2215 template<unsigned int early_ra::allocno_info::*field>
2217 early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr)
2219 auto *allocno1 = *(allocno_info *const *) allocno1_ptr;
2220 auto *allocno2 = *(allocno_info *const *) allocno2_ptr;
2222 if (allocno1->*field != allocno2->*field)
2223 return allocno1->*field < allocno2->*field ? -1 : 1;
2224 return (allocno1->id < allocno2->id ? -1
2225 : allocno1->id == allocno2->id ? 0 : 1);
2228 // Return true if we should consider chaining ALLOCNO1 onto the head
2229 // of ALLOCNO2. STRICTNESS says whether we should take copy-elision
2230 // heuristics into account, or whether we should just consider things
2231 // that matter for correctness.
2233 // This is just a local test of the two allocnos; it doesn't guarantee
2234 // that chaining them would give a self-consistent system.
2235 bool
2236 early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2,
2237 test_strictness strictness)
2239 if (allocno2->is_shared ())
2240 return false;
2242 while (allocno1->is_equiv)
2243 allocno1 = m_allocnos[allocno1->related_allocno];
2245 if (allocno2->start_point >= allocno1->end_point
2246 && !allocno2->is_equiv_to (allocno1->id))
2247 return false;
2249 if (allocno1->is_earlyclobbered
2250 && allocno1->end_point == allocno2->start_point + 1)
2251 return false;
2253 if (strictness == ALL_REASONS && allocno2->is_copy_dest)
2255 if (allocno1->copy_dest != allocno2->id)
2256 return false;
2257 if (allocno2->is_strong_copy_dest && !allocno1->is_strong_copy_src)
2258 return false;
2260 return true;
2263 // We're trying to chain allocno ALLOCNO1 to a later allocno.
2264 // Rate how good a choice ALLOCNO2 would be, with higher being better.
2266 early_ra::rate_chain (allocno_info *allocno1, allocno_info *allocno2)
2268 int score = 0;
2269 if (allocno2->is_strong_copy_dest)
2270 score += 256;
2271 else if (allocno2->is_copy_dest)
2272 score += 128;
2274 // Prefer well-aligned matches.
2275 auto *group1 = allocno1->group ();
2276 auto *group2 = allocno2->group ();
2277 if (group1->stride == 1 && group2->stride == 1)
2279 unsigned int min_size = std::min (group1->color_rep ()->size,
2280 group2->color_rep ()->size);
2281 if ((group1->color_rep_offset + allocno1->offset) % min_size
2282 == (group2->color_rep_offset + allocno2->offset) % min_size)
2283 score += min_size;
2284 else
2285 score -= min_size;
2287 return score;
2290 // Sort the chain_candidate_infos at ARG1 and ARG2 in order of decreasing
2291 // score.
2293 early_ra::cmp_chain_candidates (const void *arg1, const void *arg2)
2295 auto &candidate1 = *(const chain_candidate_info *) arg1;
2296 auto &candidate2 = *(const chain_candidate_info *) arg2;
2297 if (candidate1.score != candidate2.score)
2298 return candidate1.score > candidate2.score ? -1 : 1;
2300 // Prefer to increase the gap between uses of the allocated register,
2301 // to give the scheduler more freedom.
2302 auto *allocno1 = candidate1.allocno;
2303 auto *allocno2 = candidate2.allocno;
2304 if (allocno1->start_point != allocno2->start_point)
2305 return allocno1->start_point < allocno2->start_point ? -1 : 1;
2307 if (allocno1 != allocno2)
2308 return allocno1->id < allocno2->id ? -1 : 1;
2310 return 0;
2313 // Join the chains of allocnos that start at HEADI1 and HEADI2.
2314 // HEADI1 is either empty or a single allocno.
2315 void
2316 early_ra::chain_allocnos (unsigned int &headi1, unsigned int &headi2)
2318 if (headi1 == INVALID_ALLOCNO)
2319 headi1 = headi2;
2320 else if (headi2 == INVALID_ALLOCNO)
2321 headi2 = headi1;
2322 else
2324 auto *head1 = m_allocnos[headi1];
2325 auto *head2 = m_allocnos[headi2];
2326 gcc_checking_assert (head1->chain_next == INVALID_ALLOCNO
2327 && head1->chain_prev == INVALID_ALLOCNO
2328 && head2->chain_prev == INVALID_ALLOCNO);
2330 if (head1->is_equiv
2331 && m_allocnos[head1->related_allocno]->copy_dest == headi2)
2333 head1->is_copy_dest = head2->is_copy_dest;
2334 head1->is_strong_copy_dest = head2->is_strong_copy_dest;
2335 m_allocnos[head1->related_allocno]->copy_dest = headi1;
2337 head1->chain_next = headi2;
2338 head2->chain_prev = headi1;
2340 headi2 = headi1;
2344 // Add GROUP2's FPR information to GROUP1's, given that GROUP2 starts
2345 // OFFSET allocnos into GROUP2.
2346 void
2347 early_ra::merge_fpr_info (allocno_group_info *group1,
2348 allocno_group_info *group2,
2349 unsigned int offset)
2351 group1->fpr_size = std::max (group1->fpr_size, group2->fpr_size);
2352 group1->fpr_candidates &= (group2->fpr_candidates
2353 >> (offset * group1->stride));
2356 // Set the color representative of ALLOCNO's group to REP, such that ALLOCNO
2357 // ends being at allocno offset REP_OFFSET from the start of REP.
2358 void
2359 early_ra::set_single_color_rep (allocno_info *allocno, allocno_group_info *rep,
2360 unsigned int rep_offset)
2362 auto *group = allocno->group ();
2363 if (group->m_color_rep == rep)
2364 return;
2366 group->m_color_rep = rep;
2367 gcc_checking_assert (multiple_p (group->stride, rep->stride));
2368 unsigned int factor = group->stride / rep->stride;
2369 gcc_checking_assert (rep_offset >= allocno->offset * factor);
2370 group->color_rep_offset = rep_offset - allocno->offset * factor;
2371 merge_fpr_info (rep, group, group->color_rep_offset);
2374 // REP1 and REP2 are color representatives. Change REP1's color representative
2375 // to REP2, with REP1 starting at allocno offset REP2_OFFSET into REP2.
2376 void
2377 early_ra::set_color_rep (allocno_group_info *rep1, allocno_group_info *rep2,
2378 unsigned int rep2_offset)
2380 gcc_checking_assert (rep1 != rep2
2381 && rep2->m_color_rep == rep2
2382 && multiple_p (rep1->stride, rep2->stride));
2384 auto heads1 = rep1->chain_heads ();
2385 auto heads2 = rep2->chain_heads ();
2386 for (unsigned int i1 = 0; i1 < heads1.size (); ++i1)
2387 if (heads1[i1] != INVALID_ALLOCNO)
2389 unsigned int i2 = rep2_offset + i1 * rep1->stride / rep2->stride;
2390 if (heads2[i2] == INVALID_ALLOCNO)
2391 heads2[i2] = heads1[i1];
2392 else
2393 gcc_checking_assert (heads2[i2] == heads1[i1]);
2394 set_single_color_rep (m_allocnos[heads1[i1]], rep2, i2);
2398 // Try to chain ALLOCNO1 to the head of the chain starting at ALLOCNO2.
2399 // Return true on success.
2400 bool
2401 early_ra::try_to_chain_allocnos (allocno_info *allocno1,
2402 allocno_info *allocno2)
2404 auto *group1 = allocno1->group ()->color_rep ();
2405 auto *group2 = allocno2->group ()->color_rep ();
2407 // Avoid trying to tie different subgroups of the same group. This can
2408 // happen if the parts of a register are defined and used piecemeal.
2409 if (group1 == group2)
2410 return false;
2412 // The stride (in FPRs) between allocnos of each color representative.
2413 auto fpr_stride1 = group1->stride;
2414 auto fpr_stride2 = group2->stride;
2416 // The offset (in FPRs) of each allocno group from its color representative.
2417 auto fpr_offset1 = allocno1->group ()->color_rep_offset * fpr_stride1;
2418 auto fpr_offset2 = allocno2->group ()->color_rep_offset * fpr_stride2;
2420 // The offset (in FPRs) of each allocno from its color representative.
2421 fpr_offset1 += allocno1->offset * allocno1->group ()->stride;
2422 fpr_offset2 += allocno2->offset * allocno2->group ()->stride;
2424 // The FPR overlap is in multiples of the larger stride.
2425 auto max_fpr_stride = std::max (fpr_stride1, fpr_stride2);
2426 auto min_fpr_offset = std::min (fpr_offset1, fpr_offset2);
2427 auto fpr_overlap_offset = ROUND_DOWN (min_fpr_offset, max_fpr_stride);
2429 // The offset (in FPRs) of the start of the overlapping region from
2430 // each color representative.
2431 fpr_offset1 -= fpr_overlap_offset;
2432 fpr_offset2 -= fpr_overlap_offset;
2434 // The number of FPRs in each color representative after the start
2435 // of the overlapping region.
2436 auto fpr_after1 = (group1->size - 1) * fpr_stride1 - fpr_offset1;
2437 auto fpr_after2 = (group2->size - 1) * fpr_stride2 - fpr_offset2;
2439 auto min_fpr_after = std::min (fpr_after1, fpr_after2);
2441 // The number of overlapping allocnos.
2442 auto allocno_overlap_size = min_fpr_after / max_fpr_stride + 1;
2444 // The offset (in allocnos) of the overlapping region from the start
2445 // of each color representative.
2446 auto allocno_offset1 = fpr_offset1 / fpr_stride1;
2447 auto allocno_offset2 = fpr_offset2 / fpr_stride2;
2449 // The stride (in allocnos) between overlapping allocnos.
2450 auto allocno_stride1 = max_fpr_stride / fpr_stride1;
2451 auto allocno_stride2 = max_fpr_stride / fpr_stride2;
2453 // Reject combinations that are impossible to allocate.
2454 auto fprs1 = group1->fpr_candidates;
2455 auto fprs2 = group2->fpr_candidates;
2456 if (fpr_offset1 > fpr_offset2)
2457 fprs2 >>= (fpr_offset1 - fpr_offset2);
2458 else
2459 fprs1 >>= (fpr_offset2 - fpr_offset1);
2460 if ((fprs1 & fprs2) == 0)
2462 if (dump_file && (dump_flags & TDF_DETAILS))
2463 fprintf (dump_file, " - cannot chain %d->%d, no FPRs in common"
2464 " (%08x@%d and %08x@%d)\n", allocno1->id, allocno2->id,
2465 group1->fpr_candidates, fpr_offset1,
2466 group2->fpr_candidates, fpr_offset2);
2467 return false;
2470 // Check whether the chain can be formed.
2471 auto heads1 = group1->chain_heads ();
2472 auto heads2 = group2->chain_heads ();
2473 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2475 auto headi1 = heads1[allocno_offset1 + i * allocno_stride1];
2476 auto headi2 = heads2[allocno_offset2 + i * allocno_stride2];
2477 if (headi1 != INVALID_ALLOCNO && headi2 != INVALID_ALLOCNO)
2479 auto *head1 = m_allocnos[headi1];
2480 auto *head2 = m_allocnos[headi2];
2481 if (head1->chain_next != INVALID_ALLOCNO)
2482 return false;
2483 if (!is_chain_candidate (head1, head2, CORRECTNESS_ONLY))
2484 return false;
2488 if (dump_file && (dump_flags & TDF_DETAILS))
2490 fprintf (dump_file, " - chaining allocnos [");
2491 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2492 fprintf (dump_file, "%s%d", i ? "," : "",
2493 heads1[allocno_offset1 + i * allocno_stride1]);
2494 fprintf (dump_file, "] and [");
2495 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2496 fprintf (dump_file, "%s%d", i ? "," : "",
2497 heads2[allocno_offset2 + i * allocno_stride2]);
2498 fprintf (dump_file, "]\n");
2501 // Chain the allocnos, updating the chain heads.
2502 for (unsigned int i = 0; i < allocno_overlap_size; ++i)
2503 chain_allocnos (heads1[allocno_offset1 + i * allocno_stride1],
2504 heads2[allocno_offset2 + i * allocno_stride2]);
2506 // Pick a color representative for the merged groups.
2507 allocno_group_info *new_rep;
2508 if (allocno_offset1 == 0
2509 && group1->size == allocno_overlap_size * allocno_stride1
2510 && multiple_p (fpr_stride1, fpr_stride2))
2512 // The first group fits within the second.
2513 set_color_rep (group1, group2, allocno_offset2);
2514 new_rep = group2;
2516 else if (allocno_offset2 == 0
2517 && group2->size == allocno_overlap_size * allocno_stride2
2518 && multiple_p (fpr_stride2, fpr_stride1))
2520 // The second group fits within the first.
2521 set_color_rep (group2, group1, allocno_offset1);
2522 new_rep = group1;
2524 else
2526 // We need a new group that is big enough to span both groups.
2527 // The new group always has an FPR stride of 1.
2528 auto max_fpr_offset = std::max (fpr_offset1, fpr_offset2);
2529 auto max_fpr_after = std::max (fpr_after1, fpr_after2);
2530 auto new_size = max_fpr_offset + max_fpr_after + 1;
2531 new_rep = create_allocno_group (INVALID_REGNUM, new_size);
2533 set_color_rep (group1, new_rep, max_fpr_offset - fpr_offset1);
2534 set_color_rep (group2, new_rep, max_fpr_offset - fpr_offset2);
2537 if (dump_file && (dump_flags & TDF_DETAILS))
2539 fprintf (dump_file, " - new frontier [");
2540 auto new_heads = new_rep->chain_heads ();
2541 for (unsigned int i = 0; i < new_heads.size (); ++i)
2543 if (i)
2544 fprintf (dump_file, ",");
2545 if (new_heads[i] == INVALID_ALLOCNO)
2546 fprintf (dump_file, "-");
2547 else
2548 fprintf (dump_file, "%d", new_heads[i]);
2550 fprintf (dump_file, "]\n");
2553 return true;
2556 // Create a color_info for color representative GROUP.
2557 void
2558 early_ra::create_color (allocno_group_info *group)
2560 auto *color = region_allocate<color_info> ();
2561 color->id = m_colors.length ();
2562 color->hard_regno = FIRST_PSEUDO_REGISTER;
2563 color->group = group;
2565 gcc_checking_assert (group->m_color_rep == group);
2566 group->has_color = true;
2567 group->color = m_colors.length ();
2569 m_colors.safe_push (color);
2572 // Form allocnos into chains. Create colors for each resulting clique.
2573 void
2574 early_ra::form_chains ()
2576 if (dump_file && (dump_flags & TDF_DETAILS))
2577 fprintf (dump_file, "\nChaining allocnos:\n");
2579 // Perform (modified) interval graph coloring. First sort by
2580 // increasing start point.
2581 m_sorted_allocnos.reserve (m_allocnos.length ());
2582 m_sorted_allocnos.splice (m_allocnos);
2583 m_sorted_allocnos.qsort (cmp_increasing<&allocno_info::start_point>);
2585 // During this phase, color representatives are only correct for
2586 // unprocessed allocno groups (where the color representative is
2587 // the group itself) and for groups that contain a current chain head.
2588 unsigned int ti = 0;
2589 auto_vec<chain_candidate_info> candidates;
2590 for (unsigned int hi = 0; hi < m_sorted_allocnos.length (); ++hi)
2592 auto *allocno1 = m_sorted_allocnos[hi];
2593 if (allocno1->chain_next != INVALID_ALLOCNO)
2594 continue;
2596 // Record conflicts with direct uses for FPR hard registers.
2597 auto *group1 = allocno1->group ();
2598 for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
2599 if (fpr_conflicts_with_allocno_p (fpr, allocno1))
2600 group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
2602 // Record conflicts due to partially call-clobbered registers.
2603 // (Full clobbers are handled by the previous loop.)
2604 for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
2605 if (call_in_range_p (abi_id, allocno1->start_point,
2606 allocno1->end_point))
2608 auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
2609 group1->fpr_candidates &= ~fprs >> allocno1->offset;
2612 if (allocno1->is_shared ())
2614 if (dump_file && (dump_flags & TDF_DETAILS))
2615 fprintf (dump_file, " Allocno %d shares the same hard register"
2616 " as allocno %d\n", allocno1->id,
2617 allocno1->related_allocno);
2618 auto *allocno2 = m_allocnos[allocno1->related_allocno];
2619 merge_fpr_info (allocno2->group (), group1, allocno2->offset);
2620 m_shared_allocnos.safe_push (allocno1);
2621 continue;
2624 // Find earlier allocnos (in processing order) that could be chained
2625 // to this one.
2626 candidates.truncate (0);
2627 for (unsigned int sci = ti; sci < hi; ++sci)
2629 auto *allocno2 = m_sorted_allocnos[sci];
2630 if (allocno2->chain_prev == INVALID_ALLOCNO)
2632 if (!is_chain_candidate (allocno1, allocno2, ALL_REASONS))
2633 continue;
2634 chain_candidate_info candidate;
2635 candidate.allocno = allocno2;
2636 candidate.score = rate_chain (allocno1, allocno2);
2637 candidates.safe_push (candidate);
2639 else if (sci == ti)
2640 ++ti;
2643 // Sort the candidates by decreasing score.
2644 candidates.qsort (cmp_chain_candidates);
2645 if (dump_file && (dump_flags & TDF_DETAILS))
2647 fprintf (dump_file, " Chain candidates for %d:", allocno1->id);
2648 for (auto &candidate : candidates)
2649 fprintf (dump_file, " %d(%d)", candidate.allocno->id,
2650 candidate.score);
2651 fprintf (dump_file, "\n");
2654 // Pick the first candidate that works.
2655 for (auto &candidate : candidates)
2656 if (try_to_chain_allocnos (allocno1, candidate.allocno))
2657 break;
2660 // Create color_infos for each group. Make sure that each group's
2661 // color representative is up to date.
2662 for (unsigned int hi = m_sorted_allocnos.length (); hi-- > 0; )
2664 auto *allocno = m_sorted_allocnos[hi];
2665 if (allocno->is_shared ())
2666 continue;
2668 auto *rep = allocno->group ()->color_rep ();
2669 if (rep->has_color)
2670 continue;
2672 create_color (rep);
2673 auto heads = rep->chain_heads ();
2674 for (unsigned int i = 0; i < heads.size (); ++i)
2676 unsigned int ai = heads[i];
2677 while (ai != INVALID_ALLOCNO)
2679 allocno = m_allocnos[ai];
2680 set_single_color_rep (allocno, rep, i * rep->stride);
2681 ai = allocno->chain_next;
2687 // Return true if the given FPR (starting at 0) conflicts with allocno
2688 // ALLOCNO.
2689 bool
2690 early_ra::fpr_conflicts_with_allocno_p (unsigned int fpr,
2691 allocno_info *allocno)
2693 auto &ranges = m_fpr_ranges[fpr];
2694 unsigned int start_i = 0;
2695 unsigned int end_i = ranges.length ();
2696 while (start_i < end_i)
2698 unsigned int mid_i = (start_i + end_i) / 2;
2699 auto &range = ranges[mid_i];
2700 if (allocno->end_point > range.start_point)
2701 start_i = mid_i + 1;
2702 else if (allocno->start_point < range.end_point)
2703 end_i = mid_i;
2704 else
2706 if (range.allocno != allocno->id)
2707 return true;
2708 // The FPR is equivalent to ALLOCNO for this particular range.
2709 // See whether ALLOCNO conflicts with a neighboring range.
2710 if (mid_i > 0
2711 && ranges[mid_i - 1].start_point >= allocno->end_point)
2712 return true;
2713 if (mid_i + 1 < ranges.length ()
2714 && ranges[mid_i + 1].end_point <= allocno->start_point)
2715 return true;
2716 return false;
2719 return false;
2722 // Return true if there is a call with ABI identifier ABI_ID in the inclusive
2723 // program point range [START_POINT, END_POINT].
2724 bool
2725 early_ra::call_in_range_p (unsigned int abi_id, unsigned int start_point,
2726 unsigned int end_point)
2728 auto &points = m_call_points[abi_id];
2729 unsigned int start_i = 0;
2730 unsigned int end_i = points.length ();
2731 while (start_i < end_i)
2733 unsigned int mid_i = (start_i + end_i) / 2;
2734 auto point = points[mid_i];
2735 if (end_point > point)
2736 start_i = mid_i + 1;
2737 else if (start_point < point)
2738 end_i = mid_i;
2739 else
2740 return true;
2742 return false;
2745 // Return the set of FPRs for which a value of size SIZE will be clobbered
2746 // by a call to a function with ABI identifier ABI_ID, but would not be
2747 // for some smaller size. The set therefore excludes FPRs that are
2748 // fully-clobbered, like V0 in the base ABI.
2749 unsigned int
2750 early_ra::partial_fpr_clobbers (unsigned int abi_id, fpr_size_info size)
2752 auto &abi = function_abis[abi_id];
2753 unsigned int clobbers = 0;
2754 machine_mode mode = (size == FPR_D ? V8QImode
2755 : size == FPR_Q ? V16QImode : VNx16QImode);
2756 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
2757 if (!abi.clobbers_full_reg_p (regno)
2758 && abi.clobbers_reg_p (mode, regno))
2759 clobbers |= 1U << (regno - V0_REGNUM);
2760 return clobbers;
2763 // Process copies between pseudo registers and hard registers and update
2764 // the FPR preferences for the associated colors.
2765 void
2766 early_ra::process_copies ()
2768 for (auto &copy : m_allocno_copies)
2770 auto *allocno = m_allocnos[copy.allocno];
2771 auto *group = allocno->group ();
2772 auto offset = group->color_rep_offset + allocno->offset;
2773 if (offset > copy.fpr)
2774 continue;
2776 unsigned int fpr = copy.fpr - offset;
2777 auto *color = m_colors[group->color_rep ()->color];
2778 color->fpr_preferences[fpr] = MIN (color->fpr_preferences[fpr]
2779 + copy.weight, 127);
2780 color->num_fpr_preferences += copy.weight;
2784 // Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=>
2785 // result that puts colors in allocation order.
2787 early_ra::cmp_allocation_order (const void *color1_ptr, const void *color2_ptr)
2789 auto *color1 = *(color_info *const *) color1_ptr;
2790 auto *color2 = *(color_info *const *) color2_ptr;
2792 // Allocate bigger groups before smaller groups.
2793 if (color1->group->size != color2->group->size)
2794 return color1->group->size > color2->group->size ? -1 : 1;
2796 // Allocate groups with stronger FPR preferences before groups with weaker
2797 // FPR preferences.
2798 if (color1->num_fpr_preferences != color2->num_fpr_preferences)
2799 return color1->num_fpr_preferences > color2->num_fpr_preferences ? -1 : 1;
2801 return (color1->id < color2->id ? -1
2802 : color1->id == color2->id ? 0 : 1);
2805 // Allocate a register to each color. If we run out of registers,
2806 // give up on doing a full allocation of the FPR-based pseudos in the
2807 // region.
2808 void
2809 early_ra::allocate_colors ()
2811 if (dump_file && (dump_flags & TDF_DETAILS))
2812 fprintf (dump_file, "\nAllocating registers:\n");
2814 auto_vec<color_info *> sorted_colors;
2815 sorted_colors.safe_splice (m_colors);
2816 sorted_colors.qsort (cmp_allocation_order);
2818 for (unsigned int i = 0; i < 32; ++i)
2819 if (!crtl->abi->clobbers_full_reg_p (V0_REGNUM + i))
2820 m_call_preserved_fprs |= 1U << i;
2822 for (auto *color : sorted_colors)
2824 unsigned int candidates = color->group->fpr_candidates;
2825 for (unsigned int i = 0; i < color->group->size; ++i)
2826 candidates &= ~(m_allocated_fprs >> i);
2827 unsigned int best = INVALID_REGNUM;
2828 int best_weight = 0;
2829 unsigned int best_recency = 0;
2830 for (unsigned int fpr = 0; fpr <= 32U - color->group->size; ++fpr)
2832 if ((candidates & (1U << fpr)) == 0)
2833 continue;
2834 int weight = color->fpr_preferences[fpr];
2835 unsigned int recency = 0;
2836 // Account for registers that the current function must preserve.
2837 for (unsigned int i = 0; i < color->group->size; ++i)
2839 if (m_call_preserved_fprs & (1U << (fpr + i)))
2840 weight -= 1;
2841 recency = MAX (recency, m_fpr_recency[fpr + i]);
2843 // Prefer higher-numbered registers in the event of a tie.
2844 // This should tend to keep lower-numbered registers free
2845 // for allocnos that require V0-V7 or V0-V15.
2846 if (best == INVALID_REGNUM
2847 || best_weight < weight
2848 || (best_weight == weight && recency <= best_recency))
2850 best = fpr;
2851 best_weight = weight;
2852 best_recency = recency;
2856 if (best == INVALID_REGNUM)
2858 m_allocation_successful = false;
2859 return;
2862 color->hard_regno = best + V0_REGNUM;
2863 if (dump_file && (dump_flags & TDF_DETAILS))
2864 fprintf (dump_file, " Allocating [v%d:v%d] to color %d\n",
2865 best, best + color->group->size - 1, color->id);
2866 m_allocated_fprs |= ((1U << color->group->size) - 1) << best;
2870 // See if ALLOCNO ends a subchain of single registers that can be split
2871 // off without affecting the rest of the chain, and without introducing
2872 // any moves. Return the start of the chain if so (which might be ALLOCNO
2873 // itself), otherwise return null.
2874 early_ra::allocno_info *
2875 early_ra::find_independent_subchain (allocno_info *allocno)
2877 // Make sure ALLOCNO ends a natural subchain.
2878 if (auto *next_allocno = chain_next (allocno))
2879 if (next_allocno->start_point + 1 >= allocno->end_point)
2880 return nullptr;
2882 // Check the allocnos in the purported subchain and find the other end.
2883 for (;;)
2885 auto *group = allocno->group ();
2886 if (group->m_color_rep == group)
2887 return nullptr;
2888 if (group->size != 1)
2889 return nullptr;
2891 auto *prev_allocno = chain_prev (allocno);
2892 if (!prev_allocno || allocno->start_point + 1 < prev_allocno->end_point)
2893 return allocno;
2895 allocno = prev_allocno;
2899 // Search the colors starting at index FIRST_COLOR whose FPRs do not belong
2900 // to FPR_CONFLICTS. Return the first such color that has no group. If all
2901 // such colors have groups, instead return the color with the latest
2902 // (smallest) start point.
2903 early_ra::color_info *
2904 early_ra::find_oldest_color (unsigned int first_color,
2905 unsigned int fpr_conflicts)
2907 color_info *best = nullptr;
2908 unsigned int best_start_point = ~0U;
2909 unsigned int best_recency = 0;
2910 for (unsigned int ci = first_color; ci < m_colors.length (); ++ci)
2912 auto *color = m_colors[ci];
2913 unsigned int fpr = color->hard_regno - V0_REGNUM;
2914 if (fpr_conflicts & (1U << fpr))
2915 continue;
2916 unsigned int start_point = 0;
2917 if (color->group)
2919 auto chain_head = color->group->chain_heads ()[0];
2920 start_point = m_allocnos[chain_head]->start_point;
2922 unsigned int recency = m_fpr_recency[fpr];
2923 if (!best
2924 || best_start_point > start_point
2925 || (best_start_point == start_point && recency < best_recency))
2927 best = color;
2928 best_start_point = start_point;
2929 best_recency = recency;
2932 return best;
2935 // If there are some spare FPRs that can be reused without introducing saves,
2936 // restores, or moves, use them to "broaden" the allocation, in order to give
2937 // the scheduler more freedom. This is particularly useful for forming LDPs
2938 // and STPs.
2939 void
2940 early_ra::broaden_colors ()
2942 // Create dummy colors for every leftover FPR that can be used cheaply.
2943 unsigned int first_color = m_colors.length ();
2944 for (unsigned int fpr = 0; fpr < 32; ++fpr)
2945 if (((m_allocated_fprs | m_call_preserved_fprs) & (1U << fpr)) == 0)
2947 auto *color = region_allocate<color_info> ();
2948 color->id = m_colors.length ();
2949 color->hard_regno = V0_REGNUM + fpr;
2950 color->group = nullptr;
2951 m_colors.safe_push (color);
2954 // Exit early if there are no spare FPRs.
2955 if (first_color == m_colors.length ())
2956 return;
2958 // Go through the allocnos in order, seeing if there is a subchain of
2959 // single-FPR allocnos that can be split off from the containingg clique.
2960 // Allocate such subchains to the new colors on an oldest-first basis.
2961 for (auto *allocno : m_sorted_allocnos)
2962 if (auto *start_allocno = find_independent_subchain (allocno))
2964 unsigned int fpr_conflicts = 0;
2965 auto *member = allocno;
2966 for (;;)
2968 fpr_conflicts |= ~member->group ()->fpr_candidates;
2969 if (member == start_allocno)
2970 break;
2971 member = m_allocnos[member->chain_prev];
2974 auto *color = find_oldest_color (first_color, fpr_conflicts);
2975 if (!color)
2976 continue;
2978 if (!color->group)
2980 auto *group = allocno->group ();
2981 color->group = group;
2982 group->color = color->id;
2983 group->chain_heads ()[0] = INVALID_ALLOCNO;
2985 else
2987 auto chain_head = color->group->chain_heads ()[0];
2988 auto start_point = m_allocnos[chain_head]->start_point;
2989 if (start_point >= allocno->end_point)
2990 // Allocating to COLOR isn't viable, and it was the best
2991 // option available.
2992 continue;
2994 auto *next_allocno = chain_next (allocno);
2995 if (!next_allocno || next_allocno->start_point <= start_point)
2996 // The current allocation gives at least as much scheduling
2997 // freedom as COLOR would.
2998 continue;
3001 // Unlink the chain.
3002 if (auto *next_allocno = chain_next (allocno))
3003 next_allocno->chain_prev = start_allocno->chain_prev;
3004 if (auto *prev_allocno = chain_prev (start_allocno))
3005 prev_allocno->chain_next = allocno->chain_next;
3007 // Make the subchain use COLOR.
3008 allocno->chain_next = color->group->chain_heads ()[0];
3009 if (dump_file && (dump_flags & TDF_DETAILS))
3010 fprintf (dump_file, "Moving to optional color %d (register %s):",
3011 color->id, reg_names[color->hard_regno]);
3012 for (;;)
3014 auto *group = allocno->group ();
3015 if (dump_file && (dump_flags & TDF_DETAILS))
3016 fprintf (dump_file, " r%d", group->regno);
3017 group->m_color_rep = color->group;
3018 group->color_rep_offset = 0;
3019 if (allocno == start_allocno)
3020 break;
3021 allocno = m_allocnos[allocno->chain_prev];
3023 if (dump_file && (dump_flags & TDF_DETAILS))
3024 fprintf (dump_file, "\n");
3025 color->group->chain_heads ()[0] = start_allocno->id;
3029 // Record the final choice of hard register for each allocno.
3030 void
3031 early_ra::finalize_allocation ()
3033 for (auto *color : m_colors)
3034 if (color->group)
3036 unsigned int fpr = color->hard_regno - V0_REGNUM;
3037 for (unsigned int i = 0; i < color->group->size; ++i)
3038 m_fpr_recency[fpr + i] = m_current_region;
3040 for (auto *allocno : m_allocnos)
3042 if (allocno->is_shared ())
3043 continue;
3044 auto *group = allocno->group ();
3045 auto *rep = group->color_rep ();
3046 auto rep_regno = m_colors[rep->color]->hard_regno;
3047 auto group_regno = rep_regno + group->color_rep_offset;
3048 allocno->hard_regno = group_regno + allocno->offset * group->stride;
3050 for (auto *allocno : m_shared_allocnos)
3051 allocno->hard_regno = m_allocnos[allocno->related_allocno]->hard_regno;
3054 // Replace any allocno references in REFS with the allocated register.
3055 // INSN is the instruction that contains REFS.
3056 bool
3057 early_ra::replace_regs (rtx_insn *insn, df_ref refs)
3059 bool changed = false;
3060 for (df_ref ref = refs; ref; ref = DF_REF_NEXT_LOC (ref))
3062 auto range = get_allocno_subgroup (DF_REF_REG (ref));
3063 if (!range)
3064 continue;
3066 auto new_regno = range.allocno (0)->hard_regno;
3067 if (new_regno == FIRST_PSEUDO_REGISTER)
3069 // Reset a debug instruction if, after DCE, the only remaining
3070 // references to a register are in such instructions.
3071 gcc_assert (DEBUG_INSN_P (insn));
3072 INSN_VAR_LOCATION_LOC (insn) = gen_rtx_UNKNOWN_VAR_LOC ();
3073 return true;
3075 *DF_REF_LOC (ref) = gen_rtx_REG (GET_MODE (DF_REF_REG (ref)), new_regno);
3076 changed = true;
3078 return changed;
3081 // Try to make INSN match its FPR-related constraints. If this needs
3082 // a source operand (SRC) to be copied to a destination operand (DEST)
3083 // before INSN, add the associated (DEST, SRC) pairs to MOVES.
3085 // Return -1 on failure, otherwise return a ?/!-style reject count.
3086 // The reject count doesn't model the moves, just the internal alternative
3087 // preferences.
3089 early_ra::try_enforce_constraints (rtx_insn *insn,
3090 vec<std::pair<int, int>> &moves)
3092 if (!constrain_operands (0, get_preferred_alternatives (insn)))
3093 return -1;
3095 // Pick the alternative with the lowest cost.
3096 int best = -1;
3097 auto alts = get_preferred_alternatives (insn);
3098 for (int altno = 0; altno < recog_data.n_alternatives; ++altno)
3100 if (!(alts & ALTERNATIVE_BIT (altno)))
3101 continue;
3103 auto *op_alt = &recog_op_alt[altno * recog_data.n_operands];
3104 if (!likely_alternative_match_p (op_alt))
3105 continue;
3107 auto_vec<std::pair<int, int>, 4> new_moves;
3108 for (int opno = 0; opno < recog_data.n_operands; ++opno)
3110 rtx op = recog_data.operand[opno];
3111 if (REG_P (op)
3112 && FP_REGNUM_P (REGNO (op))
3113 && op_alt[opno].matched >= 0)
3115 rtx old_src = recog_data.operand[op_alt[opno].matched];
3116 if (!operands_match_p (op, old_src))
3118 for (int i = 0; i < recog_data.n_operands; ++i)
3119 if (i != opno)
3121 rtx other = recog_data.operand[i];
3122 if (reg_overlap_mentioned_p (op, other))
3124 old_src = NULL_RTX;
3125 break;
3128 if (!old_src)
3129 continue;
3130 new_moves.safe_push ({ opno, op_alt[opno].matched });
3134 int cost = count_rejects (op_alt) + new_moves.length () * 7;
3135 if (best < 0 || cost < best)
3137 best = cost;
3138 moves.truncate (0);
3139 moves.safe_splice (new_moves);
3142 return best;
3145 // Make INSN matches its FPR-related constraints.
3146 void
3147 early_ra::enforce_constraints (rtx_insn *insn)
3149 extract_insn (insn);
3150 preprocess_constraints (insn);
3152 // First try with the operands they are.
3153 auto_vec<std::pair<int, int>, 4> moves;
3154 int cost = try_enforce_constraints (insn, moves);
3156 // Next try taking advantage of commutativity.
3157 for (int opno = 0; opno < recog_data.n_operands - 1; ++opno)
3158 if (recog_data.constraints[opno][0] == '%')
3160 std::swap (*recog_data.operand_loc[opno],
3161 *recog_data.operand_loc[opno + 1]);
3162 std::swap (recog_data.operand[opno],
3163 recog_data.operand[opno + 1]);
3164 auto_vec<std::pair<int, int>, 4> swapped_moves;
3165 int swapped_cost = try_enforce_constraints (insn, swapped_moves);
3166 if (swapped_cost >= 0 && (cost < 0 || swapped_cost < cost))
3168 cost = swapped_cost;
3169 moves.truncate (0);
3170 moves.safe_splice (swapped_moves);
3172 else
3174 std::swap (*recog_data.operand_loc[opno],
3175 *recog_data.operand_loc[opno + 1]);
3176 std::swap (recog_data.operand[opno],
3177 recog_data.operand[opno + 1]);
3181 // The allocation should ensure that there is at least one valid combination.
3182 // It's too late to back out now if not.
3183 gcc_assert (cost >= 0);
3184 for (int i = 0; i < recog_data.n_dups; ++i)
3186 int dup_of = recog_data.dup_num[i];
3187 rtx new_op = *recog_data.operand_loc[dup_of];
3188 if (new_op != recog_data.operand[dup_of])
3189 *recog_data.dup_loc[i] = copy_rtx (new_op);
3191 for (auto move : moves)
3193 int dest_opno = move.first;
3194 int src_opno = move.second;
3195 rtx dest = recog_data.operand[dest_opno];
3196 rtx old_src = recog_data.operand[src_opno];
3197 rtx new_src = lowpart_subreg (GET_MODE (old_src), dest, GET_MODE (dest));
3198 emit_insn_before (gen_move_insn (new_src, old_src), insn);
3199 *recog_data.operand_loc[src_opno] = new_src;
3203 // See whether INSN is an instruction that operates on multi-register vectors,
3204 // and if we have decided to make it use strided rather than consecutive
3205 // accesses. Update the pattern and return true if so.
3206 bool
3207 early_ra::maybe_convert_to_strided_access (rtx_insn *insn)
3209 if (!NONJUMP_INSN_P (insn) || recog_memoized (insn) < 0)
3210 return false;
3212 auto stride_type = get_attr_stride_type (insn);
3213 rtx pat = PATTERN (insn);
3214 rtx op;
3215 if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
3216 op = SET_DEST (pat);
3217 else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
3218 op = XVECEXP (SET_SRC (pat), 0, 1);
3219 else
3220 return false;
3222 auto range = get_allocno_subgroup (op);
3223 if (!range || range.group->stride == 1)
3224 return false;
3226 gcc_assert (range.start == 0 && range.count == range.group->size);
3227 auto elt_mode = GET_MODE_INNER (GET_MODE (op));
3228 auto single_mode = aarch64_full_sve_mode (elt_mode).require ();
3229 auto_vec<rtx, 4> regs;
3230 for (unsigned int i = 0; i < range.count; ++i)
3231 regs.quick_push (gen_rtx_REG (single_mode, range.allocno (i)->hard_regno));
3233 extract_insn (insn);
3234 if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
3236 auto unspec = XINT (SET_SRC (pat), 1);
3237 if (range.count == 2)
3238 pat = gen_aarch64_strided2 (unspec, GET_MODE (op), regs[0], regs[1],
3239 recog_data.operand[1],
3240 recog_data.operand[2]);
3241 else
3242 pat = gen_aarch64_strided4 (unspec, GET_MODE (op),
3243 regs[0], regs[1], regs[2], regs[3],
3244 recog_data.operand[1],
3245 recog_data.operand[2]);
3247 else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
3249 auto unspec = XINT (SET_SRC (pat), 1);
3250 if (range.count == 2)
3251 pat = gen_aarch64_strided2 (unspec, GET_MODE (op),
3252 recog_data.operand[0],
3253 recog_data.operand[2], regs[0], regs[1]);
3254 else
3255 pat = gen_aarch64_strided4 (unspec, GET_MODE (op),
3256 recog_data.operand[0],
3257 recog_data.operand[2],
3258 regs[0], regs[1], regs[2], regs[3]);
3259 // Ensure correct sharing for the source memory.
3261 // ??? Why doesn't the generator get this right?
3262 XVECEXP (SET_SRC (pat), 0, XVECLEN (SET_SRC (pat), 0) - 1)
3263 = *recog_data.dup_loc[0];
3265 else
3266 gcc_unreachable ();
3267 PATTERN (insn) = pat;
3268 INSN_CODE (insn) = -1;
3269 df_insn_rescan (insn);
3270 return true;
3273 // We've successfully allocated the current region. Apply the allocation
3274 // to the instructions.
3275 void
3276 early_ra::apply_allocation ()
3278 for (auto *insn : m_dead_insns)
3279 set_insn_deleted (insn);
3281 rtx_insn *prev;
3282 for (auto insn_range : m_insn_ranges)
3283 for (rtx_insn *insn = insn_range.first;
3284 insn != insn_range.second;
3285 insn = prev)
3287 prev = PREV_INSN (insn);
3288 if (!INSN_P (insn))
3289 continue;
3291 bool changed = maybe_convert_to_strided_access (insn);
3292 changed |= replace_regs (insn, DF_INSN_DEFS (insn));
3293 changed |= replace_regs (insn, DF_INSN_USES (insn));
3294 if (changed && NONDEBUG_INSN_P (insn))
3296 if (GET_CODE (PATTERN (insn)) != USE
3297 && GET_CODE (PATTERN (insn)) != CLOBBER
3298 && !is_move_set (PATTERN (insn)))
3299 enforce_constraints (insn);
3301 // A REG_EQUIV note establishes an equivalence throughout
3302 // the function, but here we're reusing hard registers for
3303 // multiple pseudo registers. We also no longer need REG_EQUIV
3304 // notes that record potential spill locations, since we've
3305 // allocated the pseudo register without spilling.
3306 rtx *ptr = &REG_NOTES (insn);
3307 while (*ptr)
3308 if (REG_NOTE_KIND (*ptr) == REG_EQUIV)
3309 *ptr = XEXP (*ptr, 1);
3310 else
3311 ptr = &XEXP (*ptr, 1);
3313 changed |= replace_regs (insn, DF_INSN_EQ_USES (insn));
3314 if (changed)
3315 df_insn_rescan (insn);
3318 for (auto *insn : m_dead_insns)
3319 delete_insn (insn);
3322 // Try to allocate the current region. Update the instructions if successful.
3323 void
3324 early_ra::process_region ()
3326 for (auto *allocno : m_allocnos)
3328 allocno->chain_next = INVALID_ALLOCNO;
3329 allocno->chain_prev = INVALID_ALLOCNO;
3332 if (dump_file && (dump_flags & TDF_DETAILS))
3334 dump_fpr_ranges ();
3335 dump_copies ();
3336 dump_allocnos ();
3339 find_strided_accesses ();
3341 if (dump_file && (dump_flags & TDF_DETAILS))
3342 dump_allocnos ();
3344 form_chains ();
3346 if (dump_file && (dump_flags & TDF_DETAILS))
3347 dump_allocnos ();
3349 process_copies ();
3351 if (dump_file && (dump_flags & TDF_DETAILS))
3352 dump_colors ();
3354 allocate_colors ();
3355 if (!m_allocation_successful)
3356 return;
3358 broaden_colors ();
3359 finalize_allocation ();
3361 if (dump_file && (dump_flags & TDF_DETAILS))
3363 fprintf (dump_file, "\nAllocation successful\nFinal allocation:\n");
3364 dump_allocnos ();
3365 dump_colors ();
3368 apply_allocation ();
3371 // Return true if INSN would become dead if we successfully allocate the
3372 // current region.
3373 bool
3374 early_ra::is_dead_insn (rtx_insn *insn)
3376 rtx set = single_set (insn);
3377 if (!set)
3378 return false;
3380 rtx dest = SET_DEST (set);
3381 auto dest_range = get_allocno_subgroup (dest);
3382 if (!dest_range)
3383 return false;
3385 for (auto &allocno : dest_range.allocnos ())
3386 if (bitmap_bit_p (m_live_allocnos, allocno.id))
3387 return false;
3389 if (side_effects_p (set))
3390 return false;
3392 /* If we can't delete dead exceptions and the insn throws,
3393 then the instruction is not dead. */
3394 if (!cfun->can_delete_dead_exceptions
3395 && !insn_nothrow_p (insn))
3396 return false;
3398 return true;
3401 // Build up information about block BB. IS_ISOLATED is true if the
3402 // block is not part of a larger region.
3403 void
3404 early_ra::process_block (basic_block bb, bool is_isolated)
3406 m_current_bb = bb;
3407 m_current_point += 1;
3408 m_current_bb_point = m_current_point;
3410 // Process live-out FPRs.
3411 bitmap live_out = df_get_live_out (bb);
3412 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
3413 if (bitmap_bit_p (live_out, regno))
3414 record_fpr_use (regno);
3416 // Process live-out allocnos. We don't track individual FPR liveness
3417 // across block boundaries, so we have to assume that the whole pseudo
3418 // register is live.
3419 bitmap_iterator bi;
3420 unsigned int regno;
3421 EXECUTE_IF_AND_IN_BITMAP (df_get_live_out (bb), m_fpr_pseudos,
3422 FIRST_PSEUDO_REGISTER, regno, bi)
3424 auto range = get_allocno_subgroup (regno_reg_rtx[regno]);
3425 for (auto &allocno : range.allocnos ())
3426 record_allocno_use (&allocno);
3429 m_current_point += 1;
3431 record_artificial_refs (0);
3433 bool is_first = true;
3434 rtx_insn *start_insn = BB_END (bb);
3435 rtx_insn *insn;
3436 FOR_BB_INSNS_REVERSE (bb, insn)
3438 if (!NONDEBUG_INSN_P (insn))
3439 continue;
3441 // CLOBBERs are used to prevent pseudos from being upwards exposed.
3442 // We can ignore them if allocation is successful.
3443 if (GET_CODE (PATTERN (insn)) == CLOBBER)
3445 if (get_allocno_subgroup (XEXP (PATTERN (insn), 0)))
3446 m_dead_insns.safe_push (insn);
3447 continue;
3450 if (dump_file && (dump_flags & TDF_DETAILS))
3452 if (is_first)
3453 fprintf (dump_file, "\nBlock %d:\n", bb->index);
3454 fprintf (dump_file, "%6d:", m_current_point);
3455 pretty_printer rtl_slim_pp;
3456 rtl_slim_pp.set_output_stream (dump_file);
3457 print_insn (&rtl_slim_pp, insn, 1);
3458 pp_flush (&rtl_slim_pp);
3459 fprintf (dump_file, "\n");
3461 is_first = false;
3463 if (is_dead_insn (insn))
3465 if (dump_file && (dump_flags & TDF_DETAILS))
3466 fprintf (dump_file, "%14s -- dead\n", "");
3467 m_dead_insns.safe_push (insn);
3469 else
3471 record_insn_refs (insn);
3472 rtx pat = PATTERN (insn);
3473 if (is_move_set (pat))
3474 record_copy (SET_DEST (pat), SET_SRC (pat), true);
3475 else
3477 extract_insn (insn);
3478 record_constraints (insn);
3482 // See whether we have a complete region, with no remaining live
3483 // allocnos.
3484 if (is_isolated
3485 && bitmap_empty_p (m_live_allocnos)
3486 && m_live_fprs == 0
3487 && m_allocation_successful
3488 && !m_allocnos.is_empty ())
3490 rtx_insn *prev_insn = PREV_INSN (insn);
3491 m_insn_ranges.safe_push ({ start_insn, prev_insn });
3492 process_region ();
3493 start_new_region ();
3494 is_first = true;
3495 start_insn = prev_insn;
3498 m_insn_ranges.safe_push ({ start_insn, BB_HEAD (bb) });
3500 record_artificial_refs (DF_REF_AT_TOP);
3502 // Process live-in FPRs.
3503 bitmap live_in = df_get_live_in (bb);
3504 for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno)
3505 if (bitmap_bit_p (live_in, regno)
3506 && (m_live_fprs & (1U << (regno - V0_REGNUM))))
3507 record_fpr_def (regno);
3509 // Process live-in allocnos.
3510 EXECUTE_IF_AND_IN_BITMAP (live_in, m_fpr_pseudos,
3511 FIRST_PSEUDO_REGISTER, regno, bi)
3513 auto range = get_allocno_subgroup (regno_reg_rtx[regno]);
3514 for (auto &allocno : range.allocnos ())
3515 if (bitmap_bit_p (m_live_allocnos, allocno.id))
3516 record_allocno_def (&allocno);
3519 m_current_point += 1;
3521 bitmap_clear (m_live_allocnos);
3522 m_live_fprs = 0;
3525 // Divide the function into regions, such that there no edges into or out
3526 // of the region have live "FPR pseudos".
3527 void
3528 early_ra::process_blocks ()
3530 auto_sbitmap visited (last_basic_block_for_fn (m_fn));
3531 auto_sbitmap fpr_pseudos_live_out (last_basic_block_for_fn (m_fn));
3532 auto_sbitmap fpr_pseudos_live_in (last_basic_block_for_fn (m_fn));
3534 bitmap_clear (visited);
3535 bitmap_clear (fpr_pseudos_live_out);
3536 bitmap_clear (fpr_pseudos_live_in);
3538 // Record which blocks have live FPR pseudos on entry and exit.
3539 basic_block bb;
3540 FOR_EACH_BB_FN (bb, m_fn)
3542 if (bitmap_intersect_p (df_get_live_out (bb), m_fpr_pseudos))
3543 bitmap_set_bit (fpr_pseudos_live_out, bb->index);
3544 if (bitmap_intersect_p (df_get_live_in (bb), m_fpr_pseudos))
3545 bitmap_set_bit (fpr_pseudos_live_in, bb->index);
3548 // This is incremented by 1 at the start of each region.
3549 m_current_region = 0;
3550 memset (m_fpr_recency, 0, sizeof (m_fpr_recency));
3552 struct stack_node { edge_iterator ei; basic_block bb; };
3554 auto_vec<stack_node, 32> stack;
3555 auto_vec<basic_block, 32> region;
3557 // Go through the function in reverse postorder and process the region
3558 // containing each block.
3559 unsigned int n_blocks = df_get_n_blocks (DF_FORWARD);
3560 int *order = df_get_postorder (DF_FORWARD);
3561 for (unsigned int bbi = 0; bbi < n_blocks; ++bbi)
3563 basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, order[bbi]);
3564 if (bb->index < NUM_FIXED_BLOCKS)
3565 continue;
3567 if (!bitmap_set_bit (visited, bb->index))
3568 continue;
3570 // Process forward edges before backward edges (so push backward
3571 // edges first). Build the region in an approximation of reverse
3572 // program order.
3573 if (bitmap_bit_p (fpr_pseudos_live_in, bb->index))
3574 stack.quick_push ({ ei_start (bb->preds), nullptr });
3575 if (bitmap_bit_p (fpr_pseudos_live_out, bb->index))
3576 stack.quick_push ({ ei_start (bb->succs), bb });
3577 else
3578 region.safe_push (bb);
3579 while (!stack.is_empty ())
3581 auto &node = stack.last ();
3582 if (ei_end_p (node.ei))
3584 if (node.bb)
3585 region.safe_push (node.bb);
3586 stack.pop ();
3587 continue;
3589 edge e = ei_edge (node.ei);
3590 if (node.bb)
3592 // A forward edge from a node that has not yet been added
3593 // to region.
3594 if (bitmap_bit_p (fpr_pseudos_live_in, e->dest->index)
3595 && bitmap_set_bit (visited, e->dest->index))
3597 stack.safe_push ({ ei_start (e->dest->preds), nullptr });
3598 if (bitmap_bit_p (fpr_pseudos_live_out, e->dest->index))
3599 stack.safe_push ({ ei_start (e->dest->succs), e->dest });
3600 else
3601 region.safe_push (e->dest);
3603 else
3604 ei_next (&node.ei);
3606 else
3608 // A backward edge from a node that has already been added
3609 // to the region.
3610 if (bitmap_bit_p (fpr_pseudos_live_out, e->src->index)
3611 && bitmap_set_bit (visited, e->src->index))
3613 if (bitmap_bit_p (fpr_pseudos_live_in, e->src->index))
3614 stack.safe_push ({ ei_start (e->src->preds), nullptr });
3615 stack.safe_push ({ ei_start (e->src->succs), e->src });
3617 else
3618 ei_next (&node.ei);
3622 m_current_point = 2;
3623 start_new_region ();
3625 if (region.is_empty ())
3626 process_block (bb, true);
3627 else
3629 if (dump_file && (dump_flags & TDF_DETAILS))
3631 fprintf (dump_file, "\nRegion (from %d):", bb->index);
3632 for (unsigned int j = 0; j < region.length (); ++j)
3633 fprintf (dump_file, " %d", region[j]->index);
3634 fprintf (dump_file, "\n");
3636 for (unsigned int j = 0; j < region.length (); ++j)
3638 basic_block bb = region[j];
3639 bool is_isolated
3640 = ((j == 0 && !bitmap_bit_p (fpr_pseudos_live_out, bb->index))
3641 || (j == region.length () - 1
3642 && !bitmap_bit_p (fpr_pseudos_live_in, bb->index)));
3643 process_block (bb, is_isolated);
3646 region.truncate (0);
3648 if (!m_allocnos.is_empty () && m_allocation_successful)
3649 process_region ();
3653 // Run the pass on the current function.
3654 void
3655 early_ra::execute ()
3657 df_analyze ();
3659 preprocess_insns ();
3660 propagate_pseudo_reg_info ();
3661 choose_fpr_pseudos ();
3662 if (bitmap_empty_p (m_fpr_pseudos))
3663 return;
3665 if (dump_file && (dump_flags & TDF_DETAILS))
3666 dump_pseudo_regs ();
3668 process_blocks ();
3669 df_verify ();
3672 class pass_early_ra : public rtl_opt_pass
3674 public:
3675 pass_early_ra (gcc::context *ctxt)
3676 : rtl_opt_pass (pass_data_early_ra, ctxt)
3679 // opt_pass methods:
3680 virtual bool gate (function *);
3681 virtual unsigned int execute (function *);
3684 bool
3685 pass_early_ra::gate (function *)
3687 // Require a vector ISA to be enabled.
3688 if (!TARGET_SIMD && !TARGET_SVE)
3689 return false;
3691 if (aarch64_early_ra == AARCH64_EARLY_RA_NONE)
3692 return false;
3694 if (aarch64_early_ra == AARCH64_EARLY_RA_STRIDED
3695 && !TARGET_STREAMING_SME2)
3696 return false;
3698 return true;
3701 unsigned int
3702 pass_early_ra::execute (function *fn)
3704 early_ra (fn).execute ();
3705 return 0;
3708 } // end namespace
3710 // Create a new instance of the pass.
3711 rtl_opt_pass *
3712 make_pass_aarch64_early_ra (gcc::context *ctxt)
3714 return new pass_early_ra (ctxt);