1 /* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
2 Copyright (C) 2022-2025 Free Software Foundation, Inc.
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or(at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The values of the vl and vtype registers will affect the behavior of RVV
22 insns. That is, when we need to execute an RVV instruction, we need to set
23 the correct vl and vtype values by executing the vsetvl instruction before.
24 Executing the fewest number of vsetvl instructions while keeping the behavior
25 the same is the problem this pass is trying to solve. This vsetvl pass is
26 divided into 5 phases:
28 - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses
29 each instruction in it that affects vl and vtype state and generates an
30 array of vsetvl_info objects. Then traverse the vsetvl_info array from
31 front to back and perform fusion according to the fusion rules. The fused
32 vsetvl infos are stored in the vsetvl_block_info object's `infos` field.
34 - Phase 2 (earliest fuse global vsetvl infos): The header_info and
35 footer_info of vsetvl_block_info are used as expressions, and the
36 earliest of each expression is computed. Based on the earliest
37 information, try to lift up the corresponding vsetvl info to the src
38 basic block of the edge (mainly to reduce the total number of vsetvl
39 instructions, this uplift will cause some execution paths to execute
40 vsetvl instructions that shouldn't be there).
42 - Phase 3 (pre global vsetvl info): The header_info and footer_info of
43 vsetvl_block_info are used as expressions, and the LCM algorithm is used
44 to compute the header_info that needs to be deleted and the one that
45 needs to be inserted in some edges.
47 - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and
48 the deletion and insertion information of Phase 3, the mandatory vsetvl
49 instruction insertion, modification and deletion are performed.
51 - Phase 5 (cleanup): Clean up the avl operand in the RVV operator
52 instruction and cleanup the unused dest operand of the vsetvl insn.
54 After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual
55 basic block is represented by vsetvl_block_info, and the virtual vsetvl
56 statements inside are represented by vsetvl_info. The later phases 2 and 3
57 are constantly modifying and adjusting this virtual CFG. Phase 4 performs
58 insertion, modification and deletion of vsetvl instructions based on the
59 optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to
63 #define IN_TARGET_CODE 1
64 #define INCLUDE_ALGORITHM
65 #define INCLUDE_FUNCTIONAL
70 #include "coretypes.h"
75 #include "tree-pass.h"
78 #include "cfgcleanup.h"
79 #include "insn-config.h"
80 #include "insn-attr.h"
81 #include "insn-opinit.h"
82 #include "tm-constrs.h"
87 #include "profile-count.h"
91 using namespace rtl_ssa
;
92 using namespace riscv_vector
;
94 /* Set the bitmap DST to the union of SRC of predecessors of
96 It's a bit different from bitmap_union_of_preds in cfganal.cc. This function
97 takes into account the case where pred is ENTRY basic block. The main reason
98 for this difference is to make it easier to insert some special value into
99 the ENTRY base block. For example, vsetvl_info with a status of UNKNOWN. */
101 bitmap_union_of_preds_with_entry (sbitmap dst
, sbitmap
*src
, basic_block b
)
103 unsigned int set_size
= dst
->size
;
107 for (ix
= 0; ix
< EDGE_COUNT (b
->preds
); ix
++)
109 e
= EDGE_PRED (b
, ix
);
110 bitmap_copy (dst
, src
[e
->src
->index
]);
114 if (ix
== EDGE_COUNT (b
->preds
))
117 for (ix
++; ix
< EDGE_COUNT (b
->preds
); ix
++)
120 SBITMAP_ELT_TYPE
*p
, *r
;
122 e
= EDGE_PRED (b
, ix
);
123 p
= src
[e
->src
->index
]->elms
;
125 for (i
= 0; i
< set_size
; i
++)
130 /* Compute the reaching definition in and out based on the gen and KILL
131 information's in each Base Blocks.
132 This function references the compute_available implementation in lcm.cc */
134 compute_reaching_defintion (sbitmap
*gen
, sbitmap
*kill
, sbitmap
*in
,
138 basic_block
*worklist
, *qin
, *qout
, *qend
, bb
;
142 /* Allocate a worklist array/queue. Entries are only added to the
143 list if they were not already on the list. So the size is
144 bounded by the number of basic blocks. */
145 qin
= qout
= worklist
146 = XNEWVEC (basic_block
, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
148 /* Put every block on the worklist; this is necessary because of the
149 optimistic initialization of AVOUT above. Use reverse postorder
150 to make the forward dataflow problem require less iterations. */
151 int *rpo
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
152 int n
= pre_and_rev_post_order_compute_fn (cfun
, NULL
, rpo
, false);
153 for (int i
= 0; i
< n
; ++i
)
155 bb
= BASIC_BLOCK_FOR_FN (cfun
, rpo
[i
]);
162 qend
= &worklist
[n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
];
163 qlen
= n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
;
165 /* Mark blocks which are successors of the entry block so that we
166 can easily identify them below. */
167 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR_FOR_FN (cfun
)->succs
)
168 e
->dest
->aux
= ENTRY_BLOCK_PTR_FOR_FN (cfun
);
170 /* Iterate until the worklist is empty. */
173 /* Take the first entry off the worklist. */
180 /* Do not clear the aux field for blocks which are successors of the
181 ENTRY block. That way we never add then to the worklist again. */
182 if (bb
->aux
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
))
185 bitmap_union_of_preds_with_entry (in
[bb
->index
], out
, bb
);
187 if (bitmap_ior_and_compl (out
[bb
->index
], gen
[bb
->index
], in
[bb
->index
],
189 /* If the out state of this block changed, then we need
190 to add the successors of this block to the worklist
191 if they are not already on the worklist. */
192 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
193 if (!e
->dest
->aux
&& e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
204 clear_aux_for_edges ();
205 clear_aux_for_blocks ();
209 /* Classification of vsetvl instruction. */
213 VSETVL_VTYPE_CHANGE_ONLY
,
214 VSETVL_DISCARD_RESULT
,
220 /* emit_insn directly. */
226 static const int MAX_LMUL
= 8;
228 /* dump helper functions */
230 vlmul_to_str (vlmul_type vlmul
)
243 return "INVALID LMUL";
257 policy_to_str (bool agnostic_p
)
259 return agnostic_p
? "agnostic" : "undisturbed";
262 /* Return true if it is an RVV instruction depends on VTYPE global
265 has_vtype_op (rtx_insn
*rinsn
)
267 return recog_memoized (rinsn
) >= 0 && get_attr_has_vtype_op (rinsn
);
270 /* Return true if the instruction ignores VLMUL field of VTYPE. */
272 ignore_vlmul_insn_p (rtx_insn
*rinsn
)
274 return get_attr_type (rinsn
) == TYPE_VIMOVVX
275 || get_attr_type (rinsn
) == TYPE_VFMOVVF
276 || get_attr_type (rinsn
) == TYPE_VIMOVXV
277 || get_attr_type (rinsn
) == TYPE_VFMOVFV
;
280 /* Return true if the instruction is scalar move instruction. */
282 scalar_move_insn_p (rtx_insn
*rinsn
)
284 return get_attr_type (rinsn
) == TYPE_VIMOVXV
285 || get_attr_type (rinsn
) == TYPE_VFMOVFV
;
288 /* Return true if the instruction is fault first load instruction. */
290 fault_first_load_p (rtx_insn
*rinsn
)
292 return recog_memoized (rinsn
) >= 0
293 && (get_attr_type (rinsn
) == TYPE_VLDFF
294 || get_attr_type (rinsn
) == TYPE_VLSEGDFF
);
297 /* Return true if the instruction is read vl instruction. */
299 read_vl_insn_p (rtx_insn
*rinsn
)
301 return recog_memoized (rinsn
) >= 0 && get_attr_type (rinsn
) == TYPE_RDVL
;
304 /* Return true if it is a vsetvl instruction. */
306 vector_config_insn_p (rtx_insn
*rinsn
)
308 return recog_memoized (rinsn
) >= 0 && get_attr_type (rinsn
) == TYPE_VSETVL
;
311 /* Return true if it is vsetvldi or vsetvlsi. */
313 vsetvl_insn_p (rtx_insn
*rinsn
)
315 if (!rinsn
|| !vector_config_insn_p (rinsn
))
317 return (INSN_CODE (rinsn
) == CODE_FOR_vsetvldi
318 || INSN_CODE (rinsn
) == CODE_FOR_vsetvlsi
);
321 /* Return true if it is the bogus vsetvl_pre instruction:
323 (define_insn "@vlmax_avl<mode>"
324 [(set (match_operand:P 0 "register_operand" "=r")
325 (unspec:P [(match_operand:P 1 "const_int_operand" "i")] UNSPEC_VLMAX))]
328 [(set_attr "type" "vsetvl_pre")])
330 As described above, it's the bogus instruction which doesn't any assembler
331 and should be removed eventually. It's used for occupying a scalar register
332 for VLMAX avl RVV instruction before register allocation.
337 vsetvl_pre (set r136)
338 vadd.vv (use r136 with VLMAX avl)
345 vadd.vv (use r136 with VLMAX avl)
351 vsetvl_pre (set a5) ---> removed.
352 vsetvl a5,zero,... ---> Inserted.
357 vsetvl_pre_insn_p (rtx_insn
*rinsn
)
359 return recog_memoized (rinsn
) >= 0
360 && get_attr_type (rinsn
) == TYPE_VSETVL_PRE
;
363 /* Return true if it is vsetvl zero, rs1. */
365 vsetvl_discard_result_insn_p (rtx_insn
*rinsn
)
367 if (!vector_config_insn_p (rinsn
))
369 return (INSN_CODE (rinsn
) == CODE_FOR_vsetvl_discard_resultdi
370 || INSN_CODE (rinsn
) == CODE_FOR_vsetvl_discard_resultsi
);
374 real_insn_and_same_bb_p (const insn_info
*insn
, const bb_info
*bb
)
376 return insn
!= nullptr && insn
->is_real () && insn
->bb () == bb
;
379 /* Helper function to get VL operand for VLMAX insn. */
381 get_vl (rtx_insn
*rinsn
)
383 if (has_vl_op (rinsn
))
385 extract_insn_cached (rinsn
);
386 return recog_data
.operand
[get_attr_vl_op_idx (rinsn
)];
388 return SET_DEST (XVECEXP (PATTERN (rinsn
), 0, 0));
391 /* Helper function to get AVL operand. */
393 get_avl (rtx_insn
*rinsn
)
395 if (vsetvl_insn_p (rinsn
) || vsetvl_discard_result_insn_p (rinsn
))
396 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn
), 0, 0)), 0, 0);
398 if (!has_vl_op (rinsn
))
400 if (vlmax_avl_type_p (rinsn
))
402 extract_insn_cached (rinsn
);
403 return recog_data
.operand
[get_attr_vl_op_idx (rinsn
)];
406 /* Get default mask policy. */
410 /* For the instruction that doesn't require MA, we still need a default value
411 to emit vsetvl. We pick up the default value according to prefer policy. */
412 return (bool) (get_prefer_mask_policy () & 0x1
413 || (get_prefer_mask_policy () >> 1 & 0x1));
416 /* Helper function to get MA operand. */
418 mask_agnostic_p (rtx_insn
*rinsn
)
420 /* If it doesn't have MA, we return agnostic by default. */
421 extract_insn_cached (rinsn
);
422 int ma
= get_attr_ma (rinsn
);
423 return ma
== INVALID_ATTRIBUTE
? get_default_ma () : IS_AGNOSTIC (ma
);
426 /* Return true if FN has a vector instruction that use VL/VTYPE. */
428 has_vector_insn (function
*fn
)
432 FOR_ALL_BB_FN (cfg_bb
, fn
)
433 FOR_BB_INSNS (cfg_bb
, rinsn
)
434 if (NONDEBUG_INSN_P (rinsn
) && has_vtype_op (rinsn
))
440 calculate_vlmul (unsigned int sew
, unsigned int ratio
)
442 const vlmul_type ALL_LMUL
[]
443 = {LMUL_1
, LMUL_2
, LMUL_4
, LMUL_8
, LMUL_F8
, LMUL_F4
, LMUL_F2
};
444 for (const vlmul_type vlmul
: ALL_LMUL
)
445 if (calculate_ratio (sew
, vlmul
) == ratio
)
447 return LMUL_RESERVED
;
450 /* Get the currently supported maximum sew used in the int rvv instructions. */
454 if (TARGET_VECTOR_ELEN_64
)
456 else if (TARGET_VECTOR_ELEN_32
)
461 /* Get the currently supported maximum sew used in the float rvv instructions.
466 if (TARGET_VECTOR_ELEN_FP_64
)
468 else if (TARGET_VECTOR_ELEN_FP_32
)
470 else if (TARGET_VECTOR_ELEN_FP_16
)
479 BB_HEAD_SET
= 1 << 2,
481 /* ??? TODO: In RTL_SSA framework, we have REAL_SET,
482 PHI_SET, BB_HEAD_SET, BB_END_SET and
483 CLOBBER_DEF def_info types. Currently,
484 we conservatively do not optimize clobber
485 def since we don't see the case that we
486 need to optimize it. */
491 insn_should_be_added_p (const insn_info
*insn
, unsigned int types
)
493 if (insn
->is_real () && (types
& REAL_SET
))
495 if (insn
->is_phi () && (types
& PHI_SET
))
497 if (insn
->is_bb_head () && (types
& BB_HEAD_SET
))
499 if (insn
->is_bb_end () && (types
& BB_END_SET
))
504 static const hash_set
<use_info
*>
505 get_all_real_uses (insn_info
*insn
, unsigned regno
)
507 gcc_assert (insn
->is_real ());
509 hash_set
<use_info
*> uses
;
510 auto_vec
<phi_info
*> work_list
;
511 hash_set
<phi_info
*> visited_list
;
513 for (def_info
*def
: insn
->defs ())
515 if (!def
->is_reg () || def
->regno () != regno
)
517 set_info
*set
= safe_dyn_cast
<set_info
*> (def
);
520 for (use_info
*use
: set
->nondebug_insn_uses ())
521 if (use
->insn ()->is_real ())
523 for (use_info
*use
: set
->phi_uses ())
524 work_list
.safe_push (use
->phi ());
527 while (!work_list
.is_empty ())
529 phi_info
*phi
= work_list
.pop ();
530 visited_list
.add (phi
);
532 for (use_info
*use
: phi
->nondebug_insn_uses ())
533 if (use
->insn ()->is_real ())
535 for (use_info
*use
: phi
->phi_uses ())
536 if (!visited_list
.contains (use
->phi ()))
537 work_list
.safe_push (use
->phi ());
542 /* Recursively find all define instructions. The kind of instruction is
543 specified by the DEF_TYPE. */
544 static hash_set
<set_info
*>
545 get_all_sets (phi_info
*phi
, unsigned int types
)
547 hash_set
<set_info
*> insns
;
548 auto_vec
<phi_info
*> work_list
;
549 hash_set
<phi_info
*> visited_list
;
551 return hash_set
<set_info
*> ();
552 work_list
.safe_push (phi
);
554 while (!work_list
.is_empty ())
556 phi_info
*phi
= work_list
.pop ();
557 visited_list
.add (phi
);
558 for (use_info
*use
: phi
->inputs ())
560 def_info
*def
= use
->def ();
561 set_info
*set
= safe_dyn_cast
<set_info
*> (def
);
563 return hash_set
<set_info
*> ();
565 gcc_assert (!set
->insn ()->is_debug_insn ());
567 if (insn_should_be_added_p (set
->insn (), types
))
569 if (set
->insn ()->is_phi ())
571 phi_info
*new_phi
= as_a
<phi_info
*> (set
);
572 if (!visited_list
.contains (new_phi
))
573 work_list
.safe_push (new_phi
);
580 static hash_set
<set_info
*>
581 get_all_sets (set_info
*set
, bool /* get_real_inst */ real_p
,
582 bool /*get_phi*/ phi_p
, bool /* get_function_parameter*/ param_p
)
584 if (real_p
&& phi_p
&& param_p
)
585 return get_all_sets (safe_dyn_cast
<phi_info
*> (set
),
586 REAL_SET
| PHI_SET
| BB_HEAD_SET
| BB_END_SET
);
588 else if (real_p
&& param_p
)
589 return get_all_sets (safe_dyn_cast
<phi_info
*> (set
),
590 REAL_SET
| BB_HEAD_SET
| BB_END_SET
);
593 return get_all_sets (safe_dyn_cast
<phi_info
*> (set
), REAL_SET
);
594 return hash_set
<set_info
*> ();
598 source_equal_p (insn_info
*insn1
, insn_info
*insn2
)
600 if (!insn1
|| !insn2
)
602 rtx_insn
*rinsn1
= insn1
->rtl ();
603 rtx_insn
*rinsn2
= insn2
->rtl ();
604 if (!rinsn1
|| !rinsn2
)
607 rtx note1
= find_reg_equal_equiv_note (rinsn1
);
608 rtx note2
= find_reg_equal_equiv_note (rinsn2
);
609 /* We could handle the case of similar-looking REG_EQUALs as well but
610 would need to verify that no insn in between modifies any of the source
612 if (note1
&& note2
&& rtx_equal_p (note1
, note2
)
613 && REG_NOTE_KIND (note1
) == REG_EQUIV
)
619 extract_single_source (set_info
*set
)
623 if (set
->insn ()->is_real ())
625 if (!set
->insn ()->is_phi ())
627 hash_set
<set_info
*> sets
= get_all_sets (set
, true, false, true);
628 if (sets
.is_empty ())
631 insn_info
*first_insn
= (*sets
.begin ())->insn ();
632 if (first_insn
->is_artificial ())
634 for (const set_info
*set
: sets
)
636 /* If there is a head or end insn, we conservative return
637 NULL so that VSETVL PASS will insert vsetvl directly. */
638 if (set
->insn ()->is_artificial ())
640 if (set
!= *sets
.begin () && !source_equal_p (set
->insn (), first_insn
))
648 same_equiv_note_p (set_info
*set1
, set_info
*set2
)
650 insn_info
*insn1
= extract_single_source (set1
);
651 insn_info
*insn2
= extract_single_source (set2
);
652 if (!insn1
|| !insn2
)
654 return source_equal_p (insn1
, insn2
);
657 /* Return true if the SET result is not used by any instructions. */
659 has_no_uses (basic_block cfg_bb
, rtx_insn
*rinsn
, int regno
)
661 if (bitmap_bit_p (df_get_live_out (cfg_bb
), regno
))
665 for (iter
= NEXT_INSN (rinsn
); iter
&& iter
!= NEXT_INSN (BB_END (cfg_bb
));
666 iter
= NEXT_INSN (iter
))
667 if (df_find_use (iter
, regno_reg_rtx
[regno
]))
673 /* Return true for the special block that we can't apply LCM optimization. */
675 invalid_opt_bb_p (basic_block cfg_bb
)
680 /* We don't do LCM optimizations on complex edges. */
681 FOR_EACH_EDGE (e
, ei
, cfg_bb
->preds
)
682 if (e
->flags
& EDGE_COMPLEX
)
685 /* We only do LCM optimizations on blocks that are post dominated by
686 EXIT block, that is, we don't do LCM optimizations on infinite loop. */
687 FOR_EACH_EDGE (e
, ei
, cfg_bb
->succs
)
688 if (e
->flags
& EDGE_FAKE
)
694 /* Get all predecessors of BB. */
695 static hash_set
<basic_block
>
696 get_all_predecessors (basic_block bb
)
698 hash_set
<basic_block
> blocks
;
699 auto_vec
<basic_block
> work_list
;
700 hash_set
<basic_block
> visited_list
;
701 work_list
.safe_push (bb
);
703 while (!work_list
.is_empty ())
705 basic_block new_bb
= work_list
.pop ();
706 visited_list
.add (new_bb
);
709 FOR_EACH_EDGE (e
, ei
, new_bb
->preds
)
711 if (!visited_list
.contains (e
->src
))
712 work_list
.safe_push (e
->src
);
719 /* This flags indicates the minimum demand of the vl and vtype values by the
720 RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV
721 instruction only needs the SEW/LMUL ratio to remain the same, and does not
722 require SEW and LMUL to be fixed.
723 Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter
724 instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of
725 the former instruction, then we can make the minimum demand of the former
726 instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are
727 the SEW and LMUL of the latter instruction, and the vsetvl instruction
728 generated according to the new demand can also be used for the latter
729 instruction, so there is no need to insert a separate vsetvl instruction for
730 the latter instruction. */
731 enum demand_flags
: unsigned
734 DEMAND_SEW_P
= 1 << 0,
735 DEMAND_LMUL_P
= 1 << 1,
736 DEMAND_RATIO_P
= 1 << 2,
737 DEMAND_GE_SEW_P
= 1 << 3,
738 DEMAND_TAIL_POLICY_P
= 1 << 4,
739 DEMAND_MASK_POLICY_P
= 1 << 5,
740 DEMAND_AVL_P
= 1 << 6,
741 DEMAND_NON_ZERO_AVL_P
= 1 << 7,
744 /* We split the demand information into three parts. They are sew and lmul
745 related (sew_lmul_demand_type), tail and mask policy related
746 (policy_demand_type) and avl related (avl_demand_type). Then we define three
747 interfaces available_p, compatible_p and merge. available_p is
748 used to determine whether the two vsetvl infos prev_info and next_info are
749 available or not. If prev_info is available for next_info, it means that the
750 RVV insn corresponding to next_info on the path from prev_info to next_info
751 can be used without inserting a separate vsetvl instruction. compatible_p
752 is used to determine whether prev_info is compatible with next_info, and if
753 so, merge can be used to merge the stricter demand information from
754 next_info into prev_info so that prev_info becomes available to next_info.
757 enum class sew_lmul_demand_type
: unsigned
759 sew_lmul
= demand_flags::DEMAND_SEW_P
| demand_flags::DEMAND_LMUL_P
,
760 ratio_only
= demand_flags::DEMAND_RATIO_P
,
761 sew_only
= demand_flags::DEMAND_SEW_P
,
762 ge_sew
= demand_flags::DEMAND_GE_SEW_P
,
764 = demand_flags::DEMAND_RATIO_P
| demand_flags::DEMAND_GE_SEW_P
,
767 enum class policy_demand_type
: unsigned
770 = demand_flags::DEMAND_TAIL_POLICY_P
| demand_flags::DEMAND_MASK_POLICY_P
,
771 tail_policy_only
= demand_flags::DEMAND_TAIL_POLICY_P
,
772 mask_policy_only
= demand_flags::DEMAND_MASK_POLICY_P
,
773 ignore_policy
= demand_flags::DEMAND_EMPTY_P
,
776 enum class avl_demand_type
: unsigned
778 avl
= demand_flags::DEMAND_AVL_P
,
779 non_zero_avl
= demand_flags::DEMAND_NON_ZERO_AVL_P
,
780 ignore_avl
= demand_flags::DEMAND_EMPTY_P
,
798 sew_lmul_demand_type m_sew_lmul_demand
;
799 policy_demand_type m_policy_demand
;
800 avl_demand_type m_avl_demand
;
802 enum class state_type
812 bool m_change_vtype_only
;
813 insn_info
*m_read_vl_insn
;
814 bool m_vl_used_by_non_rvv_insn
;
818 : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX
), m_vl (NULL_RTX
),
819 m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED
),
820 m_ratio (0), m_ta (false), m_ma (false),
821 m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul
),
822 m_policy_demand (policy_demand_type::tail_mask_policy
),
823 m_avl_demand (avl_demand_type::avl
), m_state (state_type::UNINITIALIZED
),
824 m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr),
825 m_vl_used_by_non_rvv_insn (false)
828 vsetvl_info (insn_info
*insn
) : vsetvl_info () { parse_insn (insn
); }
830 vsetvl_info (rtx_insn
*insn
) : vsetvl_info () { parse_insn (insn
); }
832 void set_avl (rtx avl
) { m_avl
= avl
; }
833 void set_vl (rtx vl
) { m_vl
= vl
; }
834 void set_avl_def (set_info
*avl_def
) { m_avl_def
= avl_def
; }
835 void set_sew (uint8_t sew
) { m_sew
= sew
; }
836 void set_vlmul (vlmul_type vlmul
) { m_vlmul
= vlmul
; }
837 void set_ratio (uint8_t ratio
) { m_ratio
= ratio
; }
838 void set_ta (bool ta
) { m_ta
= ta
; }
839 void set_ma (bool ma
) { m_ma
= ma
; }
840 void set_delete () { m_delete
= true; }
841 void set_bb (bb_info
*bb
) { m_bb
= bb
; }
842 void set_max_sew (uint8_t max_sew
) { m_max_sew
= max_sew
; }
843 void set_change_vtype_only () { m_change_vtype_only
= true; }
844 void set_read_vl_insn (insn_info
*insn
) { m_read_vl_insn
= insn
; }
846 rtx
get_avl () const { return m_avl
; }
847 rtx
get_vl () const { return m_vl
; }
848 set_info
*get_avl_def () const { return m_avl_def
; }
849 uint8_t get_sew () const { return m_sew
; }
850 vlmul_type
get_vlmul () const { return m_vlmul
; }
851 uint8_t get_ratio () const { return m_ratio
; }
852 bool get_ta () const { return m_ta
; }
853 bool get_ma () const { return m_ma
; }
854 insn_info
*get_insn () const { return m_insn
; }
855 bool delete_p () const { return m_delete
; }
856 bb_info
*get_bb () const { return m_bb
; }
857 uint8_t get_max_sew () const { return m_max_sew
; }
858 insn_info
*get_read_vl_insn () const { return m_read_vl_insn
; }
859 bool vl_used_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn
; }
861 bool has_imm_avl () const { return m_avl
&& CONST_INT_P (m_avl
); }
862 bool has_vlmax_avl () const { return vlmax_avl_p (m_avl
); }
863 bool has_nonvlmax_reg_avl () const
865 return m_avl
&& REG_P (m_avl
) && !has_vlmax_avl ();
867 bool has_non_zero_avl () const
870 return INTVAL (m_avl
) > 0;
871 return has_vlmax_avl ();
875 /* The VL operand can only be either a NULL_RTX or a register. */
876 gcc_assert (!m_vl
|| REG_P (m_vl
));
877 return m_vl
!= NULL_RTX
;
879 bool has_same_ratio (const vsetvl_info
&other
) const
881 return get_ratio () == other
.get_ratio ();
884 /* The block of INSN isn't always same as the block of the VSETVL_INFO,
885 meaning we may have 'get_insn ()->bb () != get_bb ()'.
887 E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1)
889 BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies
890 get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and
891 bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the
892 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located
894 bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); }
895 void update_avl (const vsetvl_info
&other
)
897 m_avl
= other
.get_avl ();
898 m_vl
= other
.get_vl ();
899 m_avl_def
= other
.get_avl_def ();
902 bool uninit_p () const { return m_state
== state_type::UNINITIALIZED
; }
903 bool valid_p () const { return m_state
== state_type::VALID
; }
904 bool unknown_p () const { return m_state
== state_type::UNKNOWN
; }
905 bool empty_p () const { return m_state
== state_type::EMPTY
; }
906 bool change_vtype_only_p () const { return m_change_vtype_only
907 && !TARGET_XTHEADVECTOR
; }
909 void set_valid () { m_state
= state_type::VALID
; }
910 void set_unknown () { m_state
= state_type::UNKNOWN
; }
911 void set_empty () { m_state
= state_type::EMPTY
; }
913 void set_sew_lmul_demand (sew_lmul_demand_type demand
)
915 m_sew_lmul_demand
= demand
;
917 void set_policy_demand (policy_demand_type demand
)
919 m_policy_demand
= demand
;
921 void set_avl_demand (avl_demand_type demand
) { m_avl_demand
= demand
; }
923 sew_lmul_demand_type
get_sew_lmul_demand () const
925 return m_sew_lmul_demand
;
927 policy_demand_type
get_policy_demand () const { return m_policy_demand
; }
928 avl_demand_type
get_avl_demand () const { return m_avl_demand
; }
930 void normalize_demand (unsigned demand_flags
)
933 & (DEMAND_SEW_P
| DEMAND_LMUL_P
| DEMAND_RATIO_P
| DEMAND_GE_SEW_P
))
935 case (unsigned) sew_lmul_demand_type::sew_lmul
:
936 m_sew_lmul_demand
= sew_lmul_demand_type::sew_lmul
;
938 case (unsigned) sew_lmul_demand_type::ratio_only
:
939 m_sew_lmul_demand
= sew_lmul_demand_type::ratio_only
;
941 case (unsigned) sew_lmul_demand_type::sew_only
:
942 m_sew_lmul_demand
= sew_lmul_demand_type::sew_only
;
944 case (unsigned) sew_lmul_demand_type::ge_sew
:
945 m_sew_lmul_demand
= sew_lmul_demand_type::ge_sew
;
947 case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew
:
948 m_sew_lmul_demand
= sew_lmul_demand_type::ratio_and_ge_sew
;
954 switch (demand_flags
& (DEMAND_TAIL_POLICY_P
| DEMAND_MASK_POLICY_P
))
956 case (unsigned) policy_demand_type::tail_mask_policy
:
957 m_policy_demand
= policy_demand_type::tail_mask_policy
;
959 case (unsigned) policy_demand_type::tail_policy_only
:
960 m_policy_demand
= policy_demand_type::tail_policy_only
;
962 case (unsigned) policy_demand_type::mask_policy_only
:
963 m_policy_demand
= policy_demand_type::mask_policy_only
;
965 case (unsigned) policy_demand_type::ignore_policy
:
966 m_policy_demand
= policy_demand_type::ignore_policy
;
972 switch (demand_flags
& (DEMAND_AVL_P
| DEMAND_NON_ZERO_AVL_P
))
974 case (unsigned) avl_demand_type::avl
:
975 m_avl_demand
= avl_demand_type::avl
;
977 case (unsigned) avl_demand_type::non_zero_avl
:
978 m_avl_demand
= avl_demand_type::non_zero_avl
;
980 case (unsigned) avl_demand_type::ignore_avl
:
981 m_avl_demand
= avl_demand_type::ignore_avl
;
988 void parse_insn (rtx_insn
*rinsn
)
990 if (!NONDEBUG_INSN_P (rinsn
))
992 if (optimize
== 0 && !has_vtype_op (rinsn
))
994 gcc_assert (!vsetvl_discard_result_insn_p (rinsn
));
996 extract_insn_cached (rinsn
);
997 m_avl
= ::get_avl (rinsn
);
998 if (has_vlmax_avl () || vsetvl_insn_p (rinsn
))
999 m_vl
= ::get_vl (rinsn
);
1000 m_sew
= ::get_sew (rinsn
);
1001 m_vlmul
= ::get_vlmul (rinsn
);
1002 m_ta
= tail_agnostic_p (rinsn
);
1003 m_ma
= mask_agnostic_p (rinsn
);
1006 void parse_insn (insn_info
*insn
)
1008 /* The VL dest of the insn */
1009 rtx dest_vl
= NULL_RTX
;
1013 /* Return if it is debug insn for the consistency with optimize == 0. */
1014 if (insn
->is_debug_insn ())
1017 /* We set it as unknown since we don't what will happen in CALL or ASM. */
1018 if (insn
->is_call () || insn
->is_asm ())
1024 /* If this is something that updates VL/VTYPE that we don't know about, set
1025 the state to unknown. */
1026 if (!vector_config_insn_p (insn
->rtl ()) && !has_vtype_op (insn
->rtl ())
1027 && (find_access (insn
->defs (), VL_REGNUM
)
1028 || find_access (insn
->defs (), VTYPE_REGNUM
)))
1034 if (!vector_config_insn_p (insn
->rtl ()) && !has_vtype_op (insn
->rtl ()))
1040 m_avl
= ::get_avl (insn
->rtl ());
1043 if (vsetvl_insn_p (insn
->rtl ()) || has_vlmax_avl ())
1045 m_vl
= ::get_vl (insn
->rtl ());
1049 if (has_nonvlmax_reg_avl ())
1050 m_avl_def
= find_access (insn
->uses (), REGNO (m_avl
))->def ();
1053 m_sew
= ::get_sew (insn
->rtl ());
1054 m_vlmul
= ::get_vlmul (insn
->rtl ());
1055 m_ratio
= get_attr_ratio (insn
->rtl ());
1056 /* when get_attr_ratio is invalid, this kind of instructions
1057 doesn't care about ratio. However, we still need this value
1058 in demand info backward analysis. */
1059 if (m_ratio
== INVALID_ATTRIBUTE
)
1060 m_ratio
= calculate_ratio (m_sew
, m_vlmul
);
1061 m_ta
= tail_agnostic_p (insn
->rtl ());
1062 m_ma
= mask_agnostic_p (insn
->rtl ());
1064 /* If merge operand is undef value, we prefer agnostic. */
1065 int merge_op_idx
= get_attr_merge_op_idx (insn
->rtl ());
1066 if (merge_op_idx
!= INVALID_ATTRIBUTE
1067 && satisfies_constraint_vu (recog_data
.operand
[merge_op_idx
]))
1073 /* Determine the demand info of the RVV insn. */
1074 m_max_sew
= get_max_int_sew ();
1075 unsigned dflags
= 0;
1076 if (vector_config_insn_p (insn
->rtl ()))
1078 dflags
|= demand_flags::DEMAND_AVL_P
;
1079 dflags
|= demand_flags::DEMAND_RATIO_P
;
1083 if (has_vl_op (insn
->rtl ()))
1085 if (scalar_move_insn_p (insn
->rtl ()))
1087 /* If the avl for vmv.s.x comes from the vsetvl instruction, we
1088 don't know if the avl is non-zero, so it is set to
1089 DEMAND_AVL_P for now. it may be corrected to
1090 DEMAND_NON_ZERO_AVL_P later when more information is
1093 if (has_non_zero_avl ())
1094 dflags
|= demand_flags::DEMAND_NON_ZERO_AVL_P
;
1096 dflags
|= demand_flags::DEMAND_AVL_P
;
1099 dflags
|= demand_flags::DEMAND_AVL_P
;
1102 if (get_attr_ratio (insn
->rtl ()) != INVALID_ATTRIBUTE
)
1103 dflags
|= demand_flags::DEMAND_RATIO_P
;
1106 if (scalar_move_insn_p (insn
->rtl ()) && m_ta
)
1108 dflags
|= demand_flags::DEMAND_GE_SEW_P
;
1109 m_max_sew
= get_attr_type (insn
->rtl ()) == TYPE_VFMOVFV
1110 ? get_max_float_sew ()
1111 : get_max_int_sew ();
1114 dflags
|= demand_flags::DEMAND_SEW_P
;
1116 if (!ignore_vlmul_insn_p (insn
->rtl ()))
1117 dflags
|= demand_flags::DEMAND_LMUL_P
;
1121 dflags
|= demand_flags::DEMAND_TAIL_POLICY_P
;
1123 dflags
|= demand_flags::DEMAND_MASK_POLICY_P
;
1126 normalize_demand (dflags
);
1128 /* Optimize AVL from the vsetvl instruction. */
1129 insn_info
*def_insn
= extract_single_source (get_avl_def ());
1130 if (def_insn
&& vsetvl_insn_p (def_insn
->rtl ()))
1132 vsetvl_info def_info
= vsetvl_info (def_insn
);
1133 if ((scalar_move_insn_p (insn
->rtl ())
1134 || def_info
.get_ratio () == get_ratio ())
1135 && (def_info
.has_vlmax_avl () || def_info
.has_imm_avl ()))
1137 update_avl (def_info
);
1138 if (scalar_move_insn_p (insn
->rtl ()) && has_non_zero_avl ())
1139 m_avl_demand
= avl_demand_type::non_zero_avl
;
1143 /* Determine if dest operand(vl) has been used by non-RVV instructions. */
1146 const hash_set
<use_info
*> vl_uses
1147 = get_all_real_uses (get_insn (), REGNO (dest_vl
));
1148 for (use_info
*use
: vl_uses
)
1150 gcc_assert (use
->insn ()->is_real ());
1151 rtx_insn
*rinsn
= use
->insn ()->rtl ();
1152 if (!has_vl_op (rinsn
)
1153 || count_regno_occurrences (rinsn
, REGNO (dest_vl
)) != 1)
1155 m_vl_used_by_non_rvv_insn
= true;
1158 rtx avl
= ::get_avl (rinsn
);
1159 if (!avl
|| !REG_P (avl
) || REGNO (dest_vl
) != REGNO (avl
))
1161 m_vl_used_by_non_rvv_insn
= true;
1167 /* Collect the read vl insn for the fault-only-first rvv loads. */
1168 if (fault_first_load_p (insn
->rtl ()))
1170 for (insn_info
*i
= insn
->next_nondebug_insn ();
1171 i
->bb () == insn
->bb (); i
= i
->next_nondebug_insn ())
1173 if (find_access (i
->defs (), VL_REGNUM
))
1175 if (i
->rtl () && read_vl_insn_p (i
->rtl ()))
1184 /* Returns the corresponding vsetvl rtx pat. */
1185 rtx
get_vsetvl_pat (bool ignore_vl
= false) const
1187 rtx avl
= get_avl ();
1188 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
1189 set the value of avl to (const_int 0) so that VSETVL PASS will
1190 insert vsetvl correctly.*/
1193 rtx sew
= gen_int_mode (get_sew (), Pmode
);
1194 rtx vlmul
= gen_int_mode (get_vlmul (), Pmode
);
1195 rtx ta
= gen_int_mode (get_ta (), Pmode
);
1196 rtx ma
= gen_int_mode (get_ma (), Pmode
);
1198 if (change_vtype_only_p ())
1199 return gen_vsetvl_vtype_change_only (sew
, vlmul
, ta
, ma
);
1200 else if (has_vl () && !ignore_vl
)
1201 return gen_vsetvl (Pmode
, get_vl (), avl
, sew
, vlmul
, ta
, ma
);
1203 return gen_vsetvl_discard_result (Pmode
, avl
, sew
, vlmul
, ta
, ma
);
1206 /* Return true that the non-AVL operands of THIS will be modified
1207 if we fuse the VL modification from OTHER into THIS. */
1208 bool vl_modify_non_avl_op_p (const vsetvl_info
&other
) const
1210 /* We don't need to worry about any operands from THIS be
1211 modified by OTHER vsetvl since we OTHER vsetvl doesn't
1212 modify any operand. */
1213 if (!other
.has_vl ())
1216 /* THIS VL operand always preempt OTHER VL operand. */
1217 if (this->has_vl ())
1220 /* If THIS has non IMM AVL and THIS is AVL compatible with
1221 OTHER, the AVL value of THIS is same as VL value of OTHER. */
1222 if (!this->has_imm_avl ())
1224 return find_access (this->get_insn ()->uses (), REGNO (other
.get_vl ()));
1227 bool operator== (const vsetvl_info
&other
) const
1229 gcc_assert (!uninit_p () && !other
.uninit_p ()
1230 && "Uninitialization should not happen");
1233 return other
.empty_p ();
1235 return other
.unknown_p ();
1237 return get_insn () == other
.get_insn () && get_bb () == other
.get_bb ()
1238 && get_avl () == other
.get_avl () && get_vl () == other
.get_vl ()
1239 && get_avl_def () == other
.get_avl_def ()
1240 && get_sew () == other
.get_sew ()
1241 && get_vlmul () == other
.get_vlmul () && get_ta () == other
.get_ta ()
1242 && get_ma () == other
.get_ma ()
1243 && get_avl_demand () == other
.get_avl_demand ()
1244 && get_sew_lmul_demand () == other
.get_sew_lmul_demand ()
1245 && get_policy_demand () == other
.get_policy_demand ();
1248 void dump (FILE *file
, const char *indent
= "") const
1252 fprintf (file
, "UNINITIALIZED.\n");
1255 else if (unknown_p ())
1257 fprintf (file
, "UNKNOWN.\n");
1260 else if (empty_p ())
1262 fprintf (file
, "EMPTY.\n");
1265 else if (valid_p ())
1266 fprintf (file
, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (),
1267 get_bb ()->index (), delete_p () ? " (deleted)" : "");
1271 fprintf (file
, "%sDemand fields:", indent
);
1272 if (m_sew_lmul_demand
== sew_lmul_demand_type::sew_lmul
)
1273 fprintf (file
, " demand_sew_lmul");
1274 else if (m_sew_lmul_demand
== sew_lmul_demand_type::ratio_only
)
1275 fprintf (file
, " demand_ratio_only");
1276 else if (m_sew_lmul_demand
== sew_lmul_demand_type::sew_only
)
1277 fprintf (file
, " demand_sew_only");
1278 else if (m_sew_lmul_demand
== sew_lmul_demand_type::ge_sew
)
1279 fprintf (file
, " demand_ge_sew");
1280 else if (m_sew_lmul_demand
== sew_lmul_demand_type::ratio_and_ge_sew
)
1281 fprintf (file
, " demand_ratio_and_ge_sew");
1283 if (m_policy_demand
== policy_demand_type::tail_mask_policy
)
1284 fprintf (file
, " demand_tail_mask_policy");
1285 else if (m_policy_demand
== policy_demand_type::tail_policy_only
)
1286 fprintf (file
, " demand_tail_policy_only");
1287 else if (m_policy_demand
== policy_demand_type::mask_policy_only
)
1288 fprintf (file
, " demand_mask_policy_only");
1290 if (m_avl_demand
== avl_demand_type::avl
)
1291 fprintf (file
, " demand_avl");
1292 else if (m_avl_demand
== avl_demand_type::non_zero_avl
)
1293 fprintf (file
, " demand_non_zero_avl");
1294 fprintf (file
, "\n");
1296 fprintf (file
, "%sSEW=%d, ", indent
, get_sew ());
1297 fprintf (file
, "VLMUL=%s, ", vlmul_to_str (get_vlmul ()));
1298 fprintf (file
, "RATIO=%d, ", get_ratio ());
1299 fprintf (file
, "MAX_SEW=%d\n", get_max_sew ());
1301 fprintf (file
, "%sTAIL_POLICY=%s, ", indent
, policy_to_str (get_ta ()));
1302 fprintf (file
, "MASK_POLICY=%s\n", policy_to_str (get_ma ()));
1304 fprintf (file
, "%sAVL=", indent
);
1305 print_rtl_single (file
, get_avl ());
1306 fprintf (file
, "%sVL=", indent
);
1307 print_rtl_single (file
, get_vl ());
1308 if (change_vtype_only_p ())
1309 fprintf (file
, "%schange vtype only\n", indent
);
1310 if (get_read_vl_insn ())
1311 fprintf (file
, "%sread_vl_insn: insn %u\n", indent
,
1312 get_read_vl_insn ()->uid ());
1313 if (vl_used_by_non_rvv_insn_p ())
1314 fprintf (file
, "%suse_by_non_rvv_insn=true\n", indent
);
1318 class vsetvl_block_info
1321 /* The static execute probability of the demand info. */
1322 profile_probability probability
;
1324 auto_vec
<vsetvl_info
> local_infos
;
1325 vsetvl_info global_info
;
1328 vsetvl_block_info () : bb (nullptr)
1330 local_infos
.safe_grow_cleared (0);
1331 global_info
.set_empty ();
1333 vsetvl_block_info (const vsetvl_block_info
&other
)
1334 : probability (other
.probability
), local_infos (other
.local_infos
.copy ()),
1335 global_info (other
.global_info
), bb (other
.bb
)
1338 vsetvl_info
&get_entry_info ()
1340 gcc_assert (!empty_p ());
1341 return local_infos
.is_empty () ? global_info
: local_infos
[0];
1343 vsetvl_info
&get_exit_info ()
1345 gcc_assert (!empty_p ());
1346 return local_infos
.is_empty () ? global_info
1347 : local_infos
[local_infos
.length () - 1];
1349 const vsetvl_info
&get_entry_info () const
1351 gcc_assert (!empty_p ());
1352 return local_infos
.is_empty () ? global_info
: local_infos
[0];
1354 const vsetvl_info
&get_exit_info () const
1356 gcc_assert (!empty_p ());
1357 return local_infos
.is_empty () ? global_info
1358 : local_infos
[local_infos
.length () - 1];
1361 bool empty_p () const { return local_infos
.is_empty () && !has_info (); }
1362 bool has_info () const { return !global_info
.empty_p (); }
1363 void set_info (const vsetvl_info
&info
)
1365 gcc_assert (local_infos
.is_empty ());
1367 global_info
.set_bb (bb
);
1369 void set_empty_info () { global_info
.set_empty (); }
1372 /* Demand system is the RVV-based VSETVL info analysis tools wrapper.
1373 It defines compatible rules for SEW/LMUL, POLICY and AVL.
1374 Also, it provides 3 interfaces available_p, compatible_p and
1375 merge for the VSETVL PASS analysis and optimization.
1377 - available_p: Determine whether the next info can get the
1378 available VSETVL status from previous info.
1379 e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16).
1380 Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3
1381 demand, the VSETVL instruction in bb 3 can be elided.
1382 available_p (previous, next) is true in such situation.
1383 - compatible_p: Determine whether prev_info is compatible with next_info
1384 so that we can have a new merged info that is available to both of them.
1385 - merge: Merge the stricter demand information from
1386 next_info into prev_info so that prev_info becomes available to
1393 inline bool always_true (const vsetvl_info
&prev ATTRIBUTE_UNUSED
,
1394 const vsetvl_info
&next ATTRIBUTE_UNUSED
)
1398 inline bool always_false (const vsetvl_info
&prev ATTRIBUTE_UNUSED
,
1399 const vsetvl_info
&next ATTRIBUTE_UNUSED
)
1404 /* predictors for sew and lmul */
1406 inline bool lmul_eq_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1408 return prev
.get_vlmul () == next
.get_vlmul ();
1410 inline bool sew_eq_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1412 return prev
.get_sew () == next
.get_sew ();
1414 inline bool sew_lmul_eq_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1416 return lmul_eq_p (prev
, next
) && sew_eq_p (prev
, next
);
1418 inline bool sew_ge_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1420 return prev
.get_sew () == next
.get_sew ()
1421 || (next
.get_ta () && prev
.get_sew () > next
.get_sew ());
1423 inline bool sew_le_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1425 return prev
.get_sew () == next
.get_sew ()
1426 || (prev
.get_ta () && prev
.get_sew () < next
.get_sew ());
1428 inline bool prev_sew_le_next_max_sew_p (const vsetvl_info
&prev
,
1429 const vsetvl_info
&next
)
1431 return prev
.get_sew () <= next
.get_max_sew ();
1433 inline bool next_sew_le_prev_max_sew_p (const vsetvl_info
&prev
,
1434 const vsetvl_info
&next
)
1436 return next
.get_sew () <= prev
.get_max_sew ();
1438 inline bool max_sew_overlap_p (const vsetvl_info
&prev
,
1439 const vsetvl_info
&next
)
1441 return !(prev
.get_sew () > next
.get_max_sew ()
1442 || next
.get_sew () > prev
.get_max_sew ());
1444 inline bool ratio_eq_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1446 return prev
.has_same_ratio (next
);
1448 inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info
&prev
,
1449 const vsetvl_info
&next
)
1451 return prev
.get_ratio () >= (next
.get_sew () / MAX_LMUL
);
1453 inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info
&prev
,
1454 const vsetvl_info
&next
)
1456 return next
.get_ratio () >= (prev
.get_sew () / MAX_LMUL
);
1458 inline bool sew_ge_and_ratio_eq_p (const vsetvl_info
&prev
,
1459 const vsetvl_info
&next
)
1461 return sew_ge_p (prev
, next
) && ratio_eq_p (prev
, next
);
1463 inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info
&prev
,
1464 const vsetvl_info
&next
)
1466 return sew_ge_p (prev
, next
) && prev_sew_le_next_max_sew_p (prev
, next
);
1469 sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p (
1470 const vsetvl_info
&prev
, const vsetvl_info
&next
)
1472 return sew_ge_p (prev
, next
) && prev_sew_le_next_max_sew_p (prev
, next
)
1473 && next_ratio_valid_for_prev_sew_p (prev
, next
);
1476 sew_ge_and_prev_sew_le_next_max_sew_and_ratio_eq_p (
1477 const vsetvl_info
&prev
, const vsetvl_info
&next
)
1479 return sew_ge_p (prev
, next
) && prev_sew_le_next_max_sew_p (prev
, next
)
1480 && ratio_eq_p (prev
, next
);
1482 inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info
&prev
,
1483 const vsetvl_info
&next
)
1485 return sew_le_p (prev
, next
) && next_sew_le_prev_max_sew_p (prev
, next
);
1488 max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info
&prev
,
1489 const vsetvl_info
&next
)
1491 if (next_ratio_valid_for_prev_sew_p (prev
, next
)
1492 && max_sew_overlap_p (prev
, next
))
1494 if (next
.get_sew () < prev
.get_sew ()
1495 && (!next
.get_ta () || !next
.get_ma ()))
1502 sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info
&prev
,
1503 const vsetvl_info
&next
)
1505 return sew_le_p (prev
, next
) && ratio_eq_p (prev
, next
)
1506 && next_sew_le_prev_max_sew_p (prev
, next
);
1509 max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info
&prev
,
1510 const vsetvl_info
&next
)
1512 return prev_ratio_valid_for_next_sew_p (prev
, next
)
1513 && max_sew_overlap_p (prev
, next
);
1516 sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p (
1517 const vsetvl_info
&prev
, const vsetvl_info
&next
)
1519 return sew_le_p (prev
, next
) && prev_ratio_valid_for_next_sew_p (prev
, next
)
1520 && next_sew_le_prev_max_sew_p (prev
, next
);
1522 inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info
&prev
,
1523 const vsetvl_info
&next
)
1525 return ratio_eq_p (prev
, next
) && max_sew_overlap_p (prev
, next
);
1528 /* predictors for tail and mask policy */
1530 inline bool tail_policy_eq_p (const vsetvl_info
&prev
,
1531 const vsetvl_info
&next
)
1533 return prev
.get_ta () == next
.get_ta ();
1535 inline bool mask_policy_eq_p (const vsetvl_info
&prev
,
1536 const vsetvl_info
&next
)
1538 return prev
.get_ma () == next
.get_ma ();
1540 inline bool tail_mask_policy_eq_p (const vsetvl_info
&prev
,
1541 const vsetvl_info
&next
)
1543 return tail_policy_eq_p (prev
, next
) && mask_policy_eq_p (prev
, next
);
1546 /* predictors for avl */
1548 inline bool modify_or_use_vl_p (insn_info
*i
, const vsetvl_info
&info
)
1552 if (find_access (i
->defs (), REGNO (info
.get_vl ())))
1554 if (find_access (i
->uses (), REGNO (info
.get_vl ())))
1556 resource_info resource
= full_register (REGNO (info
.get_vl ()));
1557 def_lookup dl1
= crtl
->ssa
->find_def (resource
, i
);
1558 def_lookup dl2
= crtl
->ssa
->find_def (resource
, info
.get_insn ());
1559 if (dl1
.matching_set () || dl2
.matching_set ())
1561 /* If their VLs are coming from same def, we still want to fuse
1562 their VSETVL demand info to gain better performance. */
1563 return dl1
.prev_def (i
) != dl2
.prev_def (i
);
1568 inline bool modify_avl_p (insn_info
*i
, const vsetvl_info
&info
)
1570 return info
.has_nonvlmax_reg_avl ()
1571 && find_access (i
->defs (), REGNO (info
.get_avl ()));
1574 inline bool modify_reg_between_p (insn_info
*prev_insn
, insn_info
*curr_insn
,
1577 gcc_assert (prev_insn
->compare_with (curr_insn
) < 0);
1578 for (insn_info
*i
= curr_insn
->prev_nondebug_insn (); i
!= prev_insn
;
1579 i
= i
->prev_nondebug_insn ())
1582 if (find_access (i
->defs (), regno
))
1588 inline bool reg_avl_equal_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1590 if (!prev
.has_nonvlmax_reg_avl () || !next
.has_nonvlmax_reg_avl ())
1593 if (same_equiv_note_p (prev
.get_avl_def (), next
.get_avl_def ()))
1596 if (REGNO (prev
.get_avl ()) != REGNO (next
.get_avl ()))
1599 insn_info
*prev_insn
= prev
.get_insn ();
1600 if (prev
.get_bb () != prev_insn
->bb ())
1601 prev_insn
= prev
.get_bb ()->end_insn ();
1603 insn_info
*next_insn
= next
.get_insn ();
1604 if (next
.get_bb () != next_insn
->bb ())
1605 next_insn
= next
.get_bb ()->end_insn ();
1607 return avl_vl_unmodified_between_p (prev_insn
, next_insn
, next
, false);
1610 inline bool avl_equal_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1612 gcc_assert (prev
.valid_p () && next
.valid_p ());
1614 if (next
.has_vl () && next
.vl_used_by_non_rvv_insn_p ())
1617 if (vector_config_insn_p (prev
.get_insn ()->rtl ()) && next
.get_avl_def ()
1618 && next
.get_avl_def ()->insn () == prev
.get_insn ())
1621 if (prev
.get_read_vl_insn ())
1623 if (!next
.has_nonvlmax_reg_avl () || !next
.get_avl_def ())
1625 insn_info
*avl_def_insn
= extract_single_source (next
.get_avl_def ());
1626 return avl_def_insn
== prev
.get_read_vl_insn ();
1629 if (prev
== next
&& prev
.has_nonvlmax_reg_avl ())
1631 insn_info
*insn
= prev
.get_insn ();
1632 bb_info
*bb
= insn
->bb ();
1633 for (insn_info
*i
= insn
; real_insn_and_same_bb_p (i
, bb
);
1634 i
= i
->next_nondebug_insn ())
1635 if (find_access (i
->defs (), REGNO (prev
.get_avl ())))
1639 if (prev
.has_vlmax_avl () && next
.has_vlmax_avl ())
1641 else if (prev
.has_imm_avl () && next
.has_imm_avl ())
1642 return INTVAL (prev
.get_avl ()) == INTVAL (next
.get_avl ());
1643 else if (prev
.has_vl () && next
.has_nonvlmax_reg_avl ()
1644 && REGNO (prev
.get_vl ()) == REGNO (next
.get_avl ()))
1646 insn_info
*prev_insn
= prev
.insn_inside_bb_p ()
1648 : prev
.get_bb ()->end_insn ();
1650 insn_info
*next_insn
= next
.insn_inside_bb_p ()
1652 : next
.get_bb ()->end_insn ();
1653 return avl_vl_unmodified_between_p (prev_insn
, next_insn
, next
, false);
1655 else if (prev
.has_nonvlmax_reg_avl () && next
.has_nonvlmax_reg_avl ())
1656 return reg_avl_equal_p (prev
, next
);
1660 inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info
&prev
,
1661 const vsetvl_info
&next
)
1663 return avl_equal_p (prev
, next
) || prev
.has_non_zero_avl ();
1666 inline bool can_use_next_avl_p (const vsetvl_info
&prev
,
1667 const vsetvl_info
&next
)
1669 /* Forbid the AVL/VL propagation if VL of NEXT is used
1670 by non-RVV instructions. This is because:
1673 PREV: scalar move (no AVL)
1675 NEXT: vsetvl a5(VL), a4(AVL) ...
1678 Since user vsetvl instruction is no side effect instruction
1679 which should be placed in the correct and optimal location
1680 of the program by the previous PASS, it is unreasonable that
1681 VSETVL PASS tries to move it to another places if it used by
1682 non-RVV instructions.
1684 Note: We only forbid the cases that VL is used by the following
1685 non-RVV instructions which will cause issues. We don't forbid
1686 other cases since it won't cause correctness issues and we still
1687 more demand info are fused backward. The later LCM algorithm
1688 should know the optimal location of the vsetvl. */
1689 if (next
.has_vl () && next
.vl_used_by_non_rvv_insn_p ())
1692 if (!next
.has_nonvlmax_reg_avl () && !next
.has_vl ())
1695 insn_info
*prev_insn
= prev
.get_insn ();
1696 if (prev
.get_bb () != prev_insn
->bb ())
1697 prev_insn
= prev
.get_bb ()->end_insn ();
1699 insn_info
*next_insn
= next
.get_insn ();
1700 if (next
.get_bb () != next_insn
->bb ())
1701 next_insn
= next
.get_bb ()->end_insn ();
1703 return avl_vl_unmodified_between_p (prev_insn
, next_insn
, next
);
1706 inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p (
1707 const vsetvl_info
&prev
, const vsetvl_info
&next
)
1709 return avl_equal_p (prev
, next
)
1710 || (next
.has_non_zero_avl () && can_use_next_avl_p (prev
, next
));
1715 inline void nop (const vsetvl_info
&prev ATTRIBUTE_UNUSED
,
1716 const vsetvl_info
&next ATTRIBUTE_UNUSED
)
1719 /* modifiers for sew and lmul */
1721 inline void use_min_of_max_sew (vsetvl_info
&prev
, const vsetvl_info
&next
)
1723 prev
.set_max_sew (MIN (prev
.get_max_sew (), next
.get_max_sew ()));
1725 inline void use_next_sew (vsetvl_info
&prev
, const vsetvl_info
&next
)
1727 prev
.set_sew (next
.get_sew ());
1728 use_min_of_max_sew (prev
, next
);
1730 inline void use_max_sew (vsetvl_info
&prev
, const vsetvl_info
&next
)
1732 int max_sew
= MAX (prev
.get_sew (), next
.get_sew ());
1733 prev
.set_sew (max_sew
);
1734 prev
.set_ratio (calculate_ratio (prev
.get_sew (), prev
.get_vlmul ()));
1735 use_min_of_max_sew (prev
, next
);
1737 inline void use_next_sew_lmul (vsetvl_info
&prev
, const vsetvl_info
&next
)
1739 use_next_sew (prev
, next
);
1740 prev
.set_vlmul (next
.get_vlmul ());
1741 prev
.set_ratio (next
.get_ratio ());
1743 inline void use_next_sew_with_prev_ratio (vsetvl_info
&prev
,
1744 const vsetvl_info
&next
)
1746 use_next_sew (prev
, next
);
1747 prev
.set_vlmul (calculate_vlmul (next
.get_sew (), prev
.get_ratio ()));
1749 inline void modify_lmul_with_next_ratio (vsetvl_info
&prev
,
1750 const vsetvl_info
&next
)
1752 prev
.set_vlmul (calculate_vlmul (prev
.get_sew (), next
.get_ratio ()));
1753 prev
.set_ratio (next
.get_ratio ());
1756 inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info
&prev
,
1757 const vsetvl_info
&next
)
1759 prev
.set_vlmul (calculate_vlmul (prev
.get_sew (), next
.get_ratio ()));
1760 use_max_sew (prev
, next
);
1761 prev
.set_ratio (next
.get_ratio ());
1764 inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info
&prev
,
1765 const vsetvl_info
&next
)
1767 int max_sew
= MAX (prev
.get_sew (), next
.get_sew ());
1768 prev
.set_vlmul (calculate_vlmul (max_sew
, prev
.get_ratio ()));
1769 prev
.set_sew (max_sew
);
1772 /* modifiers for tail and mask policy */
1774 inline void use_tail_policy (vsetvl_info
&prev
, const vsetvl_info
&next
)
1776 if (!next
.get_ta ())
1777 prev
.set_ta (next
.get_ta ());
1779 inline void use_mask_policy (vsetvl_info
&prev
, const vsetvl_info
&next
)
1781 if (!next
.get_ma ())
1782 prev
.set_ma (next
.get_ma ());
1784 inline void use_tail_mask_policy (vsetvl_info
&prev
, const vsetvl_info
&next
)
1786 use_tail_policy (prev
, next
);
1787 use_mask_policy (prev
, next
);
1790 /* modifiers for avl */
1792 inline void use_next_avl (vsetvl_info
&prev
, const vsetvl_info
&next
)
1794 gcc_assert (can_use_next_avl_p (prev
, next
));
1795 prev
.update_avl (next
);
1798 inline void use_next_avl_when_not_equal (vsetvl_info
&prev
,
1799 const vsetvl_info
&next
)
1801 if (avl_equal_p (prev
, next
))
1803 gcc_assert (next
.has_non_zero_avl ());
1804 use_next_avl (prev
, next
);
1808 /* Can we move vsetvl info between prev_insn and next_insn safe? */
1809 bool avl_vl_unmodified_between_p (insn_info
*prev_insn
, insn_info
*next_insn
,
1810 const vsetvl_info
&info
,
1811 bool ignore_vl
= false)
1813 gcc_assert ((ignore_vl
&& info
.has_nonvlmax_reg_avl ())
1814 || (info
.has_nonvlmax_reg_avl () || info
.has_vl ()));
1816 gcc_assert (!prev_insn
->is_debug_insn () && !next_insn
->is_debug_insn ());
1817 if (prev_insn
->bb () == next_insn
->bb ()
1818 && prev_insn
->compare_with (next_insn
) < 0)
1820 for (insn_info
*i
= next_insn
->prev_nondebug_insn (); i
!= prev_insn
;
1821 i
= i
->prev_nondebug_insn ())
1823 // no def and use of vl
1824 if (!ignore_vl
&& modify_or_use_vl_p (i
, info
))
1828 if (modify_avl_p (i
, info
))
1835 basic_block prev_cfg_bb
= prev_insn
->bb ()->cfg_bb ();
1836 if (!ignore_vl
&& info
.has_vl ())
1838 bitmap live_out
= df_get_live_out (prev_cfg_bb
);
1839 if (bitmap_bit_p (live_out
, REGNO (info
.get_vl ())))
1843 /* Find set_info at location of PREV_INSN and NEXT_INSN, Return
1844 false if those 2 set_info are different.
1846 PREV_INSN --- multiple nested blocks --- NEXT_INSN.
1848 Return false if there is any modifications of AVL inside those
1849 multiple nested blocks. */
1850 if (info
.has_nonvlmax_reg_avl ())
1852 resource_info resource
= full_register (REGNO (info
.get_avl ()));
1853 def_lookup dl1
= crtl
->ssa
->find_def (resource
, prev_insn
);
1854 def_lookup dl2
= crtl
->ssa
->find_def (resource
, next_insn
);
1855 if (dl2
.matching_set ())
1859 = [&] (insn_info
*h
) { return h
->is_real () || h
->is_phi (); };
1861 def_info
*def1
= dl1
.matching_set_or_last_def_of_prev_group ();
1862 def_info
*def2
= dl2
.prev_def (next_insn
);
1863 set_info
*set1
= safe_dyn_cast
<set_info
*> (def1
);
1864 set_info
*set2
= safe_dyn_cast
<set_info
*> (def2
);
1868 auto is_same_ultimate_def
= [&] (set_info
*s1
, set_info
*s2
) {
1869 return s1
->insn ()->is_phi () && s2
->insn ()->is_phi ()
1870 && look_through_degenerate_phi (s1
)
1871 == look_through_degenerate_phi (s2
);
1874 if (set1
!= set2
&& !is_same_ultimate_def (set1
, set2
))
1876 if (!is_phi_or_real (set1
->insn ())
1877 || !is_phi_or_real (set2
->insn ()))
1880 if (set1
->insn ()->is_real () && set2
->insn ()->is_phi ())
1882 hash_set
<set_info
*> sets
1883 = get_all_sets (set2
, true, false, true);
1884 if (!sets
.contains (set1
))
1889 insn_info
*def_insn1
= extract_single_source (set1
);
1890 insn_info
*def_insn2
= extract_single_source (set2
);
1891 if (!def_insn1
|| !def_insn2
|| def_insn1
!= def_insn2
)
1897 for (insn_info
*i
= next_insn
; i
!= next_insn
->bb ()->head_insn ();
1898 i
= i
->prev_nondebug_insn ())
1900 // no def and use of vl
1901 if (!ignore_vl
&& modify_or_use_vl_p (i
, info
))
1905 if (modify_avl_p (i
, info
))
1909 for (insn_info
*i
= prev_insn
->bb ()->end_insn (); i
!= prev_insn
;
1910 i
= i
->prev_nondebug_insn ())
1912 // no def mad use of vl
1913 if (!ignore_vl
&& modify_or_use_vl_p (i
, info
))
1917 if (modify_avl_p (i
, info
))
1924 bool sew_lmul_compatible_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1926 gcc_assert (prev
.valid_p () && next
.valid_p ());
1927 sew_lmul_demand_type prev_flags
= prev
.get_sew_lmul_demand ();
1928 sew_lmul_demand_type next_flags
= next
.get_sew_lmul_demand ();
1929 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1930 AVAILABLE_P, FUSE) \
1931 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1932 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1933 return COMPATIBLE_P (prev, next);
1935 #include "riscv-vsetvl.def"
1940 bool sew_lmul_available_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1942 gcc_assert (prev
.valid_p () && next
.valid_p ());
1943 sew_lmul_demand_type prev_flags
= prev
.get_sew_lmul_demand ();
1944 sew_lmul_demand_type next_flags
= next
.get_sew_lmul_demand ();
1945 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1946 AVAILABLE_P, FUSE) \
1947 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1948 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1949 return AVAILABLE_P (prev, next);
1951 #include "riscv-vsetvl.def"
1956 void merge_sew_lmul (vsetvl_info
&prev
, const vsetvl_info
&next
)
1958 gcc_assert (prev
.valid_p () && next
.valid_p ());
1959 sew_lmul_demand_type prev_flags
= prev
.get_sew_lmul_demand ();
1960 sew_lmul_demand_type next_flags
= next
.get_sew_lmul_demand ();
1961 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1962 AVAILABLE_P, FUSE) \
1963 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1964 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1966 gcc_assert (COMPATIBLE_P (prev, next)); \
1967 FUSE (prev, next); \
1968 prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \
1972 #include "riscv-vsetvl.def"
1977 bool policy_compatible_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1979 gcc_assert (prev
.valid_p () && next
.valid_p ());
1980 policy_demand_type prev_flags
= prev
.get_policy_demand ();
1981 policy_demand_type next_flags
= next
.get_policy_demand ();
1982 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1983 AVAILABLE_P, FUSE) \
1984 if (prev_flags == policy_demand_type::PREV_FLAGS \
1985 && next_flags == policy_demand_type::NEXT_FLAGS) \
1986 return COMPATIBLE_P (prev, next);
1988 #include "riscv-vsetvl.def"
1993 bool policy_available_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1995 gcc_assert (prev
.valid_p () && next
.valid_p ());
1996 policy_demand_type prev_flags
= prev
.get_policy_demand ();
1997 policy_demand_type next_flags
= next
.get_policy_demand ();
1998 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1999 AVAILABLE_P, FUSE) \
2000 if (prev_flags == policy_demand_type::PREV_FLAGS \
2001 && next_flags == policy_demand_type::NEXT_FLAGS) \
2002 return AVAILABLE_P (prev, next);
2004 #include "riscv-vsetvl.def"
2009 void merge_policy (vsetvl_info
&prev
, const vsetvl_info
&next
)
2011 gcc_assert (prev
.valid_p () && next
.valid_p ());
2012 policy_demand_type prev_flags
= prev
.get_policy_demand ();
2013 policy_demand_type next_flags
= next
.get_policy_demand ();
2014 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2015 AVAILABLE_P, FUSE) \
2016 if (prev_flags == policy_demand_type::PREV_FLAGS \
2017 && next_flags == policy_demand_type::NEXT_FLAGS) \
2019 gcc_assert (COMPATIBLE_P (prev, next)); \
2020 FUSE (prev, next); \
2021 prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \
2025 #include "riscv-vsetvl.def"
2030 bool vl_not_in_conflict_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
2032 /* We don't fuse this following case:
2035 vmv.s.x v0, a5 -- PREV
2036 vsetvli a5, ... -- NEXT
2038 Don't fuse NEXT into PREV.
2040 return !prev
.vl_modify_non_avl_op_p (next
)
2041 && !next
.vl_modify_non_avl_op_p (prev
);
2044 bool avl_compatible_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
2046 gcc_assert (prev
.valid_p () && next
.valid_p ());
2047 avl_demand_type prev_flags
= prev
.get_avl_demand ();
2048 avl_demand_type next_flags
= next
.get_avl_demand ();
2049 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2050 AVAILABLE_P, FUSE) \
2051 if (prev_flags == avl_demand_type::PREV_FLAGS \
2052 && next_flags == avl_demand_type::NEXT_FLAGS) \
2053 return COMPATIBLE_P (prev, next);
2055 #include "riscv-vsetvl.def"
2060 bool avl_available_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
2062 gcc_assert (prev
.valid_p () && next
.valid_p ());
2063 avl_demand_type prev_flags
= prev
.get_avl_demand ();
2064 avl_demand_type next_flags
= next
.get_avl_demand ();
2065 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2066 AVAILABLE_P, FUSE) \
2067 if (prev_flags == avl_demand_type::PREV_FLAGS \
2068 && next_flags == avl_demand_type::NEXT_FLAGS) \
2069 return AVAILABLE_P (prev, next);
2071 #include "riscv-vsetvl.def"
2076 void merge_avl (vsetvl_info
&prev
, const vsetvl_info
&next
)
2078 gcc_assert (prev
.valid_p () && next
.valid_p ());
2079 avl_demand_type prev_flags
= prev
.get_avl_demand ();
2080 avl_demand_type next_flags
= next
.get_avl_demand ();
2081 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2082 AVAILABLE_P, FUSE) \
2083 if (prev_flags == avl_demand_type::PREV_FLAGS \
2084 && next_flags == avl_demand_type::NEXT_FLAGS) \
2086 gcc_assert (COMPATIBLE_P (prev, next)); \
2087 FUSE (prev, next); \
2088 prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \
2092 #include "riscv-vsetvl.def"
2097 bool compatible_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
2099 bool compatible_p
= sew_lmul_compatible_p (prev
, next
)
2100 && policy_compatible_p (prev
, next
)
2101 && avl_compatible_p (prev
, next
)
2102 && vl_not_in_conflict_p (prev
, next
);
2103 return compatible_p
;
2106 bool available_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
2108 bool available_p
= sew_lmul_available_p (prev
, next
)
2109 && policy_available_p (prev
, next
)
2110 && avl_available_p (prev
, next
)
2111 && vl_not_in_conflict_p (prev
, next
);
2112 gcc_assert (!available_p
|| compatible_p (prev
, next
));
2116 void merge (vsetvl_info
&prev
, const vsetvl_info
&next
)
2118 gcc_assert (compatible_p (prev
, next
));
2119 merge_sew_lmul (prev
, next
);
2120 merge_policy (prev
, next
);
2121 merge_avl (prev
, next
);
2122 gcc_assert (available_p (prev
, next
));
2130 demand_system m_dem
;
2131 auto_vec
<vsetvl_block_info
> m_vector_block_infos
;
2133 /* data for avl reaching definition. */
2134 sbitmap
*m_reg_def_loc
;
2136 /* data for vsetvl info reaching definition. */
2137 vsetvl_info m_unknown_info
;
2138 auto_vec
<vsetvl_info
*> m_vsetvl_def_exprs
;
2139 sbitmap
*m_vsetvl_def_in
;
2140 sbitmap
*m_vsetvl_def_out
;
2143 auto_vec
<vsetvl_info
*> m_exprs
;
2152 struct edge_list
*m_edges
;
2154 auto_vec
<vsetvl_info
> m_delete_list
;
2156 vsetvl_block_info
&get_block_info (const bb_info
*bb
)
2158 return m_vector_block_infos
[bb
->index ()];
2160 const vsetvl_block_info
&get_block_info (const basic_block bb
) const
2162 return m_vector_block_infos
[bb
->index
];
2165 vsetvl_block_info
&get_block_info (const basic_block bb
)
2167 return m_vector_block_infos
[bb
->index
];
2170 void add_expr (auto_vec
<vsetvl_info
*> &m_exprs
, vsetvl_info
&info
)
2172 for (vsetvl_info
*item
: m_exprs
)
2177 m_exprs
.safe_push (&info
);
2180 unsigned get_expr_index (auto_vec
<vsetvl_info
*> &m_exprs
,
2181 const vsetvl_info
&info
)
2183 for (size_t i
= 0; i
< m_exprs
.length (); i
+= 1)
2185 if (*m_exprs
[i
] == info
)
2191 bool anticipated_exp_p (const vsetvl_info
&header_info
)
2193 if (!header_info
.has_nonvlmax_reg_avl () && !header_info
.has_vl ())
2196 bb_info
*bb
= header_info
.get_bb ();
2197 insn_info
*prev_insn
= bb
->head_insn ();
2198 insn_info
*next_insn
= header_info
.insn_inside_bb_p ()
2199 ? header_info
.get_insn ()
2200 : header_info
.get_bb ()->end_insn ();
2202 return m_dem
.avl_vl_unmodified_between_p (prev_insn
, next_insn
,
2206 bool available_exp_p (const vsetvl_info
&prev_info
,
2207 const vsetvl_info
&next_info
)
2209 return m_dem
.available_p (prev_info
, next_info
);
2212 void compute_probabilities ()
2217 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2219 basic_block cfg_bb
= bb
->cfg_bb ();
2220 auto &curr_prob
= get_block_info (cfg_bb
).probability
;
2222 /* GCC assume entry block (bb 0) are always so
2223 executed so set its probability as "always". */
2224 if (ENTRY_BLOCK_PTR_FOR_FN (cfun
) == cfg_bb
)
2225 curr_prob
= profile_probability::always ();
2226 /* Exit block (bb 1) is the block we don't need to process. */
2227 if (EXIT_BLOCK_PTR_FOR_FN (cfun
) == cfg_bb
)
2230 gcc_assert (curr_prob
.initialized_p ());
2231 FOR_EACH_EDGE (e
, ei
, cfg_bb
->succs
)
2233 auto &new_prob
= get_block_info (e
->dest
).probability
;
2234 /* Normally, the edge probability should be initialized.
2235 However, some special testing code which is written in
2236 GIMPLE IR style force the edge probability uninitialized,
2237 we conservatively set it as never so that it will not
2238 affect PRE (Phase 3 && Phase 4). */
2239 if (!e
->probability
.initialized_p ())
2240 new_prob
= profile_probability::never ();
2241 else if (!new_prob
.initialized_p ())
2242 new_prob
= curr_prob
* e
->probability
;
2243 else if (new_prob
== profile_probability::always ())
2246 new_prob
+= curr_prob
* e
->probability
;
2251 void insert_vsetvl_insn (enum emit_type emit_type
, const vsetvl_info
&info
)
2253 rtx pat
= info
.get_vsetvl_pat ();
2254 rtx_insn
*rinsn
= info
.get_insn ()->rtl ();
2256 if (emit_type
== EMIT_DIRECT
)
2261 fprintf (dump_file
, " Insert vsetvl insn %d:\n",
2262 INSN_UID (get_last_insn ()));
2263 print_rtl_single (dump_file
, get_last_insn ());
2266 else if (emit_type
== EMIT_BEFORE
)
2268 emit_insn_before (pat
, rinsn
);
2271 fprintf (dump_file
, " Insert vsetvl insn before insn %d:\n",
2273 print_rtl_single (dump_file
, PREV_INSN (rinsn
));
2278 emit_insn_after (pat
, rinsn
);
2281 fprintf (dump_file
, " Insert vsetvl insn after insn %d:\n",
2283 print_rtl_single (dump_file
, NEXT_INSN (rinsn
));
2288 void change_vsetvl_insn (const vsetvl_info
&info
)
2290 rtx_insn
*rinsn
= info
.get_insn ()->rtl ();
2291 rtx new_pat
= info
.get_vsetvl_pat ();
2295 fprintf (dump_file
, " Change insn %d from:\n", INSN_UID (rinsn
));
2296 print_rtl_single (dump_file
, rinsn
);
2299 validate_change_or_fail (rinsn
, &PATTERN (rinsn
), new_pat
, false);
2303 fprintf (dump_file
, "\n to:\n");
2304 print_rtl_single (dump_file
, rinsn
);
2308 void remove_vsetvl_insn (rtx_insn
*rinsn
)
2312 fprintf (dump_file
, " Eliminate insn %d:\n", INSN_UID (rinsn
));
2313 print_rtl_single (dump_file
, rinsn
);
2315 if (in_sequence_p ())
2316 remove_insn (rinsn
);
2318 delete_insn (rinsn
);
2321 bool successors_probability_equal_p (const basic_block cfg_bb
) const
2325 profile_probability prob
= profile_probability::uninitialized ();
2326 FOR_EACH_EDGE (e
, ei
, cfg_bb
->succs
)
2328 if (prob
== profile_probability::uninitialized ())
2329 prob
= m_vector_block_infos
[e
->dest
->index
].probability
;
2330 else if (prob
== m_vector_block_infos
[e
->dest
->index
].probability
)
2333 /* We pick the highest probability among those incompatible VSETVL
2334 infos. When all incompatible VSETVL infos have same probability, we
2335 don't pick any of them. */
2341 bool has_compatible_reaching_vsetvl_p (vsetvl_info info
)
2344 sbitmap_iterator sbi
;
2345 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in
[info
.get_bb ()->index ()], 0,
2348 const auto prev_info
= *m_vsetvl_def_exprs
[index
];
2349 if (!prev_info
.valid_p ())
2351 if (m_dem
.compatible_p (prev_info
, info
))
2357 bool preds_all_same_avl_and_ratio_p (const vsetvl_info
&curr_info
)
2360 !bitmap_empty_p (m_vsetvl_def_in
[curr_info
.get_bb ()->index ()]));
2362 unsigned expr_index
;
2363 sbitmap_iterator sbi
;
2364 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in
[curr_info
.get_bb ()->index ()], 0,
2367 const vsetvl_info
&prev_info
= *m_vsetvl_def_exprs
[expr_index
];
2368 if (!prev_info
.valid_p ()
2369 || !m_dem
.avl_available_p (prev_info
, curr_info
)
2370 || prev_info
.get_ratio () != curr_info
.get_ratio ())
2379 : m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr),
2380 m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr),
2381 m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr)
2383 /* Initialization of RTL_SSA. */
2384 calculate_dominance_info (CDI_DOMINATORS
);
2385 loop_optimizer_init (LOOPS_NORMAL
);
2386 /* Create FAKE edges for infinite loops. */
2387 connect_infinite_loops_to_exit ();
2389 crtl
->ssa
= new function_info (cfun
);
2390 m_vector_block_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
));
2391 compute_probabilities ();
2392 m_unknown_info
.set_unknown ();
2397 free_dominance_info (CDI_DOMINATORS
);
2398 loop_optimizer_finalize ();
2399 if (crtl
->ssa
->perform_pending_updates ())
2402 crtl
->ssa
= nullptr;
2405 sbitmap_vector_free (m_reg_def_loc
);
2407 if (m_vsetvl_def_in
)
2408 sbitmap_vector_free (m_vsetvl_def_in
);
2409 if (m_vsetvl_def_out
)
2410 sbitmap_vector_free (m_vsetvl_def_out
);
2413 sbitmap_vector_free (m_avloc
);
2415 sbitmap_vector_free (m_kill
);
2417 sbitmap_vector_free (m_antloc
);
2419 sbitmap_vector_free (m_transp
);
2421 sbitmap_vector_free (m_insert
);
2423 sbitmap_vector_free (m_del
);
2425 sbitmap_vector_free (m_avin
);
2427 sbitmap_vector_free (m_avout
);
2430 free_edge_list (m_edges
);
2433 void compute_vsetvl_def_data ();
2434 void compute_transparent (const bb_info
*);
2435 void compute_lcm_local_properties ();
2437 void fuse_local_vsetvl_info ();
2438 bool earliest_fuse_vsetvl_info (int iter
);
2439 void pre_global_vsetvl_info ();
2440 void emit_vsetvl ();
2442 void remove_avl_operand ();
2443 void remove_unused_dest_operand ();
2444 void remove_vsetvl_pre_insns ();
2446 void dump (FILE *file
, const char *title
) const
2448 fprintf (file
, "\nVSETVL infos after %s\n\n", title
);
2449 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2451 const auto &block_info
= m_vector_block_infos
[bb
->index ()];
2452 fprintf (file
, " bb %d:\n", bb
->index ());
2453 fprintf (file
, " probability: ");
2454 block_info
.probability
.dump (file
);
2455 fprintf (file
, "\n");
2456 if (!block_info
.empty_p ())
2458 fprintf (file
, " Header vsetvl info:");
2459 block_info
.get_entry_info ().dump (file
, " ");
2460 fprintf (file
, " Footer vsetvl info:");
2461 block_info
.get_exit_info ().dump (file
, " ");
2462 for (const auto &info
: block_info
.local_infos
)
2465 " insn %d vsetvl info:", info
.get_insn ()->uid ());
2466 info
.dump (file
, " ");
2474 pre_vsetvl::compute_vsetvl_def_data ()
2476 m_vsetvl_def_exprs
.truncate (0);
2477 add_expr (m_vsetvl_def_exprs
, m_unknown_info
);
2478 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2480 vsetvl_block_info
&block_info
= get_block_info (bb
);
2481 if (block_info
.empty_p ())
2483 vsetvl_info
&footer_info
= block_info
.get_exit_info ();
2484 gcc_assert (footer_info
.valid_p () || footer_info
.unknown_p ());
2485 add_expr (m_vsetvl_def_exprs
, footer_info
);
2488 if (m_vsetvl_def_in
)
2489 sbitmap_vector_free (m_vsetvl_def_in
);
2490 if (m_vsetvl_def_out
)
2491 sbitmap_vector_free (m_vsetvl_def_out
);
2493 sbitmap
*def_loc
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2494 m_vsetvl_def_exprs
.length ());
2495 sbitmap
*m_kill
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2496 m_vsetvl_def_exprs
.length ());
2498 m_vsetvl_def_in
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2499 m_vsetvl_def_exprs
.length ());
2500 m_vsetvl_def_out
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2501 m_vsetvl_def_exprs
.length ());
2503 bitmap_vector_clear (def_loc
, last_basic_block_for_fn (cfun
));
2504 bitmap_vector_clear (m_kill
, last_basic_block_for_fn (cfun
));
2505 bitmap_vector_clear (m_vsetvl_def_out
, last_basic_block_for_fn (cfun
));
2507 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2509 vsetvl_block_info
&block_info
= get_block_info (bb
);
2510 if (block_info
.empty_p ())
2512 for (unsigned i
= 0; i
< m_vsetvl_def_exprs
.length (); i
+= 1)
2514 auto *info
= m_vsetvl_def_exprs
[i
];
2515 if (info
->has_nonvlmax_reg_avl ()
2516 && bitmap_bit_p (m_reg_def_loc
[bb
->index ()],
2517 REGNO (info
->get_avl ())))
2519 bitmap_set_bit (m_kill
[bb
->index ()], i
);
2520 bitmap_set_bit (def_loc
[bb
->index ()],
2521 get_expr_index (m_vsetvl_def_exprs
,
2528 vsetvl_info
&footer_info
= block_info
.get_exit_info ();
2529 bitmap_ones (m_kill
[bb
->index ()]);
2530 bitmap_set_bit (def_loc
[bb
->index ()],
2531 get_expr_index (m_vsetvl_def_exprs
, footer_info
));
2534 /* Set the def_out of the ENTRY basic block to m_unknown_info expr. */
2535 basic_block entry
= ENTRY_BLOCK_PTR_FOR_FN (cfun
);
2536 bitmap_set_bit (m_vsetvl_def_out
[entry
->index
],
2537 get_expr_index (m_vsetvl_def_exprs
, m_unknown_info
));
2539 compute_reaching_defintion (def_loc
, m_kill
, m_vsetvl_def_in
,
2542 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2545 "\n Compute vsetvl info reaching definition data:\n\n");
2546 fprintf (dump_file
, " Expression List (%d):\n",
2547 m_vsetvl_def_exprs
.length ());
2548 for (unsigned i
= 0; i
< m_vsetvl_def_exprs
.length (); i
++)
2550 const auto &info
= *m_vsetvl_def_exprs
[i
];
2551 fprintf (dump_file
, " Expr[%u]: ", i
);
2552 info
.dump (dump_file
, " ");
2554 fprintf (dump_file
, "\n bitmap data:\n");
2555 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2557 unsigned int i
= bb
->index ();
2558 fprintf (dump_file
, " BB %u:\n", i
);
2559 fprintf (dump_file
, " def_loc: ");
2560 dump_bitmap_file (dump_file
, def_loc
[i
]);
2561 fprintf (dump_file
, " kill: ");
2562 dump_bitmap_file (dump_file
, m_kill
[i
]);
2563 fprintf (dump_file
, " vsetvl_def_in: ");
2564 dump_bitmap_file (dump_file
, m_vsetvl_def_in
[i
]);
2565 fprintf (dump_file
, " vsetvl_def_out: ");
2566 dump_bitmap_file (dump_file
, m_vsetvl_def_out
[i
]);
2570 sbitmap_vector_free (def_loc
);
2571 sbitmap_vector_free (m_kill
);
2574 /* Subroutine of compute_lcm_local_properties which Compute local transparent
2575 BB. Note that the compile time is very sensitive to compute_transparent and
2576 compute_lcm_local_properties, any change of these 2 functions should be
2577 aware of the compile time changing of the program which has a large number of
2578 blocks, e.g SPEC 2017 wrf.
2580 Current compile time profile of SPEC 2017 wrf:
2583 2. machine dep reorg (VSETVL PASS) - 18%
2585 VSETVL pass should not spend more time than scheduling in compilation. */
2587 pre_vsetvl::compute_transparent (const bb_info
*bb
)
2589 int num_exprs
= m_exprs
.length ();
2590 unsigned bb_index
= bb
->index ();
2591 for (int i
= 0; i
< num_exprs
; i
++)
2593 auto *info
= m_exprs
[i
];
2594 if (info
->has_nonvlmax_reg_avl ()
2595 && bitmap_bit_p (m_reg_def_loc
[bb_index
], REGNO (info
->get_avl ())))
2596 bitmap_clear_bit (m_transp
[bb_index
], i
);
2597 else if (info
->has_vl ()
2598 && bitmap_bit_p (m_reg_def_loc
[bb_index
],
2599 REGNO (info
->get_vl ())))
2600 bitmap_clear_bit (m_transp
[bb_index
], i
);
2604 /* Compute the local properties of each recorded expression.
2606 Local properties are those that are defined by the block, irrespective of
2609 An expression is transparent in a block if its operands are not modified
2612 An expression is computed (locally available) in a block if it is computed
2613 at least once and expression would contain the same value if the
2614 computation was moved to the end of the block.
2616 An expression is locally anticipatable in a block if it is computed at
2617 least once and expression would contain the same value if the computation
2618 was moved to the beginning of the block. */
2620 pre_vsetvl::compute_lcm_local_properties ()
2622 m_exprs
.truncate (0);
2623 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2625 vsetvl_block_info
&block_info
= get_block_info (bb
);
2626 if (block_info
.empty_p ())
2628 vsetvl_info
&header_info
= block_info
.get_entry_info ();
2629 vsetvl_info
&footer_info
= block_info
.get_exit_info ();
2630 gcc_assert (footer_info
.valid_p () || footer_info
.unknown_p ());
2631 if (header_info
.valid_p ())
2632 add_expr (m_exprs
, header_info
);
2633 if (footer_info
.valid_p ())
2634 add_expr (m_exprs
, footer_info
);
2637 int num_exprs
= m_exprs
.length ();
2639 sbitmap_vector_free (m_avloc
);
2641 sbitmap_vector_free (m_kill
);
2643 sbitmap_vector_free (m_antloc
);
2645 sbitmap_vector_free (m_transp
);
2647 sbitmap_vector_free (m_avin
);
2649 sbitmap_vector_free (m_avout
);
2651 m_avloc
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2652 m_kill
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2653 m_antloc
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2654 m_transp
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2655 m_avin
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2656 m_avout
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2658 bitmap_vector_clear (m_avloc
, last_basic_block_for_fn (cfun
));
2659 bitmap_vector_clear (m_antloc
, last_basic_block_for_fn (cfun
));
2660 bitmap_vector_ones (m_transp
, last_basic_block_for_fn (cfun
));
2662 /* - If T is locally available at the end of a block, then T' must be
2663 available at the end of the same block. Since some optimization has
2664 occurred earlier, T' might not be locally available, however, it must
2665 have been previously computed on all paths. As a formula, T at AVLOC(B)
2666 implies that T' at AVOUT(B).
2667 An "available occurrence" is one that is the last occurrence in the
2668 basic block and the operands are not modified by following statements in
2669 the basic block [including this insn].
2671 - If T is locally anticipated at the beginning of a block, then either
2672 T', is locally anticipated or it is already available from previous
2673 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
2674 ANTLOC(B) at AVIN(B).
2675 An "anticipatable occurrence" is one that is the first occurrence in the
2676 basic block, the operands are not modified in the basic block prior
2677 to the occurrence and the output is not used between the start of
2678 the block and the occurrence. */
2679 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2681 unsigned bb_index
= bb
->index ();
2682 vsetvl_block_info
&block_info
= get_block_info (bb
);
2684 /* Compute m_transp */
2685 if (block_info
.empty_p ())
2686 compute_transparent (bb
);
2689 bitmap_clear (m_transp
[bb_index
]);
2690 vsetvl_info
&header_info
= block_info
.get_entry_info ();
2691 vsetvl_info
&footer_info
= block_info
.get_exit_info ();
2693 if (header_info
.valid_p () && anticipated_exp_p (header_info
))
2694 bitmap_set_bit (m_antloc
[bb_index
],
2695 get_expr_index (m_exprs
, header_info
));
2697 if (footer_info
.valid_p ())
2698 for (int i
= 0; i
< num_exprs
; i
+= 1)
2700 const vsetvl_info
&info
= *m_exprs
[i
];
2701 if (!info
.valid_p ())
2703 if (available_exp_p (footer_info
, info
))
2704 bitmap_set_bit (m_avloc
[bb_index
], i
);
2708 if (invalid_opt_bb_p (bb
->cfg_bb ()))
2710 bitmap_clear (m_antloc
[bb_index
]);
2711 bitmap_clear (m_transp
[bb_index
]);
2714 /* Compute ae_kill for each basic block using:
2718 bitmap_ior (m_kill
[bb_index
], m_transp
[bb_index
], m_avloc
[bb_index
]);
2719 bitmap_not (m_kill
[bb_index
], m_kill
[bb_index
]);
2724 pre_vsetvl::fuse_local_vsetvl_info ()
2727 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), GP_REG_LAST
+ 1);
2728 bitmap_vector_clear (m_reg_def_loc
, last_basic_block_for_fn (cfun
));
2729 bitmap_ones (m_reg_def_loc
[ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
]);
2731 for (bb_info
*bb
: crtl
->ssa
->bbs ())
2733 auto &block_info
= get_block_info (bb
);
2735 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2737 fprintf (dump_file
, " Try fuse basic block %d\n", bb
->index ());
2739 auto_vec
<vsetvl_info
> infos
;
2740 for (insn_info
*insn
: bb
->real_nondebug_insns ())
2742 vsetvl_info curr_info
= vsetvl_info (insn
);
2743 if (curr_info
.valid_p () || curr_info
.unknown_p ())
2744 infos
.safe_push (curr_info
);
2746 /* Collecting GP registers modified by the current bb. */
2747 if (insn
->is_real ())
2748 for (def_info
*def
: insn
->defs ())
2749 if (def
->is_reg () && GP_REG_P (def
->regno ()))
2750 bitmap_set_bit (m_reg_def_loc
[bb
->index ()], def
->regno ());
2753 vsetvl_info prev_info
= vsetvl_info ();
2754 prev_info
.set_empty ();
2755 for (auto &curr_info
: infos
)
2757 if (prev_info
.empty_p ())
2758 prev_info
= curr_info
;
2759 else if ((curr_info
.unknown_p () && prev_info
.valid_p ())
2760 || (curr_info
.valid_p () && prev_info
.unknown_p ()))
2762 block_info
.local_infos
.safe_push (prev_info
);
2763 prev_info
= curr_info
;
2765 else if (curr_info
.valid_p () && prev_info
.valid_p ())
2767 if (m_dem
.available_p (prev_info
, curr_info
))
2769 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2772 " Ignore curr info since prev info "
2773 "available with it:\n");
2774 fprintf (dump_file
, " prev_info: ");
2775 prev_info
.dump (dump_file
, " ");
2776 fprintf (dump_file
, " curr_info: ");
2777 curr_info
.dump (dump_file
, " ");
2778 fprintf (dump_file
, "\n");
2780 /* Even though prev_info is available with curr_info,
2781 we need to update the MAX_SEW of prev_info since
2782 we don't check MAX_SEW in available_p check.
2785 Demand fields: demand_ratio_and_ge_sew demand_avl
2786 SEW=16, VLMUL=mf4, RATIO=64, MAX_SEW=64
2789 Demand fields: demand_ge_sew demand_non_zero_avl
2790 SEW=16, VLMUL=m1, RATIO=16, MAX_SEW=32
2792 In the example above, prev_info is available with
2793 curr_info, we need to update prev_info MAX_SEW from
2795 prev_info
.set_max_sew (
2796 MIN (prev_info
.get_max_sew (), curr_info
.get_max_sew ()));
2797 if (!curr_info
.vl_used_by_non_rvv_insn_p ()
2798 && vsetvl_insn_p (curr_info
.get_insn ()->rtl ()))
2799 m_delete_list
.safe_push (curr_info
);
2801 if (curr_info
.get_read_vl_insn ())
2802 prev_info
.set_read_vl_insn (curr_info
.get_read_vl_insn ());
2804 else if (m_dem
.compatible_p (prev_info
, curr_info
))
2806 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2808 fprintf (dump_file
, " Fuse curr info since prev info "
2809 "compatible with it:\n");
2810 fprintf (dump_file
, " prev_info: ");
2811 prev_info
.dump (dump_file
, " ");
2812 fprintf (dump_file
, " curr_info: ");
2813 curr_info
.dump (dump_file
, " ");
2815 m_dem
.merge (prev_info
, curr_info
);
2816 if (!curr_info
.vl_used_by_non_rvv_insn_p ()
2817 && vsetvl_insn_p (curr_info
.get_insn ()->rtl ()))
2818 m_delete_list
.safe_push (curr_info
);
2819 if (curr_info
.get_read_vl_insn ())
2820 prev_info
.set_read_vl_insn (curr_info
.get_read_vl_insn ());
2821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2823 fprintf (dump_file
, " prev_info after fused: ");
2824 prev_info
.dump (dump_file
, " ");
2825 fprintf (dump_file
, "\n");
2830 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2833 " Cannot fuse incompatible infos:\n");
2834 fprintf (dump_file
, " prev_info: ");
2835 prev_info
.dump (dump_file
, " ");
2836 fprintf (dump_file
, " curr_info: ");
2837 curr_info
.dump (dump_file
, " ");
2839 block_info
.local_infos
.safe_push (prev_info
);
2840 prev_info
= curr_info
;
2845 if (prev_info
.valid_p () || prev_info
.unknown_p ())
2846 block_info
.local_infos
.safe_push (prev_info
);
2852 pre_vsetvl::earliest_fuse_vsetvl_info (int iter
)
2854 compute_vsetvl_def_data ();
2855 compute_lcm_local_properties ();
2857 unsigned num_exprs
= m_exprs
.length ();
2858 struct edge_list
*m_edges
= create_edge_list ();
2859 unsigned num_edges
= NUM_EDGES (m_edges
);
2861 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2863 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2865 sbitmap
*earliest
= sbitmap_vector_alloc (num_edges
, num_exprs
);
2867 compute_available (m_avloc
, m_kill
, m_avout
, m_avin
);
2868 compute_antinout_edge (m_antloc
, m_transp
, antin
, antout
);
2869 compute_earliest (m_edges
, num_exprs
, antin
, antout
, m_avout
, m_kill
,
2872 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2874 fprintf (dump_file
, "\n Compute LCM earliest insert data (lift %d):\n\n",
2876 fprintf (dump_file
, " Expression List (%u):\n", num_exprs
);
2877 for (unsigned i
= 0; i
< num_exprs
; i
++)
2879 const auto &info
= *m_exprs
[i
];
2880 fprintf (dump_file
, " Expr[%u]: ", i
);
2881 info
.dump (dump_file
, " ");
2883 fprintf (dump_file
, "\n bitmap data:\n");
2884 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2886 unsigned int i
= bb
->index ();
2887 fprintf (dump_file
, " BB %u:\n", i
);
2888 fprintf (dump_file
, " avloc: ");
2889 dump_bitmap_file (dump_file
, m_avloc
[i
]);
2890 fprintf (dump_file
, " kill: ");
2891 dump_bitmap_file (dump_file
, m_kill
[i
]);
2892 fprintf (dump_file
, " antloc: ");
2893 dump_bitmap_file (dump_file
, m_antloc
[i
]);
2894 fprintf (dump_file
, " transp: ");
2895 dump_bitmap_file (dump_file
, m_transp
[i
]);
2897 fprintf (dump_file
, " avin: ");
2898 dump_bitmap_file (dump_file
, m_avin
[i
]);
2899 fprintf (dump_file
, " avout: ");
2900 dump_bitmap_file (dump_file
, m_avout
[i
]);
2901 fprintf (dump_file
, " antin: ");
2902 dump_bitmap_file (dump_file
, antin
[i
]);
2903 fprintf (dump_file
, " antout: ");
2904 dump_bitmap_file (dump_file
, antout
[i
]);
2906 fprintf (dump_file
, "\n");
2907 fprintf (dump_file
, " earliest:\n");
2908 for (unsigned ed
= 0; ed
< num_edges
; ed
++)
2910 edge eg
= INDEX_EDGE (m_edges
, ed
);
2912 if (bitmap_empty_p (earliest
[ed
]))
2914 fprintf (dump_file
, " Edge(bb %u -> bb %u): ", eg
->src
->index
,
2916 dump_bitmap_file (dump_file
, earliest
[ed
]);
2918 fprintf (dump_file
, "\n");
2921 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2923 fprintf (dump_file
, " Fused global info result (lift %d):\n", iter
);
2926 bool changed
= false;
2927 for (unsigned ed
= 0; ed
< num_edges
; ed
++)
2929 sbitmap e
= earliest
[ed
];
2930 if (bitmap_empty_p (e
))
2933 unsigned int expr_index
;
2934 sbitmap_iterator sbi
;
2935 EXECUTE_IF_SET_IN_BITMAP (e
, 0, expr_index
, sbi
)
2937 vsetvl_info
&curr_info
= *m_exprs
[expr_index
];
2938 edge eg
= INDEX_EDGE (m_edges
, ed
);
2939 vsetvl_block_info
&src_block_info
= get_block_info (eg
->src
);
2940 vsetvl_block_info
&dest_block_info
= get_block_info (eg
->dest
);
2942 if (!curr_info
.valid_p ()
2943 || eg
->probability
== profile_probability::never ()
2944 || src_block_info
.probability
2945 == profile_probability::uninitialized ()
2946 /* When multiple set bits in earliest edge, such edge may
2947 have infinite loop in preds or succs or multiple conflict
2948 vsetvl expression which make such edge is unrelated. We
2949 don't perform fusion for such situation. */
2950 || bitmap_count_bits (e
) != 1)
2953 if (src_block_info
.empty_p ())
2955 vsetvl_info new_curr_info
= curr_info
;
2956 new_curr_info
.set_bb (crtl
->ssa
->bb (eg
->dest
));
2957 bool has_compatible_p
2958 = has_compatible_reaching_vsetvl_p (new_curr_info
);
2959 if (!has_compatible_p
)
2961 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2964 " Forbidden lift up vsetvl info into bb %u "
2965 "since there is no vsetvl info that reaching in "
2966 "is compatible with it:",
2968 curr_info
.dump (dump_file
, " ");
2973 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2976 " Set empty bb %u to info:", eg
->src
->index
);
2977 curr_info
.dump (dump_file
, " ");
2979 src_block_info
.set_info (curr_info
);
2980 src_block_info
.probability
= dest_block_info
.probability
;
2983 else if (src_block_info
.has_info ())
2985 vsetvl_info
&prev_info
= src_block_info
.get_exit_info ();
2986 gcc_assert (prev_info
.valid_p ());
2988 if (m_dem
.compatible_p (prev_info
, curr_info
))
2990 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2992 fprintf (dump_file
, " Fuse curr info since prev info "
2993 "compatible with it:\n");
2994 fprintf (dump_file
, " prev_info: ");
2995 prev_info
.dump (dump_file
, " ");
2996 fprintf (dump_file
, " curr_info: ");
2997 curr_info
.dump (dump_file
, " ");
2999 m_dem
.merge (prev_info
, curr_info
);
3000 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3002 fprintf (dump_file
, " prev_info after fused: ");
3003 prev_info
.dump (dump_file
, " ");
3004 fprintf (dump_file
, "\n");
3007 if (src_block_info
.has_info ())
3008 src_block_info
.probability
+= dest_block_info
.probability
;
3012 /* Cancel lift up if probabilities are equal. */
3013 if (successors_probability_equal_p (eg
->src
)
3014 || (dest_block_info
.probability
3015 > src_block_info
.probability
3016 && !has_compatible_reaching_vsetvl_p (curr_info
)))
3018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3023 prev_info
.dump (dump_file
, " ");
3024 fprintf (dump_file
, " due to (same probability or no "
3025 "compatible reaching):");
3026 curr_info
.dump (dump_file
, " ");
3028 src_block_info
.set_empty_info ();
3029 src_block_info
.probability
3030 = profile_probability::uninitialized ();
3031 /* See PR113696, we should reset immediate dominator to
3032 empty since we may uplift ineffective vsetvl which
3033 locate at low probability block. */
3035 = get_immediate_dominator (CDI_DOMINATORS
, eg
->src
);
3036 auto &dom_block_info
= get_block_info (dom
);
3037 if (dom_block_info
.has_info ()
3038 && !m_dem
.compatible_p (
3039 dom_block_info
.get_exit_info (), curr_info
))
3041 dom_block_info
.set_empty_info ();
3042 dom_block_info
.probability
3043 = profile_probability::uninitialized ();
3044 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3047 " Reset dominator bb %u:",
3049 prev_info
.dump (dump_file
, " ");
3051 " due to (same probability or no "
3052 "compatible reaching):");
3053 curr_info
.dump (dump_file
, " ");
3058 /* Choose the one with higher probability. */
3059 else if (dest_block_info
.probability
3060 > src_block_info
.probability
)
3062 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3065 " Change bb %u from:",
3067 prev_info
.dump (dump_file
, " ");
3069 " to (higher probability):");
3070 curr_info
.dump (dump_file
, " ");
3072 src_block_info
.set_info (curr_info
);
3073 src_block_info
.probability
= dest_block_info
.probability
;
3080 vsetvl_info
&prev_info
= src_block_info
.get_exit_info ();
3081 if (!prev_info
.valid_p ()
3082 || m_dem
.available_p (prev_info
, curr_info
)
3083 || !m_dem
.compatible_p (prev_info
, curr_info
))
3086 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3088 fprintf (dump_file
, " Fuse curr info since prev info "
3089 "compatible with it:\n");
3090 fprintf (dump_file
, " prev_info: ");
3091 prev_info
.dump (dump_file
, " ");
3092 fprintf (dump_file
, " curr_info: ");
3093 curr_info
.dump (dump_file
, " ");
3095 m_dem
.merge (prev_info
, curr_info
);
3096 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3098 fprintf (dump_file
, " prev_info after fused: ");
3099 prev_info
.dump (dump_file
, " ");
3100 fprintf (dump_file
, "\n");
3107 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3109 fprintf (dump_file
, "\n");
3112 sbitmap_vector_free (antin
);
3113 sbitmap_vector_free (antout
);
3114 sbitmap_vector_free (earliest
);
3115 free_edge_list (m_edges
);
3121 pre_vsetvl::pre_global_vsetvl_info ()
3123 compute_vsetvl_def_data ();
3124 compute_lcm_local_properties ();
3126 unsigned num_exprs
= m_exprs
.length ();
3127 m_edges
= pre_edge_lcm_avs (num_exprs
, m_transp
, m_avloc
, m_antloc
, m_kill
,
3128 m_avin
, m_avout
, &m_insert
, &m_del
);
3129 unsigned num_edges
= NUM_EDGES (m_edges
);
3131 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3133 fprintf (dump_file
, "\n Compute LCM insert and delete data:\n\n");
3134 fprintf (dump_file
, " Expression List (%u):\n", num_exprs
);
3135 for (unsigned i
= 0; i
< num_exprs
; i
++)
3137 const auto &info
= *m_exprs
[i
];
3138 fprintf (dump_file
, " Expr[%u]: ", i
);
3139 info
.dump (dump_file
, " ");
3141 fprintf (dump_file
, "\n bitmap data:\n");
3142 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3144 unsigned i
= bb
->index ();
3145 fprintf (dump_file
, " BB %u:\n", i
);
3146 fprintf (dump_file
, " avloc: ");
3147 dump_bitmap_file (dump_file
, m_avloc
[i
]);
3148 fprintf (dump_file
, " kill: ");
3149 dump_bitmap_file (dump_file
, m_kill
[i
]);
3150 fprintf (dump_file
, " antloc: ");
3151 dump_bitmap_file (dump_file
, m_antloc
[i
]);
3152 fprintf (dump_file
, " transp: ");
3153 dump_bitmap_file (dump_file
, m_transp
[i
]);
3155 fprintf (dump_file
, " avin: ");
3156 dump_bitmap_file (dump_file
, m_avin
[i
]);
3157 fprintf (dump_file
, " avout: ");
3158 dump_bitmap_file (dump_file
, m_avout
[i
]);
3159 fprintf (dump_file
, " del: ");
3160 dump_bitmap_file (dump_file
, m_del
[i
]);
3162 fprintf (dump_file
, "\n");
3163 fprintf (dump_file
, " insert:\n");
3164 for (unsigned ed
= 0; ed
< num_edges
; ed
++)
3166 edge eg
= INDEX_EDGE (m_edges
, ed
);
3168 if (bitmap_empty_p (m_insert
[ed
]))
3170 fprintf (dump_file
, " Edge(bb %u -> bb %u): ", eg
->src
->index
,
3172 dump_bitmap_file (dump_file
, m_insert
[ed
]);
3176 /* Remove vsetvl infos as LCM suggest */
3177 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3179 sbitmap d
= m_del
[bb
->index ()];
3180 if (bitmap_count_bits (d
) == 0)
3182 gcc_assert (bitmap_count_bits (d
) == 1);
3183 unsigned expr_index
= bitmap_first_set_bit (d
);
3184 vsetvl_info
&info
= *m_exprs
[expr_index
];
3185 gcc_assert (info
.valid_p ());
3186 gcc_assert (info
.get_bb () == bb
);
3187 const vsetvl_block_info
&block_info
= get_block_info (info
.get_bb ());
3188 gcc_assert (block_info
.get_entry_info () == info
);
3190 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3193 "\nLCM deleting vsetvl of block %d, it has predecessors: \n",
3195 hash_set
<basic_block
> all_preds
3196 = get_all_predecessors (bb
->cfg_bb ());
3198 for (const auto pred
: all_preds
)
3200 fprintf (dump_file
, "%d ", pred
->index
);
3203 fprintf (dump_file
, "\n");
3205 fprintf (dump_file
, "\n");
3209 /* Remove vsetvl infos if all predecessors are available to the block. */
3210 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3212 vsetvl_block_info
&block_info
= get_block_info (bb
);
3213 if (block_info
.empty_p ())
3215 vsetvl_info
&curr_info
= block_info
.get_entry_info ();
3216 if (!curr_info
.valid_p ())
3219 unsigned int expr_index
;
3220 sbitmap_iterator sbi
;
3222 !bitmap_empty_p (m_vsetvl_def_in
[curr_info
.get_bb ()->index ()]));
3223 bool full_available
= true;
3224 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in
[bb
->index ()], 0, expr_index
,
3227 vsetvl_info
&prev_info
= *m_vsetvl_def_exprs
[expr_index
];
3228 if (!prev_info
.valid_p ()
3229 || !m_dem
.available_p (prev_info
, curr_info
))
3231 full_available
= false;
3236 curr_info
.set_delete ();
3239 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3241 vsetvl_block_info
&block_info
= get_block_info (bb
);
3242 if (block_info
.empty_p ())
3244 vsetvl_info
&curr_info
= block_info
.get_entry_info ();
3245 if (curr_info
.delete_p ())
3247 if (block_info
.local_infos
.is_empty ())
3249 curr_info
= block_info
.local_infos
[0];
3251 if (curr_info
.valid_p () && !curr_info
.vl_used_by_non_rvv_insn_p ()
3252 && preds_all_same_avl_and_ratio_p (curr_info
))
3253 curr_info
.set_change_vtype_only ();
3255 vsetvl_info prev_info
= vsetvl_info ();
3256 prev_info
.set_empty ();
3257 for (auto &curr_info
: block_info
.local_infos
)
3259 if (prev_info
.valid_p () && curr_info
.valid_p ()
3260 && m_dem
.avl_available_p (prev_info
, curr_info
)
3261 && prev_info
.get_ratio () == curr_info
.get_ratio ())
3262 curr_info
.set_change_vtype_only ();
3263 prev_info
= curr_info
;
3269 pre_vsetvl::emit_vsetvl ()
3271 bool need_commit
= false;
3273 /* Fake edge is created by connect infinite loops to exit function.
3274 We should commit vsetvl edge after fake edges removes, otherwise,
3275 it will cause ICE. */
3276 remove_fake_exit_edges ();
3277 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3279 for (const auto &curr_info
: get_block_info (bb
).local_infos
)
3281 insn_info
*insn
= curr_info
.get_insn ();
3282 if (curr_info
.delete_p ())
3284 if (vsetvl_insn_p (insn
->rtl ()))
3285 remove_vsetvl_insn (curr_info
.get_insn ()->rtl ());
3288 else if (curr_info
.valid_p ())
3290 if (vsetvl_insn_p (insn
->rtl ()))
3292 const vsetvl_info temp
= vsetvl_info (insn
);
3293 if (!(curr_info
== temp
))
3297 fprintf (dump_file
, "\n Change vsetvl info from: ");
3298 temp
.dump (dump_file
, " ");
3299 fprintf (dump_file
, " to: ");
3300 curr_info
.dump (dump_file
, " ");
3302 change_vsetvl_insn (curr_info
);
3310 "\n Insert vsetvl info before insn %d: ",
3312 curr_info
.dump (dump_file
, " ");
3314 insert_vsetvl_insn (EMIT_BEFORE
, curr_info
);
3320 for (const vsetvl_info
&item
: m_delete_list
)
3322 gcc_assert (vsetvl_insn_p (item
.get_insn ()->rtl ()));
3323 remove_vsetvl_insn (item
.get_insn ()->rtl ());
3326 /* Insert vsetvl info that was not deleted after lift up. */
3327 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3329 const vsetvl_block_info
&block_info
= get_block_info (bb
);
3330 if (!block_info
.has_info ())
3333 const vsetvl_info
&footer_info
= block_info
.get_exit_info ();
3335 if (footer_info
.delete_p ())
3339 edge_iterator eg_iterator
;
3340 FOR_EACH_EDGE (eg
, eg_iterator
, bb
->cfg_bb ()->succs
)
3342 gcc_assert (!(eg
->flags
& EDGE_ABNORMAL
));
3347 "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ",
3348 eg
->src
->index
, eg
->dest
->index
);
3349 footer_info
.dump (dump_file
, " ");
3352 insert_vsetvl_insn (EMIT_DIRECT
, footer_info
);
3353 rtx_insn
*rinsn
= get_insns ();
3355 default_rtl_profile ();
3356 insert_insn_on_edge (rinsn
, eg
);
3361 /* m_insert vsetvl as LCM suggest. */
3362 for (int ed
= 0; ed
< NUM_EDGES (m_edges
); ed
++)
3364 edge eg
= INDEX_EDGE (m_edges
, ed
);
3365 sbitmap i
= m_insert
[ed
];
3366 if (bitmap_count_bits (i
) != 1)
3367 /* For code with infinite loop (e.g. pr61634.c), The data flow is
3368 completely wrong. */
3371 unsigned expr_index
= bitmap_first_set_bit (i
);
3372 const vsetvl_info
&info
= *m_exprs
[expr_index
];
3373 gcc_assert (info
.valid_p ());
3377 "\n Insert vsetvl info at edge(bb %u -> bb %u): ",
3378 eg
->src
->index
, eg
->dest
->index
);
3379 info
.dump (dump_file
, " ");
3381 rtl_profile_for_edge (eg
);
3384 insert_vsetvl_insn (EMIT_DIRECT
, info
);
3385 rtx_insn
*rinsn
= get_insns ();
3387 default_rtl_profile ();
3389 /* We should not get an abnormal edge here. */
3390 gcc_assert (!(eg
->flags
& EDGE_ABNORMAL
));
3392 insert_insn_on_edge (rinsn
, eg
);
3396 commit_edge_insertions ();
3400 pre_vsetvl::cleanup ()
3402 remove_avl_operand ();
3403 remove_unused_dest_operand ();
3404 remove_vsetvl_pre_insns ();
3408 pre_vsetvl::remove_avl_operand ()
3412 FOR_ALL_BB_FN (cfg_bb
, cfun
)
3413 FOR_BB_INSNS (cfg_bb
, rinsn
)
3414 if (NONDEBUG_INSN_P (rinsn
) && has_vl_op (rinsn
)
3415 && REG_P (get_vl (rinsn
)))
3417 rtx avl
= get_vl (rinsn
);
3418 if (count_regno_occurrences (rinsn
, REGNO (avl
)) == 1)
3421 if (fault_first_load_p (rinsn
))
3423 = simplify_replace_rtx (PATTERN (rinsn
), avl
, const0_rtx
);
3426 rtx set
= single_set (rinsn
);
3428 = simplify_replace_rtx (SET_SRC (set
), avl
, const0_rtx
);
3429 new_pat
= gen_rtx_SET (SET_DEST (set
), src
);
3433 fprintf (dump_file
, " Cleanup insn %u's avl operand:\n",
3435 print_rtl_single (dump_file
, rinsn
);
3437 validate_change_or_fail (rinsn
, &PATTERN (rinsn
), new_pat
, false);
3443 pre_vsetvl::remove_unused_dest_operand ()
3448 FOR_ALL_BB_FN (cfg_bb
, cfun
)
3449 FOR_BB_INSNS (cfg_bb
, rinsn
)
3450 if (NONDEBUG_INSN_P (rinsn
) && vsetvl_insn_p (rinsn
))
3452 rtx vl
= get_vl (rinsn
);
3453 vsetvl_info info
= vsetvl_info (rinsn
);
3454 if (has_no_uses (cfg_bb
, rinsn
, REGNO (vl
)))
3455 if (!info
.has_vlmax_avl ())
3457 rtx new_pat
= info
.get_vsetvl_pat (true);
3461 " Remove vsetvl insn %u's dest(vl) operand since "
3464 print_rtl_single (dump_file
, rinsn
);
3466 validate_change_or_fail (rinsn
, &PATTERN (rinsn
), new_pat
,
3472 /* Remove all bogus vsetvl_pre instructions. */
3474 pre_vsetvl::remove_vsetvl_pre_insns ()
3478 FOR_ALL_BB_FN (cfg_bb
, cfun
)
3479 FOR_BB_INSNS (cfg_bb
, rinsn
)
3480 if (NONDEBUG_INSN_P (rinsn
) && vsetvl_pre_insn_p (rinsn
))
3484 fprintf (dump_file
, " Eliminate vsetvl_pre insn %d:\n",
3486 print_rtl_single (dump_file
, rinsn
);
3488 remove_vsetvl_insn (rinsn
);
3492 const pass_data pass_data_vsetvl
= {
3493 RTL_PASS
, /* type */
3494 "vsetvl", /* name */
3495 OPTGROUP_NONE
, /* optinfo_flags */
3496 TV_MACH_DEP
, /* tv_id */
3497 0, /* properties_required */
3498 0, /* properties_provided */
3499 0, /* properties_destroyed */
3500 0, /* todo_flags_start */
3501 0, /* todo_flags_finish */
3504 class pass_vsetvl
: public rtl_opt_pass
3507 void simple_vsetvl ();
3508 void lazy_vsetvl ();
3511 pass_vsetvl (gcc::context
*ctxt
) : rtl_opt_pass (pass_data_vsetvl
, ctxt
) {}
3513 /* opt_pass methods: */
3514 virtual bool gate (function
*) final override
{ return TARGET_VECTOR
; }
3515 virtual unsigned int execute (function
*) final override
;
3516 }; // class pass_vsetvl
3519 pass_vsetvl::simple_vsetvl ()
3522 fprintf (dump_file
, "\nEntering Simple VSETVL PASS\n");
3526 FOR_ALL_BB_FN (cfg_bb
, cfun
)
3528 FOR_BB_INSNS (cfg_bb
, rinsn
)
3530 if (!NONDEBUG_INSN_P (rinsn
))
3532 if (has_vtype_op (rinsn
))
3534 const auto &info
= vsetvl_info (rinsn
);
3535 rtx pat
= info
.get_vsetvl_pat ();
3536 emit_insn_before (pat
, rinsn
);
3539 fprintf (dump_file
, " Insert vsetvl insn before insn %d:\n",
3541 print_rtl_single (dump_file
, PREV_INSN (rinsn
));
3548 /* Lazy vsetvl insertion for optimize > 0. */
3550 pass_vsetvl::lazy_vsetvl ()
3553 fprintf (dump_file
, "\nEntering Lazy VSETVL PASS\n\n");
3555 pre_vsetvl pre
= pre_vsetvl ();
3558 fprintf (dump_file
, "\nPhase 1: Fuse local vsetvl infos.\n\n");
3559 pre
.fuse_local_vsetvl_info ();
3560 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3561 pre
.dump (dump_file
, "phase 1");
3563 /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */
3565 fprintf (dump_file
, "\nPhase 2: Lift up vsetvl info.\n\n");
3566 if (vsetvl_strategy
!= VSETVL_OPT_NO_FUSION
)
3568 bool changed
= true;
3569 int fused_count
= 0;
3573 fprintf (dump_file
, " Try lift up %d.\n\n", fused_count
);
3574 changed
= pre
.earliest_fuse_vsetvl_info (fused_count
);
3578 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3579 pre
.dump (dump_file
, "phase 2");
3581 /* Phase 3: Reducing redundant vsetvl infos using LCM. */
3583 fprintf (dump_file
, "\nPhase 3: Reduce global vsetvl infos.\n\n");
3584 pre
.pre_global_vsetvl_info ();
3585 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3586 pre
.dump (dump_file
, "phase 3");
3588 /* Phase 4: Insert, modify and remove vsetvl insns. */
3591 "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n");
3594 /* Phase 5: Cleanup */
3596 fprintf (dump_file
, "\nPhase 5: Cleanup\n\n");
3602 /* Main entry point for this pass. */
3604 pass_vsetvl::execute (function
*)
3606 if (n_basic_blocks_for_fn (cfun
) <= 0)
3609 /* The RVV instruction may change after split which is not a stable
3610 instruction. We need to split it here to avoid potential issue
3611 since the VSETVL PASS is insert before split PASS. */
3614 /* Early return for there is no vector instructions. */
3615 if (!has_vector_insn (cfun
))
3618 if (!optimize
|| vsetvl_strategy
== VSETVL_SIMPLE
)
3627 make_pass_vsetvl (gcc::context
*ctxt
)
3629 return new pass_vsetvl (ctxt
);