[PR testsuite/116860] Testsuite adjustment for recently added tests
[official-gcc.git] / gcc / config / riscv / riscv-vsetvl.cc
blob72c4c59514e5217f2e42d7b25eb0f57249395ea9
1 /* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
2 Copyright (C) 2022-2025 Free Software Foundation, Inc.
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or(at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The values of the vl and vtype registers will affect the behavior of RVV
22 insns. That is, when we need to execute an RVV instruction, we need to set
23 the correct vl and vtype values by executing the vsetvl instruction before.
24 Executing the fewest number of vsetvl instructions while keeping the behavior
25 the same is the problem this pass is trying to solve. This vsetvl pass is
26 divided into 5 phases:
28 - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses
29 each instruction in it that affects vl and vtype state and generates an
30 array of vsetvl_info objects. Then traverse the vsetvl_info array from
31 front to back and perform fusion according to the fusion rules. The fused
32 vsetvl infos are stored in the vsetvl_block_info object's `infos` field.
34 - Phase 2 (earliest fuse global vsetvl infos): The header_info and
35 footer_info of vsetvl_block_info are used as expressions, and the
36 earliest of each expression is computed. Based on the earliest
37 information, try to lift up the corresponding vsetvl info to the src
38 basic block of the edge (mainly to reduce the total number of vsetvl
39 instructions, this uplift will cause some execution paths to execute
40 vsetvl instructions that shouldn't be there).
42 - Phase 3 (pre global vsetvl info): The header_info and footer_info of
43 vsetvl_block_info are used as expressions, and the LCM algorithm is used
44 to compute the header_info that needs to be deleted and the one that
45 needs to be inserted in some edges.
47 - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and
48 the deletion and insertion information of Phase 3, the mandatory vsetvl
49 instruction insertion, modification and deletion are performed.
51 - Phase 5 (cleanup): Clean up the avl operand in the RVV operator
52 instruction and cleanup the unused dest operand of the vsetvl insn.
54 After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual
55 basic block is represented by vsetvl_block_info, and the virtual vsetvl
56 statements inside are represented by vsetvl_info. The later phases 2 and 3
57 are constantly modifying and adjusting this virtual CFG. Phase 4 performs
58 insertion, modification and deletion of vsetvl instructions based on the
59 optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to
60 the RTL.
63 #define IN_TARGET_CODE 1
64 #define INCLUDE_ALGORITHM
65 #define INCLUDE_FUNCTIONAL
66 #define INCLUDE_ARRAY
68 #include "config.h"
69 #include "system.h"
70 #include "coretypes.h"
71 #include "tm.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "target.h"
75 #include "tree-pass.h"
76 #include "df.h"
77 #include "rtl-ssa.h"
78 #include "cfgcleanup.h"
79 #include "insn-config.h"
80 #include "insn-attr.h"
81 #include "insn-opinit.h"
82 #include "tm-constrs.h"
83 #include "cfgrtl.h"
84 #include "cfganal.h"
85 #include "lcm.h"
86 #include "predict.h"
87 #include "profile-count.h"
88 #include "gcse.h"
89 #include "cfgloop.h"
91 using namespace rtl_ssa;
92 using namespace riscv_vector;
94 /* Set the bitmap DST to the union of SRC of predecessors of
95 basic block B.
96 It's a bit different from bitmap_union_of_preds in cfganal.cc. This function
97 takes into account the case where pred is ENTRY basic block. The main reason
98 for this difference is to make it easier to insert some special value into
99 the ENTRY base block. For example, vsetvl_info with a status of UNKNOWN. */
100 static void
101 bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b)
103 unsigned int set_size = dst->size;
104 edge e;
105 unsigned ix;
107 for (ix = 0; ix < EDGE_COUNT (b->preds); ix++)
109 e = EDGE_PRED (b, ix);
110 bitmap_copy (dst, src[e->src->index]);
111 break;
114 if (ix == EDGE_COUNT (b->preds))
115 bitmap_clear (dst);
116 else
117 for (ix++; ix < EDGE_COUNT (b->preds); ix++)
119 unsigned int i;
120 SBITMAP_ELT_TYPE *p, *r;
122 e = EDGE_PRED (b, ix);
123 p = src[e->src->index]->elms;
124 r = dst->elms;
125 for (i = 0; i < set_size; i++)
126 *r++ |= *p++;
130 /* Compute the reaching definition in and out based on the gen and KILL
131 information's in each Base Blocks.
132 This function references the compute_available implementation in lcm.cc */
133 static void
134 compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in,
135 sbitmap *out)
137 edge e;
138 basic_block *worklist, *qin, *qout, *qend, bb;
139 unsigned int qlen;
140 edge_iterator ei;
142 /* Allocate a worklist array/queue. Entries are only added to the
143 list if they were not already on the list. So the size is
144 bounded by the number of basic blocks. */
145 qin = qout = worklist
146 = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
148 /* Put every block on the worklist; this is necessary because of the
149 optimistic initialization of AVOUT above. Use reverse postorder
150 to make the forward dataflow problem require less iterations. */
151 int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
152 int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
153 for (int i = 0; i < n; ++i)
155 bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]);
156 *qin++ = bb;
157 bb->aux = bb;
159 free (rpo);
161 qin = worklist;
162 qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS];
163 qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
165 /* Mark blocks which are successors of the entry block so that we
166 can easily identify them below. */
167 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
168 e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun);
170 /* Iterate until the worklist is empty. */
171 while (qlen)
173 /* Take the first entry off the worklist. */
174 bb = *qout++;
175 qlen--;
177 if (qout >= qend)
178 qout = worklist;
180 /* Do not clear the aux field for blocks which are successors of the
181 ENTRY block. That way we never add then to the worklist again. */
182 if (bb->aux != ENTRY_BLOCK_PTR_FOR_FN (cfun))
183 bb->aux = NULL;
185 bitmap_union_of_preds_with_entry (in[bb->index], out, bb);
187 if (bitmap_ior_and_compl (out[bb->index], gen[bb->index], in[bb->index],
188 kill[bb->index]))
189 /* If the out state of this block changed, then we need
190 to add the successors of this block to the worklist
191 if they are not already on the worklist. */
192 FOR_EACH_EDGE (e, ei, bb->succs)
193 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
195 *qin++ = e->dest;
196 e->dest->aux = e;
197 qlen++;
199 if (qin >= qend)
200 qin = worklist;
204 clear_aux_for_edges ();
205 clear_aux_for_blocks ();
206 free (worklist);
209 /* Classification of vsetvl instruction. */
210 enum vsetvl_type
212 VSETVL_NORMAL,
213 VSETVL_VTYPE_CHANGE_ONLY,
214 VSETVL_DISCARD_RESULT,
215 NUM_VSETVL_TYPE
218 enum emit_type
220 /* emit_insn directly. */
221 EMIT_DIRECT,
222 EMIT_BEFORE,
223 EMIT_AFTER,
226 static const int MAX_LMUL = 8;
228 /* dump helper functions */
229 static const char *
230 vlmul_to_str (vlmul_type vlmul)
232 switch (vlmul)
234 case LMUL_1:
235 return "m1";
236 case LMUL_2:
237 return "m2";
238 case LMUL_4:
239 return "m4";
240 case LMUL_8:
241 return "m8";
242 case LMUL_RESERVED:
243 return "INVALID LMUL";
244 case LMUL_F8:
245 return "mf8";
246 case LMUL_F4:
247 return "mf4";
248 case LMUL_F2:
249 return "mf2";
251 default:
252 gcc_unreachable ();
256 static const char *
257 policy_to_str (bool agnostic_p)
259 return agnostic_p ? "agnostic" : "undisturbed";
262 /* Return true if it is an RVV instruction depends on VTYPE global
263 status register. */
264 static bool
265 has_vtype_op (rtx_insn *rinsn)
267 return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
270 /* Return true if the instruction ignores VLMUL field of VTYPE. */
271 static bool
272 ignore_vlmul_insn_p (rtx_insn *rinsn)
274 return get_attr_type (rinsn) == TYPE_VIMOVVX
275 || get_attr_type (rinsn) == TYPE_VFMOVVF
276 || get_attr_type (rinsn) == TYPE_VIMOVXV
277 || get_attr_type (rinsn) == TYPE_VFMOVFV;
280 /* Return true if the instruction is scalar move instruction. */
281 static bool
282 scalar_move_insn_p (rtx_insn *rinsn)
284 return get_attr_type (rinsn) == TYPE_VIMOVXV
285 || get_attr_type (rinsn) == TYPE_VFMOVFV;
288 /* Return true if the instruction is fault first load instruction. */
289 static bool
290 fault_first_load_p (rtx_insn *rinsn)
292 return recog_memoized (rinsn) >= 0
293 && (get_attr_type (rinsn) == TYPE_VLDFF
294 || get_attr_type (rinsn) == TYPE_VLSEGDFF);
297 /* Return true if the instruction is read vl instruction. */
298 static bool
299 read_vl_insn_p (rtx_insn *rinsn)
301 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
304 /* Return true if it is a vsetvl instruction. */
305 static bool
306 vector_config_insn_p (rtx_insn *rinsn)
308 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
311 /* Return true if it is vsetvldi or vsetvlsi. */
312 static bool
313 vsetvl_insn_p (rtx_insn *rinsn)
315 if (!rinsn || !vector_config_insn_p (rinsn))
316 return false;
317 return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
318 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
321 /* Return true if it is the bogus vsetvl_pre instruction:
323 (define_insn "@vlmax_avl<mode>"
324 [(set (match_operand:P 0 "register_operand" "=r")
325 (unspec:P [(match_operand:P 1 "const_int_operand" "i")] UNSPEC_VLMAX))]
326 "TARGET_VECTOR"
328 [(set_attr "type" "vsetvl_pre")])
330 As described above, it's the bogus instruction which doesn't any assembler
331 and should be removed eventually. It's used for occupying a scalar register
332 for VLMAX avl RVV instruction before register allocation.
334 Before RA:
337 vsetvl_pre (set r136)
338 vadd.vv (use r136 with VLMAX avl)
341 After RA:
344 vsetvl_pre (set a5)
345 vadd.vv (use r136 with VLMAX avl)
348 VSETVL PASS:
351 vsetvl_pre (set a5) ---> removed.
352 vsetvl a5,zero,... ---> Inserted.
353 vadd.vv
356 static bool
357 vsetvl_pre_insn_p (rtx_insn *rinsn)
359 return recog_memoized (rinsn) >= 0
360 && get_attr_type (rinsn) == TYPE_VSETVL_PRE;
363 /* Return true if it is vsetvl zero, rs1. */
364 static bool
365 vsetvl_discard_result_insn_p (rtx_insn *rinsn)
367 if (!vector_config_insn_p (rinsn))
368 return false;
369 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
370 || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
373 static bool
374 real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
376 return insn != nullptr && insn->is_real () && insn->bb () == bb;
379 /* Helper function to get VL operand for VLMAX insn. */
380 static rtx
381 get_vl (rtx_insn *rinsn)
383 if (has_vl_op (rinsn))
385 extract_insn_cached (rinsn);
386 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
388 return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
391 /* Helper function to get AVL operand. */
392 static rtx
393 get_avl (rtx_insn *rinsn)
395 if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
396 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
398 if (!has_vl_op (rinsn))
399 return NULL_RTX;
400 if (vlmax_avl_type_p (rinsn))
401 return RVV_VLMAX;
402 extract_insn_cached (rinsn);
403 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
406 /* Get default mask policy. */
407 static bool
408 get_default_ma ()
410 /* For the instruction that doesn't require MA, we still need a default value
411 to emit vsetvl. We pick up the default value according to prefer policy. */
412 return (bool) (get_prefer_mask_policy () & 0x1
413 || (get_prefer_mask_policy () >> 1 & 0x1));
416 /* Helper function to get MA operand. */
417 static bool
418 mask_agnostic_p (rtx_insn *rinsn)
420 /* If it doesn't have MA, we return agnostic by default. */
421 extract_insn_cached (rinsn);
422 int ma = get_attr_ma (rinsn);
423 return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
426 /* Return true if FN has a vector instruction that use VL/VTYPE. */
427 static bool
428 has_vector_insn (function *fn)
430 basic_block cfg_bb;
431 rtx_insn *rinsn;
432 FOR_ALL_BB_FN (cfg_bb, fn)
433 FOR_BB_INSNS (cfg_bb, rinsn)
434 if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
435 return true;
436 return false;
439 static vlmul_type
440 calculate_vlmul (unsigned int sew, unsigned int ratio)
442 const vlmul_type ALL_LMUL[]
443 = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
444 for (const vlmul_type vlmul : ALL_LMUL)
445 if (calculate_ratio (sew, vlmul) == ratio)
446 return vlmul;
447 return LMUL_RESERVED;
450 /* Get the currently supported maximum sew used in the int rvv instructions. */
451 static uint8_t
452 get_max_int_sew ()
454 if (TARGET_VECTOR_ELEN_64)
455 return 64;
456 else if (TARGET_VECTOR_ELEN_32)
457 return 32;
458 gcc_unreachable ();
461 /* Get the currently supported maximum sew used in the float rvv instructions.
463 static uint8_t
464 get_max_float_sew ()
466 if (TARGET_VECTOR_ELEN_FP_64)
467 return 64;
468 else if (TARGET_VECTOR_ELEN_FP_32)
469 return 32;
470 else if (TARGET_VECTOR_ELEN_FP_16)
471 return 16;
472 gcc_unreachable ();
475 enum def_type
477 REAL_SET = 1 << 0,
478 PHI_SET = 1 << 1,
479 BB_HEAD_SET = 1 << 2,
480 BB_END_SET = 1 << 3,
481 /* ??? TODO: In RTL_SSA framework, we have REAL_SET,
482 PHI_SET, BB_HEAD_SET, BB_END_SET and
483 CLOBBER_DEF def_info types. Currently,
484 we conservatively do not optimize clobber
485 def since we don't see the case that we
486 need to optimize it. */
487 CLOBBER_DEF = 1 << 4
490 static bool
491 insn_should_be_added_p (const insn_info *insn, unsigned int types)
493 if (insn->is_real () && (types & REAL_SET))
494 return true;
495 if (insn->is_phi () && (types & PHI_SET))
496 return true;
497 if (insn->is_bb_head () && (types & BB_HEAD_SET))
498 return true;
499 if (insn->is_bb_end () && (types & BB_END_SET))
500 return true;
501 return false;
504 static const hash_set<use_info *>
505 get_all_real_uses (insn_info *insn, unsigned regno)
507 gcc_assert (insn->is_real ());
509 hash_set<use_info *> uses;
510 auto_vec<phi_info *> work_list;
511 hash_set<phi_info *> visited_list;
513 for (def_info *def : insn->defs ())
515 if (!def->is_reg () || def->regno () != regno)
516 continue;
517 set_info *set = safe_dyn_cast<set_info *> (def);
518 if (!set)
519 continue;
520 for (use_info *use : set->nondebug_insn_uses ())
521 if (use->insn ()->is_real ())
522 uses.add (use);
523 for (use_info *use : set->phi_uses ())
524 work_list.safe_push (use->phi ());
527 while (!work_list.is_empty ())
529 phi_info *phi = work_list.pop ();
530 visited_list.add (phi);
532 for (use_info *use : phi->nondebug_insn_uses ())
533 if (use->insn ()->is_real ())
534 uses.add (use);
535 for (use_info *use : phi->phi_uses ())
536 if (!visited_list.contains (use->phi ()))
537 work_list.safe_push (use->phi ());
539 return uses;
542 /* Recursively find all define instructions. The kind of instruction is
543 specified by the DEF_TYPE. */
544 static hash_set<set_info *>
545 get_all_sets (phi_info *phi, unsigned int types)
547 hash_set<set_info *> insns;
548 auto_vec<phi_info *> work_list;
549 hash_set<phi_info *> visited_list;
550 if (!phi)
551 return hash_set<set_info *> ();
552 work_list.safe_push (phi);
554 while (!work_list.is_empty ())
556 phi_info *phi = work_list.pop ();
557 visited_list.add (phi);
558 for (use_info *use : phi->inputs ())
560 def_info *def = use->def ();
561 set_info *set = safe_dyn_cast<set_info *> (def);
562 if (!set)
563 return hash_set<set_info *> ();
565 gcc_assert (!set->insn ()->is_debug_insn ());
567 if (insn_should_be_added_p (set->insn (), types))
568 insns.add (set);
569 if (set->insn ()->is_phi ())
571 phi_info *new_phi = as_a<phi_info *> (set);
572 if (!visited_list.contains (new_phi))
573 work_list.safe_push (new_phi);
577 return insns;
580 static hash_set<set_info *>
581 get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
582 bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
584 if (real_p && phi_p && param_p)
585 return get_all_sets (safe_dyn_cast<phi_info *> (set),
586 REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
588 else if (real_p && param_p)
589 return get_all_sets (safe_dyn_cast<phi_info *> (set),
590 REAL_SET | BB_HEAD_SET | BB_END_SET);
592 else if (real_p)
593 return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
594 return hash_set<set_info *> ();
597 static bool
598 source_equal_p (insn_info *insn1, insn_info *insn2)
600 if (!insn1 || !insn2)
601 return false;
602 rtx_insn *rinsn1 = insn1->rtl ();
603 rtx_insn *rinsn2 = insn2->rtl ();
604 if (!rinsn1 || !rinsn2)
605 return false;
607 rtx note1 = find_reg_equal_equiv_note (rinsn1);
608 rtx note2 = find_reg_equal_equiv_note (rinsn2);
609 /* We could handle the case of similar-looking REG_EQUALs as well but
610 would need to verify that no insn in between modifies any of the source
611 operands. */
612 if (note1 && note2 && rtx_equal_p (note1, note2)
613 && REG_NOTE_KIND (note1) == REG_EQUIV)
614 return true;
615 return false;
618 static insn_info *
619 extract_single_source (set_info *set)
621 if (!set)
622 return nullptr;
623 if (set->insn ()->is_real ())
624 return set->insn ();
625 if (!set->insn ()->is_phi ())
626 return nullptr;
627 hash_set<set_info *> sets = get_all_sets (set, true, false, true);
628 if (sets.is_empty ())
629 return nullptr;
631 insn_info *first_insn = (*sets.begin ())->insn ();
632 if (first_insn->is_artificial ())
633 return nullptr;
634 for (const set_info *set : sets)
636 /* If there is a head or end insn, we conservative return
637 NULL so that VSETVL PASS will insert vsetvl directly. */
638 if (set->insn ()->is_artificial ())
639 return nullptr;
640 if (set != *sets.begin () && !source_equal_p (set->insn (), first_insn))
641 return nullptr;
644 return first_insn;
647 static bool
648 same_equiv_note_p (set_info *set1, set_info *set2)
650 insn_info *insn1 = extract_single_source (set1);
651 insn_info *insn2 = extract_single_source (set2);
652 if (!insn1 || !insn2)
653 return false;
654 return source_equal_p (insn1, insn2);
657 /* Return true if the SET result is not used by any instructions. */
658 static bool
659 has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
661 if (bitmap_bit_p (df_get_live_out (cfg_bb), regno))
662 return false;
664 rtx_insn *iter;
665 for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb));
666 iter = NEXT_INSN (iter))
667 if (df_find_use (iter, regno_reg_rtx[regno]))
668 return false;
670 return true;
673 /* Return true for the special block that we can't apply LCM optimization. */
674 static bool
675 invalid_opt_bb_p (basic_block cfg_bb)
677 edge e;
678 edge_iterator ei;
680 /* We don't do LCM optimizations on complex edges. */
681 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
682 if (e->flags & EDGE_COMPLEX)
683 return true;
685 /* We only do LCM optimizations on blocks that are post dominated by
686 EXIT block, that is, we don't do LCM optimizations on infinite loop. */
687 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
688 if (e->flags & EDGE_FAKE)
689 return true;
691 return false;
694 /* Get all predecessors of BB. */
695 static hash_set<basic_block>
696 get_all_predecessors (basic_block bb)
698 hash_set<basic_block> blocks;
699 auto_vec<basic_block> work_list;
700 hash_set<basic_block> visited_list;
701 work_list.safe_push (bb);
703 while (!work_list.is_empty ())
705 basic_block new_bb = work_list.pop ();
706 visited_list.add (new_bb);
707 edge e;
708 edge_iterator ei;
709 FOR_EACH_EDGE (e, ei, new_bb->preds)
711 if (!visited_list.contains (e->src))
712 work_list.safe_push (e->src);
713 blocks.add (e->src);
716 return blocks;
719 /* This flags indicates the minimum demand of the vl and vtype values by the
720 RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV
721 instruction only needs the SEW/LMUL ratio to remain the same, and does not
722 require SEW and LMUL to be fixed.
723 Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter
724 instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of
725 the former instruction, then we can make the minimum demand of the former
726 instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are
727 the SEW and LMUL of the latter instruction, and the vsetvl instruction
728 generated according to the new demand can also be used for the latter
729 instruction, so there is no need to insert a separate vsetvl instruction for
730 the latter instruction. */
731 enum demand_flags : unsigned
733 DEMAND_EMPTY_P = 0,
734 DEMAND_SEW_P = 1 << 0,
735 DEMAND_LMUL_P = 1 << 1,
736 DEMAND_RATIO_P = 1 << 2,
737 DEMAND_GE_SEW_P = 1 << 3,
738 DEMAND_TAIL_POLICY_P = 1 << 4,
739 DEMAND_MASK_POLICY_P = 1 << 5,
740 DEMAND_AVL_P = 1 << 6,
741 DEMAND_NON_ZERO_AVL_P = 1 << 7,
744 /* We split the demand information into three parts. They are sew and lmul
745 related (sew_lmul_demand_type), tail and mask policy related
746 (policy_demand_type) and avl related (avl_demand_type). Then we define three
747 interfaces available_p, compatible_p and merge. available_p is
748 used to determine whether the two vsetvl infos prev_info and next_info are
749 available or not. If prev_info is available for next_info, it means that the
750 RVV insn corresponding to next_info on the path from prev_info to next_info
751 can be used without inserting a separate vsetvl instruction. compatible_p
752 is used to determine whether prev_info is compatible with next_info, and if
753 so, merge can be used to merge the stricter demand information from
754 next_info into prev_info so that prev_info becomes available to next_info.
757 enum class sew_lmul_demand_type : unsigned
759 sew_lmul = demand_flags::DEMAND_SEW_P | demand_flags::DEMAND_LMUL_P,
760 ratio_only = demand_flags::DEMAND_RATIO_P,
761 sew_only = demand_flags::DEMAND_SEW_P,
762 ge_sew = demand_flags::DEMAND_GE_SEW_P,
763 ratio_and_ge_sew
764 = demand_flags::DEMAND_RATIO_P | demand_flags::DEMAND_GE_SEW_P,
767 enum class policy_demand_type : unsigned
769 tail_mask_policy
770 = demand_flags::DEMAND_TAIL_POLICY_P | demand_flags::DEMAND_MASK_POLICY_P,
771 tail_policy_only = demand_flags::DEMAND_TAIL_POLICY_P,
772 mask_policy_only = demand_flags::DEMAND_MASK_POLICY_P,
773 ignore_policy = demand_flags::DEMAND_EMPTY_P,
776 enum class avl_demand_type : unsigned
778 avl = demand_flags::DEMAND_AVL_P,
779 non_zero_avl = demand_flags::DEMAND_NON_ZERO_AVL_P,
780 ignore_avl = demand_flags::DEMAND_EMPTY_P,
783 class vsetvl_info
785 private:
786 insn_info *m_insn;
787 bb_info *m_bb;
788 rtx m_avl;
789 rtx m_vl;
790 set_info *m_avl_def;
791 uint8_t m_sew;
792 uint8_t m_max_sew;
793 vlmul_type m_vlmul;
794 uint8_t m_ratio;
795 bool m_ta;
796 bool m_ma;
798 sew_lmul_demand_type m_sew_lmul_demand;
799 policy_demand_type m_policy_demand;
800 avl_demand_type m_avl_demand;
802 enum class state_type
804 UNINITIALIZED,
805 VALID,
806 UNKNOWN,
807 EMPTY,
809 state_type m_state;
811 bool m_delete;
812 bool m_change_vtype_only;
813 insn_info *m_read_vl_insn;
814 bool m_vl_used_by_non_rvv_insn;
816 public:
817 vsetvl_info ()
818 : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX), m_vl (NULL_RTX),
819 m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED),
820 m_ratio (0), m_ta (false), m_ma (false),
821 m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul),
822 m_policy_demand (policy_demand_type::tail_mask_policy),
823 m_avl_demand (avl_demand_type::avl), m_state (state_type::UNINITIALIZED),
824 m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr),
825 m_vl_used_by_non_rvv_insn (false)
828 vsetvl_info (insn_info *insn) : vsetvl_info () { parse_insn (insn); }
830 vsetvl_info (rtx_insn *insn) : vsetvl_info () { parse_insn (insn); }
832 void set_avl (rtx avl) { m_avl = avl; }
833 void set_vl (rtx vl) { m_vl = vl; }
834 void set_avl_def (set_info *avl_def) { m_avl_def = avl_def; }
835 void set_sew (uint8_t sew) { m_sew = sew; }
836 void set_vlmul (vlmul_type vlmul) { m_vlmul = vlmul; }
837 void set_ratio (uint8_t ratio) { m_ratio = ratio; }
838 void set_ta (bool ta) { m_ta = ta; }
839 void set_ma (bool ma) { m_ma = ma; }
840 void set_delete () { m_delete = true; }
841 void set_bb (bb_info *bb) { m_bb = bb; }
842 void set_max_sew (uint8_t max_sew) { m_max_sew = max_sew; }
843 void set_change_vtype_only () { m_change_vtype_only = true; }
844 void set_read_vl_insn (insn_info *insn) { m_read_vl_insn = insn; }
846 rtx get_avl () const { return m_avl; }
847 rtx get_vl () const { return m_vl; }
848 set_info *get_avl_def () const { return m_avl_def; }
849 uint8_t get_sew () const { return m_sew; }
850 vlmul_type get_vlmul () const { return m_vlmul; }
851 uint8_t get_ratio () const { return m_ratio; }
852 bool get_ta () const { return m_ta; }
853 bool get_ma () const { return m_ma; }
854 insn_info *get_insn () const { return m_insn; }
855 bool delete_p () const { return m_delete; }
856 bb_info *get_bb () const { return m_bb; }
857 uint8_t get_max_sew () const { return m_max_sew; }
858 insn_info *get_read_vl_insn () const { return m_read_vl_insn; }
859 bool vl_used_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn; }
861 bool has_imm_avl () const { return m_avl && CONST_INT_P (m_avl); }
862 bool has_vlmax_avl () const { return vlmax_avl_p (m_avl); }
863 bool has_nonvlmax_reg_avl () const
865 return m_avl && REG_P (m_avl) && !has_vlmax_avl ();
867 bool has_non_zero_avl () const
869 if (has_imm_avl ())
870 return INTVAL (m_avl) > 0;
871 return has_vlmax_avl ();
873 bool has_vl () const
875 /* The VL operand can only be either a NULL_RTX or a register. */
876 gcc_assert (!m_vl || REG_P (m_vl));
877 return m_vl != NULL_RTX;
879 bool has_same_ratio (const vsetvl_info &other) const
881 return get_ratio () == other.get_ratio ();
884 /* The block of INSN isn't always same as the block of the VSETVL_INFO,
885 meaning we may have 'get_insn ()->bb () != get_bb ()'.
887 E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1)
889 BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies
890 get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and
891 bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the
892 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located
893 at BB3). */
894 bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); }
895 void update_avl (const vsetvl_info &other)
897 m_avl = other.get_avl ();
898 m_vl = other.get_vl ();
899 m_avl_def = other.get_avl_def ();
902 bool uninit_p () const { return m_state == state_type::UNINITIALIZED; }
903 bool valid_p () const { return m_state == state_type::VALID; }
904 bool unknown_p () const { return m_state == state_type::UNKNOWN; }
905 bool empty_p () const { return m_state == state_type::EMPTY; }
906 bool change_vtype_only_p () const { return m_change_vtype_only
907 && !TARGET_XTHEADVECTOR; }
909 void set_valid () { m_state = state_type::VALID; }
910 void set_unknown () { m_state = state_type::UNKNOWN; }
911 void set_empty () { m_state = state_type::EMPTY; }
913 void set_sew_lmul_demand (sew_lmul_demand_type demand)
915 m_sew_lmul_demand = demand;
917 void set_policy_demand (policy_demand_type demand)
919 m_policy_demand = demand;
921 void set_avl_demand (avl_demand_type demand) { m_avl_demand = demand; }
923 sew_lmul_demand_type get_sew_lmul_demand () const
925 return m_sew_lmul_demand;
927 policy_demand_type get_policy_demand () const { return m_policy_demand; }
928 avl_demand_type get_avl_demand () const { return m_avl_demand; }
930 void normalize_demand (unsigned demand_flags)
932 switch (demand_flags
933 & (DEMAND_SEW_P | DEMAND_LMUL_P | DEMAND_RATIO_P | DEMAND_GE_SEW_P))
935 case (unsigned) sew_lmul_demand_type::sew_lmul:
936 m_sew_lmul_demand = sew_lmul_demand_type::sew_lmul;
937 break;
938 case (unsigned) sew_lmul_demand_type::ratio_only:
939 m_sew_lmul_demand = sew_lmul_demand_type::ratio_only;
940 break;
941 case (unsigned) sew_lmul_demand_type::sew_only:
942 m_sew_lmul_demand = sew_lmul_demand_type::sew_only;
943 break;
944 case (unsigned) sew_lmul_demand_type::ge_sew:
945 m_sew_lmul_demand = sew_lmul_demand_type::ge_sew;
946 break;
947 case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew:
948 m_sew_lmul_demand = sew_lmul_demand_type::ratio_and_ge_sew;
949 break;
950 default:
951 gcc_unreachable ();
954 switch (demand_flags & (DEMAND_TAIL_POLICY_P | DEMAND_MASK_POLICY_P))
956 case (unsigned) policy_demand_type::tail_mask_policy:
957 m_policy_demand = policy_demand_type::tail_mask_policy;
958 break;
959 case (unsigned) policy_demand_type::tail_policy_only:
960 m_policy_demand = policy_demand_type::tail_policy_only;
961 break;
962 case (unsigned) policy_demand_type::mask_policy_only:
963 m_policy_demand = policy_demand_type::mask_policy_only;
964 break;
965 case (unsigned) policy_demand_type::ignore_policy:
966 m_policy_demand = policy_demand_type::ignore_policy;
967 break;
968 default:
969 gcc_unreachable ();
972 switch (demand_flags & (DEMAND_AVL_P | DEMAND_NON_ZERO_AVL_P))
974 case (unsigned) avl_demand_type::avl:
975 m_avl_demand = avl_demand_type::avl;
976 break;
977 case (unsigned) avl_demand_type::non_zero_avl:
978 m_avl_demand = avl_demand_type::non_zero_avl;
979 break;
980 case (unsigned) avl_demand_type::ignore_avl:
981 m_avl_demand = avl_demand_type::ignore_avl;
982 break;
983 default:
984 gcc_unreachable ();
988 void parse_insn (rtx_insn *rinsn)
990 if (!NONDEBUG_INSN_P (rinsn))
991 return;
992 if (optimize == 0 && !has_vtype_op (rinsn))
993 return;
994 gcc_assert (!vsetvl_discard_result_insn_p (rinsn));
995 set_valid ();
996 extract_insn_cached (rinsn);
997 m_avl = ::get_avl (rinsn);
998 if (has_vlmax_avl () || vsetvl_insn_p (rinsn))
999 m_vl = ::get_vl (rinsn);
1000 m_sew = ::get_sew (rinsn);
1001 m_vlmul = ::get_vlmul (rinsn);
1002 m_ta = tail_agnostic_p (rinsn);
1003 m_ma = mask_agnostic_p (rinsn);
1006 void parse_insn (insn_info *insn)
1008 /* The VL dest of the insn */
1009 rtx dest_vl = NULL_RTX;
1011 m_insn = insn;
1012 m_bb = insn->bb ();
1013 /* Return if it is debug insn for the consistency with optimize == 0. */
1014 if (insn->is_debug_insn ())
1015 return;
1017 /* We set it as unknown since we don't what will happen in CALL or ASM. */
1018 if (insn->is_call () || insn->is_asm ())
1020 set_unknown ();
1021 return;
1024 /* If this is something that updates VL/VTYPE that we don't know about, set
1025 the state to unknown. */
1026 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
1027 && (find_access (insn->defs (), VL_REGNUM)
1028 || find_access (insn->defs (), VTYPE_REGNUM)))
1030 set_unknown ();
1031 return;
1034 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
1035 /* uninitialized */
1036 return;
1038 set_valid ();
1040 m_avl = ::get_avl (insn->rtl ());
1041 if (m_avl)
1043 if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ())
1045 m_vl = ::get_vl (insn->rtl ());
1046 dest_vl = m_vl;
1049 if (has_nonvlmax_reg_avl ())
1050 m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def ();
1053 m_sew = ::get_sew (insn->rtl ());
1054 m_vlmul = ::get_vlmul (insn->rtl ());
1055 m_ratio = get_attr_ratio (insn->rtl ());
1056 /* when get_attr_ratio is invalid, this kind of instructions
1057 doesn't care about ratio. However, we still need this value
1058 in demand info backward analysis. */
1059 if (m_ratio == INVALID_ATTRIBUTE)
1060 m_ratio = calculate_ratio (m_sew, m_vlmul);
1061 m_ta = tail_agnostic_p (insn->rtl ());
1062 m_ma = mask_agnostic_p (insn->rtl ());
1064 /* If merge operand is undef value, we prefer agnostic. */
1065 int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
1066 if (merge_op_idx != INVALID_ATTRIBUTE
1067 && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
1069 m_ta = true;
1070 m_ma = true;
1073 /* Determine the demand info of the RVV insn. */
1074 m_max_sew = get_max_int_sew ();
1075 unsigned dflags = 0;
1076 if (vector_config_insn_p (insn->rtl ()))
1078 dflags |= demand_flags::DEMAND_AVL_P;
1079 dflags |= demand_flags::DEMAND_RATIO_P;
1081 else
1083 if (has_vl_op (insn->rtl ()))
1085 if (scalar_move_insn_p (insn->rtl ()))
1087 /* If the avl for vmv.s.x comes from the vsetvl instruction, we
1088 don't know if the avl is non-zero, so it is set to
1089 DEMAND_AVL_P for now. it may be corrected to
1090 DEMAND_NON_ZERO_AVL_P later when more information is
1091 available.
1093 if (has_non_zero_avl ())
1094 dflags |= demand_flags::DEMAND_NON_ZERO_AVL_P;
1095 else
1096 dflags |= demand_flags::DEMAND_AVL_P;
1098 else
1099 dflags |= demand_flags::DEMAND_AVL_P;
1102 if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
1103 dflags |= demand_flags::DEMAND_RATIO_P;
1104 else
1106 if (scalar_move_insn_p (insn->rtl ()) && m_ta)
1108 dflags |= demand_flags::DEMAND_GE_SEW_P;
1109 m_max_sew = get_attr_type (insn->rtl ()) == TYPE_VFMOVFV
1110 ? get_max_float_sew ()
1111 : get_max_int_sew ();
1113 else
1114 dflags |= demand_flags::DEMAND_SEW_P;
1116 if (!ignore_vlmul_insn_p (insn->rtl ()))
1117 dflags |= demand_flags::DEMAND_LMUL_P;
1120 if (!m_ta)
1121 dflags |= demand_flags::DEMAND_TAIL_POLICY_P;
1122 if (!m_ma)
1123 dflags |= demand_flags::DEMAND_MASK_POLICY_P;
1126 normalize_demand (dflags);
1128 /* Optimize AVL from the vsetvl instruction. */
1129 insn_info *def_insn = extract_single_source (get_avl_def ());
1130 if (def_insn && vsetvl_insn_p (def_insn->rtl ()))
1132 vsetvl_info def_info = vsetvl_info (def_insn);
1133 if ((scalar_move_insn_p (insn->rtl ())
1134 || def_info.get_ratio () == get_ratio ())
1135 && (def_info.has_vlmax_avl () || def_info.has_imm_avl ()))
1137 update_avl (def_info);
1138 if (scalar_move_insn_p (insn->rtl ()) && has_non_zero_avl ())
1139 m_avl_demand = avl_demand_type::non_zero_avl;
1143 /* Determine if dest operand(vl) has been used by non-RVV instructions. */
1144 if (dest_vl)
1146 const hash_set<use_info *> vl_uses
1147 = get_all_real_uses (get_insn (), REGNO (dest_vl));
1148 for (use_info *use : vl_uses)
1150 gcc_assert (use->insn ()->is_real ());
1151 rtx_insn *rinsn = use->insn ()->rtl ();
1152 if (!has_vl_op (rinsn)
1153 || count_regno_occurrences (rinsn, REGNO (dest_vl)) != 1)
1155 m_vl_used_by_non_rvv_insn = true;
1156 break;
1158 rtx avl = ::get_avl (rinsn);
1159 if (!avl || !REG_P (avl) || REGNO (dest_vl) != REGNO (avl))
1161 m_vl_used_by_non_rvv_insn = true;
1162 break;
1167 /* Collect the read vl insn for the fault-only-first rvv loads. */
1168 if (fault_first_load_p (insn->rtl ()))
1170 for (insn_info *i = insn->next_nondebug_insn ();
1171 i->bb () == insn->bb (); i = i->next_nondebug_insn ())
1173 if (find_access (i->defs (), VL_REGNUM))
1174 break;
1175 if (i->rtl () && read_vl_insn_p (i->rtl ()))
1177 m_read_vl_insn = i;
1178 break;
1184 /* Returns the corresponding vsetvl rtx pat. */
1185 rtx get_vsetvl_pat (bool ignore_vl = false) const
1187 rtx avl = get_avl ();
1188 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
1189 set the value of avl to (const_int 0) so that VSETVL PASS will
1190 insert vsetvl correctly.*/
1191 if (!get_avl ())
1192 avl = GEN_INT (0);
1193 rtx sew = gen_int_mode (get_sew (), Pmode);
1194 rtx vlmul = gen_int_mode (get_vlmul (), Pmode);
1195 rtx ta = gen_int_mode (get_ta (), Pmode);
1196 rtx ma = gen_int_mode (get_ma (), Pmode);
1198 if (change_vtype_only_p ())
1199 return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
1200 else if (has_vl () && !ignore_vl)
1201 return gen_vsetvl (Pmode, get_vl (), avl, sew, vlmul, ta, ma);
1202 else
1203 return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
1206 /* Return true that the non-AVL operands of THIS will be modified
1207 if we fuse the VL modification from OTHER into THIS. */
1208 bool vl_modify_non_avl_op_p (const vsetvl_info &other) const
1210 /* We don't need to worry about any operands from THIS be
1211 modified by OTHER vsetvl since we OTHER vsetvl doesn't
1212 modify any operand. */
1213 if (!other.has_vl ())
1214 return false;
1216 /* THIS VL operand always preempt OTHER VL operand. */
1217 if (this->has_vl ())
1218 return false;
1220 /* If THIS has non IMM AVL and THIS is AVL compatible with
1221 OTHER, the AVL value of THIS is same as VL value of OTHER. */
1222 if (!this->has_imm_avl ())
1223 return false;
1224 return find_access (this->get_insn ()->uses (), REGNO (other.get_vl ()));
1227 bool operator== (const vsetvl_info &other) const
1229 gcc_assert (!uninit_p () && !other.uninit_p ()
1230 && "Uninitialization should not happen");
1232 if (empty_p ())
1233 return other.empty_p ();
1234 if (unknown_p ())
1235 return other.unknown_p ();
1237 return get_insn () == other.get_insn () && get_bb () == other.get_bb ()
1238 && get_avl () == other.get_avl () && get_vl () == other.get_vl ()
1239 && get_avl_def () == other.get_avl_def ()
1240 && get_sew () == other.get_sew ()
1241 && get_vlmul () == other.get_vlmul () && get_ta () == other.get_ta ()
1242 && get_ma () == other.get_ma ()
1243 && get_avl_demand () == other.get_avl_demand ()
1244 && get_sew_lmul_demand () == other.get_sew_lmul_demand ()
1245 && get_policy_demand () == other.get_policy_demand ();
1248 void dump (FILE *file, const char *indent = "") const
1250 if (uninit_p ())
1252 fprintf (file, "UNINITIALIZED.\n");
1253 return;
1255 else if (unknown_p ())
1257 fprintf (file, "UNKNOWN.\n");
1258 return;
1260 else if (empty_p ())
1262 fprintf (file, "EMPTY.\n");
1263 return;
1265 else if (valid_p ())
1266 fprintf (file, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (),
1267 get_bb ()->index (), delete_p () ? " (deleted)" : "");
1268 else
1269 gcc_unreachable ();
1271 fprintf (file, "%sDemand fields:", indent);
1272 if (m_sew_lmul_demand == sew_lmul_demand_type::sew_lmul)
1273 fprintf (file, " demand_sew_lmul");
1274 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_only)
1275 fprintf (file, " demand_ratio_only");
1276 else if (m_sew_lmul_demand == sew_lmul_demand_type::sew_only)
1277 fprintf (file, " demand_sew_only");
1278 else if (m_sew_lmul_demand == sew_lmul_demand_type::ge_sew)
1279 fprintf (file, " demand_ge_sew");
1280 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_and_ge_sew)
1281 fprintf (file, " demand_ratio_and_ge_sew");
1283 if (m_policy_demand == policy_demand_type::tail_mask_policy)
1284 fprintf (file, " demand_tail_mask_policy");
1285 else if (m_policy_demand == policy_demand_type::tail_policy_only)
1286 fprintf (file, " demand_tail_policy_only");
1287 else if (m_policy_demand == policy_demand_type::mask_policy_only)
1288 fprintf (file, " demand_mask_policy_only");
1290 if (m_avl_demand == avl_demand_type::avl)
1291 fprintf (file, " demand_avl");
1292 else if (m_avl_demand == avl_demand_type::non_zero_avl)
1293 fprintf (file, " demand_non_zero_avl");
1294 fprintf (file, "\n");
1296 fprintf (file, "%sSEW=%d, ", indent, get_sew ());
1297 fprintf (file, "VLMUL=%s, ", vlmul_to_str (get_vlmul ()));
1298 fprintf (file, "RATIO=%d, ", get_ratio ());
1299 fprintf (file, "MAX_SEW=%d\n", get_max_sew ());
1301 fprintf (file, "%sTAIL_POLICY=%s, ", indent, policy_to_str (get_ta ()));
1302 fprintf (file, "MASK_POLICY=%s\n", policy_to_str (get_ma ()));
1304 fprintf (file, "%sAVL=", indent);
1305 print_rtl_single (file, get_avl ());
1306 fprintf (file, "%sVL=", indent);
1307 print_rtl_single (file, get_vl ());
1308 if (change_vtype_only_p ())
1309 fprintf (file, "%schange vtype only\n", indent);
1310 if (get_read_vl_insn ())
1311 fprintf (file, "%sread_vl_insn: insn %u\n", indent,
1312 get_read_vl_insn ()->uid ());
1313 if (vl_used_by_non_rvv_insn_p ())
1314 fprintf (file, "%suse_by_non_rvv_insn=true\n", indent);
1318 class vsetvl_block_info
1320 public:
1321 /* The static execute probability of the demand info. */
1322 profile_probability probability;
1324 auto_vec<vsetvl_info> local_infos;
1325 vsetvl_info global_info;
1326 bb_info *bb;
1328 vsetvl_block_info () : bb (nullptr)
1330 local_infos.safe_grow_cleared (0);
1331 global_info.set_empty ();
1333 vsetvl_block_info (const vsetvl_block_info &other)
1334 : probability (other.probability), local_infos (other.local_infos.copy ()),
1335 global_info (other.global_info), bb (other.bb)
1338 vsetvl_info &get_entry_info ()
1340 gcc_assert (!empty_p ());
1341 return local_infos.is_empty () ? global_info : local_infos[0];
1343 vsetvl_info &get_exit_info ()
1345 gcc_assert (!empty_p ());
1346 return local_infos.is_empty () ? global_info
1347 : local_infos[local_infos.length () - 1];
1349 const vsetvl_info &get_entry_info () const
1351 gcc_assert (!empty_p ());
1352 return local_infos.is_empty () ? global_info : local_infos[0];
1354 const vsetvl_info &get_exit_info () const
1356 gcc_assert (!empty_p ());
1357 return local_infos.is_empty () ? global_info
1358 : local_infos[local_infos.length () - 1];
1361 bool empty_p () const { return local_infos.is_empty () && !has_info (); }
1362 bool has_info () const { return !global_info.empty_p (); }
1363 void set_info (const vsetvl_info &info)
1365 gcc_assert (local_infos.is_empty ());
1366 global_info = info;
1367 global_info.set_bb (bb);
1369 void set_empty_info () { global_info.set_empty (); }
1372 /* Demand system is the RVV-based VSETVL info analysis tools wrapper.
1373 It defines compatible rules for SEW/LMUL, POLICY and AVL.
1374 Also, it provides 3 interfaces available_p, compatible_p and
1375 merge for the VSETVL PASS analysis and optimization.
1377 - available_p: Determine whether the next info can get the
1378 available VSETVL status from previous info.
1379 e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16).
1380 Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3
1381 demand, the VSETVL instruction in bb 3 can be elided.
1382 available_p (previous, next) is true in such situation.
1383 - compatible_p: Determine whether prev_info is compatible with next_info
1384 so that we can have a new merged info that is available to both of them.
1385 - merge: Merge the stricter demand information from
1386 next_info into prev_info so that prev_info becomes available to
1387 next_info. */
1388 class demand_system
1390 private:
1391 /* predictors. */
1393 inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1394 const vsetvl_info &next ATTRIBUTE_UNUSED)
1396 return true;
1398 inline bool always_false (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1399 const vsetvl_info &next ATTRIBUTE_UNUSED)
1401 return false;
1404 /* predictors for sew and lmul */
1406 inline bool lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1408 return prev.get_vlmul () == next.get_vlmul ();
1410 inline bool sew_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1412 return prev.get_sew () == next.get_sew ();
1414 inline bool sew_lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1416 return lmul_eq_p (prev, next) && sew_eq_p (prev, next);
1418 inline bool sew_ge_p (const vsetvl_info &prev, const vsetvl_info &next)
1420 return prev.get_sew () == next.get_sew ()
1421 || (next.get_ta () && prev.get_sew () > next.get_sew ());
1423 inline bool sew_le_p (const vsetvl_info &prev, const vsetvl_info &next)
1425 return prev.get_sew () == next.get_sew ()
1426 || (prev.get_ta () && prev.get_sew () < next.get_sew ());
1428 inline bool prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1429 const vsetvl_info &next)
1431 return prev.get_sew () <= next.get_max_sew ();
1433 inline bool next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1434 const vsetvl_info &next)
1436 return next.get_sew () <= prev.get_max_sew ();
1438 inline bool max_sew_overlap_p (const vsetvl_info &prev,
1439 const vsetvl_info &next)
1441 return !(prev.get_sew () > next.get_max_sew ()
1442 || next.get_sew () > prev.get_max_sew ());
1444 inline bool ratio_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1446 return prev.has_same_ratio (next);
1448 inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1449 const vsetvl_info &next)
1451 return prev.get_ratio () >= (next.get_sew () / MAX_LMUL);
1453 inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1454 const vsetvl_info &next)
1456 return next.get_ratio () >= (prev.get_sew () / MAX_LMUL);
1458 inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev,
1459 const vsetvl_info &next)
1461 return sew_ge_p (prev, next) && ratio_eq_p (prev, next);
1463 inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1464 const vsetvl_info &next)
1466 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next);
1468 inline bool
1469 sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p (
1470 const vsetvl_info &prev, const vsetvl_info &next)
1472 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next)
1473 && next_ratio_valid_for_prev_sew_p (prev, next);
1475 inline bool
1476 sew_ge_and_prev_sew_le_next_max_sew_and_ratio_eq_p (
1477 const vsetvl_info &prev, const vsetvl_info &next)
1479 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next)
1480 && ratio_eq_p (prev, next);
1482 inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1483 const vsetvl_info &next)
1485 return sew_le_p (prev, next) && next_sew_le_prev_max_sew_p (prev, next);
1487 inline bool
1488 max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1489 const vsetvl_info &next)
1491 if (next_ratio_valid_for_prev_sew_p (prev, next)
1492 && max_sew_overlap_p (prev, next))
1494 if (next.get_sew () < prev.get_sew ()
1495 && (!next.get_ta () || !next.get_ma ()))
1496 return false;
1497 return true;
1499 return false;
1501 inline bool
1502 sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info &prev,
1503 const vsetvl_info &next)
1505 return sew_le_p (prev, next) && ratio_eq_p (prev, next)
1506 && next_sew_le_prev_max_sew_p (prev, next);
1508 inline bool
1509 max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1510 const vsetvl_info &next)
1512 return prev_ratio_valid_for_next_sew_p (prev, next)
1513 && max_sew_overlap_p (prev, next);
1515 inline bool
1516 sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p (
1517 const vsetvl_info &prev, const vsetvl_info &next)
1519 return sew_le_p (prev, next) && prev_ratio_valid_for_next_sew_p (prev, next)
1520 && next_sew_le_prev_max_sew_p (prev, next);
1522 inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info &prev,
1523 const vsetvl_info &next)
1525 return ratio_eq_p (prev, next) && max_sew_overlap_p (prev, next);
1528 /* predictors for tail and mask policy */
1530 inline bool tail_policy_eq_p (const vsetvl_info &prev,
1531 const vsetvl_info &next)
1533 return prev.get_ta () == next.get_ta ();
1535 inline bool mask_policy_eq_p (const vsetvl_info &prev,
1536 const vsetvl_info &next)
1538 return prev.get_ma () == next.get_ma ();
1540 inline bool tail_mask_policy_eq_p (const vsetvl_info &prev,
1541 const vsetvl_info &next)
1543 return tail_policy_eq_p (prev, next) && mask_policy_eq_p (prev, next);
1546 /* predictors for avl */
1548 inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
1550 if (info.has_vl ())
1552 if (find_access (i->defs (), REGNO (info.get_vl ())))
1553 return true;
1554 if (find_access (i->uses (), REGNO (info.get_vl ())))
1556 resource_info resource = full_register (REGNO (info.get_vl ()));
1557 def_lookup dl1 = crtl->ssa->find_def (resource, i);
1558 def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
1559 if (dl1.matching_set () || dl2.matching_set ())
1560 return true;
1561 /* If their VLs are coming from same def, we still want to fuse
1562 their VSETVL demand info to gain better performance. */
1563 return dl1.prev_def (i) != dl2.prev_def (i);
1566 return false;
1568 inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
1570 return info.has_nonvlmax_reg_avl ()
1571 && find_access (i->defs (), REGNO (info.get_avl ()));
1574 inline bool modify_reg_between_p (insn_info *prev_insn, insn_info *curr_insn,
1575 unsigned regno)
1577 gcc_assert (prev_insn->compare_with (curr_insn) < 0);
1578 for (insn_info *i = curr_insn->prev_nondebug_insn (); i != prev_insn;
1579 i = i->prev_nondebug_insn ())
1581 // no def of regno
1582 if (find_access (i->defs (), regno))
1583 return true;
1585 return false;
1588 inline bool reg_avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1590 if (!prev.has_nonvlmax_reg_avl () || !next.has_nonvlmax_reg_avl ())
1591 return false;
1593 if (same_equiv_note_p (prev.get_avl_def (), next.get_avl_def ()))
1594 return true;
1596 if (REGNO (prev.get_avl ()) != REGNO (next.get_avl ()))
1597 return false;
1599 insn_info *prev_insn = prev.get_insn ();
1600 if (prev.get_bb () != prev_insn->bb ())
1601 prev_insn = prev.get_bb ()->end_insn ();
1603 insn_info *next_insn = next.get_insn ();
1604 if (next.get_bb () != next_insn->bb ())
1605 next_insn = next.get_bb ()->end_insn ();
1607 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1610 inline bool avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1612 gcc_assert (prev.valid_p () && next.valid_p ());
1614 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
1615 return false;
1617 if (vector_config_insn_p (prev.get_insn ()->rtl ()) && next.get_avl_def ()
1618 && next.get_avl_def ()->insn () == prev.get_insn ())
1619 return true;
1621 if (prev.get_read_vl_insn ())
1623 if (!next.has_nonvlmax_reg_avl () || !next.get_avl_def ())
1624 return false;
1625 insn_info *avl_def_insn = extract_single_source (next.get_avl_def ());
1626 return avl_def_insn == prev.get_read_vl_insn ();
1629 if (prev == next && prev.has_nonvlmax_reg_avl ())
1631 insn_info *insn = prev.get_insn ();
1632 bb_info *bb = insn->bb ();
1633 for (insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
1634 i = i->next_nondebug_insn ())
1635 if (find_access (i->defs (), REGNO (prev.get_avl ())))
1636 return false;
1639 if (prev.has_vlmax_avl () && next.has_vlmax_avl ())
1640 return true;
1641 else if (prev.has_imm_avl () && next.has_imm_avl ())
1642 return INTVAL (prev.get_avl ()) == INTVAL (next.get_avl ());
1643 else if (prev.has_vl () && next.has_nonvlmax_reg_avl ()
1644 && REGNO (prev.get_vl ()) == REGNO (next.get_avl ()))
1646 insn_info *prev_insn = prev.insn_inside_bb_p ()
1647 ? prev.get_insn ()
1648 : prev.get_bb ()->end_insn ();
1650 insn_info *next_insn = next.insn_inside_bb_p ()
1651 ? next.get_insn ()
1652 : next.get_bb ()->end_insn ();
1653 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1655 else if (prev.has_nonvlmax_reg_avl () && next.has_nonvlmax_reg_avl ())
1656 return reg_avl_equal_p (prev, next);
1658 return false;
1660 inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info &prev,
1661 const vsetvl_info &next)
1663 return avl_equal_p (prev, next) || prev.has_non_zero_avl ();
1666 inline bool can_use_next_avl_p (const vsetvl_info &prev,
1667 const vsetvl_info &next)
1669 /* Forbid the AVL/VL propagation if VL of NEXT is used
1670 by non-RVV instructions. This is because:
1672 bb 2:
1673 PREV: scalar move (no AVL)
1674 bb 3:
1675 NEXT: vsetvl a5(VL), a4(AVL) ...
1676 branch a5,zero
1678 Since user vsetvl instruction is no side effect instruction
1679 which should be placed in the correct and optimal location
1680 of the program by the previous PASS, it is unreasonable that
1681 VSETVL PASS tries to move it to another places if it used by
1682 non-RVV instructions.
1684 Note: We only forbid the cases that VL is used by the following
1685 non-RVV instructions which will cause issues. We don't forbid
1686 other cases since it won't cause correctness issues and we still
1687 more demand info are fused backward. The later LCM algorithm
1688 should know the optimal location of the vsetvl. */
1689 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
1690 return false;
1692 if (!next.has_nonvlmax_reg_avl () && !next.has_vl ())
1693 return true;
1695 insn_info *prev_insn = prev.get_insn ();
1696 if (prev.get_bb () != prev_insn->bb ())
1697 prev_insn = prev.get_bb ()->end_insn ();
1699 insn_info *next_insn = next.get_insn ();
1700 if (next.get_bb () != next_insn->bb ())
1701 next_insn = next.get_bb ()->end_insn ();
1703 return avl_vl_unmodified_between_p (prev_insn, next_insn, next);
1706 inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p (
1707 const vsetvl_info &prev, const vsetvl_info &next)
1709 return avl_equal_p (prev, next)
1710 || (next.has_non_zero_avl () && can_use_next_avl_p (prev, next));
1713 /* modifiers */
1715 inline void nop (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1716 const vsetvl_info &next ATTRIBUTE_UNUSED)
1719 /* modifiers for sew and lmul */
1721 inline void use_min_of_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1723 prev.set_max_sew (MIN (prev.get_max_sew (), next.get_max_sew ()));
1725 inline void use_next_sew (vsetvl_info &prev, const vsetvl_info &next)
1727 prev.set_sew (next.get_sew ());
1728 use_min_of_max_sew (prev, next);
1730 inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1732 int max_sew = MAX (prev.get_sew (), next.get_sew ());
1733 prev.set_sew (max_sew);
1734 prev.set_ratio (calculate_ratio (prev.get_sew (), prev.get_vlmul ()));
1735 use_min_of_max_sew (prev, next);
1737 inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1739 use_next_sew (prev, next);
1740 prev.set_vlmul (next.get_vlmul ());
1741 prev.set_ratio (next.get_ratio ());
1743 inline void use_next_sew_with_prev_ratio (vsetvl_info &prev,
1744 const vsetvl_info &next)
1746 use_next_sew (prev, next);
1747 prev.set_vlmul (calculate_vlmul (next.get_sew (), prev.get_ratio ()));
1749 inline void modify_lmul_with_next_ratio (vsetvl_info &prev,
1750 const vsetvl_info &next)
1752 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1753 prev.set_ratio (next.get_ratio ());
1756 inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev,
1757 const vsetvl_info &next)
1759 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1760 use_max_sew (prev, next);
1761 prev.set_ratio (next.get_ratio ());
1764 inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info &prev,
1765 const vsetvl_info &next)
1767 int max_sew = MAX (prev.get_sew (), next.get_sew ());
1768 prev.set_vlmul (calculate_vlmul (max_sew, prev.get_ratio ()));
1769 prev.set_sew (max_sew);
1772 /* modifiers for tail and mask policy */
1774 inline void use_tail_policy (vsetvl_info &prev, const vsetvl_info &next)
1776 if (!next.get_ta ())
1777 prev.set_ta (next.get_ta ());
1779 inline void use_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1781 if (!next.get_ma ())
1782 prev.set_ma (next.get_ma ());
1784 inline void use_tail_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1786 use_tail_policy (prev, next);
1787 use_mask_policy (prev, next);
1790 /* modifiers for avl */
1792 inline void use_next_avl (vsetvl_info &prev, const vsetvl_info &next)
1794 gcc_assert (can_use_next_avl_p (prev, next));
1795 prev.update_avl (next);
1798 inline void use_next_avl_when_not_equal (vsetvl_info &prev,
1799 const vsetvl_info &next)
1801 if (avl_equal_p (prev, next))
1802 return;
1803 gcc_assert (next.has_non_zero_avl ());
1804 use_next_avl (prev, next);
1807 public:
1808 /* Can we move vsetvl info between prev_insn and next_insn safe? */
1809 bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn,
1810 const vsetvl_info &info,
1811 bool ignore_vl = false)
1813 gcc_assert ((ignore_vl && info.has_nonvlmax_reg_avl ())
1814 || (info.has_nonvlmax_reg_avl () || info.has_vl ()));
1816 gcc_assert (!prev_insn->is_debug_insn () && !next_insn->is_debug_insn ());
1817 if (prev_insn->bb () == next_insn->bb ()
1818 && prev_insn->compare_with (next_insn) < 0)
1820 for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
1821 i = i->prev_nondebug_insn ())
1823 // no def and use of vl
1824 if (!ignore_vl && modify_or_use_vl_p (i, info))
1825 return false;
1827 // no def of avl
1828 if (modify_avl_p (i, info))
1829 return false;
1831 return true;
1833 else
1835 basic_block prev_cfg_bb = prev_insn->bb ()->cfg_bb ();
1836 if (!ignore_vl && info.has_vl ())
1838 bitmap live_out = df_get_live_out (prev_cfg_bb);
1839 if (bitmap_bit_p (live_out, REGNO (info.get_vl ())))
1840 return false;
1843 /* Find set_info at location of PREV_INSN and NEXT_INSN, Return
1844 false if those 2 set_info are different.
1846 PREV_INSN --- multiple nested blocks --- NEXT_INSN.
1848 Return false if there is any modifications of AVL inside those
1849 multiple nested blocks. */
1850 if (info.has_nonvlmax_reg_avl ())
1852 resource_info resource = full_register (REGNO (info.get_avl ()));
1853 def_lookup dl1 = crtl->ssa->find_def (resource, prev_insn);
1854 def_lookup dl2 = crtl->ssa->find_def (resource, next_insn);
1855 if (dl2.matching_set ())
1856 return false;
1858 auto is_phi_or_real
1859 = [&] (insn_info *h) { return h->is_real () || h->is_phi (); };
1861 def_info *def1 = dl1.matching_set_or_last_def_of_prev_group ();
1862 def_info *def2 = dl2.prev_def (next_insn);
1863 set_info *set1 = safe_dyn_cast<set_info *> (def1);
1864 set_info *set2 = safe_dyn_cast<set_info *> (def2);
1865 if (!set1 || !set2)
1866 return false;
1868 auto is_same_ultimate_def = [&] (set_info *s1, set_info *s2) {
1869 return s1->insn ()->is_phi () && s2->insn ()->is_phi ()
1870 && look_through_degenerate_phi (s1)
1871 == look_through_degenerate_phi (s2);
1874 if (set1 != set2 && !is_same_ultimate_def (set1, set2))
1876 if (!is_phi_or_real (set1->insn ())
1877 || !is_phi_or_real (set2->insn ()))
1878 return false;
1880 if (set1->insn ()->is_real () && set2->insn ()->is_phi ())
1882 hash_set<set_info *> sets
1883 = get_all_sets (set2, true, false, true);
1884 if (!sets.contains (set1))
1885 return false;
1887 else
1889 insn_info *def_insn1 = extract_single_source (set1);
1890 insn_info *def_insn2 = extract_single_source (set2);
1891 if (!def_insn1 || !def_insn2 || def_insn1 != def_insn2)
1892 return false;
1897 for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn ();
1898 i = i->prev_nondebug_insn ())
1900 // no def and use of vl
1901 if (!ignore_vl && modify_or_use_vl_p (i, info))
1902 return false;
1904 // no def of avl
1905 if (modify_avl_p (i, info))
1906 return false;
1909 for (insn_info *i = prev_insn->bb ()->end_insn (); i != prev_insn;
1910 i = i->prev_nondebug_insn ())
1912 // no def mad use of vl
1913 if (!ignore_vl && modify_or_use_vl_p (i, info))
1914 return false;
1916 // no def of avl
1917 if (modify_avl_p (i, info))
1918 return false;
1921 return true;
1924 bool sew_lmul_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1926 gcc_assert (prev.valid_p () && next.valid_p ());
1927 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1928 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1929 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1930 AVAILABLE_P, FUSE) \
1931 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1932 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1933 return COMPATIBLE_P (prev, next);
1935 #include "riscv-vsetvl.def"
1937 gcc_unreachable ();
1940 bool sew_lmul_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1942 gcc_assert (prev.valid_p () && next.valid_p ());
1943 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1944 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1945 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1946 AVAILABLE_P, FUSE) \
1947 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1948 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1949 return AVAILABLE_P (prev, next);
1951 #include "riscv-vsetvl.def"
1953 gcc_unreachable ();
1956 void merge_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1958 gcc_assert (prev.valid_p () && next.valid_p ());
1959 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1960 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1961 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1962 AVAILABLE_P, FUSE) \
1963 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1964 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1966 gcc_assert (COMPATIBLE_P (prev, next)); \
1967 FUSE (prev, next); \
1968 prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \
1969 return; \
1972 #include "riscv-vsetvl.def"
1974 gcc_unreachable ();
1977 bool policy_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1979 gcc_assert (prev.valid_p () && next.valid_p ());
1980 policy_demand_type prev_flags = prev.get_policy_demand ();
1981 policy_demand_type next_flags = next.get_policy_demand ();
1982 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1983 AVAILABLE_P, FUSE) \
1984 if (prev_flags == policy_demand_type::PREV_FLAGS \
1985 && next_flags == policy_demand_type::NEXT_FLAGS) \
1986 return COMPATIBLE_P (prev, next);
1988 #include "riscv-vsetvl.def"
1990 gcc_unreachable ();
1993 bool policy_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1995 gcc_assert (prev.valid_p () && next.valid_p ());
1996 policy_demand_type prev_flags = prev.get_policy_demand ();
1997 policy_demand_type next_flags = next.get_policy_demand ();
1998 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1999 AVAILABLE_P, FUSE) \
2000 if (prev_flags == policy_demand_type::PREV_FLAGS \
2001 && next_flags == policy_demand_type::NEXT_FLAGS) \
2002 return AVAILABLE_P (prev, next);
2004 #include "riscv-vsetvl.def"
2006 gcc_unreachable ();
2009 void merge_policy (vsetvl_info &prev, const vsetvl_info &next)
2011 gcc_assert (prev.valid_p () && next.valid_p ());
2012 policy_demand_type prev_flags = prev.get_policy_demand ();
2013 policy_demand_type next_flags = next.get_policy_demand ();
2014 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2015 AVAILABLE_P, FUSE) \
2016 if (prev_flags == policy_demand_type::PREV_FLAGS \
2017 && next_flags == policy_demand_type::NEXT_FLAGS) \
2019 gcc_assert (COMPATIBLE_P (prev, next)); \
2020 FUSE (prev, next); \
2021 prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \
2022 return; \
2025 #include "riscv-vsetvl.def"
2027 gcc_unreachable ();
2030 bool vl_not_in_conflict_p (const vsetvl_info &prev, const vsetvl_info &next)
2032 /* We don't fuse this following case:
2034 li a5, -1
2035 vmv.s.x v0, a5 -- PREV
2036 vsetvli a5, ... -- NEXT
2038 Don't fuse NEXT into PREV.
2040 return !prev.vl_modify_non_avl_op_p (next)
2041 && !next.vl_modify_non_avl_op_p (prev);
2044 bool avl_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
2046 gcc_assert (prev.valid_p () && next.valid_p ());
2047 avl_demand_type prev_flags = prev.get_avl_demand ();
2048 avl_demand_type next_flags = next.get_avl_demand ();
2049 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2050 AVAILABLE_P, FUSE) \
2051 if (prev_flags == avl_demand_type::PREV_FLAGS \
2052 && next_flags == avl_demand_type::NEXT_FLAGS) \
2053 return COMPATIBLE_P (prev, next);
2055 #include "riscv-vsetvl.def"
2057 gcc_unreachable ();
2060 bool avl_available_p (const vsetvl_info &prev, const vsetvl_info &next)
2062 gcc_assert (prev.valid_p () && next.valid_p ());
2063 avl_demand_type prev_flags = prev.get_avl_demand ();
2064 avl_demand_type next_flags = next.get_avl_demand ();
2065 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2066 AVAILABLE_P, FUSE) \
2067 if (prev_flags == avl_demand_type::PREV_FLAGS \
2068 && next_flags == avl_demand_type::NEXT_FLAGS) \
2069 return AVAILABLE_P (prev, next);
2071 #include "riscv-vsetvl.def"
2073 gcc_unreachable ();
2076 void merge_avl (vsetvl_info &prev, const vsetvl_info &next)
2078 gcc_assert (prev.valid_p () && next.valid_p ());
2079 avl_demand_type prev_flags = prev.get_avl_demand ();
2080 avl_demand_type next_flags = next.get_avl_demand ();
2081 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2082 AVAILABLE_P, FUSE) \
2083 if (prev_flags == avl_demand_type::PREV_FLAGS \
2084 && next_flags == avl_demand_type::NEXT_FLAGS) \
2086 gcc_assert (COMPATIBLE_P (prev, next)); \
2087 FUSE (prev, next); \
2088 prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \
2089 return; \
2092 #include "riscv-vsetvl.def"
2094 gcc_unreachable ();
2097 bool compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
2099 bool compatible_p = sew_lmul_compatible_p (prev, next)
2100 && policy_compatible_p (prev, next)
2101 && avl_compatible_p (prev, next)
2102 && vl_not_in_conflict_p (prev, next);
2103 return compatible_p;
2106 bool available_p (const vsetvl_info &prev, const vsetvl_info &next)
2108 bool available_p = sew_lmul_available_p (prev, next)
2109 && policy_available_p (prev, next)
2110 && avl_available_p (prev, next)
2111 && vl_not_in_conflict_p (prev, next);
2112 gcc_assert (!available_p || compatible_p (prev, next));
2113 return available_p;
2116 void merge (vsetvl_info &prev, const vsetvl_info &next)
2118 gcc_assert (compatible_p (prev, next));
2119 merge_sew_lmul (prev, next);
2120 merge_policy (prev, next);
2121 merge_avl (prev, next);
2122 gcc_assert (available_p (prev, next));
2127 class pre_vsetvl
2129 private:
2130 demand_system m_dem;
2131 auto_vec<vsetvl_block_info> m_vector_block_infos;
2133 /* data for avl reaching definition. */
2134 sbitmap *m_reg_def_loc;
2136 /* data for vsetvl info reaching definition. */
2137 vsetvl_info m_unknown_info;
2138 auto_vec<vsetvl_info *> m_vsetvl_def_exprs;
2139 sbitmap *m_vsetvl_def_in;
2140 sbitmap *m_vsetvl_def_out;
2142 /* data for lcm */
2143 auto_vec<vsetvl_info *> m_exprs;
2144 sbitmap *m_avloc;
2145 sbitmap *m_avin;
2146 sbitmap *m_avout;
2147 sbitmap *m_kill;
2148 sbitmap *m_antloc;
2149 sbitmap *m_transp;
2150 sbitmap *m_insert;
2151 sbitmap *m_del;
2152 struct edge_list *m_edges;
2154 auto_vec<vsetvl_info> m_delete_list;
2156 vsetvl_block_info &get_block_info (const bb_info *bb)
2158 return m_vector_block_infos[bb->index ()];
2160 const vsetvl_block_info &get_block_info (const basic_block bb) const
2162 return m_vector_block_infos[bb->index];
2165 vsetvl_block_info &get_block_info (const basic_block bb)
2167 return m_vector_block_infos[bb->index];
2170 void add_expr (auto_vec<vsetvl_info *> &m_exprs, vsetvl_info &info)
2172 for (vsetvl_info *item : m_exprs)
2174 if (*item == info)
2175 return;
2177 m_exprs.safe_push (&info);
2180 unsigned get_expr_index (auto_vec<vsetvl_info *> &m_exprs,
2181 const vsetvl_info &info)
2183 for (size_t i = 0; i < m_exprs.length (); i += 1)
2185 if (*m_exprs[i] == info)
2186 return i;
2188 gcc_unreachable ();
2191 bool anticipated_exp_p (const vsetvl_info &header_info)
2193 if (!header_info.has_nonvlmax_reg_avl () && !header_info.has_vl ())
2194 return true;
2196 bb_info *bb = header_info.get_bb ();
2197 insn_info *prev_insn = bb->head_insn ();
2198 insn_info *next_insn = header_info.insn_inside_bb_p ()
2199 ? header_info.get_insn ()
2200 : header_info.get_bb ()->end_insn ();
2202 return m_dem.avl_vl_unmodified_between_p (prev_insn, next_insn,
2203 header_info);
2206 bool available_exp_p (const vsetvl_info &prev_info,
2207 const vsetvl_info &next_info)
2209 return m_dem.available_p (prev_info, next_info);
2212 void compute_probabilities ()
2214 edge e;
2215 edge_iterator ei;
2217 for (const bb_info *bb : crtl->ssa->bbs ())
2219 basic_block cfg_bb = bb->cfg_bb ();
2220 auto &curr_prob = get_block_info (cfg_bb).probability;
2222 /* GCC assume entry block (bb 0) are always so
2223 executed so set its probability as "always". */
2224 if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2225 curr_prob = profile_probability::always ();
2226 /* Exit block (bb 1) is the block we don't need to process. */
2227 if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2228 continue;
2230 gcc_assert (curr_prob.initialized_p ());
2231 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2233 auto &new_prob = get_block_info (e->dest).probability;
2234 /* Normally, the edge probability should be initialized.
2235 However, some special testing code which is written in
2236 GIMPLE IR style force the edge probability uninitialized,
2237 we conservatively set it as never so that it will not
2238 affect PRE (Phase 3 && Phase 4). */
2239 if (!e->probability.initialized_p ())
2240 new_prob = profile_probability::never ();
2241 else if (!new_prob.initialized_p ())
2242 new_prob = curr_prob * e->probability;
2243 else if (new_prob == profile_probability::always ())
2244 continue;
2245 else
2246 new_prob += curr_prob * e->probability;
2251 void insert_vsetvl_insn (enum emit_type emit_type, const vsetvl_info &info)
2253 rtx pat = info.get_vsetvl_pat ();
2254 rtx_insn *rinsn = info.get_insn ()->rtl ();
2256 if (emit_type == EMIT_DIRECT)
2258 emit_insn (pat);
2259 if (dump_file)
2261 fprintf (dump_file, " Insert vsetvl insn %d:\n",
2262 INSN_UID (get_last_insn ()));
2263 print_rtl_single (dump_file, get_last_insn ());
2266 else if (emit_type == EMIT_BEFORE)
2268 emit_insn_before (pat, rinsn);
2269 if (dump_file)
2271 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
2272 INSN_UID (rinsn));
2273 print_rtl_single (dump_file, PREV_INSN (rinsn));
2276 else
2278 emit_insn_after (pat, rinsn);
2279 if (dump_file)
2281 fprintf (dump_file, " Insert vsetvl insn after insn %d:\n",
2282 INSN_UID (rinsn));
2283 print_rtl_single (dump_file, NEXT_INSN (rinsn));
2288 void change_vsetvl_insn (const vsetvl_info &info)
2290 rtx_insn *rinsn = info.get_insn ()->rtl ();
2291 rtx new_pat = info.get_vsetvl_pat ();
2293 if (dump_file)
2295 fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn));
2296 print_rtl_single (dump_file, rinsn);
2299 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
2301 if (dump_file)
2303 fprintf (dump_file, "\n to:\n");
2304 print_rtl_single (dump_file, rinsn);
2308 void remove_vsetvl_insn (rtx_insn *rinsn)
2310 if (dump_file)
2312 fprintf (dump_file, " Eliminate insn %d:\n", INSN_UID (rinsn));
2313 print_rtl_single (dump_file, rinsn);
2315 if (in_sequence_p ())
2316 remove_insn (rinsn);
2317 else
2318 delete_insn (rinsn);
2321 bool successors_probability_equal_p (const basic_block cfg_bb) const
2323 edge e;
2324 edge_iterator ei;
2325 profile_probability prob = profile_probability::uninitialized ();
2326 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2328 if (prob == profile_probability::uninitialized ())
2329 prob = m_vector_block_infos[e->dest->index].probability;
2330 else if (prob == m_vector_block_infos[e->dest->index].probability)
2331 continue;
2332 else
2333 /* We pick the highest probability among those incompatible VSETVL
2334 infos. When all incompatible VSETVL infos have same probability, we
2335 don't pick any of them. */
2336 return false;
2338 return true;
2341 bool has_compatible_reaching_vsetvl_p (vsetvl_info info)
2343 unsigned int index;
2344 sbitmap_iterator sbi;
2345 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[info.get_bb ()->index ()], 0,
2346 index, sbi)
2348 const auto prev_info = *m_vsetvl_def_exprs[index];
2349 if (!prev_info.valid_p ())
2350 continue;
2351 if (m_dem.compatible_p (prev_info, info))
2352 return true;
2354 return false;
2357 bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
2359 gcc_assert (
2360 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
2362 unsigned expr_index;
2363 sbitmap_iterator sbi;
2364 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[curr_info.get_bb ()->index ()], 0,
2365 expr_index, sbi)
2367 const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
2368 if (!prev_info.valid_p ()
2369 || !m_dem.avl_available_p (prev_info, curr_info)
2370 || prev_info.get_ratio () != curr_info.get_ratio ())
2371 return false;
2374 return true;
2377 public:
2378 pre_vsetvl ()
2379 : m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr),
2380 m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr),
2381 m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr)
2383 /* Initialization of RTL_SSA. */
2384 calculate_dominance_info (CDI_DOMINATORS);
2385 loop_optimizer_init (LOOPS_NORMAL);
2386 /* Create FAKE edges for infinite loops. */
2387 connect_infinite_loops_to_exit ();
2388 df_analyze ();
2389 crtl->ssa = new function_info (cfun);
2390 m_vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2391 compute_probabilities ();
2392 m_unknown_info.set_unknown ();
2395 void finish ()
2397 free_dominance_info (CDI_DOMINATORS);
2398 loop_optimizer_finalize ();
2399 if (crtl->ssa->perform_pending_updates ())
2400 cleanup_cfg (0);
2401 delete crtl->ssa;
2402 crtl->ssa = nullptr;
2404 if (m_reg_def_loc)
2405 sbitmap_vector_free (m_reg_def_loc);
2407 if (m_vsetvl_def_in)
2408 sbitmap_vector_free (m_vsetvl_def_in);
2409 if (m_vsetvl_def_out)
2410 sbitmap_vector_free (m_vsetvl_def_out);
2412 if (m_avloc)
2413 sbitmap_vector_free (m_avloc);
2414 if (m_kill)
2415 sbitmap_vector_free (m_kill);
2416 if (m_antloc)
2417 sbitmap_vector_free (m_antloc);
2418 if (m_transp)
2419 sbitmap_vector_free (m_transp);
2420 if (m_insert)
2421 sbitmap_vector_free (m_insert);
2422 if (m_del)
2423 sbitmap_vector_free (m_del);
2424 if (m_avin)
2425 sbitmap_vector_free (m_avin);
2426 if (m_avout)
2427 sbitmap_vector_free (m_avout);
2429 if (m_edges)
2430 free_edge_list (m_edges);
2433 void compute_vsetvl_def_data ();
2434 void compute_transparent (const bb_info *);
2435 void compute_lcm_local_properties ();
2437 void fuse_local_vsetvl_info ();
2438 bool earliest_fuse_vsetvl_info (int iter);
2439 void pre_global_vsetvl_info ();
2440 void emit_vsetvl ();
2441 void cleanup ();
2442 void remove_avl_operand ();
2443 void remove_unused_dest_operand ();
2444 void remove_vsetvl_pre_insns ();
2446 void dump (FILE *file, const char *title) const
2448 fprintf (file, "\nVSETVL infos after %s\n\n", title);
2449 for (const bb_info *bb : crtl->ssa->bbs ())
2451 const auto &block_info = m_vector_block_infos[bb->index ()];
2452 fprintf (file, " bb %d:\n", bb->index ());
2453 fprintf (file, " probability: ");
2454 block_info.probability.dump (file);
2455 fprintf (file, "\n");
2456 if (!block_info.empty_p ())
2458 fprintf (file, " Header vsetvl info:");
2459 block_info.get_entry_info ().dump (file, " ");
2460 fprintf (file, " Footer vsetvl info:");
2461 block_info.get_exit_info ().dump (file, " ");
2462 for (const auto &info : block_info.local_infos)
2464 fprintf (file,
2465 " insn %d vsetvl info:", info.get_insn ()->uid ());
2466 info.dump (file, " ");
2473 void
2474 pre_vsetvl::compute_vsetvl_def_data ()
2476 m_vsetvl_def_exprs.truncate (0);
2477 add_expr (m_vsetvl_def_exprs, m_unknown_info);
2478 for (const bb_info *bb : crtl->ssa->bbs ())
2480 vsetvl_block_info &block_info = get_block_info (bb);
2481 if (block_info.empty_p ())
2482 continue;
2483 vsetvl_info &footer_info = block_info.get_exit_info ();
2484 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
2485 add_expr (m_vsetvl_def_exprs, footer_info);
2488 if (m_vsetvl_def_in)
2489 sbitmap_vector_free (m_vsetvl_def_in);
2490 if (m_vsetvl_def_out)
2491 sbitmap_vector_free (m_vsetvl_def_out);
2493 sbitmap *def_loc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2494 m_vsetvl_def_exprs.length ());
2495 sbitmap *m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2496 m_vsetvl_def_exprs.length ());
2498 m_vsetvl_def_in = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2499 m_vsetvl_def_exprs.length ());
2500 m_vsetvl_def_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2501 m_vsetvl_def_exprs.length ());
2503 bitmap_vector_clear (def_loc, last_basic_block_for_fn (cfun));
2504 bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun));
2505 bitmap_vector_clear (m_vsetvl_def_out, last_basic_block_for_fn (cfun));
2507 for (const bb_info *bb : crtl->ssa->bbs ())
2509 vsetvl_block_info &block_info = get_block_info (bb);
2510 if (block_info.empty_p ())
2512 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1)
2514 auto *info = m_vsetvl_def_exprs[i];
2515 if (info->has_nonvlmax_reg_avl ()
2516 && bitmap_bit_p (m_reg_def_loc[bb->index ()],
2517 REGNO (info->get_avl ())))
2519 bitmap_set_bit (m_kill[bb->index ()], i);
2520 bitmap_set_bit (def_loc[bb->index ()],
2521 get_expr_index (m_vsetvl_def_exprs,
2522 m_unknown_info));
2525 continue;
2528 vsetvl_info &footer_info = block_info.get_exit_info ();
2529 bitmap_ones (m_kill[bb->index ()]);
2530 bitmap_set_bit (def_loc[bb->index ()],
2531 get_expr_index (m_vsetvl_def_exprs, footer_info));
2534 /* Set the def_out of the ENTRY basic block to m_unknown_info expr. */
2535 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2536 bitmap_set_bit (m_vsetvl_def_out[entry->index],
2537 get_expr_index (m_vsetvl_def_exprs, m_unknown_info));
2539 compute_reaching_defintion (def_loc, m_kill, m_vsetvl_def_in,
2540 m_vsetvl_def_out);
2542 if (dump_file && (dump_flags & TDF_DETAILS))
2544 fprintf (dump_file,
2545 "\n Compute vsetvl info reaching definition data:\n\n");
2546 fprintf (dump_file, " Expression List (%d):\n",
2547 m_vsetvl_def_exprs.length ());
2548 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i++)
2550 const auto &info = *m_vsetvl_def_exprs[i];
2551 fprintf (dump_file, " Expr[%u]: ", i);
2552 info.dump (dump_file, " ");
2554 fprintf (dump_file, "\n bitmap data:\n");
2555 for (const bb_info *bb : crtl->ssa->bbs ())
2557 unsigned int i = bb->index ();
2558 fprintf (dump_file, " BB %u:\n", i);
2559 fprintf (dump_file, " def_loc: ");
2560 dump_bitmap_file (dump_file, def_loc[i]);
2561 fprintf (dump_file, " kill: ");
2562 dump_bitmap_file (dump_file, m_kill[i]);
2563 fprintf (dump_file, " vsetvl_def_in: ");
2564 dump_bitmap_file (dump_file, m_vsetvl_def_in[i]);
2565 fprintf (dump_file, " vsetvl_def_out: ");
2566 dump_bitmap_file (dump_file, m_vsetvl_def_out[i]);
2570 sbitmap_vector_free (def_loc);
2571 sbitmap_vector_free (m_kill);
2574 /* Subroutine of compute_lcm_local_properties which Compute local transparent
2575 BB. Note that the compile time is very sensitive to compute_transparent and
2576 compute_lcm_local_properties, any change of these 2 functions should be
2577 aware of the compile time changing of the program which has a large number of
2578 blocks, e.g SPEC 2017 wrf.
2580 Current compile time profile of SPEC 2017 wrf:
2582 1. scheduling - 27%
2583 2. machine dep reorg (VSETVL PASS) - 18%
2585 VSETVL pass should not spend more time than scheduling in compilation. */
2586 void
2587 pre_vsetvl::compute_transparent (const bb_info *bb)
2589 int num_exprs = m_exprs.length ();
2590 unsigned bb_index = bb->index ();
2591 for (int i = 0; i < num_exprs; i++)
2593 auto *info = m_exprs[i];
2594 if (info->has_nonvlmax_reg_avl ()
2595 && bitmap_bit_p (m_reg_def_loc[bb_index], REGNO (info->get_avl ())))
2596 bitmap_clear_bit (m_transp[bb_index], i);
2597 else if (info->has_vl ()
2598 && bitmap_bit_p (m_reg_def_loc[bb_index],
2599 REGNO (info->get_vl ())))
2600 bitmap_clear_bit (m_transp[bb_index], i);
2604 /* Compute the local properties of each recorded expression.
2606 Local properties are those that are defined by the block, irrespective of
2607 other blocks.
2609 An expression is transparent in a block if its operands are not modified
2610 in the block.
2612 An expression is computed (locally available) in a block if it is computed
2613 at least once and expression would contain the same value if the
2614 computation was moved to the end of the block.
2616 An expression is locally anticipatable in a block if it is computed at
2617 least once and expression would contain the same value if the computation
2618 was moved to the beginning of the block. */
2619 void
2620 pre_vsetvl::compute_lcm_local_properties ()
2622 m_exprs.truncate (0);
2623 for (const bb_info *bb : crtl->ssa->bbs ())
2625 vsetvl_block_info &block_info = get_block_info (bb);
2626 if (block_info.empty_p ())
2627 continue;
2628 vsetvl_info &header_info = block_info.get_entry_info ();
2629 vsetvl_info &footer_info = block_info.get_exit_info ();
2630 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
2631 if (header_info.valid_p ())
2632 add_expr (m_exprs, header_info);
2633 if (footer_info.valid_p ())
2634 add_expr (m_exprs, footer_info);
2637 int num_exprs = m_exprs.length ();
2638 if (m_avloc)
2639 sbitmap_vector_free (m_avloc);
2640 if (m_kill)
2641 sbitmap_vector_free (m_kill);
2642 if (m_antloc)
2643 sbitmap_vector_free (m_antloc);
2644 if (m_transp)
2645 sbitmap_vector_free (m_transp);
2646 if (m_avin)
2647 sbitmap_vector_free (m_avin);
2648 if (m_avout)
2649 sbitmap_vector_free (m_avout);
2651 m_avloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2652 m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2653 m_antloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2654 m_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2655 m_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2656 m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2658 bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun));
2659 bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun));
2660 bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun));
2662 /* - If T is locally available at the end of a block, then T' must be
2663 available at the end of the same block. Since some optimization has
2664 occurred earlier, T' might not be locally available, however, it must
2665 have been previously computed on all paths. As a formula, T at AVLOC(B)
2666 implies that T' at AVOUT(B).
2667 An "available occurrence" is one that is the last occurrence in the
2668 basic block and the operands are not modified by following statements in
2669 the basic block [including this insn].
2671 - If T is locally anticipated at the beginning of a block, then either
2672 T', is locally anticipated or it is already available from previous
2673 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
2674 ANTLOC(B) at AVIN(B).
2675 An "anticipatable occurrence" is one that is the first occurrence in the
2676 basic block, the operands are not modified in the basic block prior
2677 to the occurrence and the output is not used between the start of
2678 the block and the occurrence. */
2679 for (const bb_info *bb : crtl->ssa->bbs ())
2681 unsigned bb_index = bb->index ();
2682 vsetvl_block_info &block_info = get_block_info (bb);
2684 /* Compute m_transp */
2685 if (block_info.empty_p ())
2686 compute_transparent (bb);
2687 else
2689 bitmap_clear (m_transp[bb_index]);
2690 vsetvl_info &header_info = block_info.get_entry_info ();
2691 vsetvl_info &footer_info = block_info.get_exit_info ();
2693 if (header_info.valid_p () && anticipated_exp_p (header_info))
2694 bitmap_set_bit (m_antloc[bb_index],
2695 get_expr_index (m_exprs, header_info));
2697 if (footer_info.valid_p ())
2698 for (int i = 0; i < num_exprs; i += 1)
2700 const vsetvl_info &info = *m_exprs[i];
2701 if (!info.valid_p ())
2702 continue;
2703 if (available_exp_p (footer_info, info))
2704 bitmap_set_bit (m_avloc[bb_index], i);
2708 if (invalid_opt_bb_p (bb->cfg_bb ()))
2710 bitmap_clear (m_antloc[bb_index]);
2711 bitmap_clear (m_transp[bb_index]);
2714 /* Compute ae_kill for each basic block using:
2716 ~(TRANSP | COMP)
2718 bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
2719 bitmap_not (m_kill[bb_index], m_kill[bb_index]);
2723 void
2724 pre_vsetvl::fuse_local_vsetvl_info ()
2726 m_reg_def_loc
2727 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), GP_REG_LAST + 1);
2728 bitmap_vector_clear (m_reg_def_loc, last_basic_block_for_fn (cfun));
2729 bitmap_ones (m_reg_def_loc[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index]);
2731 for (bb_info *bb : crtl->ssa->bbs ())
2733 auto &block_info = get_block_info (bb);
2734 block_info.bb = bb;
2735 if (dump_file && (dump_flags & TDF_DETAILS))
2737 fprintf (dump_file, " Try fuse basic block %d\n", bb->index ());
2739 auto_vec<vsetvl_info> infos;
2740 for (insn_info *insn : bb->real_nondebug_insns ())
2742 vsetvl_info curr_info = vsetvl_info (insn);
2743 if (curr_info.valid_p () || curr_info.unknown_p ())
2744 infos.safe_push (curr_info);
2746 /* Collecting GP registers modified by the current bb. */
2747 if (insn->is_real ())
2748 for (def_info *def : insn->defs ())
2749 if (def->is_reg () && GP_REG_P (def->regno ()))
2750 bitmap_set_bit (m_reg_def_loc[bb->index ()], def->regno ());
2753 vsetvl_info prev_info = vsetvl_info ();
2754 prev_info.set_empty ();
2755 for (auto &curr_info : infos)
2757 if (prev_info.empty_p ())
2758 prev_info = curr_info;
2759 else if ((curr_info.unknown_p () && prev_info.valid_p ())
2760 || (curr_info.valid_p () && prev_info.unknown_p ()))
2762 block_info.local_infos.safe_push (prev_info);
2763 prev_info = curr_info;
2765 else if (curr_info.valid_p () && prev_info.valid_p ())
2767 if (m_dem.available_p (prev_info, curr_info))
2769 if (dump_file && (dump_flags & TDF_DETAILS))
2771 fprintf (dump_file,
2772 " Ignore curr info since prev info "
2773 "available with it:\n");
2774 fprintf (dump_file, " prev_info: ");
2775 prev_info.dump (dump_file, " ");
2776 fprintf (dump_file, " curr_info: ");
2777 curr_info.dump (dump_file, " ");
2778 fprintf (dump_file, "\n");
2780 /* Even though prev_info is available with curr_info,
2781 we need to update the MAX_SEW of prev_info since
2782 we don't check MAX_SEW in available_p check.
2784 prev_info:
2785 Demand fields: demand_ratio_and_ge_sew demand_avl
2786 SEW=16, VLMUL=mf4, RATIO=64, MAX_SEW=64
2788 curr_info:
2789 Demand fields: demand_ge_sew demand_non_zero_avl
2790 SEW=16, VLMUL=m1, RATIO=16, MAX_SEW=32
2792 In the example above, prev_info is available with
2793 curr_info, we need to update prev_info MAX_SEW from
2794 64 into 32. */
2795 prev_info.set_max_sew (
2796 MIN (prev_info.get_max_sew (), curr_info.get_max_sew ()));
2797 if (!curr_info.vl_used_by_non_rvv_insn_p ()
2798 && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
2799 m_delete_list.safe_push (curr_info);
2801 if (curr_info.get_read_vl_insn ())
2802 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
2804 else if (m_dem.compatible_p (prev_info, curr_info))
2806 if (dump_file && (dump_flags & TDF_DETAILS))
2808 fprintf (dump_file, " Fuse curr info since prev info "
2809 "compatible with it:\n");
2810 fprintf (dump_file, " prev_info: ");
2811 prev_info.dump (dump_file, " ");
2812 fprintf (dump_file, " curr_info: ");
2813 curr_info.dump (dump_file, " ");
2815 m_dem.merge (prev_info, curr_info);
2816 if (!curr_info.vl_used_by_non_rvv_insn_p ()
2817 && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
2818 m_delete_list.safe_push (curr_info);
2819 if (curr_info.get_read_vl_insn ())
2820 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
2821 if (dump_file && (dump_flags & TDF_DETAILS))
2823 fprintf (dump_file, " prev_info after fused: ");
2824 prev_info.dump (dump_file, " ");
2825 fprintf (dump_file, "\n");
2828 else
2830 if (dump_file && (dump_flags & TDF_DETAILS))
2832 fprintf (dump_file,
2833 " Cannot fuse incompatible infos:\n");
2834 fprintf (dump_file, " prev_info: ");
2835 prev_info.dump (dump_file, " ");
2836 fprintf (dump_file, " curr_info: ");
2837 curr_info.dump (dump_file, " ");
2839 block_info.local_infos.safe_push (prev_info);
2840 prev_info = curr_info;
2845 if (prev_info.valid_p () || prev_info.unknown_p ())
2846 block_info.local_infos.safe_push (prev_info);
2851 bool
2852 pre_vsetvl::earliest_fuse_vsetvl_info (int iter)
2854 compute_vsetvl_def_data ();
2855 compute_lcm_local_properties ();
2857 unsigned num_exprs = m_exprs.length ();
2858 struct edge_list *m_edges = create_edge_list ();
2859 unsigned num_edges = NUM_EDGES (m_edges);
2860 sbitmap *antin
2861 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2862 sbitmap *antout
2863 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2865 sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs);
2867 compute_available (m_avloc, m_kill, m_avout, m_avin);
2868 compute_antinout_edge (m_antloc, m_transp, antin, antout);
2869 compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill,
2870 earliest);
2872 if (dump_file && (dump_flags & TDF_DETAILS))
2874 fprintf (dump_file, "\n Compute LCM earliest insert data (lift %d):\n\n",
2875 iter);
2876 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
2877 for (unsigned i = 0; i < num_exprs; i++)
2879 const auto &info = *m_exprs[i];
2880 fprintf (dump_file, " Expr[%u]: ", i);
2881 info.dump (dump_file, " ");
2883 fprintf (dump_file, "\n bitmap data:\n");
2884 for (const bb_info *bb : crtl->ssa->bbs ())
2886 unsigned int i = bb->index ();
2887 fprintf (dump_file, " BB %u:\n", i);
2888 fprintf (dump_file, " avloc: ");
2889 dump_bitmap_file (dump_file, m_avloc[i]);
2890 fprintf (dump_file, " kill: ");
2891 dump_bitmap_file (dump_file, m_kill[i]);
2892 fprintf (dump_file, " antloc: ");
2893 dump_bitmap_file (dump_file, m_antloc[i]);
2894 fprintf (dump_file, " transp: ");
2895 dump_bitmap_file (dump_file, m_transp[i]);
2897 fprintf (dump_file, " avin: ");
2898 dump_bitmap_file (dump_file, m_avin[i]);
2899 fprintf (dump_file, " avout: ");
2900 dump_bitmap_file (dump_file, m_avout[i]);
2901 fprintf (dump_file, " antin: ");
2902 dump_bitmap_file (dump_file, antin[i]);
2903 fprintf (dump_file, " antout: ");
2904 dump_bitmap_file (dump_file, antout[i]);
2906 fprintf (dump_file, "\n");
2907 fprintf (dump_file, " earliest:\n");
2908 for (unsigned ed = 0; ed < num_edges; ed++)
2910 edge eg = INDEX_EDGE (m_edges, ed);
2912 if (bitmap_empty_p (earliest[ed]))
2913 continue;
2914 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
2915 eg->dest->index);
2916 dump_bitmap_file (dump_file, earliest[ed]);
2918 fprintf (dump_file, "\n");
2921 if (dump_file && (dump_flags & TDF_DETAILS))
2923 fprintf (dump_file, " Fused global info result (lift %d):\n", iter);
2926 bool changed = false;
2927 for (unsigned ed = 0; ed < num_edges; ed++)
2929 sbitmap e = earliest[ed];
2930 if (bitmap_empty_p (e))
2931 continue;
2933 unsigned int expr_index;
2934 sbitmap_iterator sbi;
2935 EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi)
2937 vsetvl_info &curr_info = *m_exprs[expr_index];
2938 edge eg = INDEX_EDGE (m_edges, ed);
2939 vsetvl_block_info &src_block_info = get_block_info (eg->src);
2940 vsetvl_block_info &dest_block_info = get_block_info (eg->dest);
2942 if (!curr_info.valid_p ()
2943 || eg->probability == profile_probability::never ()
2944 || src_block_info.probability
2945 == profile_probability::uninitialized ()
2946 /* When multiple set bits in earliest edge, such edge may
2947 have infinite loop in preds or succs or multiple conflict
2948 vsetvl expression which make such edge is unrelated. We
2949 don't perform fusion for such situation. */
2950 || bitmap_count_bits (e) != 1)
2951 continue;
2953 if (src_block_info.empty_p ())
2955 vsetvl_info new_curr_info = curr_info;
2956 new_curr_info.set_bb (crtl->ssa->bb (eg->dest));
2957 bool has_compatible_p
2958 = has_compatible_reaching_vsetvl_p (new_curr_info);
2959 if (!has_compatible_p)
2961 if (dump_file && (dump_flags & TDF_DETAILS))
2963 fprintf (dump_file,
2964 " Forbidden lift up vsetvl info into bb %u "
2965 "since there is no vsetvl info that reaching in "
2966 "is compatible with it:",
2967 eg->src->index);
2968 curr_info.dump (dump_file, " ");
2970 continue;
2973 if (dump_file && (dump_flags & TDF_DETAILS))
2975 fprintf (dump_file,
2976 " Set empty bb %u to info:", eg->src->index);
2977 curr_info.dump (dump_file, " ");
2979 src_block_info.set_info (curr_info);
2980 src_block_info.probability = dest_block_info.probability;
2981 changed = true;
2983 else if (src_block_info.has_info ())
2985 vsetvl_info &prev_info = src_block_info.get_exit_info ();
2986 gcc_assert (prev_info.valid_p ());
2988 if (m_dem.compatible_p (prev_info, curr_info))
2990 if (dump_file && (dump_flags & TDF_DETAILS))
2992 fprintf (dump_file, " Fuse curr info since prev info "
2993 "compatible with it:\n");
2994 fprintf (dump_file, " prev_info: ");
2995 prev_info.dump (dump_file, " ");
2996 fprintf (dump_file, " curr_info: ");
2997 curr_info.dump (dump_file, " ");
2999 m_dem.merge (prev_info, curr_info);
3000 if (dump_file && (dump_flags & TDF_DETAILS))
3002 fprintf (dump_file, " prev_info after fused: ");
3003 prev_info.dump (dump_file, " ");
3004 fprintf (dump_file, "\n");
3006 changed = true;
3007 if (src_block_info.has_info ())
3008 src_block_info.probability += dest_block_info.probability;
3010 else
3012 /* Cancel lift up if probabilities are equal. */
3013 if (successors_probability_equal_p (eg->src)
3014 || (dest_block_info.probability
3015 > src_block_info.probability
3016 && !has_compatible_reaching_vsetvl_p (curr_info)))
3018 if (dump_file && (dump_flags & TDF_DETAILS))
3020 fprintf (dump_file,
3021 " Reset bb %u:",
3022 eg->src->index);
3023 prev_info.dump (dump_file, " ");
3024 fprintf (dump_file, " due to (same probability or no "
3025 "compatible reaching):");
3026 curr_info.dump (dump_file, " ");
3028 src_block_info.set_empty_info ();
3029 src_block_info.probability
3030 = profile_probability::uninitialized ();
3031 /* See PR113696, we should reset immediate dominator to
3032 empty since we may uplift ineffective vsetvl which
3033 locate at low probability block. */
3034 basic_block dom
3035 = get_immediate_dominator (CDI_DOMINATORS, eg->src);
3036 auto &dom_block_info = get_block_info (dom);
3037 if (dom_block_info.has_info ()
3038 && !m_dem.compatible_p (
3039 dom_block_info.get_exit_info (), curr_info))
3041 dom_block_info.set_empty_info ();
3042 dom_block_info.probability
3043 = profile_probability::uninitialized ();
3044 if (dump_file && (dump_flags & TDF_DETAILS))
3046 fprintf (dump_file,
3047 " Reset dominator bb %u:",
3048 dom->index);
3049 prev_info.dump (dump_file, " ");
3050 fprintf (dump_file,
3051 " due to (same probability or no "
3052 "compatible reaching):");
3053 curr_info.dump (dump_file, " ");
3056 changed = true;
3058 /* Choose the one with higher probability. */
3059 else if (dest_block_info.probability
3060 > src_block_info.probability)
3062 if (dump_file && (dump_flags & TDF_DETAILS))
3064 fprintf (dump_file,
3065 " Change bb %u from:",
3066 eg->src->index);
3067 prev_info.dump (dump_file, " ");
3068 fprintf (dump_file,
3069 " to (higher probability):");
3070 curr_info.dump (dump_file, " ");
3072 src_block_info.set_info (curr_info);
3073 src_block_info.probability = dest_block_info.probability;
3074 changed = true;
3078 else
3080 vsetvl_info &prev_info = src_block_info.get_exit_info ();
3081 if (!prev_info.valid_p ()
3082 || m_dem.available_p (prev_info, curr_info)
3083 || !m_dem.compatible_p (prev_info, curr_info))
3084 continue;
3086 if (dump_file && (dump_flags & TDF_DETAILS))
3088 fprintf (dump_file, " Fuse curr info since prev info "
3089 "compatible with it:\n");
3090 fprintf (dump_file, " prev_info: ");
3091 prev_info.dump (dump_file, " ");
3092 fprintf (dump_file, " curr_info: ");
3093 curr_info.dump (dump_file, " ");
3095 m_dem.merge (prev_info, curr_info);
3096 if (dump_file && (dump_flags & TDF_DETAILS))
3098 fprintf (dump_file, " prev_info after fused: ");
3099 prev_info.dump (dump_file, " ");
3100 fprintf (dump_file, "\n");
3102 changed = true;
3107 if (dump_file && (dump_flags & TDF_DETAILS))
3109 fprintf (dump_file, "\n");
3112 sbitmap_vector_free (antin);
3113 sbitmap_vector_free (antout);
3114 sbitmap_vector_free (earliest);
3115 free_edge_list (m_edges);
3117 return changed;
3120 void
3121 pre_vsetvl::pre_global_vsetvl_info ()
3123 compute_vsetvl_def_data ();
3124 compute_lcm_local_properties ();
3126 unsigned num_exprs = m_exprs.length ();
3127 m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill,
3128 m_avin, m_avout, &m_insert, &m_del);
3129 unsigned num_edges = NUM_EDGES (m_edges);
3131 if (dump_file && (dump_flags & TDF_DETAILS))
3133 fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n");
3134 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
3135 for (unsigned i = 0; i < num_exprs; i++)
3137 const auto &info = *m_exprs[i];
3138 fprintf (dump_file, " Expr[%u]: ", i);
3139 info.dump (dump_file, " ");
3141 fprintf (dump_file, "\n bitmap data:\n");
3142 for (const bb_info *bb : crtl->ssa->bbs ())
3144 unsigned i = bb->index ();
3145 fprintf (dump_file, " BB %u:\n", i);
3146 fprintf (dump_file, " avloc: ");
3147 dump_bitmap_file (dump_file, m_avloc[i]);
3148 fprintf (dump_file, " kill: ");
3149 dump_bitmap_file (dump_file, m_kill[i]);
3150 fprintf (dump_file, " antloc: ");
3151 dump_bitmap_file (dump_file, m_antloc[i]);
3152 fprintf (dump_file, " transp: ");
3153 dump_bitmap_file (dump_file, m_transp[i]);
3155 fprintf (dump_file, " avin: ");
3156 dump_bitmap_file (dump_file, m_avin[i]);
3157 fprintf (dump_file, " avout: ");
3158 dump_bitmap_file (dump_file, m_avout[i]);
3159 fprintf (dump_file, " del: ");
3160 dump_bitmap_file (dump_file, m_del[i]);
3162 fprintf (dump_file, "\n");
3163 fprintf (dump_file, " insert:\n");
3164 for (unsigned ed = 0; ed < num_edges; ed++)
3166 edge eg = INDEX_EDGE (m_edges, ed);
3168 if (bitmap_empty_p (m_insert[ed]))
3169 continue;
3170 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
3171 eg->dest->index);
3172 dump_bitmap_file (dump_file, m_insert[ed]);
3176 /* Remove vsetvl infos as LCM suggest */
3177 for (const bb_info *bb : crtl->ssa->bbs ())
3179 sbitmap d = m_del[bb->index ()];
3180 if (bitmap_count_bits (d) == 0)
3181 continue;
3182 gcc_assert (bitmap_count_bits (d) == 1);
3183 unsigned expr_index = bitmap_first_set_bit (d);
3184 vsetvl_info &info = *m_exprs[expr_index];
3185 gcc_assert (info.valid_p ());
3186 gcc_assert (info.get_bb () == bb);
3187 const vsetvl_block_info &block_info = get_block_info (info.get_bb ());
3188 gcc_assert (block_info.get_entry_info () == info);
3189 info.set_delete ();
3190 if (dump_file && (dump_flags & TDF_DETAILS))
3192 fprintf (dump_file,
3193 "\nLCM deleting vsetvl of block %d, it has predecessors: \n",
3194 bb->index ());
3195 hash_set<basic_block> all_preds
3196 = get_all_predecessors (bb->cfg_bb ());
3197 int i = 0;
3198 for (const auto pred : all_preds)
3200 fprintf (dump_file, "%d ", pred->index);
3201 i++;
3202 if (i % 32 == 0)
3203 fprintf (dump_file, "\n");
3205 fprintf (dump_file, "\n");
3209 /* Remove vsetvl infos if all predecessors are available to the block. */
3210 for (const bb_info *bb : crtl->ssa->bbs ())
3212 vsetvl_block_info &block_info = get_block_info (bb);
3213 if (block_info.empty_p ())
3214 continue;
3215 vsetvl_info &curr_info = block_info.get_entry_info ();
3216 if (!curr_info.valid_p ())
3217 continue;
3219 unsigned int expr_index;
3220 sbitmap_iterator sbi;
3221 gcc_assert (
3222 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
3223 bool full_available = true;
3224 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index,
3225 sbi)
3227 vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
3228 if (!prev_info.valid_p ()
3229 || !m_dem.available_p (prev_info, curr_info))
3231 full_available = false;
3232 break;
3235 if (full_available)
3236 curr_info.set_delete ();
3239 for (const bb_info *bb : crtl->ssa->bbs ())
3241 vsetvl_block_info &block_info = get_block_info (bb);
3242 if (block_info.empty_p ())
3243 continue;
3244 vsetvl_info &curr_info = block_info.get_entry_info ();
3245 if (curr_info.delete_p ())
3247 if (block_info.local_infos.is_empty ())
3248 continue;
3249 curr_info = block_info.local_infos[0];
3251 if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
3252 && preds_all_same_avl_and_ratio_p (curr_info))
3253 curr_info.set_change_vtype_only ();
3255 vsetvl_info prev_info = vsetvl_info ();
3256 prev_info.set_empty ();
3257 for (auto &curr_info : block_info.local_infos)
3259 if (prev_info.valid_p () && curr_info.valid_p ()
3260 && m_dem.avl_available_p (prev_info, curr_info)
3261 && prev_info.get_ratio () == curr_info.get_ratio ())
3262 curr_info.set_change_vtype_only ();
3263 prev_info = curr_info;
3268 void
3269 pre_vsetvl::emit_vsetvl ()
3271 bool need_commit = false;
3273 /* Fake edge is created by connect infinite loops to exit function.
3274 We should commit vsetvl edge after fake edges removes, otherwise,
3275 it will cause ICE. */
3276 remove_fake_exit_edges ();
3277 for (const bb_info *bb : crtl->ssa->bbs ())
3279 for (const auto &curr_info : get_block_info (bb).local_infos)
3281 insn_info *insn = curr_info.get_insn ();
3282 if (curr_info.delete_p ())
3284 if (vsetvl_insn_p (insn->rtl ()))
3285 remove_vsetvl_insn (curr_info.get_insn ()->rtl ());
3286 continue;
3288 else if (curr_info.valid_p ())
3290 if (vsetvl_insn_p (insn->rtl ()))
3292 const vsetvl_info temp = vsetvl_info (insn);
3293 if (!(curr_info == temp))
3295 if (dump_file)
3297 fprintf (dump_file, "\n Change vsetvl info from: ");
3298 temp.dump (dump_file, " ");
3299 fprintf (dump_file, " to: ");
3300 curr_info.dump (dump_file, " ");
3302 change_vsetvl_insn (curr_info);
3305 else
3307 if (dump_file)
3309 fprintf (dump_file,
3310 "\n Insert vsetvl info before insn %d: ",
3311 insn->uid ());
3312 curr_info.dump (dump_file, " ");
3314 insert_vsetvl_insn (EMIT_BEFORE, curr_info);
3320 for (const vsetvl_info &item : m_delete_list)
3322 gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ()));
3323 remove_vsetvl_insn (item.get_insn ()->rtl ());
3326 /* Insert vsetvl info that was not deleted after lift up. */
3327 for (const bb_info *bb : crtl->ssa->bbs ())
3329 const vsetvl_block_info &block_info = get_block_info (bb);
3330 if (!block_info.has_info ())
3331 continue;
3333 const vsetvl_info &footer_info = block_info.get_exit_info ();
3335 if (footer_info.delete_p ())
3336 continue;
3338 edge eg;
3339 edge_iterator eg_iterator;
3340 FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs)
3342 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3343 if (dump_file)
3345 fprintf (
3346 dump_file,
3347 "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ",
3348 eg->src->index, eg->dest->index);
3349 footer_info.dump (dump_file, " ");
3351 start_sequence ();
3352 insert_vsetvl_insn (EMIT_DIRECT, footer_info);
3353 rtx_insn *rinsn = get_insns ();
3354 end_sequence ();
3355 default_rtl_profile ();
3356 insert_insn_on_edge (rinsn, eg);
3357 need_commit = true;
3361 /* m_insert vsetvl as LCM suggest. */
3362 for (int ed = 0; ed < NUM_EDGES (m_edges); ed++)
3364 edge eg = INDEX_EDGE (m_edges, ed);
3365 sbitmap i = m_insert[ed];
3366 if (bitmap_count_bits (i) != 1)
3367 /* For code with infinite loop (e.g. pr61634.c), The data flow is
3368 completely wrong. */
3369 continue;
3371 unsigned expr_index = bitmap_first_set_bit (i);
3372 const vsetvl_info &info = *m_exprs[expr_index];
3373 gcc_assert (info.valid_p ());
3374 if (dump_file)
3376 fprintf (dump_file,
3377 "\n Insert vsetvl info at edge(bb %u -> bb %u): ",
3378 eg->src->index, eg->dest->index);
3379 info.dump (dump_file, " ");
3381 rtl_profile_for_edge (eg);
3382 start_sequence ();
3384 insert_vsetvl_insn (EMIT_DIRECT, info);
3385 rtx_insn *rinsn = get_insns ();
3386 end_sequence ();
3387 default_rtl_profile ();
3389 /* We should not get an abnormal edge here. */
3390 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3391 need_commit = true;
3392 insert_insn_on_edge (rinsn, eg);
3395 if (need_commit)
3396 commit_edge_insertions ();
3399 void
3400 pre_vsetvl::cleanup ()
3402 remove_avl_operand ();
3403 remove_unused_dest_operand ();
3404 remove_vsetvl_pre_insns ();
3407 void
3408 pre_vsetvl::remove_avl_operand ()
3410 basic_block cfg_bb;
3411 rtx_insn *rinsn;
3412 FOR_ALL_BB_FN (cfg_bb, cfun)
3413 FOR_BB_INSNS (cfg_bb, rinsn)
3414 if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn)
3415 && REG_P (get_vl (rinsn)))
3417 rtx avl = get_vl (rinsn);
3418 if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
3420 rtx new_pat;
3421 if (fault_first_load_p (rinsn))
3422 new_pat
3423 = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
3424 else
3426 rtx set = single_set (rinsn);
3427 rtx src
3428 = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
3429 new_pat = gen_rtx_SET (SET_DEST (set), src);
3431 if (dump_file)
3433 fprintf (dump_file, " Cleanup insn %u's avl operand:\n",
3434 INSN_UID (rinsn));
3435 print_rtl_single (dump_file, rinsn);
3437 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
3442 void
3443 pre_vsetvl::remove_unused_dest_operand ()
3445 df_analyze ();
3446 basic_block cfg_bb;
3447 rtx_insn *rinsn;
3448 FOR_ALL_BB_FN (cfg_bb, cfun)
3449 FOR_BB_INSNS (cfg_bb, rinsn)
3450 if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn))
3452 rtx vl = get_vl (rinsn);
3453 vsetvl_info info = vsetvl_info (rinsn);
3454 if (has_no_uses (cfg_bb, rinsn, REGNO (vl)))
3455 if (!info.has_vlmax_avl ())
3457 rtx new_pat = info.get_vsetvl_pat (true);
3458 if (dump_file)
3460 fprintf (dump_file,
3461 " Remove vsetvl insn %u's dest(vl) operand since "
3462 "it unused:\n",
3463 INSN_UID (rinsn));
3464 print_rtl_single (dump_file, rinsn);
3466 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
3467 false);
3472 /* Remove all bogus vsetvl_pre instructions. */
3473 void
3474 pre_vsetvl::remove_vsetvl_pre_insns ()
3476 basic_block cfg_bb;
3477 rtx_insn *rinsn;
3478 FOR_ALL_BB_FN (cfg_bb, cfun)
3479 FOR_BB_INSNS (cfg_bb, rinsn)
3480 if (NONDEBUG_INSN_P (rinsn) && vsetvl_pre_insn_p (rinsn))
3482 if (dump_file)
3484 fprintf (dump_file, " Eliminate vsetvl_pre insn %d:\n",
3485 INSN_UID (rinsn));
3486 print_rtl_single (dump_file, rinsn);
3488 remove_vsetvl_insn (rinsn);
3492 const pass_data pass_data_vsetvl = {
3493 RTL_PASS, /* type */
3494 "vsetvl", /* name */
3495 OPTGROUP_NONE, /* optinfo_flags */
3496 TV_MACH_DEP, /* tv_id */
3497 0, /* properties_required */
3498 0, /* properties_provided */
3499 0, /* properties_destroyed */
3500 0, /* todo_flags_start */
3501 0, /* todo_flags_finish */
3504 class pass_vsetvl : public rtl_opt_pass
3506 private:
3507 void simple_vsetvl ();
3508 void lazy_vsetvl ();
3510 public:
3511 pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
3513 /* opt_pass methods: */
3514 virtual bool gate (function *) final override { return TARGET_VECTOR; }
3515 virtual unsigned int execute (function *) final override;
3516 }; // class pass_vsetvl
3518 void
3519 pass_vsetvl::simple_vsetvl ()
3521 if (dump_file)
3522 fprintf (dump_file, "\nEntering Simple VSETVL PASS\n");
3524 basic_block cfg_bb;
3525 rtx_insn *rinsn;
3526 FOR_ALL_BB_FN (cfg_bb, cfun)
3528 FOR_BB_INSNS (cfg_bb, rinsn)
3530 if (!NONDEBUG_INSN_P (rinsn))
3531 continue;
3532 if (has_vtype_op (rinsn))
3534 const auto &info = vsetvl_info (rinsn);
3535 rtx pat = info.get_vsetvl_pat ();
3536 emit_insn_before (pat, rinsn);
3537 if (dump_file)
3539 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
3540 INSN_UID (rinsn));
3541 print_rtl_single (dump_file, PREV_INSN (rinsn));
3548 /* Lazy vsetvl insertion for optimize > 0. */
3549 void
3550 pass_vsetvl::lazy_vsetvl ()
3552 if (dump_file)
3553 fprintf (dump_file, "\nEntering Lazy VSETVL PASS\n\n");
3555 pre_vsetvl pre = pre_vsetvl ();
3557 if (dump_file)
3558 fprintf (dump_file, "\nPhase 1: Fuse local vsetvl infos.\n\n");
3559 pre.fuse_local_vsetvl_info ();
3560 if (dump_file && (dump_flags & TDF_DETAILS))
3561 pre.dump (dump_file, "phase 1");
3563 /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */
3564 if (dump_file)
3565 fprintf (dump_file, "\nPhase 2: Lift up vsetvl info.\n\n");
3566 if (vsetvl_strategy != VSETVL_OPT_NO_FUSION)
3568 bool changed = true;
3569 int fused_count = 0;
3572 if (dump_file)
3573 fprintf (dump_file, " Try lift up %d.\n\n", fused_count);
3574 changed = pre.earliest_fuse_vsetvl_info (fused_count);
3575 fused_count += 1;
3576 } while (changed);
3578 if (dump_file && (dump_flags & TDF_DETAILS))
3579 pre.dump (dump_file, "phase 2");
3581 /* Phase 3: Reducing redundant vsetvl infos using LCM. */
3582 if (dump_file)
3583 fprintf (dump_file, "\nPhase 3: Reduce global vsetvl infos.\n\n");
3584 pre.pre_global_vsetvl_info ();
3585 if (dump_file && (dump_flags & TDF_DETAILS))
3586 pre.dump (dump_file, "phase 3");
3588 /* Phase 4: Insert, modify and remove vsetvl insns. */
3589 if (dump_file)
3590 fprintf (dump_file,
3591 "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n");
3592 pre.emit_vsetvl ();
3594 /* Phase 5: Cleanup */
3595 if (dump_file)
3596 fprintf (dump_file, "\nPhase 5: Cleanup\n\n");
3597 pre.cleanup ();
3599 pre.finish ();
3602 /* Main entry point for this pass. */
3603 unsigned int
3604 pass_vsetvl::execute (function *)
3606 if (n_basic_blocks_for_fn (cfun) <= 0)
3607 return 0;
3609 /* The RVV instruction may change after split which is not a stable
3610 instruction. We need to split it here to avoid potential issue
3611 since the VSETVL PASS is insert before split PASS. */
3612 split_all_insns ();
3614 /* Early return for there is no vector instructions. */
3615 if (!has_vector_insn (cfun))
3616 return 0;
3618 if (!optimize || vsetvl_strategy == VSETVL_SIMPLE)
3619 simple_vsetvl ();
3620 else
3621 lazy_vsetvl ();
3623 return 0;
3626 rtl_opt_pass *
3627 make_pass_vsetvl (gcc::context *ctxt)
3629 return new pass_vsetvl (ctxt);