libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / config / riscv / riscv-vsetvl.cc
blob030ffbe2ebbc6335681cd937b10cdd63905e5879
1 /* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
2 Copyright (C) 2022-2024 Free Software Foundation, Inc.
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or(at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The values of the vl and vtype registers will affect the behavior of RVV
22 insns. That is, when we need to execute an RVV instruction, we need to set
23 the correct vl and vtype values by executing the vsetvl instruction before.
24 Executing the fewest number of vsetvl instructions while keeping the behavior
25 the same is the problem this pass is trying to solve. This vsetvl pass is
26 divided into 5 phases:
28 - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses
29 each instruction in it that affects vl and vtype state and generates an
30 array of vsetvl_info objects. Then traverse the vsetvl_info array from
31 front to back and perform fusion according to the fusion rules. The fused
32 vsetvl infos are stored in the vsetvl_block_info object's `infos` field.
34 - Phase 2 (earliest fuse global vsetvl infos): The header_info and
35 footer_info of vsetvl_block_info are used as expressions, and the
36 earliest of each expression is computed. Based on the earliest
37 information, try to lift up the corresponding vsetvl info to the src
38 basic block of the edge (mainly to reduce the total number of vsetvl
39 instructions, this uplift will cause some execution paths to execute
40 vsetvl instructions that shouldn't be there).
42 - Phase 3 (pre global vsetvl info): The header_info and footer_info of
43 vsetvl_block_info are used as expressions, and the LCM algorithm is used
44 to compute the header_info that needs to be deleted and the one that
45 needs to be inserted in some edges.
47 - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and
48 the deletion and insertion information of Phase 3, the mandatory vsetvl
49 instruction insertion, modification and deletion are performed.
51 - Phase 5 (cleanup): Clean up the avl operand in the RVV operator
52 instruction and cleanup the unused dest operand of the vsetvl insn.
54 After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual
55 basic block is represented by vsetvl_block_info, and the virtual vsetvl
56 statements inside are represented by vsetvl_info. The later phases 2 and 3
57 are constantly modifying and adjusting this virtual CFG. Phase 4 performs
58 insertion, modification and deletion of vsetvl instructions based on the
59 optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to
60 the RTL.
63 #define IN_TARGET_CODE 1
64 #define INCLUDE_ALGORITHM
65 #define INCLUDE_FUNCTIONAL
66 #define INCLUDE_ARRAY
68 #include "config.h"
69 #include "system.h"
70 #include "coretypes.h"
71 #include "tm.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "target.h"
75 #include "tree-pass.h"
76 #include "df.h"
77 #include "rtl-ssa.h"
78 #include "cfgcleanup.h"
79 #include "insn-config.h"
80 #include "insn-attr.h"
81 #include "insn-opinit.h"
82 #include "tm-constrs.h"
83 #include "cfgrtl.h"
84 #include "cfganal.h"
85 #include "lcm.h"
86 #include "predict.h"
87 #include "profile-count.h"
88 #include "gcse.h"
89 #include "cfgloop.h"
91 using namespace rtl_ssa;
92 using namespace riscv_vector;
94 /* Set the bitmap DST to the union of SRC of predecessors of
95 basic block B.
96 It's a bit different from bitmap_union_of_preds in cfganal.cc. This function
97 takes into account the case where pred is ENTRY basic block. The main reason
98 for this difference is to make it easier to insert some special value into
99 the ENTRY base block. For example, vsetvl_info with a status of UNKNOWN. */
100 static void
101 bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b)
103 unsigned int set_size = dst->size;
104 edge e;
105 unsigned ix;
107 for (ix = 0; ix < EDGE_COUNT (b->preds); ix++)
109 e = EDGE_PRED (b, ix);
110 bitmap_copy (dst, src[e->src->index]);
111 break;
114 if (ix == EDGE_COUNT (b->preds))
115 bitmap_clear (dst);
116 else
117 for (ix++; ix < EDGE_COUNT (b->preds); ix++)
119 unsigned int i;
120 SBITMAP_ELT_TYPE *p, *r;
122 e = EDGE_PRED (b, ix);
123 p = src[e->src->index]->elms;
124 r = dst->elms;
125 for (i = 0; i < set_size; i++)
126 *r++ |= *p++;
130 /* Compute the reaching definition in and out based on the gen and KILL
131 information's in each Base Blocks.
132 This function references the compute_available implementation in lcm.cc */
133 static void
134 compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in,
135 sbitmap *out)
137 edge e;
138 basic_block *worklist, *qin, *qout, *qend, bb;
139 unsigned int qlen;
140 edge_iterator ei;
142 /* Allocate a worklist array/queue. Entries are only added to the
143 list if they were not already on the list. So the size is
144 bounded by the number of basic blocks. */
145 qin = qout = worklist
146 = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
148 /* Put every block on the worklist; this is necessary because of the
149 optimistic initialization of AVOUT above. Use reverse postorder
150 to make the forward dataflow problem require less iterations. */
151 int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
152 int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
153 for (int i = 0; i < n; ++i)
155 bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]);
156 *qin++ = bb;
157 bb->aux = bb;
159 free (rpo);
161 qin = worklist;
162 qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS];
163 qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
165 /* Mark blocks which are successors of the entry block so that we
166 can easily identify them below. */
167 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
168 e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun);
170 /* Iterate until the worklist is empty. */
171 while (qlen)
173 /* Take the first entry off the worklist. */
174 bb = *qout++;
175 qlen--;
177 if (qout >= qend)
178 qout = worklist;
180 /* Do not clear the aux field for blocks which are successors of the
181 ENTRY block. That way we never add then to the worklist again. */
182 if (bb->aux != ENTRY_BLOCK_PTR_FOR_FN (cfun))
183 bb->aux = NULL;
185 bitmap_union_of_preds_with_entry (in[bb->index], out, bb);
187 if (bitmap_ior_and_compl (out[bb->index], gen[bb->index], in[bb->index],
188 kill[bb->index]))
189 /* If the out state of this block changed, then we need
190 to add the successors of this block to the worklist
191 if they are not already on the worklist. */
192 FOR_EACH_EDGE (e, ei, bb->succs)
193 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
195 *qin++ = e->dest;
196 e->dest->aux = e;
197 qlen++;
199 if (qin >= qend)
200 qin = worklist;
204 clear_aux_for_edges ();
205 clear_aux_for_blocks ();
206 free (worklist);
209 /* Classification of vsetvl instruction. */
210 enum vsetvl_type
212 VSETVL_NORMAL,
213 VSETVL_VTYPE_CHANGE_ONLY,
214 VSETVL_DISCARD_RESULT,
215 NUM_VSETVL_TYPE
218 enum emit_type
220 /* emit_insn directly. */
221 EMIT_DIRECT,
222 EMIT_BEFORE,
223 EMIT_AFTER,
226 /* dump helper functions */
227 static const char *
228 vlmul_to_str (vlmul_type vlmul)
230 switch (vlmul)
232 case LMUL_1:
233 return "m1";
234 case LMUL_2:
235 return "m2";
236 case LMUL_4:
237 return "m4";
238 case LMUL_8:
239 return "m8";
240 case LMUL_RESERVED:
241 return "INVALID LMUL";
242 case LMUL_F8:
243 return "mf8";
244 case LMUL_F4:
245 return "mf4";
246 case LMUL_F2:
247 return "mf2";
249 default:
250 gcc_unreachable ();
254 static const char *
255 policy_to_str (bool agnostic_p)
257 return agnostic_p ? "agnostic" : "undisturbed";
260 /* Return true if it is an RVV instruction depends on VTYPE global
261 status register. */
262 static bool
263 has_vtype_op (rtx_insn *rinsn)
265 return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
268 /* Return true if the instruction ignores VLMUL field of VTYPE. */
269 static bool
270 ignore_vlmul_insn_p (rtx_insn *rinsn)
272 return get_attr_type (rinsn) == TYPE_VIMOVVX
273 || get_attr_type (rinsn) == TYPE_VFMOVVF
274 || get_attr_type (rinsn) == TYPE_VIMOVXV
275 || get_attr_type (rinsn) == TYPE_VFMOVFV;
278 /* Return true if the instruction is scalar move instruction. */
279 static bool
280 scalar_move_insn_p (rtx_insn *rinsn)
282 return get_attr_type (rinsn) == TYPE_VIMOVXV
283 || get_attr_type (rinsn) == TYPE_VFMOVFV;
286 /* Return true if the instruction is fault first load instruction. */
287 static bool
288 fault_first_load_p (rtx_insn *rinsn)
290 return recog_memoized (rinsn) >= 0
291 && (get_attr_type (rinsn) == TYPE_VLDFF
292 || get_attr_type (rinsn) == TYPE_VLSEGDFF);
295 /* Return true if the instruction is read vl instruction. */
296 static bool
297 read_vl_insn_p (rtx_insn *rinsn)
299 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
302 /* Return true if it is a vsetvl instruction. */
303 static bool
304 vector_config_insn_p (rtx_insn *rinsn)
306 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
309 /* Return true if it is vsetvldi or vsetvlsi. */
310 static bool
311 vsetvl_insn_p (rtx_insn *rinsn)
313 if (!rinsn || !vector_config_insn_p (rinsn))
314 return false;
315 return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
316 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
319 /* Return true if it is the bogus vsetvl_pre instruction:
321 (define_insn "@vlmax_avl<mode>"
322 [(set (match_operand:P 0 "register_operand" "=r")
323 (unspec:P [(match_operand:P 1 "const_int_operand" "i")] UNSPEC_VLMAX))]
324 "TARGET_VECTOR"
326 [(set_attr "type" "vsetvl_pre")])
328 As described above, it's the bogus instruction which doesn't any assembler
329 and should be removed eventually. It's used for occupying a scalar register
330 for VLMAX avl RVV instruction before register allocation.
332 Before RA:
335 vsetvl_pre (set r136)
336 vadd.vv (use r136 with VLMAX avl)
339 After RA:
342 vsetvl_pre (set a5)
343 vadd.vv (use r136 with VLMAX avl)
346 VSETVL PASS:
349 vsetvl_pre (set a5) ---> removed.
350 vsetvl a5,zero,... ---> Inserted.
351 vadd.vv
354 static bool
355 vsetvl_pre_insn_p (rtx_insn *rinsn)
357 return recog_memoized (rinsn) >= 0
358 && get_attr_type (rinsn) == TYPE_VSETVL_PRE;
361 /* Return true if it is vsetvl zero, rs1. */
362 static bool
363 vsetvl_discard_result_insn_p (rtx_insn *rinsn)
365 if (!vector_config_insn_p (rinsn))
366 return false;
367 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
368 || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
371 static bool
372 real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
374 return insn != nullptr && insn->is_real () && insn->bb () == bb;
377 /* Helper function to get VL operand for VLMAX insn. */
378 static rtx
379 get_vl (rtx_insn *rinsn)
381 if (has_vl_op (rinsn))
383 extract_insn_cached (rinsn);
384 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
386 return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
389 /* Helper function to get AVL operand. */
390 static rtx
391 get_avl (rtx_insn *rinsn)
393 if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
394 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
396 if (!has_vl_op (rinsn))
397 return NULL_RTX;
398 if (vlmax_avl_type_p (rinsn))
399 return RVV_VLMAX;
400 extract_insn_cached (rinsn);
401 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
404 /* Get default mask policy. */
405 static bool
406 get_default_ma ()
408 /* For the instruction that doesn't require MA, we still need a default value
409 to emit vsetvl. We pick up the default value according to prefer policy. */
410 return (bool) (get_prefer_mask_policy () & 0x1
411 || (get_prefer_mask_policy () >> 1 & 0x1));
414 /* Helper function to get MA operand. */
415 static bool
416 mask_agnostic_p (rtx_insn *rinsn)
418 /* If it doesn't have MA, we return agnostic by default. */
419 extract_insn_cached (rinsn);
420 int ma = get_attr_ma (rinsn);
421 return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
424 /* Return true if FN has a vector instruction that use VL/VTYPE. */
425 static bool
426 has_vector_insn (function *fn)
428 basic_block cfg_bb;
429 rtx_insn *rinsn;
430 FOR_ALL_BB_FN (cfg_bb, fn)
431 FOR_BB_INSNS (cfg_bb, rinsn)
432 if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
433 return true;
434 return false;
437 static vlmul_type
438 calculate_vlmul (unsigned int sew, unsigned int ratio)
440 const vlmul_type ALL_LMUL[]
441 = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
442 for (const vlmul_type vlmul : ALL_LMUL)
443 if (calculate_ratio (sew, vlmul) == ratio)
444 return vlmul;
445 return LMUL_RESERVED;
448 /* Get the currently supported maximum sew used in the int rvv instructions. */
449 static uint8_t
450 get_max_int_sew ()
452 if (TARGET_VECTOR_ELEN_64)
453 return 64;
454 else if (TARGET_VECTOR_ELEN_32)
455 return 32;
456 gcc_unreachable ();
459 /* Get the currently supported maximum sew used in the float rvv instructions.
461 static uint8_t
462 get_max_float_sew ()
464 if (TARGET_VECTOR_ELEN_FP_64)
465 return 64;
466 else if (TARGET_VECTOR_ELEN_FP_32)
467 return 32;
468 else if (TARGET_VECTOR_ELEN_FP_16)
469 return 16;
470 gcc_unreachable ();
473 enum def_type
475 REAL_SET = 1 << 0,
476 PHI_SET = 1 << 1,
477 BB_HEAD_SET = 1 << 2,
478 BB_END_SET = 1 << 3,
479 /* ??? TODO: In RTL_SSA framework, we have REAL_SET,
480 PHI_SET, BB_HEAD_SET, BB_END_SET and
481 CLOBBER_DEF def_info types. Currently,
482 we conservatively do not optimize clobber
483 def since we don't see the case that we
484 need to optimize it. */
485 CLOBBER_DEF = 1 << 4
488 static bool
489 insn_should_be_added_p (const insn_info *insn, unsigned int types)
491 if (insn->is_real () && (types & REAL_SET))
492 return true;
493 if (insn->is_phi () && (types & PHI_SET))
494 return true;
495 if (insn->is_bb_head () && (types & BB_HEAD_SET))
496 return true;
497 if (insn->is_bb_end () && (types & BB_END_SET))
498 return true;
499 return false;
502 static const hash_set<use_info *>
503 get_all_real_uses (insn_info *insn, unsigned regno)
505 gcc_assert (insn->is_real ());
507 hash_set<use_info *> uses;
508 auto_vec<phi_info *> work_list;
509 hash_set<phi_info *> visited_list;
511 for (def_info *def : insn->defs ())
513 if (!def->is_reg () || def->regno () != regno)
514 continue;
515 set_info *set = safe_dyn_cast<set_info *> (def);
516 if (!set)
517 continue;
518 for (use_info *use : set->nondebug_insn_uses ())
519 if (use->insn ()->is_real ())
520 uses.add (use);
521 for (use_info *use : set->phi_uses ())
522 work_list.safe_push (use->phi ());
525 while (!work_list.is_empty ())
527 phi_info *phi = work_list.pop ();
528 visited_list.add (phi);
530 for (use_info *use : phi->nondebug_insn_uses ())
531 if (use->insn ()->is_real ())
532 uses.add (use);
533 for (use_info *use : phi->phi_uses ())
534 if (!visited_list.contains (use->phi ()))
535 work_list.safe_push (use->phi ());
537 return uses;
540 /* Recursively find all define instructions. The kind of instruction is
541 specified by the DEF_TYPE. */
542 static hash_set<set_info *>
543 get_all_sets (phi_info *phi, unsigned int types)
545 hash_set<set_info *> insns;
546 auto_vec<phi_info *> work_list;
547 hash_set<phi_info *> visited_list;
548 if (!phi)
549 return hash_set<set_info *> ();
550 work_list.safe_push (phi);
552 while (!work_list.is_empty ())
554 phi_info *phi = work_list.pop ();
555 visited_list.add (phi);
556 for (use_info *use : phi->inputs ())
558 def_info *def = use->def ();
559 set_info *set = safe_dyn_cast<set_info *> (def);
560 if (!set)
561 return hash_set<set_info *> ();
563 gcc_assert (!set->insn ()->is_debug_insn ());
565 if (insn_should_be_added_p (set->insn (), types))
566 insns.add (set);
567 if (set->insn ()->is_phi ())
569 phi_info *new_phi = as_a<phi_info *> (set);
570 if (!visited_list.contains (new_phi))
571 work_list.safe_push (new_phi);
575 return insns;
578 static hash_set<set_info *>
579 get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
580 bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
582 if (real_p && phi_p && param_p)
583 return get_all_sets (safe_dyn_cast<phi_info *> (set),
584 REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
586 else if (real_p && param_p)
587 return get_all_sets (safe_dyn_cast<phi_info *> (set),
588 REAL_SET | BB_HEAD_SET | BB_END_SET);
590 else if (real_p)
591 return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
592 return hash_set<set_info *> ();
595 static bool
596 source_equal_p (insn_info *insn1, insn_info *insn2)
598 if (!insn1 || !insn2)
599 return false;
600 rtx_insn *rinsn1 = insn1->rtl ();
601 rtx_insn *rinsn2 = insn2->rtl ();
602 if (!rinsn1 || !rinsn2)
603 return false;
605 rtx note1 = find_reg_equal_equiv_note (rinsn1);
606 rtx note2 = find_reg_equal_equiv_note (rinsn2);
607 /* We could handle the case of similar-looking REG_EQUALs as well but
608 would need to verify that no insn in between modifies any of the source
609 operands. */
610 if (note1 && note2 && rtx_equal_p (note1, note2)
611 && REG_NOTE_KIND (note1) == REG_EQUIV)
612 return true;
613 return false;
616 static insn_info *
617 extract_single_source (set_info *set)
619 if (!set)
620 return nullptr;
621 if (set->insn ()->is_real ())
622 return set->insn ();
623 if (!set->insn ()->is_phi ())
624 return nullptr;
625 hash_set<set_info *> sets = get_all_sets (set, true, false, true);
626 if (sets.is_empty ())
627 return nullptr;
629 insn_info *first_insn = (*sets.begin ())->insn ();
630 if (first_insn->is_artificial ())
631 return nullptr;
632 for (const set_info *set : sets)
634 /* If there is a head or end insn, we conservative return
635 NULL so that VSETVL PASS will insert vsetvl directly. */
636 if (set->insn ()->is_artificial ())
637 return nullptr;
638 if (set != *sets.begin () && !source_equal_p (set->insn (), first_insn))
639 return nullptr;
642 return first_insn;
645 static bool
646 same_equiv_note_p (set_info *set1, set_info *set2)
648 insn_info *insn1 = extract_single_source (set1);
649 insn_info *insn2 = extract_single_source (set2);
650 if (!insn1 || !insn2)
651 return false;
652 return source_equal_p (insn1, insn2);
655 /* Return true if the SET result is not used by any instructions. */
656 static bool
657 has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
659 if (bitmap_bit_p (df_get_live_out (cfg_bb), regno))
660 return false;
662 rtx_insn *iter;
663 for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb));
664 iter = NEXT_INSN (iter))
665 if (df_find_use (iter, regno_reg_rtx[regno]))
666 return false;
668 return true;
671 /* Return true for the special block that we can't apply LCM optimization. */
672 static bool
673 invalid_opt_bb_p (basic_block cfg_bb)
675 edge e;
676 edge_iterator ei;
678 /* We don't do LCM optimizations on complex edges. */
679 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
680 if (e->flags & EDGE_COMPLEX)
681 return true;
683 /* We only do LCM optimizations on blocks that are post dominated by
684 EXIT block, that is, we don't do LCM optimizations on infinite loop. */
685 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
686 if (e->flags & EDGE_FAKE)
687 return true;
689 return false;
692 /* Get all predecessors of BB. */
693 static hash_set<basic_block>
694 get_all_predecessors (basic_block bb)
696 hash_set<basic_block> blocks;
697 auto_vec<basic_block> work_list;
698 hash_set<basic_block> visited_list;
699 work_list.safe_push (bb);
701 while (!work_list.is_empty ())
703 basic_block new_bb = work_list.pop ();
704 visited_list.add (new_bb);
705 edge e;
706 edge_iterator ei;
707 FOR_EACH_EDGE (e, ei, new_bb->preds)
709 if (!visited_list.contains (e->src))
710 work_list.safe_push (e->src);
711 blocks.add (e->src);
714 return blocks;
717 /* This flags indicates the minimum demand of the vl and vtype values by the
718 RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV
719 instruction only needs the SEW/LMUL ratio to remain the same, and does not
720 require SEW and LMUL to be fixed.
721 Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter
722 instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of
723 the former instruction, then we can make the minimum demand of the former
724 instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are
725 the SEW and LMUL of the latter instruction, and the vsetvl instruction
726 generated according to the new demand can also be used for the latter
727 instruction, so there is no need to insert a separate vsetvl instruction for
728 the latter instruction. */
729 enum demand_flags : unsigned
731 DEMAND_EMPTY_P = 0,
732 DEMAND_SEW_P = 1 << 0,
733 DEMAND_LMUL_P = 1 << 1,
734 DEMAND_RATIO_P = 1 << 2,
735 DEMAND_GE_SEW_P = 1 << 3,
736 DEMAND_TAIL_POLICY_P = 1 << 4,
737 DEMAND_MASK_POLICY_P = 1 << 5,
738 DEMAND_AVL_P = 1 << 6,
739 DEMAND_NON_ZERO_AVL_P = 1 << 7,
742 /* We split the demand information into three parts. They are sew and lmul
743 related (sew_lmul_demand_type), tail and mask policy related
744 (policy_demand_type) and avl related (avl_demand_type). Then we define three
745 interfaces available_p, compatible_p and merge. available_p is
746 used to determine whether the two vsetvl infos prev_info and next_info are
747 available or not. If prev_info is available for next_info, it means that the
748 RVV insn corresponding to next_info on the path from prev_info to next_info
749 can be used without inserting a separate vsetvl instruction. compatible_p
750 is used to determine whether prev_info is compatible with next_info, and if
751 so, merge can be used to merge the stricter demand information from
752 next_info into prev_info so that prev_info becomes available to next_info.
755 enum class sew_lmul_demand_type : unsigned
757 sew_lmul = demand_flags::DEMAND_SEW_P | demand_flags::DEMAND_LMUL_P,
758 ratio_only = demand_flags::DEMAND_RATIO_P,
759 sew_only = demand_flags::DEMAND_SEW_P,
760 ge_sew = demand_flags::DEMAND_GE_SEW_P,
761 ratio_and_ge_sew
762 = demand_flags::DEMAND_RATIO_P | demand_flags::DEMAND_GE_SEW_P,
765 enum class policy_demand_type : unsigned
767 tail_mask_policy
768 = demand_flags::DEMAND_TAIL_POLICY_P | demand_flags::DEMAND_MASK_POLICY_P,
769 tail_policy_only = demand_flags::DEMAND_TAIL_POLICY_P,
770 mask_policy_only = demand_flags::DEMAND_MASK_POLICY_P,
771 ignore_policy = demand_flags::DEMAND_EMPTY_P,
774 enum class avl_demand_type : unsigned
776 avl = demand_flags::DEMAND_AVL_P,
777 non_zero_avl = demand_flags::DEMAND_NON_ZERO_AVL_P,
778 ignore_avl = demand_flags::DEMAND_EMPTY_P,
781 class vsetvl_info
783 private:
784 insn_info *m_insn;
785 bb_info *m_bb;
786 rtx m_avl;
787 rtx m_vl;
788 set_info *m_avl_def;
789 uint8_t m_sew;
790 uint8_t m_max_sew;
791 vlmul_type m_vlmul;
792 uint8_t m_ratio;
793 bool m_ta;
794 bool m_ma;
796 sew_lmul_demand_type m_sew_lmul_demand;
797 policy_demand_type m_policy_demand;
798 avl_demand_type m_avl_demand;
800 enum class state_type
802 UNINITIALIZED,
803 VALID,
804 UNKNOWN,
805 EMPTY,
807 state_type m_state;
809 bool m_delete;
810 bool m_change_vtype_only;
811 insn_info *m_read_vl_insn;
812 bool m_vl_used_by_non_rvv_insn;
814 public:
815 vsetvl_info ()
816 : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX), m_vl (NULL_RTX),
817 m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED),
818 m_ratio (0), m_ta (false), m_ma (false),
819 m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul),
820 m_policy_demand (policy_demand_type::tail_mask_policy),
821 m_avl_demand (avl_demand_type::avl), m_state (state_type::UNINITIALIZED),
822 m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr),
823 m_vl_used_by_non_rvv_insn (false)
826 vsetvl_info (insn_info *insn) : vsetvl_info () { parse_insn (insn); }
828 vsetvl_info (rtx_insn *insn) : vsetvl_info () { parse_insn (insn); }
830 void set_avl (rtx avl) { m_avl = avl; }
831 void set_vl (rtx vl) { m_vl = vl; }
832 void set_avl_def (set_info *avl_def) { m_avl_def = avl_def; }
833 void set_sew (uint8_t sew) { m_sew = sew; }
834 void set_vlmul (vlmul_type vlmul) { m_vlmul = vlmul; }
835 void set_ratio (uint8_t ratio) { m_ratio = ratio; }
836 void set_ta (bool ta) { m_ta = ta; }
837 void set_ma (bool ma) { m_ma = ma; }
838 void set_delete () { m_delete = true; }
839 void set_bb (bb_info *bb) { m_bb = bb; }
840 void set_max_sew (uint8_t max_sew) { m_max_sew = max_sew; }
841 void set_change_vtype_only () { m_change_vtype_only = true; }
842 void set_read_vl_insn (insn_info *insn) { m_read_vl_insn = insn; }
844 rtx get_avl () const { return m_avl; }
845 rtx get_vl () const { return m_vl; }
846 set_info *get_avl_def () const { return m_avl_def; }
847 uint8_t get_sew () const { return m_sew; }
848 vlmul_type get_vlmul () const { return m_vlmul; }
849 uint8_t get_ratio () const { return m_ratio; }
850 bool get_ta () const { return m_ta; }
851 bool get_ma () const { return m_ma; }
852 insn_info *get_insn () const { return m_insn; }
853 bool delete_p () const { return m_delete; }
854 bb_info *get_bb () const { return m_bb; }
855 uint8_t get_max_sew () const { return m_max_sew; }
856 insn_info *get_read_vl_insn () const { return m_read_vl_insn; }
857 bool vl_used_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn; }
859 bool has_imm_avl () const { return m_avl && CONST_INT_P (m_avl); }
860 bool has_vlmax_avl () const { return vlmax_avl_p (m_avl); }
861 bool has_nonvlmax_reg_avl () const
863 return m_avl && REG_P (m_avl) && !has_vlmax_avl ();
865 bool has_non_zero_avl () const
867 if (has_imm_avl ())
868 return INTVAL (m_avl) > 0;
869 return has_vlmax_avl ();
871 bool has_vl () const
873 /* The VL operand can only be either a NULL_RTX or a register. */
874 gcc_assert (!m_vl || REG_P (m_vl));
875 return m_vl != NULL_RTX;
877 bool has_same_ratio (const vsetvl_info &other) const
879 return get_ratio () == other.get_ratio ();
882 /* The block of INSN isn't always same as the block of the VSETVL_INFO,
883 meaning we may have 'get_insn ()->bb () != get_bb ()'.
885 E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1)
887 BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies
888 get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and
889 bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the
890 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located
891 at BB3). */
892 bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); }
893 void update_avl (const vsetvl_info &other)
895 m_avl = other.get_avl ();
896 m_vl = other.get_vl ();
897 m_avl_def = other.get_avl_def ();
900 bool uninit_p () const { return m_state == state_type::UNINITIALIZED; }
901 bool valid_p () const { return m_state == state_type::VALID; }
902 bool unknown_p () const { return m_state == state_type::UNKNOWN; }
903 bool empty_p () const { return m_state == state_type::EMPTY; }
904 bool change_vtype_only_p () const { return m_change_vtype_only; }
906 void set_valid () { m_state = state_type::VALID; }
907 void set_unknown () { m_state = state_type::UNKNOWN; }
908 void set_empty () { m_state = state_type::EMPTY; }
910 void set_sew_lmul_demand (sew_lmul_demand_type demand)
912 m_sew_lmul_demand = demand;
914 void set_policy_demand (policy_demand_type demand)
916 m_policy_demand = demand;
918 void set_avl_demand (avl_demand_type demand) { m_avl_demand = demand; }
920 sew_lmul_demand_type get_sew_lmul_demand () const
922 return m_sew_lmul_demand;
924 policy_demand_type get_policy_demand () const { return m_policy_demand; }
925 avl_demand_type get_avl_demand () const { return m_avl_demand; }
927 void normalize_demand (unsigned demand_flags)
929 switch (demand_flags
930 & (DEMAND_SEW_P | DEMAND_LMUL_P | DEMAND_RATIO_P | DEMAND_GE_SEW_P))
932 case (unsigned) sew_lmul_demand_type::sew_lmul:
933 m_sew_lmul_demand = sew_lmul_demand_type::sew_lmul;
934 break;
935 case (unsigned) sew_lmul_demand_type::ratio_only:
936 m_sew_lmul_demand = sew_lmul_demand_type::ratio_only;
937 break;
938 case (unsigned) sew_lmul_demand_type::sew_only:
939 m_sew_lmul_demand = sew_lmul_demand_type::sew_only;
940 break;
941 case (unsigned) sew_lmul_demand_type::ge_sew:
942 m_sew_lmul_demand = sew_lmul_demand_type::ge_sew;
943 break;
944 case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew:
945 m_sew_lmul_demand = sew_lmul_demand_type::ratio_and_ge_sew;
946 break;
947 default:
948 gcc_unreachable ();
951 switch (demand_flags & (DEMAND_TAIL_POLICY_P | DEMAND_MASK_POLICY_P))
953 case (unsigned) policy_demand_type::tail_mask_policy:
954 m_policy_demand = policy_demand_type::tail_mask_policy;
955 break;
956 case (unsigned) policy_demand_type::tail_policy_only:
957 m_policy_demand = policy_demand_type::tail_policy_only;
958 break;
959 case (unsigned) policy_demand_type::mask_policy_only:
960 m_policy_demand = policy_demand_type::mask_policy_only;
961 break;
962 case (unsigned) policy_demand_type::ignore_policy:
963 m_policy_demand = policy_demand_type::ignore_policy;
964 break;
965 default:
966 gcc_unreachable ();
969 switch (demand_flags & (DEMAND_AVL_P | DEMAND_NON_ZERO_AVL_P))
971 case (unsigned) avl_demand_type::avl:
972 m_avl_demand = avl_demand_type::avl;
973 break;
974 case (unsigned) avl_demand_type::non_zero_avl:
975 m_avl_demand = avl_demand_type::non_zero_avl;
976 break;
977 case (unsigned) avl_demand_type::ignore_avl:
978 m_avl_demand = avl_demand_type::ignore_avl;
979 break;
980 default:
981 gcc_unreachable ();
985 void parse_insn (rtx_insn *rinsn)
987 if (!NONDEBUG_INSN_P (rinsn))
988 return;
989 if (optimize == 0 && !has_vtype_op (rinsn))
990 return;
991 gcc_assert (!vsetvl_discard_result_insn_p (rinsn));
992 set_valid ();
993 extract_insn_cached (rinsn);
994 m_avl = ::get_avl (rinsn);
995 if (has_vlmax_avl () || vsetvl_insn_p (rinsn))
996 m_vl = ::get_vl (rinsn);
997 m_sew = ::get_sew (rinsn);
998 m_vlmul = ::get_vlmul (rinsn);
999 m_ta = tail_agnostic_p (rinsn);
1000 m_ma = mask_agnostic_p (rinsn);
1003 void parse_insn (insn_info *insn)
1005 /* The VL dest of the insn */
1006 rtx dest_vl = NULL_RTX;
1008 m_insn = insn;
1009 m_bb = insn->bb ();
1010 /* Return if it is debug insn for the consistency with optimize == 0. */
1011 if (insn->is_debug_insn ())
1012 return;
1014 /* We set it as unknown since we don't what will happen in CALL or ASM. */
1015 if (insn->is_call () || insn->is_asm ())
1017 set_unknown ();
1018 return;
1021 /* If this is something that updates VL/VTYPE that we don't know about, set
1022 the state to unknown. */
1023 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
1024 && (find_access (insn->defs (), VL_REGNUM)
1025 || find_access (insn->defs (), VTYPE_REGNUM)))
1027 set_unknown ();
1028 return;
1031 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
1032 /* uninitialized */
1033 return;
1035 set_valid ();
1037 m_avl = ::get_avl (insn->rtl ());
1038 if (m_avl)
1040 if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ())
1042 m_vl = ::get_vl (insn->rtl ());
1043 dest_vl = m_vl;
1046 if (has_nonvlmax_reg_avl ())
1047 m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def ();
1050 m_sew = ::get_sew (insn->rtl ());
1051 m_vlmul = ::get_vlmul (insn->rtl ());
1052 m_ratio = get_attr_ratio (insn->rtl ());
1053 /* when get_attr_ratio is invalid, this kind of instructions
1054 doesn't care about ratio. However, we still need this value
1055 in demand info backward analysis. */
1056 if (m_ratio == INVALID_ATTRIBUTE)
1057 m_ratio = calculate_ratio (m_sew, m_vlmul);
1058 m_ta = tail_agnostic_p (insn->rtl ());
1059 m_ma = mask_agnostic_p (insn->rtl ());
1061 /* If merge operand is undef value, we prefer agnostic. */
1062 int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
1063 if (merge_op_idx != INVALID_ATTRIBUTE
1064 && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
1066 m_ta = true;
1067 m_ma = true;
1070 /* Determine the demand info of the RVV insn. */
1071 m_max_sew = get_max_int_sew ();
1072 unsigned dflags = 0;
1073 if (vector_config_insn_p (insn->rtl ()))
1075 dflags |= demand_flags::DEMAND_AVL_P;
1076 dflags |= demand_flags::DEMAND_RATIO_P;
1078 else
1080 if (has_vl_op (insn->rtl ()))
1082 if (scalar_move_insn_p (insn->rtl ()))
1084 /* If the avl for vmv.s.x comes from the vsetvl instruction, we
1085 don't know if the avl is non-zero, so it is set to
1086 DEMAND_AVL_P for now. it may be corrected to
1087 DEMAND_NON_ZERO_AVL_P later when more information is
1088 available.
1090 if (has_non_zero_avl ())
1091 dflags |= demand_flags::DEMAND_NON_ZERO_AVL_P;
1092 else
1093 dflags |= demand_flags::DEMAND_AVL_P;
1095 else
1096 dflags |= demand_flags::DEMAND_AVL_P;
1099 if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
1100 dflags |= demand_flags::DEMAND_RATIO_P;
1101 else
1103 if (scalar_move_insn_p (insn->rtl ()) && m_ta)
1105 dflags |= demand_flags::DEMAND_GE_SEW_P;
1106 m_max_sew = get_attr_type (insn->rtl ()) == TYPE_VFMOVFV
1107 ? get_max_float_sew ()
1108 : get_max_int_sew ();
1110 else
1111 dflags |= demand_flags::DEMAND_SEW_P;
1113 if (!ignore_vlmul_insn_p (insn->rtl ()))
1114 dflags |= demand_flags::DEMAND_LMUL_P;
1117 if (!m_ta)
1118 dflags |= demand_flags::DEMAND_TAIL_POLICY_P;
1119 if (!m_ma)
1120 dflags |= demand_flags::DEMAND_MASK_POLICY_P;
1123 normalize_demand (dflags);
1125 /* Optimize AVL from the vsetvl instruction. */
1126 insn_info *def_insn = extract_single_source (get_avl_def ());
1127 if (def_insn && vsetvl_insn_p (def_insn->rtl ()))
1129 vsetvl_info def_info = vsetvl_info (def_insn);
1130 if ((scalar_move_insn_p (insn->rtl ())
1131 || def_info.get_ratio () == get_ratio ())
1132 && (def_info.has_vlmax_avl () || def_info.has_imm_avl ()))
1134 update_avl (def_info);
1135 if (scalar_move_insn_p (insn->rtl ()) && has_non_zero_avl ())
1136 m_avl_demand = avl_demand_type::non_zero_avl;
1140 /* Determine if dest operand(vl) has been used by non-RVV instructions. */
1141 if (dest_vl)
1143 const hash_set<use_info *> vl_uses
1144 = get_all_real_uses (get_insn (), REGNO (dest_vl));
1145 for (use_info *use : vl_uses)
1147 gcc_assert (use->insn ()->is_real ());
1148 rtx_insn *rinsn = use->insn ()->rtl ();
1149 if (!has_vl_op (rinsn)
1150 || count_regno_occurrences (rinsn, REGNO (dest_vl)) != 1)
1152 m_vl_used_by_non_rvv_insn = true;
1153 break;
1155 rtx avl = ::get_avl (rinsn);
1156 if (!avl || !REG_P (avl) || REGNO (dest_vl) != REGNO (avl))
1158 m_vl_used_by_non_rvv_insn = true;
1159 break;
1164 /* Collect the read vl insn for the fault-only-first rvv loads. */
1165 if (fault_first_load_p (insn->rtl ()))
1167 for (insn_info *i = insn->next_nondebug_insn ();
1168 i->bb () == insn->bb (); i = i->next_nondebug_insn ())
1170 if (find_access (i->defs (), VL_REGNUM))
1171 break;
1172 if (i->rtl () && read_vl_insn_p (i->rtl ()))
1174 m_read_vl_insn = i;
1175 break;
1181 /* Returns the corresponding vsetvl rtx pat. */
1182 rtx get_vsetvl_pat (bool ignore_vl = false) const
1184 rtx avl = get_avl ();
1185 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
1186 set the value of avl to (const_int 0) so that VSETVL PASS will
1187 insert vsetvl correctly.*/
1188 if (!get_avl ())
1189 avl = GEN_INT (0);
1190 rtx sew = gen_int_mode (get_sew (), Pmode);
1191 rtx vlmul = gen_int_mode (get_vlmul (), Pmode);
1192 rtx ta = gen_int_mode (get_ta (), Pmode);
1193 rtx ma = gen_int_mode (get_ma (), Pmode);
1195 if (change_vtype_only_p ())
1196 return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
1197 else if (has_vl () && !ignore_vl)
1198 return gen_vsetvl (Pmode, get_vl (), avl, sew, vlmul, ta, ma);
1199 else
1200 return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
1203 /* Return true that the non-AVL operands of THIS will be modified
1204 if we fuse the VL modification from OTHER into THIS. */
1205 bool vl_modify_non_avl_op_p (const vsetvl_info &other) const
1207 /* We don't need to worry about any operands from THIS be
1208 modified by OTHER vsetvl since we OTHER vsetvl doesn't
1209 modify any operand. */
1210 if (!other.has_vl ())
1211 return false;
1213 /* THIS VL operand always preempt OTHER VL operand. */
1214 if (this->has_vl ())
1215 return false;
1217 /* If THIS has non IMM AVL and THIS is AVL compatible with
1218 OTHER, the AVL value of THIS is same as VL value of OTHER. */
1219 if (!this->has_imm_avl ())
1220 return false;
1221 return find_access (this->get_insn ()->uses (), REGNO (other.get_vl ()));
1224 bool operator== (const vsetvl_info &other) const
1226 gcc_assert (!uninit_p () && !other.uninit_p ()
1227 && "Uninitialization should not happen");
1229 if (empty_p ())
1230 return other.empty_p ();
1231 if (unknown_p ())
1232 return other.unknown_p ();
1234 return get_insn () == other.get_insn () && get_bb () == other.get_bb ()
1235 && get_avl () == other.get_avl () && get_vl () == other.get_vl ()
1236 && get_avl_def () == other.get_avl_def ()
1237 && get_sew () == other.get_sew ()
1238 && get_vlmul () == other.get_vlmul () && get_ta () == other.get_ta ()
1239 && get_ma () == other.get_ma ()
1240 && get_avl_demand () == other.get_avl_demand ()
1241 && get_sew_lmul_demand () == other.get_sew_lmul_demand ()
1242 && get_policy_demand () == other.get_policy_demand ();
1245 void dump (FILE *file, const char *indent = "") const
1247 if (uninit_p ())
1249 fprintf (file, "UNINITIALIZED.\n");
1250 return;
1252 else if (unknown_p ())
1254 fprintf (file, "UNKNOWN.\n");
1255 return;
1257 else if (empty_p ())
1259 fprintf (file, "EMPTY.\n");
1260 return;
1262 else if (valid_p ())
1263 fprintf (file, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (),
1264 get_bb ()->index (), delete_p () ? " (deleted)" : "");
1265 else
1266 gcc_unreachable ();
1268 fprintf (file, "%sDemand fields:", indent);
1269 if (m_sew_lmul_demand == sew_lmul_demand_type::sew_lmul)
1270 fprintf (file, " demand_sew_lmul");
1271 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_only)
1272 fprintf (file, " demand_ratio_only");
1273 else if (m_sew_lmul_demand == sew_lmul_demand_type::sew_only)
1274 fprintf (file, " demand_sew_only");
1275 else if (m_sew_lmul_demand == sew_lmul_demand_type::ge_sew)
1276 fprintf (file, " demand_ge_sew");
1277 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_and_ge_sew)
1278 fprintf (file, " demand_ratio_and_ge_sew");
1280 if (m_policy_demand == policy_demand_type::tail_mask_policy)
1281 fprintf (file, " demand_tail_mask_policy");
1282 else if (m_policy_demand == policy_demand_type::tail_policy_only)
1283 fprintf (file, " demand_tail_policy_only");
1284 else if (m_policy_demand == policy_demand_type::mask_policy_only)
1285 fprintf (file, " demand_mask_policy_only");
1287 if (m_avl_demand == avl_demand_type::avl)
1288 fprintf (file, " demand_avl");
1289 else if (m_avl_demand == avl_demand_type::non_zero_avl)
1290 fprintf (file, " demand_non_zero_avl");
1291 fprintf (file, "\n");
1293 fprintf (file, "%sSEW=%d, ", indent, get_sew ());
1294 fprintf (file, "VLMUL=%s, ", vlmul_to_str (get_vlmul ()));
1295 fprintf (file, "RATIO=%d, ", get_ratio ());
1296 fprintf (file, "MAX_SEW=%d\n", get_max_sew ());
1298 fprintf (file, "%sTAIL_POLICY=%s, ", indent, policy_to_str (get_ta ()));
1299 fprintf (file, "MASK_POLICY=%s\n", policy_to_str (get_ma ()));
1301 fprintf (file, "%sAVL=", indent);
1302 print_rtl_single (file, get_avl ());
1303 fprintf (file, "%sVL=", indent);
1304 print_rtl_single (file, get_vl ());
1305 if (change_vtype_only_p ())
1306 fprintf (file, "%schange vtype only\n", indent);
1307 if (get_read_vl_insn ())
1308 fprintf (file, "%sread_vl_insn: insn %u\n", indent,
1309 get_read_vl_insn ()->uid ());
1310 if (vl_used_by_non_rvv_insn_p ())
1311 fprintf (file, "%suse_by_non_rvv_insn=true\n", indent);
1315 class vsetvl_block_info
1317 public:
1318 /* The static execute probability of the demand info. */
1319 profile_probability probability;
1321 auto_vec<vsetvl_info> local_infos;
1322 vsetvl_info global_info;
1323 bb_info *bb;
1325 vsetvl_block_info () : bb (nullptr)
1327 local_infos.safe_grow_cleared (0);
1328 global_info.set_empty ();
1330 vsetvl_block_info (const vsetvl_block_info &other)
1331 : probability (other.probability), local_infos (other.local_infos.copy ()),
1332 global_info (other.global_info), bb (other.bb)
1335 vsetvl_info &get_entry_info ()
1337 gcc_assert (!empty_p ());
1338 return local_infos.is_empty () ? global_info : local_infos[0];
1340 vsetvl_info &get_exit_info ()
1342 gcc_assert (!empty_p ());
1343 return local_infos.is_empty () ? global_info
1344 : local_infos[local_infos.length () - 1];
1346 const vsetvl_info &get_entry_info () const
1348 gcc_assert (!empty_p ());
1349 return local_infos.is_empty () ? global_info : local_infos[0];
1351 const vsetvl_info &get_exit_info () const
1353 gcc_assert (!empty_p ());
1354 return local_infos.is_empty () ? global_info
1355 : local_infos[local_infos.length () - 1];
1358 bool empty_p () const { return local_infos.is_empty () && !has_info (); }
1359 bool has_info () const { return !global_info.empty_p (); }
1360 void set_info (const vsetvl_info &info)
1362 gcc_assert (local_infos.is_empty ());
1363 global_info = info;
1364 global_info.set_bb (bb);
1366 void set_empty_info () { global_info.set_empty (); }
1369 /* Demand system is the RVV-based VSETVL info analysis tools wrapper.
1370 It defines compatible rules for SEW/LMUL, POLICY and AVL.
1371 Also, it provides 3 interfaces available_p, compatible_p and
1372 merge for the VSETVL PASS analysis and optimization.
1374 - available_p: Determine whether the next info can get the
1375 available VSETVL status from previous info.
1376 e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16).
1377 Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3
1378 demand, the VSETVL instruction in bb 3 can be elided.
1379 available_p (previous, next) is true in such situation.
1380 - compatible_p: Determine whether prev_info is compatible with next_info
1381 so that we can have a new merged info that is available to both of them.
1382 - merge: Merge the stricter demand information from
1383 next_info into prev_info so that prev_info becomes available to
1384 next_info. */
1385 class demand_system
1387 private:
1388 /* predictors. */
1390 inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1391 const vsetvl_info &next ATTRIBUTE_UNUSED)
1393 return true;
1395 inline bool always_false (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1396 const vsetvl_info &next ATTRIBUTE_UNUSED)
1398 return false;
1401 /* predictors for sew and lmul */
1403 inline bool lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1405 return prev.get_vlmul () == next.get_vlmul ();
1407 inline bool sew_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1409 return prev.get_sew () == next.get_sew ();
1411 inline bool sew_lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1413 return lmul_eq_p (prev, next) && sew_eq_p (prev, next);
1415 inline bool sew_ge_p (const vsetvl_info &prev, const vsetvl_info &next)
1417 return prev.get_sew () == next.get_sew ()
1418 || (next.get_ta () && prev.get_sew () > next.get_sew ());
1420 inline bool sew_le_p (const vsetvl_info &prev, const vsetvl_info &next)
1422 return prev.get_sew () == next.get_sew ()
1423 || (prev.get_ta () && prev.get_sew () < next.get_sew ());
1425 inline bool prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1426 const vsetvl_info &next)
1428 return prev.get_sew () <= next.get_max_sew ();
1430 inline bool next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1431 const vsetvl_info &next)
1433 return next.get_sew () <= prev.get_max_sew ();
1435 inline bool max_sew_overlap_p (const vsetvl_info &prev,
1436 const vsetvl_info &next)
1438 return !(prev.get_sew () > next.get_max_sew ()
1439 || next.get_sew () > prev.get_max_sew ());
1441 inline bool ratio_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1443 return prev.has_same_ratio (next);
1445 inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1446 const vsetvl_info &next)
1448 return prev.get_ratio () >= (next.get_sew () / 8);
1450 inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1451 const vsetvl_info &next)
1453 return next.get_ratio () >= (prev.get_sew () / 8);
1456 inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev,
1457 const vsetvl_info &next)
1459 return sew_ge_p (prev, next) && ratio_eq_p (prev, next);
1461 inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1462 const vsetvl_info &next)
1464 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next);
1466 inline bool
1467 sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p (
1468 const vsetvl_info &prev, const vsetvl_info &next)
1470 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next)
1471 && next_ratio_valid_for_prev_sew_p (prev, next);
1473 inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1474 const vsetvl_info &next)
1476 return sew_le_p (prev, next) && next_sew_le_prev_max_sew_p (prev, next);
1478 inline bool
1479 max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1480 const vsetvl_info &next)
1482 return next_ratio_valid_for_prev_sew_p (prev, next)
1483 && max_sew_overlap_p (prev, next);
1485 inline bool
1486 sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info &prev,
1487 const vsetvl_info &next)
1489 return sew_le_p (prev, next) && ratio_eq_p (prev, next)
1490 && next_sew_le_prev_max_sew_p (prev, next);
1492 inline bool
1493 max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1494 const vsetvl_info &next)
1496 return prev_ratio_valid_for_next_sew_p (prev, next)
1497 && max_sew_overlap_p (prev, next);
1499 inline bool
1500 sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p (
1501 const vsetvl_info &prev, const vsetvl_info &next)
1503 return sew_le_p (prev, next) && prev_ratio_valid_for_next_sew_p (prev, next)
1504 && next_sew_le_prev_max_sew_p (prev, next);
1506 inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info &prev,
1507 const vsetvl_info &next)
1509 return ratio_eq_p (prev, next) && max_sew_overlap_p (prev, next);
1512 /* predictors for tail and mask policy */
1514 inline bool tail_policy_eq_p (const vsetvl_info &prev,
1515 const vsetvl_info &next)
1517 return prev.get_ta () == next.get_ta ();
1519 inline bool mask_policy_eq_p (const vsetvl_info &prev,
1520 const vsetvl_info &next)
1522 return prev.get_ma () == next.get_ma ();
1524 inline bool tail_mask_policy_eq_p (const vsetvl_info &prev,
1525 const vsetvl_info &next)
1527 return tail_policy_eq_p (prev, next) && mask_policy_eq_p (prev, next);
1530 /* predictors for avl */
1532 inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
1534 if (info.has_vl ())
1536 if (find_access (i->defs (), REGNO (info.get_vl ())))
1537 return true;
1538 if (find_access (i->uses (), REGNO (info.get_vl ())))
1540 resource_info resource = full_register (REGNO (info.get_vl ()));
1541 def_lookup dl1 = crtl->ssa->find_def (resource, i);
1542 def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
1543 if (dl1.matching_set () || dl2.matching_set ())
1544 return true;
1545 /* If their VLs are coming from same def, we still want to fuse
1546 their VSETVL demand info to gain better performance. */
1547 return dl1.prev_def (i) != dl2.prev_def (i);
1550 return false;
1552 inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
1554 return info.has_nonvlmax_reg_avl ()
1555 && find_access (i->defs (), REGNO (info.get_avl ()));
1558 inline bool modify_reg_between_p (insn_info *prev_insn, insn_info *curr_insn,
1559 unsigned regno)
1561 gcc_assert (prev_insn->compare_with (curr_insn) < 0);
1562 for (insn_info *i = curr_insn->prev_nondebug_insn (); i != prev_insn;
1563 i = i->prev_nondebug_insn ())
1565 // no def of regno
1566 if (find_access (i->defs (), regno))
1567 return true;
1569 return false;
1572 inline bool reg_avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1574 if (!prev.has_nonvlmax_reg_avl () || !next.has_nonvlmax_reg_avl ())
1575 return false;
1577 if (same_equiv_note_p (prev.get_avl_def (), next.get_avl_def ()))
1578 return true;
1580 if (REGNO (prev.get_avl ()) != REGNO (next.get_avl ()))
1581 return false;
1583 insn_info *prev_insn = prev.get_insn ();
1584 if (prev.get_bb () != prev_insn->bb ())
1585 prev_insn = prev.get_bb ()->end_insn ();
1587 insn_info *next_insn = next.get_insn ();
1588 if (next.get_bb () != next_insn->bb ())
1589 next_insn = next.get_bb ()->end_insn ();
1591 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1594 inline bool avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1596 gcc_assert (prev.valid_p () && next.valid_p ());
1598 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
1599 return false;
1601 if (vector_config_insn_p (prev.get_insn ()->rtl ()) && next.get_avl_def ()
1602 && next.get_avl_def ()->insn () == prev.get_insn ())
1603 return true;
1605 if (prev.get_read_vl_insn ())
1607 if (!next.has_nonvlmax_reg_avl () || !next.get_avl_def ())
1608 return false;
1609 insn_info *avl_def_insn = extract_single_source (next.get_avl_def ());
1610 return avl_def_insn == prev.get_read_vl_insn ();
1613 if (prev == next && prev.has_nonvlmax_reg_avl ())
1615 insn_info *insn = prev.get_insn ();
1616 bb_info *bb = insn->bb ();
1617 for (insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
1618 i = i->next_nondebug_insn ())
1619 if (find_access (i->defs (), REGNO (prev.get_avl ())))
1620 return false;
1623 if (prev.has_vlmax_avl () && next.has_vlmax_avl ())
1624 return true;
1625 else if (prev.has_imm_avl () && next.has_imm_avl ())
1626 return INTVAL (prev.get_avl ()) == INTVAL (next.get_avl ());
1627 else if (prev.has_vl () && next.has_nonvlmax_reg_avl ()
1628 && REGNO (prev.get_vl ()) == REGNO (next.get_avl ()))
1630 insn_info *prev_insn = prev.insn_inside_bb_p ()
1631 ? prev.get_insn ()
1632 : prev.get_bb ()->end_insn ();
1634 insn_info *next_insn = next.insn_inside_bb_p ()
1635 ? next.get_insn ()
1636 : next.get_bb ()->end_insn ();
1637 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1639 else if (prev.has_nonvlmax_reg_avl () && next.has_nonvlmax_reg_avl ())
1640 return reg_avl_equal_p (prev, next);
1642 return false;
1644 inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info &prev,
1645 const vsetvl_info &next)
1647 return avl_equal_p (prev, next) || prev.has_non_zero_avl ();
1650 inline bool can_use_next_avl_p (const vsetvl_info &prev,
1651 const vsetvl_info &next)
1653 /* Forbid the AVL/VL propagation if VL of NEXT is used
1654 by non-RVV instructions. This is because:
1656 bb 2:
1657 PREV: scalar move (no AVL)
1658 bb 3:
1659 NEXT: vsetvl a5(VL), a4(AVL) ...
1660 branch a5,zero
1662 Since user vsetvl instruction is no side effect instruction
1663 which should be placed in the correct and optimal location
1664 of the program by the previous PASS, it is unreasonable that
1665 VSETVL PASS tries to move it to another places if it used by
1666 non-RVV instructions.
1668 Note: We only forbid the cases that VL is used by the following
1669 non-RVV instructions which will cause issues. We don't forbid
1670 other cases since it won't cause correctness issues and we still
1671 more demand info are fused backward. The later LCM algorithm
1672 should know the optimal location of the vsetvl. */
1673 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
1674 return false;
1676 if (!next.has_nonvlmax_reg_avl () && !next.has_vl ())
1677 return true;
1679 insn_info *prev_insn = prev.get_insn ();
1680 if (prev.get_bb () != prev_insn->bb ())
1681 prev_insn = prev.get_bb ()->end_insn ();
1683 insn_info *next_insn = next.get_insn ();
1684 if (next.get_bb () != next_insn->bb ())
1685 next_insn = next.get_bb ()->end_insn ();
1687 return avl_vl_unmodified_between_p (prev_insn, next_insn, next);
1690 inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p (
1691 const vsetvl_info &prev, const vsetvl_info &next)
1693 return avl_equal_p (prev, next)
1694 || (next.has_non_zero_avl () && can_use_next_avl_p (prev, next));
1697 /* modifiers */
1699 inline void nop (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1700 const vsetvl_info &next ATTRIBUTE_UNUSED)
1703 /* modifiers for sew and lmul */
1705 inline void use_min_of_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1707 prev.set_max_sew (MIN (prev.get_max_sew (), next.get_max_sew ()));
1709 inline void use_next_sew (vsetvl_info &prev, const vsetvl_info &next)
1711 prev.set_sew (next.get_sew ());
1712 use_min_of_max_sew (prev, next);
1714 inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1716 int max_sew = MAX (prev.get_sew (), next.get_sew ());
1717 prev.set_sew (max_sew);
1718 use_min_of_max_sew (prev, next);
1720 inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1722 use_next_sew (prev, next);
1723 prev.set_vlmul (next.get_vlmul ());
1724 prev.set_ratio (next.get_ratio ());
1726 inline void use_next_sew_with_prev_ratio (vsetvl_info &prev,
1727 const vsetvl_info &next)
1729 use_next_sew (prev, next);
1730 prev.set_vlmul (calculate_vlmul (next.get_sew (), prev.get_ratio ()));
1732 inline void modify_lmul_with_next_ratio (vsetvl_info &prev,
1733 const vsetvl_info &next)
1735 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1736 prev.set_ratio (next.get_ratio ());
1739 inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev,
1740 const vsetvl_info &next)
1742 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1743 use_max_sew (prev, next);
1744 prev.set_ratio (next.get_ratio ());
1747 inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info &prev,
1748 const vsetvl_info &next)
1750 int max_sew = MAX (prev.get_sew (), next.get_sew ());
1751 prev.set_vlmul (calculate_vlmul (max_sew, prev.get_ratio ()));
1752 prev.set_sew (max_sew);
1755 /* modifiers for tail and mask policy */
1757 inline void use_tail_policy (vsetvl_info &prev, const vsetvl_info &next)
1759 if (!next.get_ta ())
1760 prev.set_ta (next.get_ta ());
1762 inline void use_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1764 if (!next.get_ma ())
1765 prev.set_ma (next.get_ma ());
1767 inline void use_tail_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1769 use_tail_policy (prev, next);
1770 use_mask_policy (prev, next);
1773 /* modifiers for avl */
1775 inline void use_next_avl (vsetvl_info &prev, const vsetvl_info &next)
1777 gcc_assert (can_use_next_avl_p (prev, next));
1778 prev.update_avl (next);
1781 inline void use_next_avl_when_not_equal (vsetvl_info &prev,
1782 const vsetvl_info &next)
1784 if (avl_equal_p (prev, next))
1785 return;
1786 gcc_assert (next.has_non_zero_avl ());
1787 use_next_avl (prev, next);
1790 public:
1791 /* Can we move vsetvl info between prev_insn and next_insn safe? */
1792 bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn,
1793 const vsetvl_info &info,
1794 bool ignore_vl = false)
1796 gcc_assert ((ignore_vl && info.has_nonvlmax_reg_avl ())
1797 || (info.has_nonvlmax_reg_avl () || info.has_vl ()));
1799 gcc_assert (!prev_insn->is_debug_insn () && !next_insn->is_debug_insn ());
1800 if (prev_insn->bb () == next_insn->bb ()
1801 && prev_insn->compare_with (next_insn) < 0)
1803 for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
1804 i = i->prev_nondebug_insn ())
1806 // no def and use of vl
1807 if (!ignore_vl && modify_or_use_vl_p (i, info))
1808 return false;
1810 // no def of avl
1811 if (modify_avl_p (i, info))
1812 return false;
1814 return true;
1816 else
1818 basic_block prev_cfg_bb = prev_insn->bb ()->cfg_bb ();
1819 if (!ignore_vl && info.has_vl ())
1821 bitmap live_out = df_get_live_out (prev_cfg_bb);
1822 if (bitmap_bit_p (live_out, REGNO (info.get_vl ())))
1823 return false;
1826 /* Find set_info at location of PREV_INSN and NEXT_INSN, Return
1827 false if those 2 set_info are different.
1829 PREV_INSN --- multiple nested blocks --- NEXT_INSN.
1831 Return false if there is any modifications of AVL inside those
1832 multiple nested blocks. */
1833 if (info.has_nonvlmax_reg_avl ())
1835 resource_info resource = full_register (REGNO (info.get_avl ()));
1836 def_lookup dl1 = crtl->ssa->find_def (resource, prev_insn);
1837 def_lookup dl2 = crtl->ssa->find_def (resource, next_insn);
1838 if (dl2.matching_set ())
1839 return false;
1841 auto is_phi_or_real
1842 = [&] (insn_info *h) { return h->is_real () || h->is_phi (); };
1844 def_info *def1 = dl1.matching_set_or_last_def_of_prev_group ();
1845 def_info *def2 = dl2.prev_def (next_insn);
1846 set_info *set1 = safe_dyn_cast<set_info *> (def1);
1847 set_info *set2 = safe_dyn_cast<set_info *> (def2);
1848 if (!set1 || !set2)
1849 return false;
1851 auto is_same_ultimate_def = [&] (set_info *s1, set_info *s2) {
1852 return s1->insn ()->is_phi () && s2->insn ()->is_phi ()
1853 && look_through_degenerate_phi (s1)
1854 == look_through_degenerate_phi (s2);
1857 if (set1 != set2 && !is_same_ultimate_def (set1, set2))
1859 if (!is_phi_or_real (set1->insn ())
1860 || !is_phi_or_real (set2->insn ()))
1861 return false;
1863 if (set1->insn ()->is_real () && set2->insn ()->is_phi ())
1865 hash_set<set_info *> sets
1866 = get_all_sets (set2, true, false, true);
1867 if (!sets.contains (set1))
1868 return false;
1870 else
1872 insn_info *def_insn1 = extract_single_source (set1);
1873 insn_info *def_insn2 = extract_single_source (set2);
1874 if (!def_insn1 || !def_insn2 || def_insn1 != def_insn2)
1875 return false;
1880 for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn ();
1881 i = i->prev_nondebug_insn ())
1883 // no def and use of vl
1884 if (!ignore_vl && modify_or_use_vl_p (i, info))
1885 return false;
1887 // no def of avl
1888 if (modify_avl_p (i, info))
1889 return false;
1892 for (insn_info *i = prev_insn->bb ()->end_insn (); i != prev_insn;
1893 i = i->prev_nondebug_insn ())
1895 // no def mad use of vl
1896 if (!ignore_vl && modify_or_use_vl_p (i, info))
1897 return false;
1899 // no def of avl
1900 if (modify_avl_p (i, info))
1901 return false;
1904 return true;
1907 bool sew_lmul_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1909 gcc_assert (prev.valid_p () && next.valid_p ());
1910 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1911 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1912 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1913 AVAILABLE_P, FUSE) \
1914 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1915 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1916 return COMPATIBLE_P (prev, next);
1918 #include "riscv-vsetvl.def"
1920 gcc_unreachable ();
1923 bool sew_lmul_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1925 gcc_assert (prev.valid_p () && next.valid_p ());
1926 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1927 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1928 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1929 AVAILABLE_P, FUSE) \
1930 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1931 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1932 return AVAILABLE_P (prev, next);
1934 #include "riscv-vsetvl.def"
1936 gcc_unreachable ();
1939 void merge_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1941 gcc_assert (prev.valid_p () && next.valid_p ());
1942 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1943 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1944 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1945 AVAILABLE_P, FUSE) \
1946 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1947 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1949 gcc_assert (COMPATIBLE_P (prev, next)); \
1950 FUSE (prev, next); \
1951 prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \
1952 return; \
1955 #include "riscv-vsetvl.def"
1957 gcc_unreachable ();
1960 bool policy_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1962 gcc_assert (prev.valid_p () && next.valid_p ());
1963 policy_demand_type prev_flags = prev.get_policy_demand ();
1964 policy_demand_type next_flags = next.get_policy_demand ();
1965 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1966 AVAILABLE_P, FUSE) \
1967 if (prev_flags == policy_demand_type::PREV_FLAGS \
1968 && next_flags == policy_demand_type::NEXT_FLAGS) \
1969 return COMPATIBLE_P (prev, next);
1971 #include "riscv-vsetvl.def"
1973 gcc_unreachable ();
1976 bool policy_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1978 gcc_assert (prev.valid_p () && next.valid_p ());
1979 policy_demand_type prev_flags = prev.get_policy_demand ();
1980 policy_demand_type next_flags = next.get_policy_demand ();
1981 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1982 AVAILABLE_P, FUSE) \
1983 if (prev_flags == policy_demand_type::PREV_FLAGS \
1984 && next_flags == policy_demand_type::NEXT_FLAGS) \
1985 return AVAILABLE_P (prev, next);
1987 #include "riscv-vsetvl.def"
1989 gcc_unreachable ();
1992 void merge_policy (vsetvl_info &prev, const vsetvl_info &next)
1994 gcc_assert (prev.valid_p () && next.valid_p ());
1995 policy_demand_type prev_flags = prev.get_policy_demand ();
1996 policy_demand_type next_flags = next.get_policy_demand ();
1997 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1998 AVAILABLE_P, FUSE) \
1999 if (prev_flags == policy_demand_type::PREV_FLAGS \
2000 && next_flags == policy_demand_type::NEXT_FLAGS) \
2002 gcc_assert (COMPATIBLE_P (prev, next)); \
2003 FUSE (prev, next); \
2004 prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \
2005 return; \
2008 #include "riscv-vsetvl.def"
2010 gcc_unreachable ();
2013 bool vl_not_in_conflict_p (const vsetvl_info &prev, const vsetvl_info &next)
2015 /* We don't fuse this following case:
2017 li a5, -1
2018 vmv.s.x v0, a5 -- PREV
2019 vsetvli a5, ... -- NEXT
2021 Don't fuse NEXT into PREV.
2023 return !prev.vl_modify_non_avl_op_p (next)
2024 && !next.vl_modify_non_avl_op_p (prev);
2027 bool avl_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
2029 gcc_assert (prev.valid_p () && next.valid_p ());
2030 avl_demand_type prev_flags = prev.get_avl_demand ();
2031 avl_demand_type next_flags = next.get_avl_demand ();
2032 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2033 AVAILABLE_P, FUSE) \
2034 if (prev_flags == avl_demand_type::PREV_FLAGS \
2035 && next_flags == avl_demand_type::NEXT_FLAGS) \
2036 return COMPATIBLE_P (prev, next);
2038 #include "riscv-vsetvl.def"
2040 gcc_unreachable ();
2043 bool avl_available_p (const vsetvl_info &prev, const vsetvl_info &next)
2045 gcc_assert (prev.valid_p () && next.valid_p ());
2046 avl_demand_type prev_flags = prev.get_avl_demand ();
2047 avl_demand_type next_flags = next.get_avl_demand ();
2048 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2049 AVAILABLE_P, FUSE) \
2050 if (prev_flags == avl_demand_type::PREV_FLAGS \
2051 && next_flags == avl_demand_type::NEXT_FLAGS) \
2052 return AVAILABLE_P (prev, next);
2054 #include "riscv-vsetvl.def"
2056 gcc_unreachable ();
2059 void merge_avl (vsetvl_info &prev, const vsetvl_info &next)
2061 gcc_assert (prev.valid_p () && next.valid_p ());
2062 avl_demand_type prev_flags = prev.get_avl_demand ();
2063 avl_demand_type next_flags = next.get_avl_demand ();
2064 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2065 AVAILABLE_P, FUSE) \
2066 if (prev_flags == avl_demand_type::PREV_FLAGS \
2067 && next_flags == avl_demand_type::NEXT_FLAGS) \
2069 gcc_assert (COMPATIBLE_P (prev, next)); \
2070 FUSE (prev, next); \
2071 prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \
2072 return; \
2075 #include "riscv-vsetvl.def"
2077 gcc_unreachable ();
2080 bool compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
2082 bool compatible_p = sew_lmul_compatible_p (prev, next)
2083 && policy_compatible_p (prev, next)
2084 && avl_compatible_p (prev, next)
2085 && vl_not_in_conflict_p (prev, next);
2086 return compatible_p;
2089 bool available_p (const vsetvl_info &prev, const vsetvl_info &next)
2091 bool available_p = sew_lmul_available_p (prev, next)
2092 && policy_available_p (prev, next)
2093 && avl_available_p (prev, next)
2094 && vl_not_in_conflict_p (prev, next);
2095 gcc_assert (!available_p || compatible_p (prev, next));
2096 return available_p;
2099 void merge (vsetvl_info &prev, const vsetvl_info &next)
2101 gcc_assert (compatible_p (prev, next));
2102 merge_sew_lmul (prev, next);
2103 merge_policy (prev, next);
2104 merge_avl (prev, next);
2105 gcc_assert (available_p (prev, next));
2110 class pre_vsetvl
2112 private:
2113 demand_system m_dem;
2114 auto_vec<vsetvl_block_info> m_vector_block_infos;
2116 /* data for avl reaching definition. */
2117 sbitmap *m_reg_def_loc;
2119 /* data for vsetvl info reaching definition. */
2120 vsetvl_info m_unknown_info;
2121 auto_vec<vsetvl_info *> m_vsetvl_def_exprs;
2122 sbitmap *m_vsetvl_def_in;
2123 sbitmap *m_vsetvl_def_out;
2125 /* data for lcm */
2126 auto_vec<vsetvl_info *> m_exprs;
2127 sbitmap *m_avloc;
2128 sbitmap *m_avin;
2129 sbitmap *m_avout;
2130 sbitmap *m_kill;
2131 sbitmap *m_antloc;
2132 sbitmap *m_transp;
2133 sbitmap *m_insert;
2134 sbitmap *m_del;
2135 struct edge_list *m_edges;
2137 auto_vec<vsetvl_info> m_delete_list;
2139 vsetvl_block_info &get_block_info (const bb_info *bb)
2141 return m_vector_block_infos[bb->index ()];
2143 const vsetvl_block_info &get_block_info (const basic_block bb) const
2145 return m_vector_block_infos[bb->index];
2148 vsetvl_block_info &get_block_info (const basic_block bb)
2150 return m_vector_block_infos[bb->index];
2153 void add_expr (auto_vec<vsetvl_info *> &m_exprs, vsetvl_info &info)
2155 for (vsetvl_info *item : m_exprs)
2157 if (*item == info)
2158 return;
2160 m_exprs.safe_push (&info);
2163 unsigned get_expr_index (auto_vec<vsetvl_info *> &m_exprs,
2164 const vsetvl_info &info)
2166 for (size_t i = 0; i < m_exprs.length (); i += 1)
2168 if (*m_exprs[i] == info)
2169 return i;
2171 gcc_unreachable ();
2174 bool anticipated_exp_p (const vsetvl_info &header_info)
2176 if (!header_info.has_nonvlmax_reg_avl () && !header_info.has_vl ())
2177 return true;
2179 bb_info *bb = header_info.get_bb ();
2180 insn_info *prev_insn = bb->head_insn ();
2181 insn_info *next_insn = header_info.insn_inside_bb_p ()
2182 ? header_info.get_insn ()
2183 : header_info.get_bb ()->end_insn ();
2185 return m_dem.avl_vl_unmodified_between_p (prev_insn, next_insn,
2186 header_info);
2189 bool available_exp_p (const vsetvl_info &prev_info,
2190 const vsetvl_info &next_info)
2192 return m_dem.available_p (prev_info, next_info);
2195 void compute_probabilities ()
2197 edge e;
2198 edge_iterator ei;
2200 for (const bb_info *bb : crtl->ssa->bbs ())
2202 basic_block cfg_bb = bb->cfg_bb ();
2203 auto &curr_prob = get_block_info (cfg_bb).probability;
2205 /* GCC assume entry block (bb 0) are always so
2206 executed so set its probability as "always". */
2207 if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2208 curr_prob = profile_probability::always ();
2209 /* Exit block (bb 1) is the block we don't need to process. */
2210 if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2211 continue;
2213 gcc_assert (curr_prob.initialized_p ());
2214 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2216 auto &new_prob = get_block_info (e->dest).probability;
2217 /* Normally, the edge probability should be initialized.
2218 However, some special testing code which is written in
2219 GIMPLE IR style force the edge probability uninitialized,
2220 we conservatively set it as never so that it will not
2221 affect PRE (Phase 3 && Phase 4). */
2222 if (!e->probability.initialized_p ())
2223 new_prob = profile_probability::never ();
2224 else if (!new_prob.initialized_p ())
2225 new_prob = curr_prob * e->probability;
2226 else if (new_prob == profile_probability::always ())
2227 continue;
2228 else
2229 new_prob += curr_prob * e->probability;
2234 void insert_vsetvl_insn (enum emit_type emit_type, const vsetvl_info &info)
2236 rtx pat = info.get_vsetvl_pat ();
2237 rtx_insn *rinsn = info.get_insn ()->rtl ();
2239 if (emit_type == EMIT_DIRECT)
2241 emit_insn (pat);
2242 if (dump_file)
2244 fprintf (dump_file, " Insert vsetvl insn %d:\n",
2245 INSN_UID (get_last_insn ()));
2246 print_rtl_single (dump_file, get_last_insn ());
2249 else if (emit_type == EMIT_BEFORE)
2251 emit_insn_before (pat, rinsn);
2252 if (dump_file)
2254 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
2255 INSN_UID (rinsn));
2256 print_rtl_single (dump_file, PREV_INSN (rinsn));
2259 else
2261 emit_insn_after (pat, rinsn);
2262 if (dump_file)
2264 fprintf (dump_file, " Insert vsetvl insn after insn %d:\n",
2265 INSN_UID (rinsn));
2266 print_rtl_single (dump_file, NEXT_INSN (rinsn));
2271 void change_vsetvl_insn (const vsetvl_info &info)
2273 rtx_insn *rinsn = info.get_insn ()->rtl ();
2274 rtx new_pat = info.get_vsetvl_pat ();
2276 if (dump_file)
2278 fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn));
2279 print_rtl_single (dump_file, rinsn);
2282 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
2284 if (dump_file)
2286 fprintf (dump_file, "\n to:\n");
2287 print_rtl_single (dump_file, rinsn);
2291 void remove_vsetvl_insn (rtx_insn *rinsn)
2293 if (dump_file)
2295 fprintf (dump_file, " Eliminate insn %d:\n", INSN_UID (rinsn));
2296 print_rtl_single (dump_file, rinsn);
2298 if (in_sequence_p ())
2299 remove_insn (rinsn);
2300 else
2301 delete_insn (rinsn);
2304 bool successors_probability_equal_p (const basic_block cfg_bb) const
2306 edge e;
2307 edge_iterator ei;
2308 profile_probability prob = profile_probability::uninitialized ();
2309 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2311 if (prob == profile_probability::uninitialized ())
2312 prob = m_vector_block_infos[e->dest->index].probability;
2313 else if (prob == m_vector_block_infos[e->dest->index].probability)
2314 continue;
2315 else
2316 /* We pick the highest probability among those incompatible VSETVL
2317 infos. When all incompatible VSETVL infos have same probability, we
2318 don't pick any of them. */
2319 return false;
2321 return true;
2324 bool has_compatible_reaching_vsetvl_p (vsetvl_info info)
2326 unsigned int index;
2327 sbitmap_iterator sbi;
2328 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[info.get_bb ()->index ()], 0,
2329 index, sbi)
2331 const auto prev_info = *m_vsetvl_def_exprs[index];
2332 if (!prev_info.valid_p ())
2333 continue;
2334 if (m_dem.compatible_p (prev_info, info))
2335 return true;
2337 return false;
2340 bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
2342 gcc_assert (
2343 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
2345 unsigned expr_index;
2346 sbitmap_iterator sbi;
2347 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[curr_info.get_bb ()->index ()], 0,
2348 expr_index, sbi)
2350 const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
2351 if (!prev_info.valid_p ()
2352 || !m_dem.avl_available_p (prev_info, curr_info)
2353 || prev_info.get_ratio () != curr_info.get_ratio ())
2354 return false;
2357 return true;
2360 public:
2361 pre_vsetvl ()
2362 : m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr),
2363 m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr),
2364 m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr)
2366 /* Initialization of RTL_SSA. */
2367 calculate_dominance_info (CDI_DOMINATORS);
2368 loop_optimizer_init (LOOPS_NORMAL);
2369 /* Create FAKE edges for infinite loops. */
2370 connect_infinite_loops_to_exit ();
2371 df_analyze ();
2372 crtl->ssa = new function_info (cfun);
2373 m_vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2374 compute_probabilities ();
2375 m_unknown_info.set_unknown ();
2378 void finish ()
2380 free_dominance_info (CDI_DOMINATORS);
2381 loop_optimizer_finalize ();
2382 if (crtl->ssa->perform_pending_updates ())
2383 cleanup_cfg (0);
2384 delete crtl->ssa;
2385 crtl->ssa = nullptr;
2387 if (m_reg_def_loc)
2388 sbitmap_vector_free (m_reg_def_loc);
2390 if (m_vsetvl_def_in)
2391 sbitmap_vector_free (m_vsetvl_def_in);
2392 if (m_vsetvl_def_out)
2393 sbitmap_vector_free (m_vsetvl_def_out);
2395 if (m_avloc)
2396 sbitmap_vector_free (m_avloc);
2397 if (m_kill)
2398 sbitmap_vector_free (m_kill);
2399 if (m_antloc)
2400 sbitmap_vector_free (m_antloc);
2401 if (m_transp)
2402 sbitmap_vector_free (m_transp);
2403 if (m_insert)
2404 sbitmap_vector_free (m_insert);
2405 if (m_del)
2406 sbitmap_vector_free (m_del);
2407 if (m_avin)
2408 sbitmap_vector_free (m_avin);
2409 if (m_avout)
2410 sbitmap_vector_free (m_avout);
2412 if (m_edges)
2413 free_edge_list (m_edges);
2416 void compute_vsetvl_def_data ();
2417 void compute_transparent (const bb_info *);
2418 void compute_lcm_local_properties ();
2420 void fuse_local_vsetvl_info ();
2421 bool earliest_fuse_vsetvl_info (int iter);
2422 void pre_global_vsetvl_info ();
2423 void emit_vsetvl ();
2424 void cleanup ();
2425 void remove_avl_operand ();
2426 void remove_unused_dest_operand ();
2427 void remove_vsetvl_pre_insns ();
2429 void dump (FILE *file, const char *title) const
2431 fprintf (file, "\nVSETVL infos after %s\n\n", title);
2432 for (const bb_info *bb : crtl->ssa->bbs ())
2434 const auto &block_info = m_vector_block_infos[bb->index ()];
2435 fprintf (file, " bb %d:\n", bb->index ());
2436 fprintf (file, " probability: ");
2437 block_info.probability.dump (file);
2438 fprintf (file, "\n");
2439 if (!block_info.empty_p ())
2441 fprintf (file, " Header vsetvl info:");
2442 block_info.get_entry_info ().dump (file, " ");
2443 fprintf (file, " Footer vsetvl info:");
2444 block_info.get_exit_info ().dump (file, " ");
2445 for (const auto &info : block_info.local_infos)
2447 fprintf (file,
2448 " insn %d vsetvl info:", info.get_insn ()->uid ());
2449 info.dump (file, " ");
2456 void
2457 pre_vsetvl::compute_vsetvl_def_data ()
2459 m_vsetvl_def_exprs.truncate (0);
2460 add_expr (m_vsetvl_def_exprs, m_unknown_info);
2461 for (const bb_info *bb : crtl->ssa->bbs ())
2463 vsetvl_block_info &block_info = get_block_info (bb);
2464 if (block_info.empty_p ())
2465 continue;
2466 vsetvl_info &footer_info = block_info.get_exit_info ();
2467 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
2468 add_expr (m_vsetvl_def_exprs, footer_info);
2471 if (m_vsetvl_def_in)
2472 sbitmap_vector_free (m_vsetvl_def_in);
2473 if (m_vsetvl_def_out)
2474 sbitmap_vector_free (m_vsetvl_def_out);
2476 sbitmap *def_loc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2477 m_vsetvl_def_exprs.length ());
2478 sbitmap *m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2479 m_vsetvl_def_exprs.length ());
2481 m_vsetvl_def_in = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2482 m_vsetvl_def_exprs.length ());
2483 m_vsetvl_def_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2484 m_vsetvl_def_exprs.length ());
2486 bitmap_vector_clear (def_loc, last_basic_block_for_fn (cfun));
2487 bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun));
2488 bitmap_vector_clear (m_vsetvl_def_out, last_basic_block_for_fn (cfun));
2490 for (const bb_info *bb : crtl->ssa->bbs ())
2492 vsetvl_block_info &block_info = get_block_info (bb);
2493 if (block_info.empty_p ())
2495 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1)
2497 auto *info = m_vsetvl_def_exprs[i];
2498 if (info->has_nonvlmax_reg_avl ()
2499 && bitmap_bit_p (m_reg_def_loc[bb->index ()],
2500 REGNO (info->get_avl ())))
2502 bitmap_set_bit (m_kill[bb->index ()], i);
2503 bitmap_set_bit (def_loc[bb->index ()],
2504 get_expr_index (m_vsetvl_def_exprs,
2505 m_unknown_info));
2508 continue;
2511 vsetvl_info &footer_info = block_info.get_exit_info ();
2512 bitmap_ones (m_kill[bb->index ()]);
2513 bitmap_set_bit (def_loc[bb->index ()],
2514 get_expr_index (m_vsetvl_def_exprs, footer_info));
2517 /* Set the def_out of the ENTRY basic block to m_unknown_info expr. */
2518 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2519 bitmap_set_bit (m_vsetvl_def_out[entry->index],
2520 get_expr_index (m_vsetvl_def_exprs, m_unknown_info));
2522 compute_reaching_defintion (def_loc, m_kill, m_vsetvl_def_in,
2523 m_vsetvl_def_out);
2525 if (dump_file && (dump_flags & TDF_DETAILS))
2527 fprintf (dump_file,
2528 "\n Compute vsetvl info reaching definition data:\n\n");
2529 fprintf (dump_file, " Expression List (%d):\n",
2530 m_vsetvl_def_exprs.length ());
2531 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i++)
2533 const auto &info = *m_vsetvl_def_exprs[i];
2534 fprintf (dump_file, " Expr[%u]: ", i);
2535 info.dump (dump_file, " ");
2537 fprintf (dump_file, "\n bitmap data:\n");
2538 for (const bb_info *bb : crtl->ssa->bbs ())
2540 unsigned int i = bb->index ();
2541 fprintf (dump_file, " BB %u:\n", i);
2542 fprintf (dump_file, " def_loc: ");
2543 dump_bitmap_file (dump_file, def_loc[i]);
2544 fprintf (dump_file, " kill: ");
2545 dump_bitmap_file (dump_file, m_kill[i]);
2546 fprintf (dump_file, " vsetvl_def_in: ");
2547 dump_bitmap_file (dump_file, m_vsetvl_def_in[i]);
2548 fprintf (dump_file, " vsetvl_def_out: ");
2549 dump_bitmap_file (dump_file, m_vsetvl_def_out[i]);
2553 sbitmap_vector_free (def_loc);
2554 sbitmap_vector_free (m_kill);
2557 /* Subroutine of compute_lcm_local_properties which Compute local transparent
2558 BB. Note that the compile time is very sensitive to compute_transparent and
2559 compute_lcm_local_properties, any change of these 2 functions should be
2560 aware of the compile time changing of the program which has a large number of
2561 blocks, e.g SPEC 2017 wrf.
2563 Current compile time profile of SPEC 2017 wrf:
2565 1. scheduling - 27%
2566 2. machine dep reorg (VSETVL PASS) - 18%
2568 VSETVL pass should not spend more time than scheduling in compilation. */
2569 void
2570 pre_vsetvl::compute_transparent (const bb_info *bb)
2572 int num_exprs = m_exprs.length ();
2573 unsigned bb_index = bb->index ();
2574 for (int i = 0; i < num_exprs; i++)
2576 auto *info = m_exprs[i];
2577 if (info->has_nonvlmax_reg_avl ()
2578 && bitmap_bit_p (m_reg_def_loc[bb_index], REGNO (info->get_avl ())))
2579 bitmap_clear_bit (m_transp[bb_index], i);
2580 else if (info->has_vl ()
2581 && bitmap_bit_p (m_reg_def_loc[bb_index],
2582 REGNO (info->get_vl ())))
2583 bitmap_clear_bit (m_transp[bb_index], i);
2587 /* Compute the local properties of each recorded expression.
2589 Local properties are those that are defined by the block, irrespective of
2590 other blocks.
2592 An expression is transparent in a block if its operands are not modified
2593 in the block.
2595 An expression is computed (locally available) in a block if it is computed
2596 at least once and expression would contain the same value if the
2597 computation was moved to the end of the block.
2599 An expression is locally anticipatable in a block if it is computed at
2600 least once and expression would contain the same value if the computation
2601 was moved to the beginning of the block. */
2602 void
2603 pre_vsetvl::compute_lcm_local_properties ()
2605 m_exprs.truncate (0);
2606 for (const bb_info *bb : crtl->ssa->bbs ())
2608 vsetvl_block_info &block_info = get_block_info (bb);
2609 if (block_info.empty_p ())
2610 continue;
2611 vsetvl_info &header_info = block_info.get_entry_info ();
2612 vsetvl_info &footer_info = block_info.get_exit_info ();
2613 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
2614 if (header_info.valid_p ())
2615 add_expr (m_exprs, header_info);
2616 if (footer_info.valid_p ())
2617 add_expr (m_exprs, footer_info);
2620 int num_exprs = m_exprs.length ();
2621 if (m_avloc)
2622 sbitmap_vector_free (m_avloc);
2623 if (m_kill)
2624 sbitmap_vector_free (m_kill);
2625 if (m_antloc)
2626 sbitmap_vector_free (m_antloc);
2627 if (m_transp)
2628 sbitmap_vector_free (m_transp);
2629 if (m_avin)
2630 sbitmap_vector_free (m_avin);
2631 if (m_avout)
2632 sbitmap_vector_free (m_avout);
2634 m_avloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2635 m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2636 m_antloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2637 m_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2638 m_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2639 m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2641 bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun));
2642 bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun));
2643 bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun));
2645 /* - If T is locally available at the end of a block, then T' must be
2646 available at the end of the same block. Since some optimization has
2647 occurred earlier, T' might not be locally available, however, it must
2648 have been previously computed on all paths. As a formula, T at AVLOC(B)
2649 implies that T' at AVOUT(B).
2650 An "available occurrence" is one that is the last occurrence in the
2651 basic block and the operands are not modified by following statements in
2652 the basic block [including this insn].
2654 - If T is locally anticipated at the beginning of a block, then either
2655 T', is locally anticipated or it is already available from previous
2656 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
2657 ANTLOC(B) at AVIN(B).
2658 An "anticipatable occurrence" is one that is the first occurrence in the
2659 basic block, the operands are not modified in the basic block prior
2660 to the occurrence and the output is not used between the start of
2661 the block and the occurrence. */
2662 for (const bb_info *bb : crtl->ssa->bbs ())
2664 unsigned bb_index = bb->index ();
2665 vsetvl_block_info &block_info = get_block_info (bb);
2667 /* Compute m_transp */
2668 if (block_info.empty_p ())
2669 compute_transparent (bb);
2670 else
2672 bitmap_clear (m_transp[bb_index]);
2673 vsetvl_info &header_info = block_info.get_entry_info ();
2674 vsetvl_info &footer_info = block_info.get_exit_info ();
2676 if (header_info.valid_p () && anticipated_exp_p (header_info))
2677 bitmap_set_bit (m_antloc[bb_index],
2678 get_expr_index (m_exprs, header_info));
2680 if (footer_info.valid_p ())
2681 for (int i = 0; i < num_exprs; i += 1)
2683 const vsetvl_info &info = *m_exprs[i];
2684 if (!info.valid_p ())
2685 continue;
2686 if (available_exp_p (footer_info, info))
2687 bitmap_set_bit (m_avloc[bb_index], i);
2691 if (invalid_opt_bb_p (bb->cfg_bb ()))
2693 bitmap_clear (m_antloc[bb_index]);
2694 bitmap_clear (m_transp[bb_index]);
2697 /* Compute ae_kill for each basic block using:
2699 ~(TRANSP | COMP)
2701 bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
2702 bitmap_not (m_kill[bb_index], m_kill[bb_index]);
2706 void
2707 pre_vsetvl::fuse_local_vsetvl_info ()
2709 m_reg_def_loc
2710 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), GP_REG_LAST + 1);
2711 bitmap_vector_clear (m_reg_def_loc, last_basic_block_for_fn (cfun));
2712 bitmap_ones (m_reg_def_loc[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index]);
2714 for (bb_info *bb : crtl->ssa->bbs ())
2716 auto &block_info = get_block_info (bb);
2717 block_info.bb = bb;
2718 if (dump_file && (dump_flags & TDF_DETAILS))
2720 fprintf (dump_file, " Try fuse basic block %d\n", bb->index ());
2722 auto_vec<vsetvl_info> infos;
2723 for (insn_info *insn : bb->real_nondebug_insns ())
2725 vsetvl_info curr_info = vsetvl_info (insn);
2726 if (curr_info.valid_p () || curr_info.unknown_p ())
2727 infos.safe_push (curr_info);
2729 /* Collecting GP registers modified by the current bb. */
2730 if (insn->is_real ())
2731 for (def_info *def : insn->defs ())
2732 if (def->is_reg () && GP_REG_P (def->regno ()))
2733 bitmap_set_bit (m_reg_def_loc[bb->index ()], def->regno ());
2736 vsetvl_info prev_info = vsetvl_info ();
2737 prev_info.set_empty ();
2738 for (auto &curr_info : infos)
2740 if (prev_info.empty_p ())
2741 prev_info = curr_info;
2742 else if ((curr_info.unknown_p () && prev_info.valid_p ())
2743 || (curr_info.valid_p () && prev_info.unknown_p ()))
2745 block_info.local_infos.safe_push (prev_info);
2746 prev_info = curr_info;
2748 else if (curr_info.valid_p () && prev_info.valid_p ())
2750 if (m_dem.available_p (prev_info, curr_info))
2752 if (dump_file && (dump_flags & TDF_DETAILS))
2754 fprintf (dump_file,
2755 " Ignore curr info since prev info "
2756 "available with it:\n");
2757 fprintf (dump_file, " prev_info: ");
2758 prev_info.dump (dump_file, " ");
2759 fprintf (dump_file, " curr_info: ");
2760 curr_info.dump (dump_file, " ");
2761 fprintf (dump_file, "\n");
2763 /* Even though prev_info is available with curr_info,
2764 we need to update the MAX_SEW of prev_info since
2765 we don't check MAX_SEW in available_p check.
2767 prev_info:
2768 Demand fields: demand_ratio_and_ge_sew demand_avl
2769 SEW=16, VLMUL=mf4, RATIO=64, MAX_SEW=64
2771 curr_info:
2772 Demand fields: demand_ge_sew demand_non_zero_avl
2773 SEW=16, VLMUL=m1, RATIO=16, MAX_SEW=32
2775 In the example above, prev_info is available with
2776 curr_info, we need to update prev_info MAX_SEW from
2777 64 into 32. */
2778 prev_info.set_max_sew (
2779 MIN (prev_info.get_max_sew (), curr_info.get_max_sew ()));
2780 if (!curr_info.vl_used_by_non_rvv_insn_p ()
2781 && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
2782 m_delete_list.safe_push (curr_info);
2784 if (curr_info.get_read_vl_insn ())
2785 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
2787 else if (m_dem.compatible_p (prev_info, curr_info))
2789 if (dump_file && (dump_flags & TDF_DETAILS))
2791 fprintf (dump_file, " Fuse curr info since prev info "
2792 "compatible with it:\n");
2793 fprintf (dump_file, " prev_info: ");
2794 prev_info.dump (dump_file, " ");
2795 fprintf (dump_file, " curr_info: ");
2796 curr_info.dump (dump_file, " ");
2798 m_dem.merge (prev_info, curr_info);
2799 if (!curr_info.vl_used_by_non_rvv_insn_p ()
2800 && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
2801 m_delete_list.safe_push (curr_info);
2802 if (curr_info.get_read_vl_insn ())
2803 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
2804 if (dump_file && (dump_flags & TDF_DETAILS))
2806 fprintf (dump_file, " prev_info after fused: ");
2807 prev_info.dump (dump_file, " ");
2808 fprintf (dump_file, "\n");
2811 else
2813 if (dump_file && (dump_flags & TDF_DETAILS))
2815 fprintf (dump_file,
2816 " Cannot fuse incompatible infos:\n");
2817 fprintf (dump_file, " prev_info: ");
2818 prev_info.dump (dump_file, " ");
2819 fprintf (dump_file, " curr_info: ");
2820 curr_info.dump (dump_file, " ");
2822 block_info.local_infos.safe_push (prev_info);
2823 prev_info = curr_info;
2828 if (prev_info.valid_p () || prev_info.unknown_p ())
2829 block_info.local_infos.safe_push (prev_info);
2834 bool
2835 pre_vsetvl::earliest_fuse_vsetvl_info (int iter)
2837 compute_vsetvl_def_data ();
2838 compute_lcm_local_properties ();
2840 unsigned num_exprs = m_exprs.length ();
2841 struct edge_list *m_edges = create_edge_list ();
2842 unsigned num_edges = NUM_EDGES (m_edges);
2843 sbitmap *antin
2844 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2845 sbitmap *antout
2846 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2848 sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs);
2850 compute_available (m_avloc, m_kill, m_avout, m_avin);
2851 compute_antinout_edge (m_antloc, m_transp, antin, antout);
2852 compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill,
2853 earliest);
2855 if (dump_file && (dump_flags & TDF_DETAILS))
2857 fprintf (dump_file, "\n Compute LCM earliest insert data (lift %d):\n\n",
2858 iter);
2859 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
2860 for (unsigned i = 0; i < num_exprs; i++)
2862 const auto &info = *m_exprs[i];
2863 fprintf (dump_file, " Expr[%u]: ", i);
2864 info.dump (dump_file, " ");
2866 fprintf (dump_file, "\n bitmap data:\n");
2867 for (const bb_info *bb : crtl->ssa->bbs ())
2869 unsigned int i = bb->index ();
2870 fprintf (dump_file, " BB %u:\n", i);
2871 fprintf (dump_file, " avloc: ");
2872 dump_bitmap_file (dump_file, m_avloc[i]);
2873 fprintf (dump_file, " kill: ");
2874 dump_bitmap_file (dump_file, m_kill[i]);
2875 fprintf (dump_file, " antloc: ");
2876 dump_bitmap_file (dump_file, m_antloc[i]);
2877 fprintf (dump_file, " transp: ");
2878 dump_bitmap_file (dump_file, m_transp[i]);
2880 fprintf (dump_file, " avin: ");
2881 dump_bitmap_file (dump_file, m_avin[i]);
2882 fprintf (dump_file, " avout: ");
2883 dump_bitmap_file (dump_file, m_avout[i]);
2884 fprintf (dump_file, " antin: ");
2885 dump_bitmap_file (dump_file, antin[i]);
2886 fprintf (dump_file, " antout: ");
2887 dump_bitmap_file (dump_file, antout[i]);
2889 fprintf (dump_file, "\n");
2890 fprintf (dump_file, " earliest:\n");
2891 for (unsigned ed = 0; ed < num_edges; ed++)
2893 edge eg = INDEX_EDGE (m_edges, ed);
2895 if (bitmap_empty_p (earliest[ed]))
2896 continue;
2897 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
2898 eg->dest->index);
2899 dump_bitmap_file (dump_file, earliest[ed]);
2901 fprintf (dump_file, "\n");
2904 if (dump_file && (dump_flags & TDF_DETAILS))
2906 fprintf (dump_file, " Fused global info result (lift %d):\n", iter);
2909 bool changed = false;
2910 for (unsigned ed = 0; ed < num_edges; ed++)
2912 sbitmap e = earliest[ed];
2913 if (bitmap_empty_p (e))
2914 continue;
2916 unsigned int expr_index;
2917 sbitmap_iterator sbi;
2918 EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi)
2920 vsetvl_info &curr_info = *m_exprs[expr_index];
2921 edge eg = INDEX_EDGE (m_edges, ed);
2922 vsetvl_block_info &src_block_info = get_block_info (eg->src);
2923 vsetvl_block_info &dest_block_info = get_block_info (eg->dest);
2925 if (!curr_info.valid_p ()
2926 || eg->probability == profile_probability::never ()
2927 || src_block_info.probability
2928 == profile_probability::uninitialized ()
2929 /* When multiple set bits in earliest edge, such edge may
2930 have infinite loop in preds or succs or multiple conflict
2931 vsetvl expression which make such edge is unrelated. We
2932 don't perform fusion for such situation. */
2933 || bitmap_count_bits (e) != 1)
2934 continue;
2936 if (src_block_info.empty_p ())
2938 vsetvl_info new_curr_info = curr_info;
2939 new_curr_info.set_bb (crtl->ssa->bb (eg->dest));
2940 bool has_compatible_p
2941 = has_compatible_reaching_vsetvl_p (new_curr_info);
2942 if (!has_compatible_p)
2944 if (dump_file && (dump_flags & TDF_DETAILS))
2946 fprintf (dump_file,
2947 " Forbidden lift up vsetvl info into bb %u "
2948 "since there is no vsetvl info that reaching in "
2949 "is compatible with it:",
2950 eg->src->index);
2951 curr_info.dump (dump_file, " ");
2953 continue;
2956 if (dump_file && (dump_flags & TDF_DETAILS))
2958 fprintf (dump_file,
2959 " Set empty bb %u to info:", eg->src->index);
2960 curr_info.dump (dump_file, " ");
2962 src_block_info.set_info (curr_info);
2963 src_block_info.probability = dest_block_info.probability;
2964 changed = true;
2966 else if (src_block_info.has_info ())
2968 vsetvl_info &prev_info = src_block_info.get_exit_info ();
2969 gcc_assert (prev_info.valid_p ());
2971 if (m_dem.compatible_p (prev_info, curr_info))
2973 if (dump_file && (dump_flags & TDF_DETAILS))
2975 fprintf (dump_file, " Fuse curr info since prev info "
2976 "compatible with it:\n");
2977 fprintf (dump_file, " prev_info: ");
2978 prev_info.dump (dump_file, " ");
2979 fprintf (dump_file, " curr_info: ");
2980 curr_info.dump (dump_file, " ");
2982 m_dem.merge (prev_info, curr_info);
2983 if (dump_file && (dump_flags & TDF_DETAILS))
2985 fprintf (dump_file, " prev_info after fused: ");
2986 prev_info.dump (dump_file, " ");
2987 fprintf (dump_file, "\n");
2989 changed = true;
2990 if (src_block_info.has_info ())
2991 src_block_info.probability += dest_block_info.probability;
2993 else
2995 /* Cancel lift up if probabilities are equal. */
2996 if (successors_probability_equal_p (eg->src)
2997 || (dest_block_info.probability
2998 > src_block_info.probability
2999 && !has_compatible_reaching_vsetvl_p (curr_info)))
3001 if (dump_file && (dump_flags & TDF_DETAILS))
3003 fprintf (dump_file,
3004 " Reset bb %u:",
3005 eg->src->index);
3006 prev_info.dump (dump_file, " ");
3007 fprintf (dump_file, " due to (same probability or no "
3008 "compatible reaching):");
3009 curr_info.dump (dump_file, " ");
3011 src_block_info.set_empty_info ();
3012 src_block_info.probability
3013 = profile_probability::uninitialized ();
3014 /* See PR113696, we should reset immediate dominator to
3015 empty since we may uplift ineffective vsetvl which
3016 locate at low probability block. */
3017 basic_block dom
3018 = get_immediate_dominator (CDI_DOMINATORS, eg->src);
3019 auto &dom_block_info = get_block_info (dom);
3020 if (dom_block_info.has_info ()
3021 && !m_dem.compatible_p (
3022 dom_block_info.get_exit_info (), curr_info))
3024 dom_block_info.set_empty_info ();
3025 dom_block_info.probability
3026 = profile_probability::uninitialized ();
3027 if (dump_file && (dump_flags & TDF_DETAILS))
3029 fprintf (dump_file,
3030 " Reset dominator bb %u:",
3031 dom->index);
3032 prev_info.dump (dump_file, " ");
3033 fprintf (dump_file,
3034 " due to (same probability or no "
3035 "compatible reaching):");
3036 curr_info.dump (dump_file, " ");
3039 changed = true;
3041 /* Choose the one with higher probability. */
3042 else if (dest_block_info.probability
3043 > src_block_info.probability)
3045 if (dump_file && (dump_flags & TDF_DETAILS))
3047 fprintf (dump_file,
3048 " Change bb %u from:",
3049 eg->src->index);
3050 prev_info.dump (dump_file, " ");
3051 fprintf (dump_file,
3052 " to (higher probability):");
3053 curr_info.dump (dump_file, " ");
3055 src_block_info.set_info (curr_info);
3056 src_block_info.probability = dest_block_info.probability;
3057 changed = true;
3061 else
3063 vsetvl_info &prev_info = src_block_info.get_exit_info ();
3064 if (!prev_info.valid_p ()
3065 || m_dem.available_p (prev_info, curr_info)
3066 || !m_dem.compatible_p (prev_info, curr_info))
3067 continue;
3069 if (dump_file && (dump_flags & TDF_DETAILS))
3071 fprintf (dump_file, " Fuse curr info since prev info "
3072 "compatible with it:\n");
3073 fprintf (dump_file, " prev_info: ");
3074 prev_info.dump (dump_file, " ");
3075 fprintf (dump_file, " curr_info: ");
3076 curr_info.dump (dump_file, " ");
3078 m_dem.merge (prev_info, curr_info);
3079 if (dump_file && (dump_flags & TDF_DETAILS))
3081 fprintf (dump_file, " prev_info after fused: ");
3082 prev_info.dump (dump_file, " ");
3083 fprintf (dump_file, "\n");
3085 changed = true;
3090 if (dump_file && (dump_flags & TDF_DETAILS))
3092 fprintf (dump_file, "\n");
3095 sbitmap_vector_free (antin);
3096 sbitmap_vector_free (antout);
3097 sbitmap_vector_free (earliest);
3098 free_edge_list (m_edges);
3100 return changed;
3103 void
3104 pre_vsetvl::pre_global_vsetvl_info ()
3106 compute_vsetvl_def_data ();
3107 compute_lcm_local_properties ();
3109 unsigned num_exprs = m_exprs.length ();
3110 m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill,
3111 m_avin, m_avout, &m_insert, &m_del);
3112 unsigned num_edges = NUM_EDGES (m_edges);
3114 if (dump_file && (dump_flags & TDF_DETAILS))
3116 fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n");
3117 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
3118 for (unsigned i = 0; i < num_exprs; i++)
3120 const auto &info = *m_exprs[i];
3121 fprintf (dump_file, " Expr[%u]: ", i);
3122 info.dump (dump_file, " ");
3124 fprintf (dump_file, "\n bitmap data:\n");
3125 for (const bb_info *bb : crtl->ssa->bbs ())
3127 unsigned i = bb->index ();
3128 fprintf (dump_file, " BB %u:\n", i);
3129 fprintf (dump_file, " avloc: ");
3130 dump_bitmap_file (dump_file, m_avloc[i]);
3131 fprintf (dump_file, " kill: ");
3132 dump_bitmap_file (dump_file, m_kill[i]);
3133 fprintf (dump_file, " antloc: ");
3134 dump_bitmap_file (dump_file, m_antloc[i]);
3135 fprintf (dump_file, " transp: ");
3136 dump_bitmap_file (dump_file, m_transp[i]);
3138 fprintf (dump_file, " avin: ");
3139 dump_bitmap_file (dump_file, m_avin[i]);
3140 fprintf (dump_file, " avout: ");
3141 dump_bitmap_file (dump_file, m_avout[i]);
3142 fprintf (dump_file, " del: ");
3143 dump_bitmap_file (dump_file, m_del[i]);
3145 fprintf (dump_file, "\n");
3146 fprintf (dump_file, " insert:\n");
3147 for (unsigned ed = 0; ed < num_edges; ed++)
3149 edge eg = INDEX_EDGE (m_edges, ed);
3151 if (bitmap_empty_p (m_insert[ed]))
3152 continue;
3153 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
3154 eg->dest->index);
3155 dump_bitmap_file (dump_file, m_insert[ed]);
3159 /* Remove vsetvl infos as LCM suggest */
3160 for (const bb_info *bb : crtl->ssa->bbs ())
3162 sbitmap d = m_del[bb->index ()];
3163 if (bitmap_count_bits (d) == 0)
3164 continue;
3165 gcc_assert (bitmap_count_bits (d) == 1);
3166 unsigned expr_index = bitmap_first_set_bit (d);
3167 vsetvl_info &info = *m_exprs[expr_index];
3168 gcc_assert (info.valid_p ());
3169 gcc_assert (info.get_bb () == bb);
3170 const vsetvl_block_info &block_info = get_block_info (info.get_bb ());
3171 gcc_assert (block_info.get_entry_info () == info);
3172 info.set_delete ();
3173 if (dump_file && (dump_flags & TDF_DETAILS))
3175 fprintf (dump_file,
3176 "\nLCM deleting vsetvl of block %d, it has predecessors: \n",
3177 bb->index ());
3178 hash_set<basic_block> all_preds
3179 = get_all_predecessors (bb->cfg_bb ());
3180 int i = 0;
3181 for (const auto pred : all_preds)
3183 fprintf (dump_file, "%d ", pred->index);
3184 i++;
3185 if (i % 32 == 0)
3186 fprintf (dump_file, "\n");
3188 fprintf (dump_file, "\n");
3192 /* Remove vsetvl infos if all predecessors are available to the block. */
3193 for (const bb_info *bb : crtl->ssa->bbs ())
3195 vsetvl_block_info &block_info = get_block_info (bb);
3196 if (block_info.empty_p ())
3197 continue;
3198 vsetvl_info &curr_info = block_info.get_entry_info ();
3199 if (!curr_info.valid_p ())
3200 continue;
3202 unsigned int expr_index;
3203 sbitmap_iterator sbi;
3204 gcc_assert (
3205 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
3206 bool full_available = true;
3207 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index,
3208 sbi)
3210 vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
3211 if (!prev_info.valid_p ()
3212 || !m_dem.available_p (prev_info, curr_info))
3214 full_available = false;
3215 break;
3218 if (full_available)
3219 curr_info.set_delete ();
3222 for (const bb_info *bb : crtl->ssa->bbs ())
3224 vsetvl_block_info &block_info = get_block_info (bb);
3225 if (block_info.empty_p ())
3226 continue;
3227 vsetvl_info &curr_info = block_info.get_entry_info ();
3228 if (curr_info.delete_p ())
3230 if (block_info.local_infos.is_empty ())
3231 continue;
3232 curr_info = block_info.local_infos[0];
3234 if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
3235 && preds_all_same_avl_and_ratio_p (curr_info))
3236 curr_info.set_change_vtype_only ();
3238 vsetvl_info prev_info = vsetvl_info ();
3239 prev_info.set_empty ();
3240 for (auto &curr_info : block_info.local_infos)
3242 if (prev_info.valid_p () && curr_info.valid_p ()
3243 && m_dem.avl_available_p (prev_info, curr_info)
3244 && prev_info.get_ratio () == curr_info.get_ratio ())
3245 curr_info.set_change_vtype_only ();
3246 prev_info = curr_info;
3251 void
3252 pre_vsetvl::emit_vsetvl ()
3254 bool need_commit = false;
3256 /* Fake edge is created by connect infinite loops to exit function.
3257 We should commit vsetvl edge after fake edges removes, otherwise,
3258 it will cause ICE. */
3259 remove_fake_exit_edges ();
3260 for (const bb_info *bb : crtl->ssa->bbs ())
3262 for (const auto &curr_info : get_block_info (bb).local_infos)
3264 insn_info *insn = curr_info.get_insn ();
3265 if (curr_info.delete_p ())
3267 if (vsetvl_insn_p (insn->rtl ()))
3268 remove_vsetvl_insn (curr_info.get_insn ()->rtl ());
3269 continue;
3271 else if (curr_info.valid_p ())
3273 if (vsetvl_insn_p (insn->rtl ()))
3275 const vsetvl_info temp = vsetvl_info (insn);
3276 if (!(curr_info == temp))
3278 if (dump_file)
3280 fprintf (dump_file, "\n Change vsetvl info from: ");
3281 temp.dump (dump_file, " ");
3282 fprintf (dump_file, " to: ");
3283 curr_info.dump (dump_file, " ");
3285 change_vsetvl_insn (curr_info);
3288 else
3290 if (dump_file)
3292 fprintf (dump_file,
3293 "\n Insert vsetvl info before insn %d: ",
3294 insn->uid ());
3295 curr_info.dump (dump_file, " ");
3297 insert_vsetvl_insn (EMIT_BEFORE, curr_info);
3303 for (const vsetvl_info &item : m_delete_list)
3305 gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ()));
3306 remove_vsetvl_insn (item.get_insn ()->rtl ());
3309 /* Insert vsetvl info that was not deleted after lift up. */
3310 for (const bb_info *bb : crtl->ssa->bbs ())
3312 const vsetvl_block_info &block_info = get_block_info (bb);
3313 if (!block_info.has_info ())
3314 continue;
3316 const vsetvl_info &footer_info = block_info.get_exit_info ();
3318 if (footer_info.delete_p ())
3319 continue;
3321 edge eg;
3322 edge_iterator eg_iterator;
3323 FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs)
3325 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3326 if (dump_file)
3328 fprintf (
3329 dump_file,
3330 "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ",
3331 eg->src->index, eg->dest->index);
3332 footer_info.dump (dump_file, " ");
3334 start_sequence ();
3335 insert_vsetvl_insn (EMIT_DIRECT, footer_info);
3336 rtx_insn *rinsn = get_insns ();
3337 end_sequence ();
3338 default_rtl_profile ();
3339 insert_insn_on_edge (rinsn, eg);
3340 need_commit = true;
3344 /* m_insert vsetvl as LCM suggest. */
3345 for (int ed = 0; ed < NUM_EDGES (m_edges); ed++)
3347 edge eg = INDEX_EDGE (m_edges, ed);
3348 sbitmap i = m_insert[ed];
3349 if (bitmap_count_bits (i) != 1)
3350 /* For code with infinite loop (e.g. pr61634.c), The data flow is
3351 completely wrong. */
3352 continue;
3354 unsigned expr_index = bitmap_first_set_bit (i);
3355 const vsetvl_info &info = *m_exprs[expr_index];
3356 gcc_assert (info.valid_p ());
3357 if (dump_file)
3359 fprintf (dump_file,
3360 "\n Insert vsetvl info at edge(bb %u -> bb %u): ",
3361 eg->src->index, eg->dest->index);
3362 info.dump (dump_file, " ");
3364 rtl_profile_for_edge (eg);
3365 start_sequence ();
3367 insert_vsetvl_insn (EMIT_DIRECT, info);
3368 rtx_insn *rinsn = get_insns ();
3369 end_sequence ();
3370 default_rtl_profile ();
3372 /* We should not get an abnormal edge here. */
3373 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3374 need_commit = true;
3375 insert_insn_on_edge (rinsn, eg);
3378 if (need_commit)
3379 commit_edge_insertions ();
3382 void
3383 pre_vsetvl::cleanup ()
3385 remove_avl_operand ();
3386 remove_unused_dest_operand ();
3387 remove_vsetvl_pre_insns ();
3390 void
3391 pre_vsetvl::remove_avl_operand ()
3393 basic_block cfg_bb;
3394 rtx_insn *rinsn;
3395 FOR_ALL_BB_FN (cfg_bb, cfun)
3396 FOR_BB_INSNS (cfg_bb, rinsn)
3397 if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn)
3398 && REG_P (get_vl (rinsn)))
3400 rtx avl = get_vl (rinsn);
3401 if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
3403 rtx new_pat;
3404 if (fault_first_load_p (rinsn))
3405 new_pat
3406 = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
3407 else
3409 rtx set = single_set (rinsn);
3410 rtx src
3411 = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
3412 new_pat = gen_rtx_SET (SET_DEST (set), src);
3414 if (dump_file)
3416 fprintf (dump_file, " Cleanup insn %u's avl operand:\n",
3417 INSN_UID (rinsn));
3418 print_rtl_single (dump_file, rinsn);
3420 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
3425 void
3426 pre_vsetvl::remove_unused_dest_operand ()
3428 df_analyze ();
3429 basic_block cfg_bb;
3430 rtx_insn *rinsn;
3431 FOR_ALL_BB_FN (cfg_bb, cfun)
3432 FOR_BB_INSNS (cfg_bb, rinsn)
3433 if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn))
3435 rtx vl = get_vl (rinsn);
3436 vsetvl_info info = vsetvl_info (rinsn);
3437 if (has_no_uses (cfg_bb, rinsn, REGNO (vl)))
3438 if (!info.has_vlmax_avl ())
3440 rtx new_pat = info.get_vsetvl_pat (true);
3441 if (dump_file)
3443 fprintf (dump_file,
3444 " Remove vsetvl insn %u's dest(vl) operand since "
3445 "it unused:\n",
3446 INSN_UID (rinsn));
3447 print_rtl_single (dump_file, rinsn);
3449 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
3450 false);
3455 /* Remove all bogus vsetvl_pre instructions. */
3456 void
3457 pre_vsetvl::remove_vsetvl_pre_insns ()
3459 basic_block cfg_bb;
3460 rtx_insn *rinsn;
3461 FOR_ALL_BB_FN (cfg_bb, cfun)
3462 FOR_BB_INSNS (cfg_bb, rinsn)
3463 if (NONDEBUG_INSN_P (rinsn) && vsetvl_pre_insn_p (rinsn))
3465 if (dump_file)
3467 fprintf (dump_file, " Eliminate vsetvl_pre insn %d:\n",
3468 INSN_UID (rinsn));
3469 print_rtl_single (dump_file, rinsn);
3471 remove_vsetvl_insn (rinsn);
3475 const pass_data pass_data_vsetvl = {
3476 RTL_PASS, /* type */
3477 "vsetvl", /* name */
3478 OPTGROUP_NONE, /* optinfo_flags */
3479 TV_MACH_DEP, /* tv_id */
3480 0, /* properties_required */
3481 0, /* properties_provided */
3482 0, /* properties_destroyed */
3483 0, /* todo_flags_start */
3484 0, /* todo_flags_finish */
3487 class pass_vsetvl : public rtl_opt_pass
3489 private:
3490 void simple_vsetvl ();
3491 void lazy_vsetvl ();
3493 public:
3494 pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
3496 /* opt_pass methods: */
3497 virtual bool gate (function *) final override { return TARGET_VECTOR; }
3498 virtual unsigned int execute (function *) final override;
3499 }; // class pass_vsetvl
3501 void
3502 pass_vsetvl::simple_vsetvl ()
3504 if (dump_file)
3505 fprintf (dump_file, "\nEntering Simple VSETVL PASS\n");
3507 basic_block cfg_bb;
3508 rtx_insn *rinsn;
3509 FOR_ALL_BB_FN (cfg_bb, cfun)
3511 FOR_BB_INSNS (cfg_bb, rinsn)
3513 if (!NONDEBUG_INSN_P (rinsn))
3514 continue;
3515 if (has_vtype_op (rinsn))
3517 const auto &info = vsetvl_info (rinsn);
3518 rtx pat = info.get_vsetvl_pat ();
3519 emit_insn_before (pat, rinsn);
3520 if (dump_file)
3522 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
3523 INSN_UID (rinsn));
3524 print_rtl_single (dump_file, PREV_INSN (rinsn));
3531 /* Lazy vsetvl insertion for optimize > 0. */
3532 void
3533 pass_vsetvl::lazy_vsetvl ()
3535 if (dump_file)
3536 fprintf (dump_file, "\nEntering Lazy VSETVL PASS\n\n");
3538 pre_vsetvl pre = pre_vsetvl ();
3540 if (dump_file)
3541 fprintf (dump_file, "\nPhase 1: Fuse local vsetvl infos.\n\n");
3542 pre.fuse_local_vsetvl_info ();
3543 if (dump_file && (dump_flags & TDF_DETAILS))
3544 pre.dump (dump_file, "phase 1");
3546 /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */
3547 if (dump_file)
3548 fprintf (dump_file, "\nPhase 2: Lift up vsetvl info.\n\n");
3549 if (vsetvl_strategy != VSETVL_OPT_NO_FUSION)
3551 bool changed = true;
3552 int fused_count = 0;
3555 if (dump_file)
3556 fprintf (dump_file, " Try lift up %d.\n\n", fused_count);
3557 changed = pre.earliest_fuse_vsetvl_info (fused_count);
3558 fused_count += 1;
3559 } while (changed);
3561 if (dump_file && (dump_flags & TDF_DETAILS))
3562 pre.dump (dump_file, "phase 2");
3564 /* Phase 3: Reducing redundant vsetvl infos using LCM. */
3565 if (dump_file)
3566 fprintf (dump_file, "\nPhase 3: Reduce global vsetvl infos.\n\n");
3567 pre.pre_global_vsetvl_info ();
3568 if (dump_file && (dump_flags & TDF_DETAILS))
3569 pre.dump (dump_file, "phase 3");
3571 /* Phase 4: Insert, modify and remove vsetvl insns. */
3572 if (dump_file)
3573 fprintf (dump_file,
3574 "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n");
3575 pre.emit_vsetvl ();
3577 /* Phase 5: Cleanup */
3578 if (dump_file)
3579 fprintf (dump_file, "\nPhase 5: Cleanup\n\n");
3580 pre.cleanup ();
3582 pre.finish ();
3585 /* Main entry point for this pass. */
3586 unsigned int
3587 pass_vsetvl::execute (function *)
3589 if (n_basic_blocks_for_fn (cfun) <= 0)
3590 return 0;
3592 /* The RVV instruction may change after split which is not a stable
3593 instruction. We need to split it here to avoid potential issue
3594 since the VSETVL PASS is insert before split PASS. */
3595 split_all_insns ();
3597 /* Early return for there is no vector instructions. */
3598 if (!has_vector_insn (cfun))
3599 return 0;
3601 if (!optimize || vsetvl_strategy == VSETVL_SIMPLE)
3602 simple_vsetvl ();
3603 else
3604 lazy_vsetvl ();
3606 return 0;
3609 rtl_opt_pass *
3610 make_pass_vsetvl (gcc::context *ctxt)
3612 return new pass_vsetvl (ctxt);