1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
,
95 stmt_vec_info stmt_info
, slp_tree node
,
96 tree vectype
, int misalign
,
97 enum vect_cost_model_location where
)
99 if ((kind
== vector_load
|| kind
== unaligned_load
)
100 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
101 kind
= vector_gather_load
;
102 if ((kind
== vector_store
|| kind
== unaligned_store
)
103 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
104 kind
= vector_scatter_store
;
106 stmt_info_for_cost si
107 = { count
, kind
, where
, stmt_info
, node
, vectype
, misalign
};
108 body_cost_vec
->safe_push (si
);
111 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
115 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
116 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
117 tree vectype
, int misalign
,
118 enum vect_cost_model_location where
)
120 return record_stmt_cost (body_cost_vec
, count
, kind
, stmt_info
, NULL
,
121 vectype
, misalign
, where
);
125 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
126 enum vect_cost_for_stmt kind
, slp_tree node
,
127 tree vectype
, int misalign
,
128 enum vect_cost_model_location where
)
130 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, node
,
131 vectype
, misalign
, where
);
135 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
136 enum vect_cost_for_stmt kind
,
137 enum vect_cost_model_location where
)
139 gcc_assert (kind
== cond_branch_taken
|| kind
== cond_branch_not_taken
140 || kind
== scalar_stmt
);
141 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, NULL
,
142 NULL_TREE
, 0, where
);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
148 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
150 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
160 read_vector_array (vec_info
*vinfo
,
161 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
162 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
164 tree vect_type
, vect
, vect_name
, array_ref
;
167 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
168 vect_type
= TREE_TYPE (TREE_TYPE (array
));
169 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
170 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
171 build_int_cst (size_type_node
, n
),
172 NULL_TREE
, NULL_TREE
);
174 new_stmt
= gimple_build_assign (vect
, array_ref
);
175 vect_name
= make_ssa_name (vect
, new_stmt
);
176 gimple_assign_set_lhs (new_stmt
, vect_name
);
177 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
187 write_vector_array (vec_info
*vinfo
,
188 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
189 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
194 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
195 build_int_cst (size_type_node
, n
),
196 NULL_TREE
, NULL_TREE
);
198 new_stmt
= gimple_build_assign (array_ref
, vect
);
199 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
207 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
211 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
221 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
222 gimple_stmt_iterator
*gsi
, tree var
)
224 tree clobber
= build_clobber (TREE_TYPE (var
));
225 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
226 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
231 /* Function vect_mark_relevant.
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
236 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
237 enum vect_relevant relevant
, bool live_p
)
239 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
240 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE
, vect_location
,
244 "mark relevant %d, live %d: %G", relevant
, live_p
,
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE
, vect_location
,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info
= stmt_info
;
263 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
265 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
266 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
269 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
270 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
271 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
273 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE
, vect_location
,
278 "already marked relevant/live.\n");
282 worklist
->safe_push (stmt_info
);
286 /* Function is_simple_and_all_uses_invariant
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
292 loop_vec_info loop_vinfo
)
297 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
301 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
303 enum vect_def_type dt
= vect_uninitialized_def
;
305 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
309 "use not simple.\n");
313 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
319 /* Function vect_stmt_relevant_p.
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
329 CHECKME: what other side effects would the vectorizer allow? */
332 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
333 enum vect_relevant
*relevant
, bool *live_p
)
335 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
337 imm_use_iterator imm_iter
;
341 *relevant
= vect_unused_in_scope
;
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info
->stmt
)
346 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
347 *relevant
= vect_used_in_scope
;
349 /* changing memory. */
350 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
351 if (gimple_vdef (stmt_info
->stmt
)
352 && !gimple_clobber_p (stmt_info
->stmt
))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE
, vect_location
,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant
= vect_used_in_scope
;
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
363 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
365 basic_block bb
= gimple_bb (USE_STMT (use_p
));
366 if (!flow_bb_inside_loop_p (loop
, bb
))
368 if (is_gimple_debug (USE_STMT (use_p
)))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE
, vect_location
,
373 "vec_stmt_relevant_p: used out of loop.\n");
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
378 gcc_assert (bb
== single_exit (loop
)->dest
);
385 if (*live_p
&& *relevant
== vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE
, vect_location
,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant
= vect_used_only_live
;
394 return (*live_p
|| *relevant
);
398 /* Function exist_non_indexing_operands_for_use_p
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
404 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info
))
414 /* STMT has a data_ref. FORNOW this means that its of one of
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
428 if (!assign
|| !gimple_assign_copy_p (assign
))
430 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
431 if (call
&& gimple_call_internal_p (call
))
433 internal_fn ifn
= gimple_call_internal_fn (call
);
434 int mask_index
= internal_fn_mask_index (ifn
);
436 && use
== gimple_call_arg (call
, mask_index
))
438 int stored_value_index
= internal_fn_stored_value_index (ifn
);
439 if (stored_value_index
>= 0
440 && use
== gimple_call_arg (call
, stored_value_index
))
442 if (internal_gather_scatter_fn_p (ifn
)
443 && use
== gimple_call_arg (call
, 1))
449 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
451 operand
= gimple_assign_rhs1 (assign
);
452 if (TREE_CODE (operand
) != SSA_NAME
)
463 Function process_use.
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
487 Return true if everything is as expected. Return false otherwise. */
490 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
491 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
494 stmt_vec_info dstmt_vinfo
;
495 enum vect_def_type dt
;
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
500 return opt_result::success ();
502 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
503 return opt_result::failure_at (stmt_vinfo
->stmt
,
505 " unsupported use in stmt.\n");
508 return opt_result::success ();
510 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
511 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
518 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
520 && bb
->loop_father
== def_bb
->loop_father
)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE
, vect_location
,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
526 return opt_result::success ();
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
536 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE
, vect_location
,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
544 case vect_unused_in_scope
:
545 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
546 vect_used_in_scope
: vect_unused_in_scope
;
549 case vect_used_in_outer_by_reduction
:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
551 relevant
= vect_used_by_reduction
;
554 case vect_used_in_outer
:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
556 relevant
= vect_used_in_scope
;
559 case vect_used_in_scope
:
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
574 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE
, vect_location
,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
582 case vect_unused_in_scope
:
583 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
585 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
588 case vect_used_by_reduction
:
589 case vect_used_only_live
:
590 relevant
= vect_used_in_outer_by_reduction
;
593 case vect_used_in_scope
:
594 relevant
= vect_used_in_outer
;
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
605 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
609 loop_latch_edge (bb
->loop_father
))
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE
, vect_location
,
614 "induction value on backedge.\n");
615 return opt_result::success ();
619 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
620 return opt_result::success ();
624 /* Function vect_mark_stmts_to_be_vectorized.
626 Not all stmts in the loop need to be vectorized. For example:
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
638 This pass detects such stmts. */
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
643 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
644 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
645 unsigned int nbbs
= loop
->num_nodes
;
646 gimple_stmt_iterator si
;
650 enum vect_relevant relevant
;
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
654 auto_vec
<stmt_vec_info
, 64> worklist
;
656 /* 1. Init worklist. */
657 for (i
= 0; i
< nbbs
; i
++)
660 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
662 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
667 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
668 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
670 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
672 if (is_gimple_debug (gsi_stmt (si
)))
674 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE
, vect_location
,
677 "init: stmt relevant? %G", stmt_info
->stmt
);
679 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
680 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
684 /* 2. Process_worklist */
685 while (worklist
.length () > 0)
690 stmt_vec_info stmt_vinfo
= worklist
.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE
, vect_location
,
693 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
698 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
713 case vect_reduction_def
:
714 gcc_assert (relevant
!= vect_unused_in_scope
);
715 if (relevant
!= vect_unused_in_scope
716 && relevant
!= vect_used_in_scope
717 && relevant
!= vect_used_by_reduction
718 && relevant
!= vect_used_only_live
)
719 return opt_result::failure_at
720 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
723 case vect_nested_cycle
:
724 if (relevant
!= vect_unused_in_scope
725 && relevant
!= vect_used_in_outer_by_reduction
726 && relevant
!= vect_used_in_outer
)
727 return opt_result::failure_at
728 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
731 case vect_double_reduction_def
:
732 if (relevant
!= vect_unused_in_scope
733 && relevant
!= vect_used_by_reduction
734 && relevant
!= vect_used_only_live
)
735 return opt_result::failure_at
736 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
743 if (is_pattern_stmt_p (stmt_vinfo
))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
750 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
751 tree op
= gimple_assign_rhs1 (assign
);
754 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
757 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
758 loop_vinfo
, relevant
, &worklist
, false);
761 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
762 loop_vinfo
, relevant
, &worklist
, false);
767 for (; i
< gimple_num_ops (assign
); i
++)
769 op
= gimple_op (assign
, i
);
770 if (TREE_CODE (op
) == SSA_NAME
)
773 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
780 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
782 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
784 tree arg
= gimple_call_arg (call
, i
);
786 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
794 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
796 tree op
= USE_FROM_PTR (use_p
);
798 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
806 gather_scatter_info gs_info
;
807 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
810 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
819 } /* while worklist */
821 return opt_result::success ();
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
831 vect_model_simple_cost (vec_info
*,
832 stmt_vec_info stmt_info
, int ncopies
,
833 enum vect_def_type
*dt
,
836 stmt_vector_for_cost
*cost_vec
,
837 vect_cost_for_stmt kind
= vector_stmt
)
839 int inside_cost
= 0, prologue_cost
= 0;
841 gcc_assert (cost_vec
!= NULL
);
843 /* ??? Somehow we need to fix this at the callers. */
845 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
851 for (int i
= 0; i
< ndts
; i
++)
852 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
853 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
854 stmt_info
, 0, vect_prologue
);
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
858 stmt_info
, 0, vect_body
);
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE
, vect_location
,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
877 enum vect_def_type
*dt
,
878 unsigned int ncopies
, int pwr
,
879 stmt_vector_for_cost
*cost_vec
,
883 int inside_cost
= 0, prologue_cost
= 0;
885 for (i
= 0; i
< pwr
+ 1; i
++)
887 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
889 ? vector_stmt
: vec_promote_demote
,
890 stmt_info
, 0, vect_body
);
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i
= 0; i
< 2; i
++)
896 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
897 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
898 stmt_info
, 0, vect_prologue
);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE
, vect_location
,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
906 /* Returns true if the current function returns DECL. */
909 cfun_returns (tree decl
)
913 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
915 greturn
*ret
= safe_dyn_cast
<greturn
*> (*gsi_last_bb (e
->src
));
918 if (gimple_return_retval (ret
) == decl
)
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
926 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
928 while (gimple_clobber_p (def
));
929 if (is_a
<gassign
*> (def
)
930 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
931 && gimple_assign_rhs1 (def
) == decl
)
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
943 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
944 vect_memory_access_type memory_access_type
,
945 gather_scatter_info
*gs_info
,
946 dr_alignment_support alignment_support_scheme
,
948 vec_load_store_type vls_type
, slp_tree slp_node
,
949 stmt_vector_for_cost
*cost_vec
)
951 unsigned int inside_cost
= 0, prologue_cost
= 0;
952 stmt_vec_info first_stmt_info
= stmt_info
;
953 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
955 /* ??? Somehow we need to fix this at the callers. */
957 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
959 if (vls_type
== VLS_STORE_INVARIANT
)
962 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
963 stmt_info
, 0, vect_prologue
);
966 /* Grouped stores update all elements in the group at once,
967 so we want the DR for the first statement. */
968 if (!slp_node
&& grouped_access_p
)
969 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
971 /* True if we should include any once-per-group costs as well as
972 the cost of the statement itself. For SLP we only get called
973 once per group anyhow. */
974 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
976 /* We assume that the cost of a single store-lanes instruction is
977 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
978 access is instead being provided by a permute-and-store operation,
979 include the cost of the permutes. */
981 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
983 /* Uses a high and low interleave or shuffle operations for each
985 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
986 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
987 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
988 stmt_info
, 0, vect_body
);
990 if (dump_enabled_p ())
991 dump_printf_loc (MSG_NOTE
, vect_location
,
992 "vect_model_store_cost: strided group_size = %d .\n",
996 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
997 /* Costs of the stores. */
998 if (memory_access_type
== VMAT_ELEMENTWISE
999 || memory_access_type
== VMAT_GATHER_SCATTER
)
1001 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1002 if (memory_access_type
== VMAT_GATHER_SCATTER
1003 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1004 /* For emulated scatter N offset vector element extracts
1005 (we assume the scalar scaling and ptr + offset add is consumed by
1007 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1008 vec_to_scalar
, stmt_info
, 0,
1010 /* N scalar stores plus extracting the elements. */
1011 inside_cost
+= record_stmt_cost (cost_vec
,
1012 ncopies
* assumed_nunits
,
1013 scalar_store
, stmt_info
, 0, vect_body
);
1016 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, alignment_support_scheme
,
1017 misalignment
, &inside_cost
, cost_vec
);
1019 if (memory_access_type
== VMAT_ELEMENTWISE
1020 || memory_access_type
== VMAT_STRIDED_SLP
1021 || (memory_access_type
== VMAT_GATHER_SCATTER
1022 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1024 /* N scalar stores plus extracting the elements. */
1025 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1026 inside_cost
+= record_stmt_cost (cost_vec
,
1027 ncopies
* assumed_nunits
,
1028 vec_to_scalar
, stmt_info
, 0, vect_body
);
1031 /* When vectorizing a store into the function result assign
1032 a penalty if the function returns in a multi-register location.
1033 In this case we assume we'll end up with having to spill the
1034 vector result and do piecewise loads as a conservative estimate. */
1035 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1037 && (TREE_CODE (base
) == RESULT_DECL
1038 || (DECL_P (base
) && cfun_returns (base
)))
1039 && !aggregate_value_p (base
, cfun
->decl
))
1041 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1042 /* ??? Handle PARALLEL in some way. */
1045 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1046 /* Assume that a single reg-reg move is possible and cheap,
1047 do not account for vector to gp register move cost. */
1051 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1053 stmt_info
, 0, vect_epilogue
);
1055 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1057 stmt_info
, 0, vect_epilogue
);
1062 if (dump_enabled_p ())
1063 dump_printf_loc (MSG_NOTE
, vect_location
,
1064 "vect_model_store_cost: inside_cost = %d, "
1065 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1069 /* Calculate cost of DR's memory access. */
1071 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1072 dr_alignment_support alignment_support_scheme
,
1074 unsigned int *inside_cost
,
1075 stmt_vector_for_cost
*body_cost_vec
)
1077 switch (alignment_support_scheme
)
1081 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1082 vector_store
, stmt_info
, 0,
1085 if (dump_enabled_p ())
1086 dump_printf_loc (MSG_NOTE
, vect_location
,
1087 "vect_model_store_cost: aligned.\n");
1091 case dr_unaligned_supported
:
1093 /* Here, we assign an additional cost for the unaligned store. */
1094 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1095 unaligned_store
, stmt_info
,
1096 misalignment
, vect_body
);
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_NOTE
, vect_location
,
1099 "vect_model_store_cost: unaligned supported by "
1104 case dr_unaligned_unsupported
:
1106 *inside_cost
= VECT_MAX_COST
;
1108 if (dump_enabled_p ())
1109 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1110 "vect_model_store_cost: unsupported access.\n");
1120 /* Function vect_model_load_cost
1122 Models cost for loads. In the case of grouped accesses, one access has
1123 the overhead of the grouped access attributed to it. Since unaligned
1124 accesses are supported for loads, we also account for the costs of the
1125 access scheme chosen. */
1128 vect_model_load_cost (vec_info
*vinfo
,
1129 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1130 vect_memory_access_type memory_access_type
,
1131 dr_alignment_support alignment_support_scheme
,
1133 gather_scatter_info
*gs_info
,
1135 stmt_vector_for_cost
*cost_vec
)
1137 unsigned int inside_cost
= 0, prologue_cost
= 0;
1138 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1140 gcc_assert (cost_vec
);
1142 /* ??? Somehow we need to fix this at the callers. */
1144 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1146 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1148 /* If the load is permuted then the alignment is determined by
1149 the first group element not by the first scalar stmt DR. */
1150 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1151 /* Record the cost for the permutation. */
1152 unsigned n_perms
, n_loads
;
1153 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1154 vf
, true, &n_perms
, &n_loads
);
1155 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1156 first_stmt_info
, 0, vect_body
);
1158 /* And adjust the number of loads performed. This handles
1159 redundancies as well as loads that are later dead. */
1163 /* Grouped loads read all elements in the group at once,
1164 so we want the DR for the first statement. */
1165 stmt_vec_info first_stmt_info
= stmt_info
;
1166 if (!slp_node
&& grouped_access_p
)
1167 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1169 /* True if we should include any once-per-group costs as well as
1170 the cost of the statement itself. For SLP we only get called
1171 once per group anyhow. */
1172 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1174 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1175 ones we actually need. Account for the cost of unused results. */
1176 if (first_stmt_p
&& !slp_node
&& memory_access_type
== VMAT_LOAD_STORE_LANES
)
1178 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
1179 stmt_vec_info next_stmt_info
= first_stmt_info
;
1183 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
1185 while (next_stmt_info
);
1188 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE
, vect_location
,
1190 "vect_model_load_cost: %d unused vectors.\n",
1192 vect_get_load_cost (vinfo
, stmt_info
, ncopies
* gaps
,
1193 alignment_support_scheme
, misalignment
, false,
1194 &inside_cost
, &prologue_cost
,
1195 cost_vec
, cost_vec
, true);
1199 /* We assume that the cost of a single load-lanes instruction is
1200 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1201 access is instead being provided by a load-and-permute operation,
1202 include the cost of the permutes. */
1204 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1206 /* Uses an even and odd extract operations or shuffle operations
1207 for each needed permute. */
1208 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1209 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1210 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1211 stmt_info
, 0, vect_body
);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE
, vect_location
,
1215 "vect_model_load_cost: strided group_size = %d .\n",
1219 /* The loads themselves. */
1220 if (memory_access_type
== VMAT_ELEMENTWISE
1221 || memory_access_type
== VMAT_GATHER_SCATTER
)
1223 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1224 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1225 if (memory_access_type
== VMAT_GATHER_SCATTER
1226 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1227 /* For emulated gathers N offset vector element extracts
1228 (we assume the scalar scaling and ptr + offset add is consumed by
1230 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1231 vec_to_scalar
, stmt_info
, 0,
1233 /* N scalar loads plus gathering them into a vector. */
1234 inside_cost
+= record_stmt_cost (cost_vec
,
1235 ncopies
* assumed_nunits
,
1236 scalar_load
, stmt_info
, 0, vect_body
);
1238 else if (memory_access_type
== VMAT_INVARIANT
)
1240 /* Invariant loads will ideally be hoisted and splat to a vector. */
1241 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1242 scalar_load
, stmt_info
, 0,
1244 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1245 scalar_to_vec
, stmt_info
, 0,
1249 vect_get_load_cost (vinfo
, stmt_info
, ncopies
,
1250 alignment_support_scheme
, misalignment
, first_stmt_p
,
1251 &inside_cost
, &prologue_cost
,
1252 cost_vec
, cost_vec
, true);
1253 if (memory_access_type
== VMAT_ELEMENTWISE
1254 || memory_access_type
== VMAT_STRIDED_SLP
1255 || (memory_access_type
== VMAT_GATHER_SCATTER
1256 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1257 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1258 stmt_info
, 0, vect_body
);
1260 if (dump_enabled_p ())
1261 dump_printf_loc (MSG_NOTE
, vect_location
,
1262 "vect_model_load_cost: inside_cost = %d, "
1263 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1267 /* Calculate cost of DR's memory access. */
1269 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1270 dr_alignment_support alignment_support_scheme
,
1272 bool add_realign_cost
, unsigned int *inside_cost
,
1273 unsigned int *prologue_cost
,
1274 stmt_vector_for_cost
*prologue_cost_vec
,
1275 stmt_vector_for_cost
*body_cost_vec
,
1276 bool record_prologue_costs
)
1278 switch (alignment_support_scheme
)
1282 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1283 stmt_info
, 0, vect_body
);
1285 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE
, vect_location
,
1287 "vect_model_load_cost: aligned.\n");
1291 case dr_unaligned_supported
:
1293 /* Here, we assign an additional cost for the unaligned load. */
1294 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1295 unaligned_load
, stmt_info
,
1296 misalignment
, vect_body
);
1298 if (dump_enabled_p ())
1299 dump_printf_loc (MSG_NOTE
, vect_location
,
1300 "vect_model_load_cost: unaligned supported by "
1305 case dr_explicit_realign
:
1307 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1308 vector_load
, stmt_info
, 0, vect_body
);
1309 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1310 vec_perm
, stmt_info
, 0, vect_body
);
1312 /* FIXME: If the misalignment remains fixed across the iterations of
1313 the containing loop, the following cost should be added to the
1315 if (targetm
.vectorize
.builtin_mask_for_load
)
1316 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1317 stmt_info
, 0, vect_body
);
1319 if (dump_enabled_p ())
1320 dump_printf_loc (MSG_NOTE
, vect_location
,
1321 "vect_model_load_cost: explicit realign\n");
1325 case dr_explicit_realign_optimized
:
1327 if (dump_enabled_p ())
1328 dump_printf_loc (MSG_NOTE
, vect_location
,
1329 "vect_model_load_cost: unaligned software "
1332 /* Unaligned software pipeline has a load of an address, an initial
1333 load, and possibly a mask operation to "prime" the loop. However,
1334 if this is an access in a group of loads, which provide grouped
1335 access, then the above cost should only be considered for one
1336 access in the group. Inside the loop, there is a load op
1337 and a realignment op. */
1339 if (add_realign_cost
&& record_prologue_costs
)
1341 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1342 vector_stmt
, stmt_info
,
1344 if (targetm
.vectorize
.builtin_mask_for_load
)
1345 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1346 vector_stmt
, stmt_info
,
1350 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1351 stmt_info
, 0, vect_body
);
1352 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1353 stmt_info
, 0, vect_body
);
1355 if (dump_enabled_p ())
1356 dump_printf_loc (MSG_NOTE
, vect_location
,
1357 "vect_model_load_cost: explicit realign optimized"
1363 case dr_unaligned_unsupported
:
1365 *inside_cost
= VECT_MAX_COST
;
1367 if (dump_enabled_p ())
1368 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1369 "vect_model_load_cost: unsupported access.\n");
1378 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1379 the loop preheader for the vectorized stmt STMT_VINFO. */
1382 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1383 gimple_stmt_iterator
*gsi
)
1386 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1388 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1390 if (dump_enabled_p ())
1391 dump_printf_loc (MSG_NOTE
, vect_location
,
1392 "created new init_stmt: %G", new_stmt
);
1395 /* Function vect_init_vector.
1397 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1398 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1399 vector type a vector with all elements equal to VAL is created first.
1400 Place the initialization at GSI if it is not NULL. Otherwise, place the
1401 initialization at the loop preheader.
1402 Return the DEF of INIT_STMT.
1403 It will be used in the vectorization of STMT_INFO. */
1406 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1407 gimple_stmt_iterator
*gsi
)
1412 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1413 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1415 gcc_assert (VECTOR_TYPE_P (type
));
1416 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1418 /* Scalar boolean value should be transformed into
1419 all zeros or all ones value before building a vector. */
1420 if (VECTOR_BOOLEAN_TYPE_P (type
))
1422 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1423 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1425 if (CONSTANT_CLASS_P (val
))
1426 val
= integer_zerop (val
) ? false_val
: true_val
;
1429 new_temp
= make_ssa_name (TREE_TYPE (type
));
1430 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1431 val
, true_val
, false_val
);
1432 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1438 gimple_seq stmts
= NULL
;
1439 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1440 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1441 TREE_TYPE (type
), val
);
1443 /* ??? Condition vectorization expects us to do
1444 promotion of invariant/external defs. */
1445 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1446 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1447 !gsi_end_p (gsi2
); )
1449 init_stmt
= gsi_stmt (gsi2
);
1450 gsi_remove (&gsi2
, false);
1451 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1455 val
= build_vector_from_val (type
, val
);
1458 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1459 init_stmt
= gimple_build_assign (new_temp
, val
);
1460 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1465 /* Function vect_get_vec_defs_for_operand.
1467 OP is an operand in STMT_VINFO. This function returns a vector of
1468 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1470 In the case that OP is an SSA_NAME which is defined in the loop, then
1471 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1473 In case OP is an invariant or constant, a new stmt that creates a vector def
1474 needs to be introduced. VECTYPE may be used to specify a required type for
1475 vector invariant. */
1478 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1480 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1483 enum vect_def_type dt
;
1485 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1487 if (dump_enabled_p ())
1488 dump_printf_loc (MSG_NOTE
, vect_location
,
1489 "vect_get_vec_defs_for_operand: %T\n", op
);
1491 stmt_vec_info def_stmt_info
;
1492 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1493 &def_stmt_info
, &def_stmt
);
1494 gcc_assert (is_simple_use
);
1495 if (def_stmt
&& dump_enabled_p ())
1496 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1498 vec_oprnds
->create (ncopies
);
1499 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1501 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1505 vector_type
= vectype
;
1506 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1507 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1508 vector_type
= truth_type_for (stmt_vectype
);
1510 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1512 gcc_assert (vector_type
);
1513 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1515 vec_oprnds
->quick_push (vop
);
1519 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1520 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1521 for (unsigned i
= 0; i
< ncopies
; ++i
)
1522 vec_oprnds
->quick_push (gimple_get_lhs
1523 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1528 /* Get vectorized definitions for OP0 and OP1. */
1531 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1533 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1534 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1535 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1536 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1541 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1543 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1545 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1547 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1552 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1553 op0
, vec_oprnds0
, vectype0
);
1555 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1556 op1
, vec_oprnds1
, vectype1
);
1558 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1559 op2
, vec_oprnds2
, vectype2
);
1561 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1562 op3
, vec_oprnds3
, vectype3
);
1567 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1569 tree op0
, vec
<tree
> *vec_oprnds0
,
1570 tree op1
, vec
<tree
> *vec_oprnds1
,
1571 tree op2
, vec
<tree
> *vec_oprnds2
,
1572 tree op3
, vec
<tree
> *vec_oprnds3
)
1574 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1575 op0
, vec_oprnds0
, NULL_TREE
,
1576 op1
, vec_oprnds1
, NULL_TREE
,
1577 op2
, vec_oprnds2
, NULL_TREE
,
1578 op3
, vec_oprnds3
, NULL_TREE
);
1581 /* Helper function called by vect_finish_replace_stmt and
1582 vect_finish_stmt_generation. Set the location of the new
1583 statement and create and return a stmt_vec_info for it. */
1586 vect_finish_stmt_generation_1 (vec_info
*,
1587 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1589 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1594 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1596 /* While EH edges will generally prevent vectorization, stmt might
1597 e.g. be in a must-not-throw region. Ensure newly created stmts
1598 that could throw are part of the same region. */
1599 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1600 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1601 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1604 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1607 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1608 which sets the same scalar result as STMT_INFO did. Create and return a
1609 stmt_vec_info for VEC_STMT. */
1612 vect_finish_replace_stmt (vec_info
*vinfo
,
1613 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1615 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1616 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1618 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1619 gsi_replace (&gsi
, vec_stmt
, true);
1621 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1624 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1625 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1628 vect_finish_stmt_generation (vec_info
*vinfo
,
1629 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1630 gimple_stmt_iterator
*gsi
)
1632 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1634 if (!gsi_end_p (*gsi
)
1635 && gimple_has_mem_ops (vec_stmt
))
1637 gimple
*at_stmt
= gsi_stmt (*gsi
);
1638 tree vuse
= gimple_vuse (at_stmt
);
1639 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1641 tree vdef
= gimple_vdef (at_stmt
);
1642 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1643 gimple_set_modified (vec_stmt
, true);
1644 /* If we have an SSA vuse and insert a store, update virtual
1645 SSA form to avoid triggering the renamer. Do so only
1646 if we can easily see all uses - which is what almost always
1647 happens with the way vectorized stmts are inserted. */
1648 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1649 && ((is_gimple_assign (vec_stmt
)
1650 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1651 || (is_gimple_call (vec_stmt
)
1652 && (!(gimple_call_flags (vec_stmt
)
1653 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
))
1654 || (gimple_call_lhs (vec_stmt
)
1655 && !is_gimple_reg (gimple_call_lhs (vec_stmt
)))))))
1657 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1658 gimple_set_vdef (vec_stmt
, new_vdef
);
1659 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1663 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1664 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1667 /* We want to vectorize a call to combined function CFN with function
1668 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1669 as the types of all inputs. Check whether this is possible using
1670 an internal function, returning its code if so or IFN_LAST if not. */
1673 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1674 tree vectype_out
, tree vectype_in
)
1677 if (internal_fn_p (cfn
))
1678 ifn
= as_internal_fn (cfn
);
1680 ifn
= associated_internal_fn (fndecl
);
1681 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1683 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1684 if (info
.vectorizable
)
1686 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1687 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1688 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1689 OPTIMIZE_FOR_SPEED
))
1697 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1698 gimple_stmt_iterator
*);
1700 /* Check whether a load or store statement in the loop described by
1701 LOOP_VINFO is possible in a loop using partial vectors. This is
1702 testing whether the vectorizer pass has the appropriate support,
1703 as well as whether the target does.
1705 VLS_TYPE says whether the statement is a load or store and VECTYPE
1706 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1707 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1708 says how the load or store is going to be implemented and GROUP_SIZE
1709 is the number of load or store statements in the containing group.
1710 If the access is a gather load or scatter store, GS_INFO describes
1711 its arguments. If the load or store is conditional, SCALAR_MASK is the
1712 condition under which it occurs.
1714 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1715 vectors is not supported, otherwise record the required rgroup control
1719 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1721 vec_load_store_type vls_type
,
1723 vect_memory_access_type
1725 gather_scatter_info
*gs_info
,
1728 /* Invariant loads need no special support. */
1729 if (memory_access_type
== VMAT_INVARIANT
)
1732 unsigned int nvectors
;
1734 nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1736 nvectors
= vect_get_num_copies (loop_vinfo
, vectype
);
1738 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1739 machine_mode vecmode
= TYPE_MODE (vectype
);
1740 bool is_load
= (vls_type
== VLS_LOAD
);
1741 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1744 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1745 : !vect_store_lanes_supported (vectype
, group_size
, true))
1747 if (dump_enabled_p ())
1748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1749 "can't operate on partial vectors because"
1750 " the target doesn't have an appropriate"
1751 " load/store-lanes instruction.\n");
1752 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1755 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1760 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1762 internal_fn ifn
= (is_load
1763 ? IFN_MASK_GATHER_LOAD
1764 : IFN_MASK_SCATTER_STORE
);
1765 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1766 gs_info
->memory_type
,
1767 gs_info
->offset_vectype
,
1770 if (dump_enabled_p ())
1771 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1772 "can't operate on partial vectors because"
1773 " the target doesn't have an appropriate"
1774 " gather load or scatter store instruction.\n");
1775 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1778 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1783 if (memory_access_type
!= VMAT_CONTIGUOUS
1784 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1786 /* Element X of the data must come from iteration i * VF + X of the
1787 scalar loop. We need more work to support other mappings. */
1788 if (dump_enabled_p ())
1789 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1790 "can't operate on partial vectors because an"
1791 " access isn't contiguous.\n");
1792 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1796 if (!VECTOR_MODE_P (vecmode
))
1798 if (dump_enabled_p ())
1799 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1800 "can't operate on partial vectors when emulating"
1801 " vector operations.\n");
1802 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1806 /* We might load more scalars than we need for permuting SLP loads.
1807 We checked in get_group_load_store_type that the extra elements
1808 don't leak into a new vector. */
1809 auto group_memory_nvectors
= [](poly_uint64 size
, poly_uint64 nunits
)
1811 unsigned int nvectors
;
1812 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1817 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1818 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1819 machine_mode mask_mode
;
1820 bool using_partial_vectors_p
= false;
1821 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1822 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1824 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1825 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1826 using_partial_vectors_p
= true;
1830 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1832 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1833 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1834 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1835 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1836 using_partial_vectors_p
= true;
1839 if (!using_partial_vectors_p
)
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1843 "can't operate on partial vectors because the"
1844 " target doesn't have the appropriate partial"
1845 " vectorization load or store.\n");
1846 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1850 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1851 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1852 that needs to be applied to all loads and stores in a vectorized loop.
1853 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1854 otherwise return VEC_MASK & LOOP_MASK.
1856 MASK_TYPE is the type of both masks. If new statements are needed,
1857 insert them before GSI. */
1860 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1861 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1863 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1867 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1869 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1872 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1873 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1874 vec_mask
, loop_mask
);
1876 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1880 /* Determine whether we can use a gather load or scatter store to vectorize
1881 strided load or store STMT_INFO by truncating the current offset to a
1882 smaller width. We need to be able to construct an offset vector:
1884 { 0, X, X*2, X*3, ... }
1886 without loss of precision, where X is STMT_INFO's DR_STEP.
1888 Return true if this is possible, describing the gather load or scatter
1889 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1892 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1893 loop_vec_info loop_vinfo
, bool masked_p
,
1894 gather_scatter_info
*gs_info
)
1896 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1897 data_reference
*dr
= dr_info
->dr
;
1898 tree step
= DR_STEP (dr
);
1899 if (TREE_CODE (step
) != INTEGER_CST
)
1901 /* ??? Perhaps we could use range information here? */
1902 if (dump_enabled_p ())
1903 dump_printf_loc (MSG_NOTE
, vect_location
,
1904 "cannot truncate variable step.\n");
1908 /* Get the number of bits in an element. */
1909 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1910 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1911 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1913 /* Set COUNT to the upper limit on the number of elements - 1.
1914 Start with the maximum vectorization factor. */
1915 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1917 /* Try lowering COUNT to the number of scalar latch iterations. */
1918 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1919 widest_int max_iters
;
1920 if (max_loop_iterations (loop
, &max_iters
)
1921 && max_iters
< count
)
1922 count
= max_iters
.to_shwi ();
1924 /* Try scales of 1 and the element size. */
1925 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1926 wi::overflow_type overflow
= wi::OVF_NONE
;
1927 for (int i
= 0; i
< 2; ++i
)
1929 int scale
= scales
[i
];
1931 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1934 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1935 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1938 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1939 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1941 /* Find the narrowest viable offset type. */
1942 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1943 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1946 /* See whether the target supports the operation with an offset
1947 no narrower than OFFSET_TYPE. */
1948 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1949 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1950 vectype
, memory_type
, offset_type
, scale
,
1951 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1952 || gs_info
->ifn
== IFN_LAST
)
1955 gs_info
->decl
= NULL_TREE
;
1956 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1957 but we don't need to store that here. */
1958 gs_info
->base
= NULL_TREE
;
1959 gs_info
->element_type
= TREE_TYPE (vectype
);
1960 gs_info
->offset
= fold_convert (offset_type
, step
);
1961 gs_info
->offset_dt
= vect_constant_def
;
1962 gs_info
->scale
= scale
;
1963 gs_info
->memory_type
= memory_type
;
1967 if (overflow
&& dump_enabled_p ())
1968 dump_printf_loc (MSG_NOTE
, vect_location
,
1969 "truncating gather/scatter offset to %d bits"
1970 " might change its value.\n", element_bits
);
1975 /* Return true if we can use gather/scatter internal functions to
1976 vectorize STMT_INFO, which is a grouped or strided load or store.
1977 MASKED_P is true if load or store is conditional. When returning
1978 true, fill in GS_INFO with the information required to perform the
1982 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1983 loop_vec_info loop_vinfo
, bool masked_p
,
1984 gather_scatter_info
*gs_info
)
1986 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1987 || gs_info
->ifn
== IFN_LAST
)
1988 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1991 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1992 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1994 gcc_assert (TYPE_PRECISION (new_offset_type
)
1995 >= TYPE_PRECISION (old_offset_type
));
1996 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1998 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_NOTE
, vect_location
,
2000 "using gather/scatter for strided/grouped access,"
2001 " scale = %d\n", gs_info
->scale
);
2006 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2007 elements with a known constant step. Return -1 if that step
2008 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2011 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
2013 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2014 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
2018 /* If the target supports a permute mask that reverses the elements in
2019 a vector of type VECTYPE, return that mask, otherwise return null. */
2022 perm_mask_for_reverse (tree vectype
)
2024 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2026 /* The encoding has a single stepped pattern. */
2027 vec_perm_builder
sel (nunits
, 1, 3);
2028 for (int i
= 0; i
< 3; ++i
)
2029 sel
.quick_push (nunits
- 1 - i
);
2031 vec_perm_indices
indices (sel
, 1, nunits
);
2032 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), TYPE_MODE (vectype
),
2035 return vect_gen_perm_mask_checked (vectype
, indices
);
2038 /* A subroutine of get_load_store_type, with a subset of the same
2039 arguments. Handle the case where STMT_INFO is a load or store that
2040 accesses consecutive elements with a negative step. Sets *POFFSET
2041 to the offset to be applied to the DR for the first access. */
2043 static vect_memory_access_type
2044 get_negative_load_store_type (vec_info
*vinfo
,
2045 stmt_vec_info stmt_info
, tree vectype
,
2046 vec_load_store_type vls_type
,
2047 unsigned int ncopies
, poly_int64
*poffset
)
2049 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2050 dr_alignment_support alignment_support_scheme
;
2054 if (dump_enabled_p ())
2055 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2056 "multiple types with negative step.\n");
2057 return VMAT_ELEMENTWISE
;
2060 /* For backward running DRs the first access in vectype actually is
2061 N-1 elements before the address of the DR. */
2062 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
2063 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
2065 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
2066 alignment_support_scheme
2067 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
2068 if (alignment_support_scheme
!= dr_aligned
2069 && alignment_support_scheme
!= dr_unaligned_supported
)
2071 if (dump_enabled_p ())
2072 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2073 "negative step but alignment required.\n");
2075 return VMAT_ELEMENTWISE
;
2078 if (vls_type
== VLS_STORE_INVARIANT
)
2080 if (dump_enabled_p ())
2081 dump_printf_loc (MSG_NOTE
, vect_location
,
2082 "negative step with invariant source;"
2083 " no permute needed.\n");
2084 return VMAT_CONTIGUOUS_DOWN
;
2087 if (!perm_mask_for_reverse (vectype
))
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2091 "negative step and reversing not supported.\n");
2093 return VMAT_ELEMENTWISE
;
2096 return VMAT_CONTIGUOUS_REVERSE
;
2099 /* STMT_INFO is either a masked or unconditional store. Return the value
2103 vect_get_store_rhs (stmt_vec_info stmt_info
)
2105 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2107 gcc_assert (gimple_assign_single_p (assign
));
2108 return gimple_assign_rhs1 (assign
);
2110 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2112 internal_fn ifn
= gimple_call_internal_fn (call
);
2113 int index
= internal_fn_stored_value_index (ifn
);
2114 gcc_assert (index
>= 0);
2115 return gimple_call_arg (call
, index
);
2120 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2122 This function returns a vector type which can be composed with NETLS pieces,
2123 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2124 same vector size as the return vector. It checks target whether supports
2125 pieces-size vector mode for construction firstly, if target fails to, check
2126 pieces-size scalar mode for construction further. It returns NULL_TREE if
2127 fails to find the available composition.
2129 For example, for (vtype=V16QI, nelts=4), we can probably get:
2130 - V16QI with PTYPE V4QI.
2131 - V4SI with PTYPE SI.
2135 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2137 gcc_assert (VECTOR_TYPE_P (vtype
));
2138 gcc_assert (known_gt (nelts
, 0U));
2140 machine_mode vmode
= TYPE_MODE (vtype
);
2141 if (!VECTOR_MODE_P (vmode
))
2144 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2145 unsigned int pbsize
;
2146 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2148 /* First check if vec_init optab supports construction from
2149 vector pieces directly. */
2150 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2151 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2153 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2154 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2155 != CODE_FOR_nothing
))
2157 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2161 /* Otherwise check if exists an integer type of the same piece size and
2162 if vec_init optab supports construction from it directly. */
2163 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2164 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2165 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2166 != CODE_FOR_nothing
))
2168 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2169 return build_vector_type (*ptype
, nelts
);
2176 /* A subroutine of get_load_store_type, with a subset of the same
2177 arguments. Handle the case where STMT_INFO is part of a grouped load
2180 For stores, the statements in the group are all consecutive
2181 and there is no gap at the end. For loads, the statements in the
2182 group might not be consecutive; there can be gaps between statements
2183 as well as at the end. */
2186 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2187 tree vectype
, slp_tree slp_node
,
2188 bool masked_p
, vec_load_store_type vls_type
,
2189 vect_memory_access_type
*memory_access_type
,
2190 poly_int64
*poffset
,
2191 dr_alignment_support
*alignment_support_scheme
,
2193 gather_scatter_info
*gs_info
)
2195 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2196 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2197 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2198 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2199 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2200 bool single_element_p
= (stmt_info
== first_stmt_info
2201 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2202 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2203 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2205 /* True if the vectorized statements would access beyond the last
2206 statement in the group. */
2207 bool overrun_p
= false;
2209 /* True if we can cope with such overrun by peeling for gaps, so that
2210 there is at least one final scalar iteration after the vector loop. */
2211 bool can_overrun_p
= (!masked_p
2212 && vls_type
== VLS_LOAD
2216 /* There can only be a gap at the end of the group if the stride is
2217 known at compile time. */
2218 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2220 /* Stores can't yet have gaps. */
2221 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2225 /* For SLP vectorization we directly vectorize a subchain
2226 without permutation. */
2227 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2229 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2230 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2232 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2233 separated by the stride, until we have a complete vector.
2234 Fall back to scalar accesses if that isn't possible. */
2235 if (multiple_p (nunits
, group_size
))
2236 *memory_access_type
= VMAT_STRIDED_SLP
;
2238 *memory_access_type
= VMAT_ELEMENTWISE
;
2242 overrun_p
= loop_vinfo
&& gap
!= 0;
2243 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2246 "Grouped store with gaps requires"
2247 " non-consecutive accesses\n");
2250 /* An overrun is fine if the trailing elements are smaller
2251 than the alignment boundary B. Every vector access will
2252 be a multiple of B and so we are guaranteed to access a
2253 non-gap element in the same B-sized block. */
2255 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2257 / vect_get_scalar_dr_size (first_dr_info
)))
2260 /* If the gap splits the vector in half and the target
2261 can do half-vector operations avoid the epilogue peeling
2262 by simply loading half of the vector only. Usually
2263 the construction with an upper zero half will be elided. */
2264 dr_alignment_support alss
;
2265 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2269 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2270 vectype
, misalign
)))
2272 || alss
== dr_unaligned_supported
)
2273 && known_eq (nunits
, (group_size
- gap
) * 2)
2274 && known_eq (nunits
, group_size
)
2275 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2279 if (overrun_p
&& !can_overrun_p
)
2281 if (dump_enabled_p ())
2282 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2283 "Peeling for outer loop is not supported\n");
2286 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2289 if (single_element_p
)
2290 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2291 only correct for single element "interleaving" SLP. */
2292 *memory_access_type
= get_negative_load_store_type
2293 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2296 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2297 separated by the stride, until we have a complete vector.
2298 Fall back to scalar accesses if that isn't possible. */
2299 if (multiple_p (nunits
, group_size
))
2300 *memory_access_type
= VMAT_STRIDED_SLP
;
2302 *memory_access_type
= VMAT_ELEMENTWISE
;
2307 gcc_assert (!loop_vinfo
|| cmp
> 0);
2308 *memory_access_type
= VMAT_CONTIGUOUS
;
2311 /* When we have a contiguous access across loop iterations
2312 but the access in the loop doesn't cover the full vector
2313 we can end up with no gap recorded but still excess
2314 elements accessed, see PR103116. Make sure we peel for
2315 gaps if necessary and sufficient and give up if not. */
2317 && *memory_access_type
== VMAT_CONTIGUOUS
2318 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2319 && !multiple_p (group_size
* LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
2322 unsigned HOST_WIDE_INT cnunits
, cvf
;
2324 || !nunits
.is_constant (&cnunits
)
2325 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&cvf
)
2326 /* Peeling for gaps assumes that a single scalar iteration
2327 is enough to make sure the last vector iteration doesn't
2328 access excess elements.
2329 ??? Enhancements include peeling multiple iterations
2330 or using masked loads with a static mask. */
2331 || (group_size
* cvf
) % cnunits
+ group_size
< cnunits
)
2333 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2335 "peeling for gaps insufficient for "
2345 /* We can always handle this case using elementwise accesses,
2346 but see if something more efficient is available. */
2347 *memory_access_type
= VMAT_ELEMENTWISE
;
2349 /* If there is a gap at the end of the group then these optimizations
2350 would access excess elements in the last iteration. */
2351 bool would_overrun_p
= (gap
!= 0);
2352 /* An overrun is fine if the trailing elements are smaller than the
2353 alignment boundary B. Every vector access will be a multiple of B
2354 and so we are guaranteed to access a non-gap element in the
2355 same B-sized block. */
2358 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2359 / vect_get_scalar_dr_size (first_dr_info
)))
2360 would_overrun_p
= false;
2362 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2363 && (can_overrun_p
|| !would_overrun_p
)
2364 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2366 /* First cope with the degenerate case of a single-element
2368 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2371 /* Otherwise try using LOAD/STORE_LANES. */
2372 else if (vls_type
== VLS_LOAD
2373 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2374 : vect_store_lanes_supported (vectype
, group_size
,
2377 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2378 overrun_p
= would_overrun_p
;
2381 /* If that fails, try using permuting loads. */
2382 else if (vls_type
== VLS_LOAD
2383 ? vect_grouped_load_supported (vectype
, single_element_p
,
2385 : vect_grouped_store_supported (vectype
, group_size
))
2387 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2388 overrun_p
= would_overrun_p
;
2392 /* As a last resort, trying using a gather load or scatter store.
2394 ??? Although the code can handle all group sizes correctly,
2395 it probably isn't a win to use separate strided accesses based
2396 on nearby locations. Or, even if it's a win over scalar code,
2397 it might not be a win over vectorizing at a lower VF, if that
2398 allows us to use contiguous accesses. */
2399 if (*memory_access_type
== VMAT_ELEMENTWISE
2402 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2404 *memory_access_type
= VMAT_GATHER_SCATTER
;
2407 if (*memory_access_type
== VMAT_GATHER_SCATTER
2408 || *memory_access_type
== VMAT_ELEMENTWISE
)
2410 *alignment_support_scheme
= dr_unaligned_supported
;
2411 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2415 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2416 *alignment_support_scheme
2417 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2421 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2423 /* STMT is the leader of the group. Check the operands of all the
2424 stmts of the group. */
2425 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2426 while (next_stmt_info
)
2428 tree op
= vect_get_store_rhs (next_stmt_info
);
2429 enum vect_def_type dt
;
2430 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2432 if (dump_enabled_p ())
2433 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2434 "use not simple.\n");
2437 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2443 gcc_assert (can_overrun_p
);
2444 if (dump_enabled_p ())
2445 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2446 "Data access with gaps requires scalar "
2448 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2454 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2455 if there is a memory access type that the vectorized form can use,
2456 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2457 or scatters, fill in GS_INFO accordingly. In addition
2458 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2459 the target does not support the alignment scheme. *MISALIGNMENT
2460 is set according to the alignment of the access (including
2461 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2463 SLP says whether we're performing SLP rather than loop vectorization.
2464 MASKED_P is true if the statement is conditional on a vectorized mask.
2465 VECTYPE is the vector type that the vectorized statements will use.
2466 NCOPIES is the number of vector statements that will be needed. */
2469 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2470 tree vectype
, slp_tree slp_node
,
2471 bool masked_p
, vec_load_store_type vls_type
,
2472 unsigned int ncopies
,
2473 vect_memory_access_type
*memory_access_type
,
2474 poly_int64
*poffset
,
2475 dr_alignment_support
*alignment_support_scheme
,
2477 gather_scatter_info
*gs_info
)
2479 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2480 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2481 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2483 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2485 *memory_access_type
= VMAT_GATHER_SCATTER
;
2486 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2488 /* When using internal functions, we rely on pattern recognition
2489 to convert the type of the offset to the type that the target
2490 requires, with the result being a call to an internal function.
2491 If that failed for some reason (e.g. because another pattern
2492 took priority), just handle cases in which the offset already
2493 has the right type. */
2494 else if (gs_info
->ifn
!= IFN_LAST
2495 && !is_gimple_call (stmt_info
->stmt
)
2496 && !tree_nop_conversion_p (TREE_TYPE (gs_info
->offset
),
2497 TREE_TYPE (gs_info
->offset_vectype
)))
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2501 "%s offset requires a conversion\n",
2502 vls_type
== VLS_LOAD
? "gather" : "scatter");
2505 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2506 &gs_info
->offset_dt
,
2507 &gs_info
->offset_vectype
))
2509 if (dump_enabled_p ())
2510 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2511 "%s index use not simple.\n",
2512 vls_type
== VLS_LOAD
? "gather" : "scatter");
2515 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2517 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2518 || !TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
).is_constant ()
2519 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2520 (gs_info
->offset_vectype
),
2521 TYPE_VECTOR_SUBPARTS (vectype
)))
2523 if (dump_enabled_p ())
2524 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2525 "unsupported vector types for emulated "
2530 /* Gather-scatter accesses perform only component accesses, alignment
2531 is irrelevant for them. */
2532 *alignment_support_scheme
= dr_unaligned_supported
;
2534 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2536 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2538 vls_type
, memory_access_type
, poffset
,
2539 alignment_support_scheme
,
2540 misalignment
, gs_info
))
2543 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2545 gcc_assert (!slp_node
);
2547 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2549 *memory_access_type
= VMAT_GATHER_SCATTER
;
2551 *memory_access_type
= VMAT_ELEMENTWISE
;
2552 /* Alignment is irrelevant here. */
2553 *alignment_support_scheme
= dr_unaligned_supported
;
2557 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2560 gcc_assert (vls_type
== VLS_LOAD
);
2561 *memory_access_type
= VMAT_INVARIANT
;
2562 /* Invariant accesses perform only component accesses, alignment
2563 is irrelevant for them. */
2564 *alignment_support_scheme
= dr_unaligned_supported
;
2569 *memory_access_type
= get_negative_load_store_type
2570 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2572 *memory_access_type
= VMAT_CONTIGUOUS
;
2573 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2575 *alignment_support_scheme
2576 = vect_supportable_dr_alignment (vinfo
,
2577 STMT_VINFO_DR_INFO (stmt_info
),
2578 vectype
, *misalignment
);
2582 if ((*memory_access_type
== VMAT_ELEMENTWISE
2583 || *memory_access_type
== VMAT_STRIDED_SLP
)
2584 && !nunits
.is_constant ())
2586 if (dump_enabled_p ())
2587 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2588 "Not using elementwise accesses due to variable "
2589 "vectorization factor.\n");
2593 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2595 if (dump_enabled_p ())
2596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2597 "unsupported unaligned access\n");
2601 /* FIXME: At the moment the cost model seems to underestimate the
2602 cost of using elementwise accesses. This check preserves the
2603 traditional behavior until that can be fixed. */
2604 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2605 if (!first_stmt_info
)
2606 first_stmt_info
= stmt_info
;
2607 if (*memory_access_type
== VMAT_ELEMENTWISE
2608 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2609 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2610 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2611 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2613 if (dump_enabled_p ())
2614 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2615 "not falling back to elementwise accesses\n");
2621 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2622 conditional operation STMT_INFO. When returning true, store the mask
2623 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2624 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2625 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2628 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2629 slp_tree slp_node
, unsigned mask_index
,
2630 tree
*mask
, slp_tree
*mask_node
,
2631 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2633 enum vect_def_type mask_dt
;
2635 slp_tree mask_node_1
;
2636 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2637 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2641 "mask use not simple.\n");
2645 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2647 if (dump_enabled_p ())
2648 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2649 "mask argument is not a boolean.\n");
2653 /* If the caller is not prepared for adjusting an external/constant
2654 SLP mask vector type fail. */
2657 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2659 if (dump_enabled_p ())
2660 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2661 "SLP mask argument is not vectorized.\n");
2665 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2667 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2669 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2671 if (dump_enabled_p ())
2672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2673 "could not find an appropriate vector mask type.\n");
2677 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2678 TYPE_VECTOR_SUBPARTS (vectype
)))
2680 if (dump_enabled_p ())
2681 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2682 "vector mask type %T"
2683 " does not match vector data type %T.\n",
2684 mask_vectype
, vectype
);
2689 *mask_dt_out
= mask_dt
;
2690 *mask_vectype_out
= mask_vectype
;
2692 *mask_node
= mask_node_1
;
2696 /* Return true if stored value RHS is suitable for vectorizing store
2697 statement STMT_INFO. When returning true, store the type of the
2698 definition in *RHS_DT_OUT, the type of the vectorized store value in
2699 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2702 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2703 slp_tree slp_node
, tree rhs
,
2704 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2705 vec_load_store_type
*vls_type_out
)
2707 /* In the case this is a store from a constant make sure
2708 native_encode_expr can handle it. */
2709 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2711 if (dump_enabled_p ())
2712 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2713 "cannot encode constant as a byte sequence.\n");
2718 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2720 if (gimple_call_internal_p (call
)
2721 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2722 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2725 enum vect_def_type rhs_dt
;
2728 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2729 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2731 if (dump_enabled_p ())
2732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2733 "use not simple.\n");
2737 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2738 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2740 if (dump_enabled_p ())
2741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2742 "incompatible vector types.\n");
2746 *rhs_dt_out
= rhs_dt
;
2747 *rhs_vectype_out
= rhs_vectype
;
2748 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2749 *vls_type_out
= VLS_STORE_INVARIANT
;
2751 *vls_type_out
= VLS_STORE
;
2755 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2756 Note that we support masks with floating-point type, in which case the
2757 floats are interpreted as a bitmask. */
2760 vect_build_all_ones_mask (vec_info
*vinfo
,
2761 stmt_vec_info stmt_info
, tree masktype
)
2763 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2764 return build_int_cst (masktype
, -1);
2765 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2767 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2768 mask
= build_vector_from_val (masktype
, mask
);
2769 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2771 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2775 for (int j
= 0; j
< 6; ++j
)
2777 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2778 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2779 mask
= build_vector_from_val (masktype
, mask
);
2780 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2785 /* Build an all-zero merge value of type VECTYPE while vectorizing
2786 STMT_INFO as a gather load. */
2789 vect_build_zero_merge_argument (vec_info
*vinfo
,
2790 stmt_vec_info stmt_info
, tree vectype
)
2793 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2794 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2795 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2799 for (int j
= 0; j
< 6; ++j
)
2801 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2802 merge
= build_real (TREE_TYPE (vectype
), r
);
2806 merge
= build_vector_from_val (vectype
, merge
);
2807 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2810 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2811 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2812 the gather load operation. If the load is conditional, MASK is the
2813 unvectorized condition and MASK_DT is its definition type, otherwise
2817 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2818 gimple_stmt_iterator
*gsi
,
2820 gather_scatter_info
*gs_info
,
2823 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2824 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2825 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2826 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2827 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2828 edge pe
= loop_preheader_edge (loop
);
2829 enum { NARROW
, NONE
, WIDEN
} modifier
;
2830 poly_uint64 gather_off_nunits
2831 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2833 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2834 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2835 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2836 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2837 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2838 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2839 tree scaletype
= TREE_VALUE (arglist
);
2840 tree real_masktype
= masktype
;
2841 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2843 || TREE_CODE (masktype
) == INTEGER_TYPE
2844 || types_compatible_p (srctype
, masktype
)));
2846 masktype
= truth_type_for (srctype
);
2848 tree mask_halftype
= masktype
;
2849 tree perm_mask
= NULL_TREE
;
2850 tree mask_perm_mask
= NULL_TREE
;
2851 if (known_eq (nunits
, gather_off_nunits
))
2853 else if (known_eq (nunits
* 2, gather_off_nunits
))
2857 /* Currently widening gathers and scatters are only supported for
2858 fixed-length vectors. */
2859 int count
= gather_off_nunits
.to_constant ();
2860 vec_perm_builder
sel (count
, count
, 1);
2861 for (int i
= 0; i
< count
; ++i
)
2862 sel
.quick_push (i
| (count
/ 2));
2864 vec_perm_indices
indices (sel
, 1, count
);
2865 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2868 else if (known_eq (nunits
, gather_off_nunits
* 2))
2872 /* Currently narrowing gathers and scatters are only supported for
2873 fixed-length vectors. */
2874 int count
= nunits
.to_constant ();
2875 vec_perm_builder
sel (count
, count
, 1);
2876 sel
.quick_grow (count
);
2877 for (int i
= 0; i
< count
; ++i
)
2878 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2879 vec_perm_indices
indices (sel
, 2, count
);
2880 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2884 if (mask
&& VECTOR_TYPE_P (real_masktype
))
2886 for (int i
= 0; i
< count
; ++i
)
2887 sel
[i
] = i
| (count
/ 2);
2888 indices
.new_vector (sel
, 2, count
);
2889 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2892 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2897 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2898 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2900 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2901 if (!is_gimple_min_invariant (ptr
))
2904 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2905 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2906 gcc_assert (!new_bb
);
2909 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2911 tree vec_oprnd0
= NULL_TREE
;
2912 tree vec_mask
= NULL_TREE
;
2913 tree src_op
= NULL_TREE
;
2914 tree mask_op
= NULL_TREE
;
2915 tree prev_res
= NULL_TREE
;
2919 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2920 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2923 auto_vec
<tree
> vec_oprnds0
;
2924 auto_vec
<tree
> vec_masks
;
2925 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2926 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2927 gs_info
->offset
, &vec_oprnds0
);
2929 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2930 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2931 mask
, &vec_masks
, masktype
);
2932 for (int j
= 0; j
< ncopies
; ++j
)
2935 if (modifier
== WIDEN
&& (j
& 1))
2936 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2937 perm_mask
, stmt_info
, gsi
);
2939 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2941 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2943 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2944 TYPE_VECTOR_SUBPARTS (idxtype
)));
2945 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2946 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2947 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2948 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2954 if (mask_perm_mask
&& (j
& 1))
2955 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2956 mask_perm_mask
, stmt_info
, gsi
);
2959 if (modifier
== NARROW
)
2962 vec_mask
= vec_masks
[j
/ 2];
2965 vec_mask
= vec_masks
[j
];
2968 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2970 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2971 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2972 gcc_assert (known_eq (sub1
, sub2
));
2973 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2974 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2976 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2977 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2981 if (modifier
== NARROW
&& !VECTOR_TYPE_P (real_masktype
))
2983 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2985 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2986 : VEC_UNPACK_LO_EXPR
,
2988 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2994 tree mask_arg
= mask_op
;
2995 if (masktype
!= real_masktype
)
2997 tree utype
, optype
= TREE_TYPE (mask_op
);
2998 if (VECTOR_TYPE_P (real_masktype
)
2999 || TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
3000 utype
= real_masktype
;
3002 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
3003 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
3004 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
3006 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
3007 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3009 if (!useless_type_conversion_p (real_masktype
, utype
))
3011 gcc_assert (TYPE_PRECISION (utype
)
3012 <= TYPE_PRECISION (real_masktype
));
3013 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
3014 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
3015 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3018 src_op
= build_zero_cst (srctype
);
3020 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
3023 if (!useless_type_conversion_p (vectype
, rettype
))
3025 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
3026 TYPE_VECTOR_SUBPARTS (rettype
)));
3027 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
3028 gimple_call_set_lhs (new_stmt
, op
);
3029 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3030 var
= make_ssa_name (vec_dest
);
3031 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
3032 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
3033 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3037 var
= make_ssa_name (vec_dest
, new_stmt
);
3038 gimple_call_set_lhs (new_stmt
, var
);
3039 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3042 if (modifier
== NARROW
)
3049 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
3051 new_stmt
= SSA_NAME_DEF_STMT (var
);
3054 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3056 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3059 /* Prepare the base and offset in GS_INFO for vectorization.
3060 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3061 to the vectorized offset argument for the first copy of STMT_INFO.
3062 STMT_INFO is the statement described by GS_INFO and LOOP is the
3066 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
3067 class loop
*loop
, stmt_vec_info stmt_info
,
3068 slp_tree slp_node
, gather_scatter_info
*gs_info
,
3069 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
3071 gimple_seq stmts
= NULL
;
3072 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
3076 edge pe
= loop_preheader_edge (loop
);
3077 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3078 gcc_assert (!new_bb
);
3081 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
3085 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
3086 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
3087 gs_info
->offset
, vec_offset
,
3088 gs_info
->offset_vectype
);
3092 /* Prepare to implement a grouped or strided load or store using
3093 the gather load or scatter store operation described by GS_INFO.
3094 STMT_INFO is the load or store statement.
3096 Set *DATAREF_BUMP to the amount that should be added to the base
3097 address after each copy of the vectorized statement. Set *VEC_OFFSET
3098 to an invariant offset vector in which element I has the value
3099 I * DR_STEP / SCALE. */
3102 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
3103 loop_vec_info loop_vinfo
,
3104 gather_scatter_info
*gs_info
,
3105 tree
*dataref_bump
, tree
*vec_offset
)
3107 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3108 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3110 tree bump
= size_binop (MULT_EXPR
,
3111 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3112 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3113 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
3115 /* The offset given in GS_INFO can have pointer type, so use the element
3116 type of the vector instead. */
3117 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3119 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3120 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3121 ssize_int (gs_info
->scale
));
3122 step
= fold_convert (offset_type
, step
);
3124 /* Create {0, X, X*2, X*3, ...}. */
3125 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3126 build_zero_cst (offset_type
), step
);
3127 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
3130 /* Return the amount that should be added to a vector pointer to move
3131 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3132 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3136 vect_get_data_ptr_increment (vec_info
*vinfo
,
3137 dr_vec_info
*dr_info
, tree aggr_type
,
3138 vect_memory_access_type memory_access_type
)
3140 if (memory_access_type
== VMAT_INVARIANT
)
3141 return size_zero_node
;
3143 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3144 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3145 if (tree_int_cst_sgn (step
) == -1)
3146 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3150 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3153 vectorizable_bswap (vec_info
*vinfo
,
3154 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3155 gimple
**vec_stmt
, slp_tree slp_node
,
3157 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3160 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3161 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3164 op
= gimple_call_arg (stmt
, 0);
3165 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3166 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3168 /* Multiple types in SLP are handled by creating the appropriate number of
3169 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3174 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3176 gcc_assert (ncopies
>= 1);
3178 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3182 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3183 unsigned word_bytes
;
3184 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3187 /* The encoding uses one stepped pattern for each byte in the word. */
3188 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3189 for (unsigned i
= 0; i
< 3; ++i
)
3190 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3191 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3193 vec_perm_indices
indices (elts
, 1, num_bytes
);
3194 machine_mode vmode
= TYPE_MODE (char_vectype
);
3195 if (!can_vec_perm_const_p (vmode
, vmode
, indices
))
3201 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3203 if (dump_enabled_p ())
3204 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3205 "incompatible vector types for invariants\n");
3209 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3210 DUMP_VECT_SCOPE ("vectorizable_bswap");
3211 record_stmt_cost (cost_vec
,
3212 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3213 record_stmt_cost (cost_vec
,
3215 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3216 vec_perm
, stmt_info
, 0, vect_body
);
3220 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3223 vec
<tree
> vec_oprnds
= vNULL
;
3224 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3226 /* Arguments are ready. create the new vector stmt. */
3229 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3232 tree tem
= make_ssa_name (char_vectype
);
3233 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3234 char_vectype
, vop
));
3235 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3236 tree tem2
= make_ssa_name (char_vectype
);
3237 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3238 tem
, tem
, bswap_vconst
);
3239 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3240 tem
= make_ssa_name (vectype
);
3241 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3243 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3245 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3247 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3251 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3253 vec_oprnds
.release ();
3257 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3258 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3259 in a single step. On success, store the binary pack code in
3263 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3264 tree_code
*convert_code
)
3266 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3267 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3271 int multi_step_cvt
= 0;
3272 auto_vec
<tree
, 8> interm_types
;
3273 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3274 &code
, &multi_step_cvt
, &interm_types
)
3278 *convert_code
= code
;
3282 /* Function vectorizable_call.
3284 Check if STMT_INFO performs a function call that can be vectorized.
3285 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3286 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3287 Return true if STMT_INFO is vectorizable in this way. */
3290 vectorizable_call (vec_info
*vinfo
,
3291 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3292 gimple
**vec_stmt
, slp_tree slp_node
,
3293 stmt_vector_for_cost
*cost_vec
)
3299 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3300 tree vectype_out
, vectype_in
;
3301 poly_uint64 nunits_in
;
3302 poly_uint64 nunits_out
;
3303 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3304 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3305 tree fndecl
, new_temp
, rhs_type
;
3306 enum vect_def_type dt
[4]
3307 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3308 vect_unknown_def_type
};
3309 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3310 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3311 int ndts
= ARRAY_SIZE (dt
);
3313 auto_vec
<tree
, 8> vargs
;
3314 enum { NARROW
, NONE
, WIDEN
} modifier
;
3318 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3321 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3325 /* Is STMT_INFO a vectorizable call? */
3326 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3330 if (gimple_call_internal_p (stmt
)
3331 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3332 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3333 /* Handled by vectorizable_load and vectorizable_store. */
3336 if (gimple_call_lhs (stmt
) == NULL_TREE
3337 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3340 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3342 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3344 /* Process function arguments. */
3345 rhs_type
= NULL_TREE
;
3346 vectype_in
= NULL_TREE
;
3347 nargs
= gimple_call_num_args (stmt
);
3349 /* Bail out if the function has more than four arguments, we do not have
3350 interesting builtin functions to vectorize with more than two arguments
3351 except for fma. No arguments is also not good. */
3352 if (nargs
== 0 || nargs
> 4)
3355 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3356 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3357 if (cfn
== CFN_GOMP_SIMD_LANE
)
3360 rhs_type
= unsigned_type_node
;
3364 if (internal_fn_p (cfn
))
3365 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3367 for (i
= 0; i
< nargs
; i
++)
3369 if ((int) i
== mask_opno
)
3371 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3372 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3377 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3378 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3380 if (dump_enabled_p ())
3381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3382 "use not simple.\n");
3386 /* We can only handle calls with arguments of the same type. */
3388 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3390 if (dump_enabled_p ())
3391 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3392 "argument types differ.\n");
3396 rhs_type
= TREE_TYPE (op
);
3399 vectype_in
= vectypes
[i
];
3400 else if (vectypes
[i
]
3401 && !types_compatible_p (vectypes
[i
], vectype_in
))
3403 if (dump_enabled_p ())
3404 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3405 "argument vector types differ.\n");
3409 /* If all arguments are external or constant defs, infer the vector type
3410 from the scalar type. */
3412 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3414 gcc_assert (vectype_in
);
3417 if (dump_enabled_p ())
3418 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3419 "no vectype for scalar type %T\n", rhs_type
);
3423 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3424 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3425 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3426 by a pack of the two vectors into an SI vector. We would need
3427 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3428 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3430 if (dump_enabled_p ())
3431 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3432 "mismatched vector sizes %T and %T\n",
3433 vectype_in
, vectype_out
);
3437 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3438 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3440 if (dump_enabled_p ())
3441 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3442 "mixed mask and nonmask vector types\n");
3446 if (vect_emulated_vector_p (vectype_in
) || vect_emulated_vector_p (vectype_out
))
3448 if (dump_enabled_p ())
3449 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3450 "use emulated vector type for call\n");
3455 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3456 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3457 if (known_eq (nunits_in
* 2, nunits_out
))
3459 else if (known_eq (nunits_out
, nunits_in
))
3461 else if (known_eq (nunits_out
* 2, nunits_in
))
3466 /* We only handle functions that do not read or clobber memory. */
3467 if (gimple_vuse (stmt
))
3469 if (dump_enabled_p ())
3470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3471 "function reads from or writes to memory.\n");
3475 /* For now, we only vectorize functions if a target specific builtin
3476 is available. TODO -- in some cases, it might be profitable to
3477 insert the calls for pieces of the vector, in order to be able
3478 to vectorize other operations in the loop. */
3480 internal_fn ifn
= IFN_LAST
;
3481 tree callee
= gimple_call_fndecl (stmt
);
3483 /* First try using an internal function. */
3484 tree_code convert_code
= ERROR_MARK
;
3486 && (modifier
== NONE
3487 || (modifier
== NARROW
3488 && simple_integer_narrowing (vectype_out
, vectype_in
,
3490 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3493 /* If that fails, try asking for a target-specific built-in function. */
3494 if (ifn
== IFN_LAST
)
3496 if (cfn
!= CFN_LAST
)
3497 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3498 (cfn
, vectype_out
, vectype_in
);
3499 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3500 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3501 (callee
, vectype_out
, vectype_in
);
3504 if (ifn
== IFN_LAST
&& !fndecl
)
3506 if (cfn
== CFN_GOMP_SIMD_LANE
3509 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3510 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3511 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3512 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3514 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3515 { 0, 1, 2, ... vf - 1 } vector. */
3516 gcc_assert (nargs
== 0);
3518 else if (modifier
== NONE
3519 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3520 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3521 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3522 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3523 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3524 slp_op
, vectype_in
, cost_vec
);
3527 if (dump_enabled_p ())
3528 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3529 "function is not vectorizable.\n");
3536 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3537 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3539 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3541 /* Sanity check: make sure that at least one copy of the vectorized stmt
3542 needs to be generated. */
3543 gcc_assert (ncopies
>= 1);
3545 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3546 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3547 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3548 if (!vec_stmt
) /* transformation not required. */
3551 for (i
= 0; i
< nargs
; ++i
)
3552 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
],
3554 ? vectypes
[i
] : vectype_in
))
3556 if (dump_enabled_p ())
3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3558 "incompatible vector types for invariants\n");
3561 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3562 DUMP_VECT_SCOPE ("vectorizable_call");
3563 vect_model_simple_cost (vinfo
, stmt_info
,
3564 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3565 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3566 record_stmt_cost (cost_vec
, ncopies
/ 2,
3567 vec_promote_demote
, stmt_info
, 0, vect_body
);
3570 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3571 && (reduc_idx
>= 0 || mask_opno
>= 0))
3574 && (cond_fn
== IFN_LAST
3575 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3576 OPTIMIZE_FOR_SPEED
)))
3578 if (dump_enabled_p ())
3579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3580 "can't use a fully-masked loop because no"
3581 " conditional operation is available.\n");
3582 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3586 unsigned int nvectors
3588 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3590 tree scalar_mask
= NULL_TREE
;
3592 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3593 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3594 vectype_out
, scalar_mask
);
3602 if (dump_enabled_p ())
3603 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3606 scalar_dest
= gimple_call_lhs (stmt
);
3607 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3609 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3610 unsigned int vect_nargs
= nargs
;
3611 if (masked_loop_p
&& reduc_idx
>= 0)
3617 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3619 tree prev_res
= NULL_TREE
;
3620 vargs
.safe_grow (vect_nargs
, true);
3621 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3622 for (j
= 0; j
< ncopies
; ++j
)
3624 /* Build argument list for the vectorized call. */
3627 vec
<tree
> vec_oprnds0
;
3629 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3630 vec_oprnds0
= vec_defs
[0];
3632 /* Arguments are ready. Create the new vector stmt. */
3633 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3636 if (masked_loop_p
&& reduc_idx
>= 0)
3638 unsigned int vec_num
= vec_oprnds0
.length ();
3639 /* Always true for SLP. */
3640 gcc_assert (ncopies
== 1);
3641 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, vec_num
,
3645 for (k
= 0; k
< nargs
; k
++)
3647 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3648 vargs
[varg
++] = vec_oprndsk
[i
];
3650 if (masked_loop_p
&& reduc_idx
>= 0)
3651 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3653 if (modifier
== NARROW
)
3655 /* We don't define any narrowing conditional functions
3657 gcc_assert (mask_opno
< 0);
3658 tree half_res
= make_ssa_name (vectype_in
);
3660 = gimple_build_call_internal_vec (ifn
, vargs
);
3661 gimple_call_set_lhs (call
, half_res
);
3662 gimple_call_set_nothrow (call
, true);
3663 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3666 prev_res
= half_res
;
3669 new_temp
= make_ssa_name (vec_dest
);
3670 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3671 prev_res
, half_res
);
3672 vect_finish_stmt_generation (vinfo
, stmt_info
,
3677 if (mask_opno
>= 0 && masked_loop_p
)
3679 unsigned int vec_num
= vec_oprnds0
.length ();
3680 /* Always true for SLP. */
3681 gcc_assert (ncopies
== 1);
3682 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3684 vargs
[mask_opno
] = prepare_vec_mask
3685 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3686 vargs
[mask_opno
], gsi
);
3690 if (ifn
!= IFN_LAST
)
3691 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3693 call
= gimple_build_call_vec (fndecl
, vargs
);
3694 new_temp
= make_ssa_name (vec_dest
, call
);
3695 gimple_call_set_lhs (call
, new_temp
);
3696 gimple_call_set_nothrow (call
, true);
3697 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3700 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3706 if (masked_loop_p
&& reduc_idx
>= 0)
3707 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, ncopies
,
3709 for (i
= 0; i
< nargs
; i
++)
3711 op
= gimple_call_arg (stmt
, i
);
3714 vec_defs
.quick_push (vNULL
);
3715 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3719 vargs
[varg
++] = vec_defs
[i
][j
];
3721 if (masked_loop_p
&& reduc_idx
>= 0)
3722 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3724 if (mask_opno
>= 0 && masked_loop_p
)
3726 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3729 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3730 vargs
[mask_opno
], gsi
);
3734 if (cfn
== CFN_GOMP_SIMD_LANE
)
3736 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3738 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3739 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3740 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3741 new_temp
= make_ssa_name (vec_dest
);
3742 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3743 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3745 else if (modifier
== NARROW
)
3747 /* We don't define any narrowing conditional functions at
3749 gcc_assert (mask_opno
< 0);
3750 tree half_res
= make_ssa_name (vectype_in
);
3751 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3752 gimple_call_set_lhs (call
, half_res
);
3753 gimple_call_set_nothrow (call
, true);
3754 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3757 prev_res
= half_res
;
3760 new_temp
= make_ssa_name (vec_dest
);
3761 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3762 prev_res
, half_res
);
3763 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3768 if (ifn
!= IFN_LAST
)
3769 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3771 call
= gimple_build_call_vec (fndecl
, vargs
);
3772 new_temp
= make_ssa_name (vec_dest
, call
);
3773 gimple_call_set_lhs (call
, new_temp
);
3774 gimple_call_set_nothrow (call
, true);
3775 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3779 if (j
== (modifier
== NARROW
? 1 : 0))
3780 *vec_stmt
= new_stmt
;
3781 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3783 for (i
= 0; i
< nargs
; i
++)
3785 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3786 vec_oprndsi
.release ();
3789 else if (modifier
== NARROW
)
3791 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3792 /* We don't define any narrowing conditional functions at present. */
3793 gcc_assert (mask_opno
< 0);
3794 for (j
= 0; j
< ncopies
; ++j
)
3796 /* Build argument list for the vectorized call. */
3798 vargs
.create (nargs
* 2);
3804 vec
<tree
> vec_oprnds0
;
3806 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3807 vec_oprnds0
= vec_defs
[0];
3809 /* Arguments are ready. Create the new vector stmt. */
3810 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3814 for (k
= 0; k
< nargs
; k
++)
3816 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3817 vargs
.quick_push (vec_oprndsk
[i
]);
3818 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3821 if (ifn
!= IFN_LAST
)
3822 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3824 call
= gimple_build_call_vec (fndecl
, vargs
);
3825 new_temp
= make_ssa_name (vec_dest
, call
);
3826 gimple_call_set_lhs (call
, new_temp
);
3827 gimple_call_set_nothrow (call
, true);
3828 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3829 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3834 for (i
= 0; i
< nargs
; i
++)
3836 op
= gimple_call_arg (stmt
, i
);
3839 vec_defs
.quick_push (vNULL
);
3840 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3841 op
, &vec_defs
[i
], vectypes
[i
]);
3843 vec_oprnd0
= vec_defs
[i
][2*j
];
3844 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3846 vargs
.quick_push (vec_oprnd0
);
3847 vargs
.quick_push (vec_oprnd1
);
3850 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3851 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3852 gimple_call_set_lhs (new_stmt
, new_temp
);
3853 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3855 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3859 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3861 for (i
= 0; i
< nargs
; i
++)
3863 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3864 vec_oprndsi
.release ();
3868 /* No current target implements this case. */
3873 /* The call in STMT might prevent it from being removed in dce.
3874 We however cannot remove it here, due to the way the ssa name
3875 it defines is mapped to the new definition. So just replace
3876 rhs of the statement with something harmless. */
3881 stmt_info
= vect_orig_stmt (stmt_info
);
3882 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3885 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3886 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3892 struct simd_call_arg_info
3896 HOST_WIDE_INT linear_step
;
3897 enum vect_def_type dt
;
3899 bool simd_lane_linear
;
3902 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3903 is linear within simd lane (but not within whole loop), note it in
3907 vect_simd_lane_linear (tree op
, class loop
*loop
,
3908 struct simd_call_arg_info
*arginfo
)
3910 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3912 if (!is_gimple_assign (def_stmt
)
3913 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3914 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3917 tree base
= gimple_assign_rhs1 (def_stmt
);
3918 HOST_WIDE_INT linear_step
= 0;
3919 tree v
= gimple_assign_rhs2 (def_stmt
);
3920 while (TREE_CODE (v
) == SSA_NAME
)
3923 def_stmt
= SSA_NAME_DEF_STMT (v
);
3924 if (is_gimple_assign (def_stmt
))
3925 switch (gimple_assign_rhs_code (def_stmt
))
3928 t
= gimple_assign_rhs2 (def_stmt
);
3929 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3931 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3932 v
= gimple_assign_rhs1 (def_stmt
);
3935 t
= gimple_assign_rhs2 (def_stmt
);
3936 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3938 linear_step
= tree_to_shwi (t
);
3939 v
= gimple_assign_rhs1 (def_stmt
);
3942 t
= gimple_assign_rhs1 (def_stmt
);
3943 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3944 || (TYPE_PRECISION (TREE_TYPE (v
))
3945 < TYPE_PRECISION (TREE_TYPE (t
))))
3954 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3956 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3957 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3962 arginfo
->linear_step
= linear_step
;
3964 arginfo
->simd_lane_linear
= true;
3970 /* Return the number of elements in vector type VECTYPE, which is associated
3971 with a SIMD clone. At present these vectors always have a constant
3974 static unsigned HOST_WIDE_INT
3975 simd_clone_subparts (tree vectype
)
3977 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3980 /* Function vectorizable_simd_clone_call.
3982 Check if STMT_INFO performs a function call that can be vectorized
3983 by calling a simd clone of the function.
3984 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3985 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3986 Return true if STMT_INFO is vectorizable in this way. */
3989 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3990 gimple_stmt_iterator
*gsi
,
3991 gimple
**vec_stmt
, slp_tree slp_node
,
3992 stmt_vector_for_cost
*)
3997 tree vec_oprnd0
= NULL_TREE
;
4000 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4001 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4002 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
4003 tree fndecl
, new_temp
;
4005 auto_vec
<simd_call_arg_info
> arginfo
;
4006 vec
<tree
> vargs
= vNULL
;
4008 tree lhs
, rtype
, ratype
;
4009 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
4012 /* Is STMT a vectorizable call? */
4013 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4017 fndecl
= gimple_call_fndecl (stmt
);
4018 if (fndecl
== NULL_TREE
4019 && gimple_call_internal_p (stmt
, IFN_MASK_CALL
))
4021 fndecl
= gimple_call_arg (stmt
, 0);
4022 gcc_checking_assert (TREE_CODE (fndecl
) == ADDR_EXPR
);
4023 fndecl
= TREE_OPERAND (fndecl
, 0);
4024 gcc_checking_assert (TREE_CODE (fndecl
) == FUNCTION_DECL
);
4027 if (fndecl
== NULL_TREE
)
4030 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
4031 if (node
== NULL
|| node
->simd_clones
== NULL
)
4034 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4037 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4041 if (gimple_call_lhs (stmt
)
4042 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
4045 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
4047 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4049 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
4056 /* Process function arguments. */
4057 nargs
= gimple_call_num_args (stmt
) - arg_offset
;
4059 /* Bail out if the function has zero arguments. */
4063 arginfo
.reserve (nargs
, true);
4065 for (i
= 0; i
< nargs
; i
++)
4067 simd_call_arg_info thisarginfo
;
4070 thisarginfo
.linear_step
= 0;
4071 thisarginfo
.align
= 0;
4072 thisarginfo
.op
= NULL_TREE
;
4073 thisarginfo
.simd_lane_linear
= false;
4075 op
= gimple_call_arg (stmt
, i
+ arg_offset
);
4076 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
4077 &thisarginfo
.vectype
)
4078 || thisarginfo
.dt
== vect_uninitialized_def
)
4080 if (dump_enabled_p ())
4081 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4082 "use not simple.\n");
4086 if (thisarginfo
.dt
== vect_constant_def
4087 || thisarginfo
.dt
== vect_external_def
)
4088 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
4090 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
4092 /* For linear arguments, the analyze phase should have saved
4093 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4094 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
4095 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
4097 gcc_assert (vec_stmt
);
4098 thisarginfo
.linear_step
4099 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
4101 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
4102 thisarginfo
.simd_lane_linear
4103 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
4104 == boolean_true_node
);
4105 /* If loop has been peeled for alignment, we need to adjust it. */
4106 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
4107 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4108 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4110 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4111 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
4112 tree opt
= TREE_TYPE (thisarginfo
.op
);
4113 bias
= fold_convert (TREE_TYPE (step
), bias
);
4114 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4116 = fold_build2 (POINTER_TYPE_P (opt
)
4117 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4118 thisarginfo
.op
, bias
);
4122 && thisarginfo
.dt
!= vect_constant_def
4123 && thisarginfo
.dt
!= vect_external_def
4125 && TREE_CODE (op
) == SSA_NAME
4126 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4128 && tree_fits_shwi_p (iv
.step
))
4130 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4131 thisarginfo
.op
= iv
.base
;
4133 else if ((thisarginfo
.dt
== vect_constant_def
4134 || thisarginfo
.dt
== vect_external_def
)
4135 && POINTER_TYPE_P (TREE_TYPE (op
)))
4136 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4137 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4139 if (POINTER_TYPE_P (TREE_TYPE (op
))
4140 && !thisarginfo
.linear_step
4142 && thisarginfo
.dt
!= vect_constant_def
4143 && thisarginfo
.dt
!= vect_external_def
4146 && TREE_CODE (op
) == SSA_NAME
)
4147 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4149 arginfo
.quick_push (thisarginfo
);
4152 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4153 if (!vf
.is_constant ())
4155 if (dump_enabled_p ())
4156 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4157 "not considering SIMD clones; not yet supported"
4158 " for variable-width vectors.\n");
4162 unsigned int badness
= 0;
4163 struct cgraph_node
*bestn
= NULL
;
4164 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4165 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4167 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4168 n
= n
->simdclone
->next_clone
)
4170 unsigned int this_badness
= 0;
4171 unsigned int num_calls
;
4172 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
4173 || n
->simdclone
->nargs
!= nargs
)
4176 this_badness
+= exact_log2 (num_calls
) * 4096;
4177 if (n
->simdclone
->inbranch
)
4178 this_badness
+= 8192;
4179 int target_badness
= targetm
.simd_clone
.usable (n
);
4180 if (target_badness
< 0)
4182 this_badness
+= target_badness
* 512;
4183 for (i
= 0; i
< nargs
; i
++)
4185 switch (n
->simdclone
->args
[i
].arg_type
)
4187 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4188 if (!useless_type_conversion_p
4189 (n
->simdclone
->args
[i
].orig_type
,
4190 TREE_TYPE (gimple_call_arg (stmt
, i
+ arg_offset
))))
4192 else if (arginfo
[i
].dt
== vect_constant_def
4193 || arginfo
[i
].dt
== vect_external_def
4194 || arginfo
[i
].linear_step
)
4197 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4198 if (arginfo
[i
].dt
!= vect_constant_def
4199 && arginfo
[i
].dt
!= vect_external_def
)
4202 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4203 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4204 if (arginfo
[i
].dt
== vect_constant_def
4205 || arginfo
[i
].dt
== vect_external_def
4206 || (arginfo
[i
].linear_step
4207 != n
->simdclone
->args
[i
].linear_step
))
4210 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4211 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4212 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4213 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4214 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4215 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4219 case SIMD_CLONE_ARG_TYPE_MASK
:
4222 if (i
== (size_t) -1)
4224 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4229 if (arginfo
[i
].align
)
4230 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4231 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4233 if (i
== (size_t) -1)
4235 if (bestn
== NULL
|| this_badness
< badness
)
4238 badness
= this_badness
;
4245 for (i
= 0; i
< nargs
; i
++)
4247 if ((arginfo
[i
].dt
== vect_constant_def
4248 || arginfo
[i
].dt
== vect_external_def
)
4249 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4251 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
+ arg_offset
));
4252 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4254 if (arginfo
[i
].vectype
== NULL
4255 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4256 simd_clone_subparts (arginfo
[i
].vectype
)))
4260 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
4261 && VECTOR_BOOLEAN_TYPE_P (bestn
->simdclone
->args
[i
].vector_type
))
4263 if (dump_enabled_p ())
4264 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4265 "vector mask arguments are not supported.\n");
4269 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
4270 && bestn
->simdclone
->mask_mode
== VOIDmode
4271 && (simd_clone_subparts (bestn
->simdclone
->args
[i
].vector_type
)
4272 != simd_clone_subparts (arginfo
[i
].vectype
)))
4274 /* FORNOW we only have partial support for vector-type masks that
4275 can't hold all of simdlen. */
4276 if (dump_enabled_p ())
4277 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4279 "in-branch vector clones are not yet"
4280 " supported for mismatched vector sizes.\n");
4283 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
4284 && bestn
->simdclone
->mask_mode
!= VOIDmode
)
4286 /* FORNOW don't support integer-type masks. */
4287 if (dump_enabled_p ())
4288 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4290 "in-branch vector clones are not yet"
4291 " supported for integer mask modes.\n");
4296 fndecl
= bestn
->decl
;
4297 nunits
= bestn
->simdclone
->simdlen
;
4298 ncopies
= vector_unroll_factor (vf
, nunits
);
4300 /* If the function isn't const, only allow it in simd loops where user
4301 has asserted that at least nunits consecutive iterations can be
4302 performed using SIMD instructions. */
4303 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4304 && gimple_vuse (stmt
))
4307 /* Sanity check: make sure that at least one copy of the vectorized stmt
4308 needs to be generated. */
4309 gcc_assert (ncopies
>= 1);
4311 if (!vec_stmt
) /* transformation not required. */
4313 /* When the original call is pure or const but the SIMD ABI dictates
4314 an aggregate return we will have to use a virtual definition and
4315 in a loop eventually even need to add a virtual PHI. That's
4316 not straight-forward so allow to fix this up via renaming. */
4317 if (gimple_call_lhs (stmt
)
4318 && !gimple_vdef (stmt
)
4319 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn
->decl
))) == ARRAY_TYPE
)
4320 vinfo
->any_known_not_updated_vssa
= true;
4321 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4322 for (i
= 0; i
< nargs
; i
++)
4323 if ((bestn
->simdclone
->args
[i
].arg_type
4324 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4325 || (bestn
->simdclone
->args
[i
].arg_type
4326 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4328 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4331 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4332 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4333 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4334 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4335 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4336 tree sll
= arginfo
[i
].simd_lane_linear
4337 ? boolean_true_node
: boolean_false_node
;
4338 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4340 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4341 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4342 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4343 dt, slp_node, cost_vec); */
4349 if (dump_enabled_p ())
4350 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4353 scalar_dest
= gimple_call_lhs (stmt
);
4354 vec_dest
= NULL_TREE
;
4359 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4360 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4361 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4364 rtype
= TREE_TYPE (ratype
);
4368 auto_vec
<vec
<tree
> > vec_oprnds
;
4369 auto_vec
<unsigned> vec_oprnds_i
;
4370 vec_oprnds
.safe_grow_cleared (nargs
, true);
4371 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4372 for (j
= 0; j
< ncopies
; ++j
)
4374 /* Build argument list for the vectorized call. */
4376 vargs
.create (nargs
);
4380 for (i
= 0; i
< nargs
; i
++)
4382 unsigned int k
, l
, m
, o
;
4384 op
= gimple_call_arg (stmt
, i
+ arg_offset
);
4385 switch (bestn
->simdclone
->args
[i
].arg_type
)
4387 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4388 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4389 o
= vector_unroll_factor (nunits
,
4390 simd_clone_subparts (atype
));
4391 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4393 if (simd_clone_subparts (atype
)
4394 < simd_clone_subparts (arginfo
[i
].vectype
))
4396 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4397 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4398 / simd_clone_subparts (atype
));
4399 gcc_assert ((k
& (k
- 1)) == 0);
4402 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4403 ncopies
* o
/ k
, op
,
4405 vec_oprnds_i
[i
] = 0;
4406 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4410 vec_oprnd0
= arginfo
[i
].op
;
4411 if ((m
& (k
- 1)) == 0)
4412 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4414 arginfo
[i
].op
= vec_oprnd0
;
4416 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4418 bitsize_int ((m
& (k
- 1)) * prec
));
4420 = gimple_build_assign (make_ssa_name (atype
),
4422 vect_finish_stmt_generation (vinfo
, stmt_info
,
4424 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4428 k
= (simd_clone_subparts (atype
)
4429 / simd_clone_subparts (arginfo
[i
].vectype
));
4430 gcc_assert ((k
& (k
- 1)) == 0);
4431 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4433 vec_alloc (ctor_elts
, k
);
4436 for (l
= 0; l
< k
; l
++)
4438 if (m
== 0 && l
== 0)
4440 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4444 vec_oprnds_i
[i
] = 0;
4445 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4448 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4449 arginfo
[i
].op
= vec_oprnd0
;
4452 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4456 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4460 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4462 = gimple_build_assign (make_ssa_name (atype
),
4464 vect_finish_stmt_generation (vinfo
, stmt_info
,
4466 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4469 vargs
.safe_push (vec_oprnd0
);
4472 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4474 = gimple_build_assign (make_ssa_name (atype
),
4476 vect_finish_stmt_generation (vinfo
, stmt_info
,
4478 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4483 case SIMD_CLONE_ARG_TYPE_MASK
:
4484 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4485 if (bestn
->simdclone
->mask_mode
!= VOIDmode
)
4487 /* FORNOW: this is disabled above. */
4492 tree elt_type
= TREE_TYPE (atype
);
4493 tree one
= fold_convert (elt_type
, integer_one_node
);
4494 tree zero
= fold_convert (elt_type
, integer_zero_node
);
4495 o
= vector_unroll_factor (nunits
,
4496 simd_clone_subparts (atype
));
4497 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4499 if (simd_clone_subparts (atype
)
4500 < simd_clone_subparts (arginfo
[i
].vectype
))
4502 /* The mask type has fewer elements than simdlen. */
4507 else if (simd_clone_subparts (atype
)
4508 == simd_clone_subparts (arginfo
[i
].vectype
))
4510 /* The SIMD clone function has the same number of
4511 elements as the current function. */
4514 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4518 vec_oprnds_i
[i
] = 0;
4520 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4522 = build3 (VEC_COND_EXPR
, atype
, vec_oprnd0
,
4523 build_vector_from_val (atype
, one
),
4524 build_vector_from_val (atype
, zero
));
4526 = gimple_build_assign (make_ssa_name (atype
),
4528 vect_finish_stmt_generation (vinfo
, stmt_info
,
4530 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4534 /* The mask type has more elements than simdlen. */
4542 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4543 vargs
.safe_push (op
);
4545 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4546 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4551 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4552 &stmts
, true, NULL_TREE
);
4556 edge pe
= loop_preheader_edge (loop
);
4557 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4558 gcc_assert (!new_bb
);
4560 if (arginfo
[i
].simd_lane_linear
)
4562 vargs
.safe_push (arginfo
[i
].op
);
4565 tree phi_res
= copy_ssa_name (op
);
4566 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4567 add_phi_arg (new_phi
, arginfo
[i
].op
,
4568 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4570 = POINTER_TYPE_P (TREE_TYPE (op
))
4571 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4572 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4573 ? sizetype
: TREE_TYPE (op
);
4575 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4577 tree tcst
= wide_int_to_tree (type
, cst
);
4578 tree phi_arg
= copy_ssa_name (op
);
4580 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4581 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4582 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4583 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4585 arginfo
[i
].op
= phi_res
;
4586 vargs
.safe_push (phi_res
);
4591 = POINTER_TYPE_P (TREE_TYPE (op
))
4592 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4593 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4594 ? sizetype
: TREE_TYPE (op
);
4596 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4598 tree tcst
= wide_int_to_tree (type
, cst
);
4599 new_temp
= make_ssa_name (TREE_TYPE (op
));
4601 = gimple_build_assign (new_temp
, code
,
4602 arginfo
[i
].op
, tcst
);
4603 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4604 vargs
.safe_push (new_temp
);
4607 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4608 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4609 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4610 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4611 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4612 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4618 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4622 || known_eq (simd_clone_subparts (rtype
), nunits
));
4624 new_temp
= create_tmp_var (ratype
);
4625 else if (useless_type_conversion_p (vectype
, rtype
))
4626 new_temp
= make_ssa_name (vec_dest
, new_call
);
4628 new_temp
= make_ssa_name (rtype
, new_call
);
4629 gimple_call_set_lhs (new_call
, new_temp
);
4631 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4632 gimple
*new_stmt
= new_call
;
4636 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4639 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4640 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4641 k
= vector_unroll_factor (nunits
,
4642 simd_clone_subparts (vectype
));
4643 gcc_assert ((k
& (k
- 1)) == 0);
4644 for (l
= 0; l
< k
; l
++)
4649 t
= build_fold_addr_expr (new_temp
);
4650 t
= build2 (MEM_REF
, vectype
, t
,
4651 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4654 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4655 bitsize_int (prec
), bitsize_int (l
* prec
));
4656 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4657 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4659 if (j
== 0 && l
== 0)
4660 *vec_stmt
= new_stmt
;
4661 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4665 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4668 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4670 unsigned int k
= (simd_clone_subparts (vectype
)
4671 / simd_clone_subparts (rtype
));
4672 gcc_assert ((k
& (k
- 1)) == 0);
4673 if ((j
& (k
- 1)) == 0)
4674 vec_alloc (ret_ctor_elts
, k
);
4678 o
= vector_unroll_factor (nunits
,
4679 simd_clone_subparts (rtype
));
4680 for (m
= 0; m
< o
; m
++)
4682 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4683 size_int (m
), NULL_TREE
, NULL_TREE
);
4684 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4686 vect_finish_stmt_generation (vinfo
, stmt_info
,
4688 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4689 gimple_assign_lhs (new_stmt
));
4691 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4694 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4695 if ((j
& (k
- 1)) != k
- 1)
4697 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4699 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4700 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4702 if ((unsigned) j
== k
- 1)
4703 *vec_stmt
= new_stmt
;
4704 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4709 tree t
= build_fold_addr_expr (new_temp
);
4710 t
= build2 (MEM_REF
, vectype
, t
,
4711 build_int_cst (TREE_TYPE (t
), 0));
4712 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4713 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4714 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4716 else if (!useless_type_conversion_p (vectype
, rtype
))
4718 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4720 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4721 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4726 *vec_stmt
= new_stmt
;
4727 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4730 for (i
= 0; i
< nargs
; ++i
)
4732 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4737 /* Mark the clone as no longer being a candidate for GC. */
4738 bestn
->gc_candidate
= false;
4740 /* The call in STMT might prevent it from being removed in dce.
4741 We however cannot remove it here, due to the way the ssa name
4742 it defines is mapped to the new definition. So just replace
4743 rhs of the statement with something harmless. */
4751 type
= TREE_TYPE (scalar_dest
);
4752 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4753 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4756 new_stmt
= gimple_build_nop ();
4757 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4758 unlink_stmt_vdef (stmt
);
4764 /* Function vect_gen_widened_results_half
4766 Create a vector stmt whose code, type, number of arguments, and result
4767 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4768 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4769 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4770 needs to be created (DECL is a function-decl of a target-builtin).
4771 STMT_INFO is the original scalar stmt that we are vectorizing. */
4774 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4775 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4776 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4777 stmt_vec_info stmt_info
)
4782 /* Generate half of the widened result: */
4783 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4784 if (op_type
!= binary_op
)
4786 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4787 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4788 gimple_assign_set_lhs (new_stmt
, new_temp
);
4789 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4795 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4796 For multi-step conversions store the resulting vectors and call the function
4800 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4802 stmt_vec_info stmt_info
,
4803 vec
<tree
> &vec_dsts
,
4804 gimple_stmt_iterator
*gsi
,
4805 slp_tree slp_node
, enum tree_code code
)
4808 tree vop0
, vop1
, new_tmp
, vec_dest
;
4810 vec_dest
= vec_dsts
.pop ();
4812 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4814 /* Create demotion operation. */
4815 vop0
= (*vec_oprnds
)[i
];
4816 vop1
= (*vec_oprnds
)[i
+ 1];
4817 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4818 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4819 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4820 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4823 /* Store the resulting vector for next recursive call. */
4824 (*vec_oprnds
)[i
/2] = new_tmp
;
4827 /* This is the last step of the conversion sequence. Store the
4828 vectors in SLP_NODE or in vector info of the scalar statement
4829 (or in STMT_VINFO_RELATED_STMT chain). */
4831 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4833 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4837 /* For multi-step demotion operations we first generate demotion operations
4838 from the source type to the intermediate types, and then combine the
4839 results (stored in VEC_OPRNDS) in demotion operation to the destination
4843 /* At each level of recursion we have half of the operands we had at the
4845 vec_oprnds
->truncate ((i
+1)/2);
4846 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4848 stmt_info
, vec_dsts
, gsi
,
4849 slp_node
, VEC_PACK_TRUNC_EXPR
);
4852 vec_dsts
.quick_push (vec_dest
);
4856 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4857 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4858 STMT_INFO. For multi-step conversions store the resulting vectors and
4859 call the function recursively. */
4862 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4863 vec
<tree
> *vec_oprnds0
,
4864 vec
<tree
> *vec_oprnds1
,
4865 stmt_vec_info stmt_info
, tree vec_dest
,
4866 gimple_stmt_iterator
*gsi
,
4867 enum tree_code code1
,
4868 enum tree_code code2
, int op_type
)
4871 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4872 gimple
*new_stmt1
, *new_stmt2
;
4873 vec
<tree
> vec_tmp
= vNULL
;
4875 vec_tmp
.create (vec_oprnds0
->length () * 2);
4876 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4878 if (op_type
== binary_op
)
4879 vop1
= (*vec_oprnds1
)[i
];
4883 /* Generate the two halves of promotion operation. */
4884 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4885 op_type
, vec_dest
, gsi
,
4887 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4888 op_type
, vec_dest
, gsi
,
4890 if (is_gimple_call (new_stmt1
))
4892 new_tmp1
= gimple_call_lhs (new_stmt1
);
4893 new_tmp2
= gimple_call_lhs (new_stmt2
);
4897 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4898 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4901 /* Store the results for the next step. */
4902 vec_tmp
.quick_push (new_tmp1
);
4903 vec_tmp
.quick_push (new_tmp2
);
4906 vec_oprnds0
->release ();
4907 *vec_oprnds0
= vec_tmp
;
4910 /* Create vectorized promotion stmts for widening stmts using only half the
4911 potential vector size for input. */
4913 vect_create_half_widening_stmts (vec_info
*vinfo
,
4914 vec
<tree
> *vec_oprnds0
,
4915 vec
<tree
> *vec_oprnds1
,
4916 stmt_vec_info stmt_info
, tree vec_dest
,
4917 gimple_stmt_iterator
*gsi
,
4918 enum tree_code code1
,
4926 vec
<tree
> vec_tmp
= vNULL
;
4928 vec_tmp
.create (vec_oprnds0
->length ());
4929 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4931 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
4933 gcc_assert (op_type
== binary_op
);
4934 vop1
= (*vec_oprnds1
)[i
];
4936 /* Widen the first vector input. */
4937 out_type
= TREE_TYPE (vec_dest
);
4938 new_tmp1
= make_ssa_name (out_type
);
4939 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
4940 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
4941 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
4943 /* Widen the second vector input. */
4944 new_tmp2
= make_ssa_name (out_type
);
4945 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
4946 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
4947 /* Perform the operation. With both vector inputs widened. */
4948 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, new_tmp2
);
4952 /* Perform the operation. With the single vector input widened. */
4953 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, vop1
);
4956 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
4957 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
4958 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
4960 /* Store the results for the next step. */
4961 vec_tmp
.quick_push (new_tmp3
);
4964 vec_oprnds0
->release ();
4965 *vec_oprnds0
= vec_tmp
;
4969 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4970 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4971 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4972 Return true if STMT_INFO is vectorizable in this way. */
4975 vectorizable_conversion (vec_info
*vinfo
,
4976 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4977 gimple
**vec_stmt
, slp_tree slp_node
,
4978 stmt_vector_for_cost
*cost_vec
)
4982 tree op0
, op1
= NULL_TREE
;
4983 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4984 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4985 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4987 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4989 poly_uint64 nunits_in
;
4990 poly_uint64 nunits_out
;
4991 tree vectype_out
, vectype_in
;
4993 tree lhs_type
, rhs_type
;
4994 enum { NARROW
, NONE
, WIDEN
} modifier
;
4995 vec
<tree
> vec_oprnds0
= vNULL
;
4996 vec
<tree
> vec_oprnds1
= vNULL
;
4998 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4999 int multi_step_cvt
= 0;
5000 vec
<tree
> interm_types
= vNULL
;
5001 tree intermediate_type
, cvt_type
= NULL_TREE
;
5003 unsigned short fltsz
;
5005 /* Is STMT a vectorizable conversion? */
5007 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5010 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5014 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5018 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5021 code
= gimple_assign_rhs_code (stmt
);
5022 if (!CONVERT_EXPR_CODE_P (code
)
5023 && code
!= FIX_TRUNC_EXPR
5024 && code
!= FLOAT_EXPR
5025 && code
!= WIDEN_PLUS_EXPR
5026 && code
!= WIDEN_MINUS_EXPR
5027 && code
!= WIDEN_MULT_EXPR
5028 && code
!= WIDEN_LSHIFT_EXPR
)
5031 bool widen_arith
= (code
== WIDEN_PLUS_EXPR
5032 || code
== WIDEN_MINUS_EXPR
5033 || code
== WIDEN_MULT_EXPR
5034 || code
== WIDEN_LSHIFT_EXPR
);
5035 op_type
= TREE_CODE_LENGTH (code
);
5037 /* Check types of lhs and rhs. */
5038 scalar_dest
= gimple_assign_lhs (stmt
);
5039 lhs_type
= TREE_TYPE (scalar_dest
);
5040 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5042 /* Check the operands of the operation. */
5043 slp_tree slp_op0
, slp_op1
= NULL
;
5044 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5045 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
5047 if (dump_enabled_p ())
5048 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5049 "use not simple.\n");
5053 rhs_type
= TREE_TYPE (op0
);
5054 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
5055 && !((INTEGRAL_TYPE_P (lhs_type
)
5056 && INTEGRAL_TYPE_P (rhs_type
))
5057 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
5058 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
5061 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5062 && ((INTEGRAL_TYPE_P (lhs_type
)
5063 && !type_has_mode_precision_p (lhs_type
))
5064 || (INTEGRAL_TYPE_P (rhs_type
)
5065 && !type_has_mode_precision_p (rhs_type
))))
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5069 "type conversion to/from bit-precision unsupported."
5074 if (op_type
== binary_op
)
5076 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
5077 || code
== WIDEN_PLUS_EXPR
|| code
== WIDEN_MINUS_EXPR
);
5079 op1
= gimple_assign_rhs2 (stmt
);
5081 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
5082 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
5084 if (dump_enabled_p ())
5085 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5086 "use not simple.\n");
5089 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5092 vectype_in
= vectype1_in
;
5095 /* If op0 is an external or constant def, infer the vector type
5096 from the scalar type. */
5098 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
5100 gcc_assert (vectype_in
);
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5105 "no vectype for scalar type %T\n", rhs_type
);
5110 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5111 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5113 if (dump_enabled_p ())
5114 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5115 "can't convert between boolean and non "
5116 "boolean vectors %T\n", rhs_type
);
5121 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
5122 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5123 if (known_eq (nunits_out
, nunits_in
))
5128 else if (multiple_p (nunits_out
, nunits_in
))
5132 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
5136 /* Multiple types in SLP are handled by creating the appropriate number of
5137 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5141 else if (modifier
== NARROW
)
5142 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
5144 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
5146 /* Sanity check: make sure that at least one copy of the vectorized stmt
5147 needs to be generated. */
5148 gcc_assert (ncopies
>= 1);
5150 bool found_mode
= false;
5151 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
5152 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
5153 opt_scalar_mode rhs_mode_iter
;
5155 /* Supportable by target? */
5159 if (code
!= FIX_TRUNC_EXPR
5160 && code
!= FLOAT_EXPR
5161 && !CONVERT_EXPR_CODE_P (code
))
5163 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
5167 if (dump_enabled_p ())
5168 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5169 "conversion not supported by target.\n");
5173 if (known_eq (nunits_in
, nunits_out
))
5175 if (!supportable_half_widening_operation (code
, vectype_out
,
5176 vectype_in
, &code1
))
5178 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5181 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
5182 vectype_out
, vectype_in
, &code1
,
5183 &code2
, &multi_step_cvt
,
5186 /* Binary widening operation can only be supported directly by the
5188 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5192 if (code
!= FLOAT_EXPR
5193 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
5196 fltsz
= GET_MODE_SIZE (lhs_mode
);
5197 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
5199 rhs_mode
= rhs_mode_iter
.require ();
5200 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
5204 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5205 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5206 if (cvt_type
== NULL_TREE
)
5209 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5211 if (!supportable_convert_operation (code
, vectype_out
,
5212 cvt_type
, &codecvt1
))
5215 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
5216 vectype_out
, cvt_type
,
5217 &codecvt1
, &codecvt2
,
5222 gcc_assert (multi_step_cvt
== 0);
5224 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5226 vectype_in
, &code1
, &code2
,
5227 &multi_step_cvt
, &interm_types
))
5237 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5238 codecvt2
= ERROR_MARK
;
5242 interm_types
.safe_push (cvt_type
);
5243 cvt_type
= NULL_TREE
;
5248 gcc_assert (op_type
== unary_op
);
5249 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5250 &code1
, &multi_step_cvt
,
5254 if (code
!= FIX_TRUNC_EXPR
5255 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5259 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5260 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5261 if (cvt_type
== NULL_TREE
)
5263 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
5266 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5267 &code1
, &multi_step_cvt
,
5276 if (!vec_stmt
) /* transformation not required. */
5279 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5280 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5282 if (dump_enabled_p ())
5283 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5284 "incompatible vector types for invariants\n");
5287 DUMP_VECT_SCOPE ("vectorizable_conversion");
5288 if (modifier
== NONE
)
5290 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5291 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5294 else if (modifier
== NARROW
)
5296 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5297 /* The final packing step produces one vector result per copy. */
5298 unsigned int nvectors
5299 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5300 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5301 multi_step_cvt
, cost_vec
,
5306 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5307 /* The initial unpacking step produces two vector results
5308 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5309 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5310 unsigned int nvectors
5312 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5314 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5315 multi_step_cvt
, cost_vec
,
5318 interm_types
.release ();
5323 if (dump_enabled_p ())
5324 dump_printf_loc (MSG_NOTE
, vect_location
,
5325 "transform conversion. ncopies = %d.\n", ncopies
);
5327 if (op_type
== binary_op
)
5329 if (CONSTANT_CLASS_P (op0
))
5330 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5331 else if (CONSTANT_CLASS_P (op1
))
5332 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5335 /* In case of multi-step conversion, we first generate conversion operations
5336 to the intermediate types, and then from that types to the final one.
5337 We create vector destinations for the intermediate type (TYPES) received
5338 from supportable_*_operation, and store them in the correct order
5339 for future use in vect_create_vectorized_*_stmts (). */
5340 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5341 vec_dest
= vect_create_destination_var (scalar_dest
,
5342 (cvt_type
&& modifier
== WIDEN
)
5343 ? cvt_type
: vectype_out
);
5344 vec_dsts
.quick_push (vec_dest
);
5348 for (i
= interm_types
.length () - 1;
5349 interm_types
.iterate (i
, &intermediate_type
); i
--)
5351 vec_dest
= vect_create_destination_var (scalar_dest
,
5353 vec_dsts
.quick_push (vec_dest
);
5358 vec_dest
= vect_create_destination_var (scalar_dest
,
5360 ? vectype_out
: cvt_type
);
5365 if (modifier
== WIDEN
)
5367 else if (modifier
== NARROW
)
5370 ninputs
= vect_pow2 (multi_step_cvt
);
5378 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5380 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5382 /* Arguments are ready, create the new vector stmt. */
5383 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5384 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5385 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5386 gimple_assign_set_lhs (new_stmt
, new_temp
);
5387 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5390 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5392 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5397 /* In case the vectorization factor (VF) is bigger than the number
5398 of elements that we can fit in a vectype (nunits), we have to
5399 generate more than one vector stmt - i.e - we need to "unroll"
5400 the vector stmt by a factor VF/nunits. */
5401 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5403 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5405 if (code
== WIDEN_LSHIFT_EXPR
)
5407 int oprnds_size
= vec_oprnds0
.length ();
5408 vec_oprnds1
.create (oprnds_size
);
5409 for (i
= 0; i
< oprnds_size
; ++i
)
5410 vec_oprnds1
.quick_push (op1
);
5412 /* Arguments are ready. Create the new vector stmts. */
5413 for (i
= multi_step_cvt
; i
>= 0; i
--)
5415 tree this_dest
= vec_dsts
[i
];
5416 enum tree_code c1
= code1
, c2
= code2
;
5417 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5422 if (known_eq (nunits_out
, nunits_in
))
5423 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
,
5424 &vec_oprnds1
, stmt_info
,
5428 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5429 &vec_oprnds1
, stmt_info
,
5434 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5439 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5440 new_temp
= make_ssa_name (vec_dest
);
5441 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
5442 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5445 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5448 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5450 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5455 /* In case the vectorization factor (VF) is bigger than the number
5456 of elements that we can fit in a vectype (nunits), we have to
5457 generate more than one vector stmt - i.e - we need to "unroll"
5458 the vector stmt by a factor VF/nunits. */
5459 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5461 /* Arguments are ready. Create the new vector stmts. */
5463 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5465 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5466 new_temp
= make_ssa_name (vec_dest
);
5468 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5469 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5470 vec_oprnds0
[i
] = new_temp
;
5473 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5475 stmt_info
, vec_dsts
, gsi
,
5480 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5482 vec_oprnds0
.release ();
5483 vec_oprnds1
.release ();
5484 interm_types
.release ();
5489 /* Return true if we can assume from the scalar form of STMT_INFO that
5490 neither the scalar nor the vector forms will generate code. STMT_INFO
5491 is known not to involve a data reference. */
5494 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5496 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5500 tree lhs
= gimple_assign_lhs (stmt
);
5501 tree_code code
= gimple_assign_rhs_code (stmt
);
5502 tree rhs
= gimple_assign_rhs1 (stmt
);
5504 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5507 if (CONVERT_EXPR_CODE_P (code
))
5508 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5513 /* Function vectorizable_assignment.
5515 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5516 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5517 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5518 Return true if STMT_INFO is vectorizable in this way. */
5521 vectorizable_assignment (vec_info
*vinfo
,
5522 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5523 gimple
**vec_stmt
, slp_tree slp_node
,
5524 stmt_vector_for_cost
*cost_vec
)
5529 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5531 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5535 vec
<tree
> vec_oprnds
= vNULL
;
5537 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5538 enum tree_code code
;
5541 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5544 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5548 /* Is vectorizable assignment? */
5549 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5553 scalar_dest
= gimple_assign_lhs (stmt
);
5554 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5557 if (STMT_VINFO_DATA_REF (stmt_info
))
5560 code
= gimple_assign_rhs_code (stmt
);
5561 if (!(gimple_assign_single_p (stmt
)
5562 || code
== PAREN_EXPR
5563 || CONVERT_EXPR_CODE_P (code
)))
5566 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5567 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5569 /* Multiple types in SLP are handled by creating the appropriate number of
5570 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5575 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5577 gcc_assert (ncopies
>= 1);
5580 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5581 &dt
[0], &vectype_in
))
5583 if (dump_enabled_p ())
5584 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5585 "use not simple.\n");
5589 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5591 /* We can handle NOP_EXPR conversions that do not change the number
5592 of elements or the vector size. */
5593 if ((CONVERT_EXPR_CODE_P (code
)
5594 || code
== VIEW_CONVERT_EXPR
)
5596 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5597 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5598 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5601 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5603 if (dump_enabled_p ())
5604 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5605 "can't convert between boolean and non "
5606 "boolean vectors %T\n", TREE_TYPE (op
));
5611 /* We do not handle bit-precision changes. */
5612 if ((CONVERT_EXPR_CODE_P (code
)
5613 || code
== VIEW_CONVERT_EXPR
)
5614 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5615 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5616 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5617 /* But a conversion that does not change the bit-pattern is ok. */
5618 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5619 > TYPE_PRECISION (TREE_TYPE (op
)))
5620 && TYPE_UNSIGNED (TREE_TYPE (op
))))
5622 if (dump_enabled_p ())
5623 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5624 "type conversion to/from bit-precision "
5629 if (!vec_stmt
) /* transformation not required. */
5632 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5634 if (dump_enabled_p ())
5635 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5636 "incompatible vector types for invariants\n");
5639 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5640 DUMP_VECT_SCOPE ("vectorizable_assignment");
5641 if (!vect_nop_conversion_p (stmt_info
))
5642 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5648 if (dump_enabled_p ())
5649 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5652 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5655 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5657 /* Arguments are ready. create the new vector stmt. */
5658 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5660 if (CONVERT_EXPR_CODE_P (code
)
5661 || code
== VIEW_CONVERT_EXPR
)
5662 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5663 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5664 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5665 gimple_assign_set_lhs (new_stmt
, new_temp
);
5666 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5668 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5670 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5673 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5675 vec_oprnds
.release ();
5680 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5681 either as shift by a scalar or by a vector. */
5684 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5687 machine_mode vec_mode
;
5692 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5696 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5698 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5700 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5702 || (optab_handler (optab
, TYPE_MODE (vectype
))
5703 == CODE_FOR_nothing
))
5707 vec_mode
= TYPE_MODE (vectype
);
5708 icode
= (int) optab_handler (optab
, vec_mode
);
5709 if (icode
== CODE_FOR_nothing
)
5716 /* Function vectorizable_shift.
5718 Check if STMT_INFO performs a shift operation that can be vectorized.
5719 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5720 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5721 Return true if STMT_INFO is vectorizable in this way. */
5724 vectorizable_shift (vec_info
*vinfo
,
5725 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5726 gimple
**vec_stmt
, slp_tree slp_node
,
5727 stmt_vector_for_cost
*cost_vec
)
5731 tree op0
, op1
= NULL
;
5732 tree vec_oprnd1
= NULL_TREE
;
5734 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5735 enum tree_code code
;
5736 machine_mode vec_mode
;
5740 machine_mode optab_op2_mode
;
5741 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5743 poly_uint64 nunits_in
;
5744 poly_uint64 nunits_out
;
5749 vec
<tree
> vec_oprnds0
= vNULL
;
5750 vec
<tree
> vec_oprnds1
= vNULL
;
5753 bool scalar_shift_arg
= true;
5754 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5755 bool incompatible_op1_vectype_p
= false;
5757 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5760 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5761 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5765 /* Is STMT a vectorizable binary/unary operation? */
5766 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5770 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5773 code
= gimple_assign_rhs_code (stmt
);
5775 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5776 || code
== RROTATE_EXPR
))
5779 scalar_dest
= gimple_assign_lhs (stmt
);
5780 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5781 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5783 if (dump_enabled_p ())
5784 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5785 "bit-precision shifts not supported.\n");
5790 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5791 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5793 if (dump_enabled_p ())
5794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5795 "use not simple.\n");
5798 /* If op0 is an external or constant def, infer the vector type
5799 from the scalar type. */
5801 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5803 gcc_assert (vectype
);
5806 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5808 "no vectype for scalar type\n");
5812 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5813 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5814 if (maybe_ne (nunits_out
, nunits_in
))
5817 stmt_vec_info op1_def_stmt_info
;
5819 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5820 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5824 "use not simple.\n");
5828 /* Multiple types in SLP are handled by creating the appropriate number of
5829 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5834 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5836 gcc_assert (ncopies
>= 1);
5838 /* Determine whether the shift amount is a vector, or scalar. If the
5839 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5841 if ((dt
[1] == vect_internal_def
5842 || dt
[1] == vect_induction_def
5843 || dt
[1] == vect_nested_cycle
)
5845 scalar_shift_arg
= false;
5846 else if (dt
[1] == vect_constant_def
5847 || dt
[1] == vect_external_def
5848 || dt
[1] == vect_internal_def
)
5850 /* In SLP, need to check whether the shift count is the same,
5851 in loops if it is a constant or invariant, it is always
5855 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5856 stmt_vec_info slpstmt_info
;
5858 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5860 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5861 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5862 scalar_shift_arg
= false;
5865 /* For internal SLP defs we have to make sure we see scalar stmts
5866 for all vector elements.
5867 ??? For different vectors we could resort to a different
5868 scalar shift operand but code-generation below simply always
5870 if (dt
[1] == vect_internal_def
5871 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5873 scalar_shift_arg
= false;
5876 /* If the shift amount is computed by a pattern stmt we cannot
5877 use the scalar amount directly thus give up and use a vector
5879 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5880 scalar_shift_arg
= false;
5884 if (dump_enabled_p ())
5885 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5886 "operand mode requires invariant argument.\n");
5890 /* Vector shifted by vector. */
5891 bool was_scalar_shift_arg
= scalar_shift_arg
;
5892 if (!scalar_shift_arg
)
5894 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5895 if (dump_enabled_p ())
5896 dump_printf_loc (MSG_NOTE
, vect_location
,
5897 "vector/vector shift/rotate found.\n");
5900 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5902 incompatible_op1_vectype_p
5903 = (op1_vectype
== NULL_TREE
5904 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5905 TYPE_VECTOR_SUBPARTS (vectype
))
5906 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5907 if (incompatible_op1_vectype_p
5909 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5910 || slp_op1
->refcnt
!= 1))
5912 if (dump_enabled_p ())
5913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5914 "unusable type for last operand in"
5915 " vector/vector shift/rotate.\n");
5919 /* See if the machine has a vector shifted by scalar insn and if not
5920 then see if it has a vector shifted by vector insn. */
5923 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5925 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5927 if (dump_enabled_p ())
5928 dump_printf_loc (MSG_NOTE
, vect_location
,
5929 "vector/scalar shift/rotate found.\n");
5933 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5935 && (optab_handler (optab
, TYPE_MODE (vectype
))
5936 != CODE_FOR_nothing
))
5938 scalar_shift_arg
= false;
5940 if (dump_enabled_p ())
5941 dump_printf_loc (MSG_NOTE
, vect_location
,
5942 "vector/vector shift/rotate found.\n");
5945 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5949 /* Unlike the other binary operators, shifts/rotates have
5950 the rhs being int, instead of the same type as the lhs,
5951 so make sure the scalar is the right type if we are
5952 dealing with vectors of long long/long/short/char. */
5953 incompatible_op1_vectype_p
5955 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5957 if (incompatible_op1_vectype_p
5958 && dt
[1] == vect_internal_def
)
5960 if (dump_enabled_p ())
5961 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5962 "unusable type for last operand in"
5963 " vector/vector shift/rotate.\n");
5970 /* Supportable by target? */
5973 if (dump_enabled_p ())
5974 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5978 vec_mode
= TYPE_MODE (vectype
);
5979 icode
= (int) optab_handler (optab
, vec_mode
);
5980 if (icode
== CODE_FOR_nothing
)
5982 if (dump_enabled_p ())
5983 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5984 "op not supported by target.\n");
5987 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5988 if (vect_emulated_vector_p (vectype
))
5991 if (!vec_stmt
) /* transformation not required. */
5994 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5995 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5996 && (!incompatible_op1_vectype_p
5997 || dt
[1] == vect_constant_def
)
5998 && !vect_maybe_update_slp_op_vectype
6000 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
6002 if (dump_enabled_p ())
6003 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6004 "incompatible vector types for invariants\n");
6007 /* Now adjust the constant shift amount in place. */
6009 && incompatible_op1_vectype_p
6010 && dt
[1] == vect_constant_def
)
6012 for (unsigned i
= 0;
6013 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
6015 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
6016 = fold_convert (TREE_TYPE (vectype
),
6017 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
6018 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
6022 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
6023 DUMP_VECT_SCOPE ("vectorizable_shift");
6024 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
6025 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
6031 if (dump_enabled_p ())
6032 dump_printf_loc (MSG_NOTE
, vect_location
,
6033 "transform binary/unary operation.\n");
6035 if (incompatible_op1_vectype_p
&& !slp_node
)
6037 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
6038 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6039 if (dt
[1] != vect_constant_def
)
6040 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
6041 TREE_TYPE (vectype
), NULL
);
6045 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6047 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
6049 /* Vector shl and shr insn patterns can be defined with scalar
6050 operand 2 (shift operand). In this case, use constant or loop
6051 invariant op1 directly, without extending it to vector mode
6053 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
6054 if (!VECTOR_MODE_P (optab_op2_mode
))
6056 if (dump_enabled_p ())
6057 dump_printf_loc (MSG_NOTE
, vect_location
,
6058 "operand 1 using scalar mode.\n");
6060 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
6061 vec_oprnds1
.quick_push (vec_oprnd1
);
6062 /* Store vec_oprnd1 for every vector stmt to be created.
6063 We check during the analysis that all the shift arguments
6065 TODO: Allow different constants for different vector
6066 stmts generated for an SLP instance. */
6068 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
6069 vec_oprnds1
.quick_push (vec_oprnd1
);
6072 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
6074 if (was_scalar_shift_arg
)
6076 /* If the argument was the same in all lanes create
6077 the correctly typed vector shift amount directly. */
6078 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6079 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
6080 !loop_vinfo
? gsi
: NULL
);
6081 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
6082 !loop_vinfo
? gsi
: NULL
);
6083 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
6084 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
6085 vec_oprnds1
.quick_push (vec_oprnd1
);
6087 else if (dt
[1] == vect_constant_def
)
6088 /* The constant shift amount has been adjusted in place. */
6091 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
6094 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6095 (a special case for certain kind of vector shifts); otherwise,
6096 operand 1 should be of a vector type (the usual case). */
6097 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6099 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
6101 /* Arguments are ready. Create the new vector stmt. */
6102 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6104 /* For internal defs where we need to use a scalar shift arg
6105 extract the first lane. */
6106 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
6108 vop1
= vec_oprnds1
[0];
6109 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
6111 = gimple_build_assign (new_temp
,
6112 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
6114 TYPE_SIZE (TREE_TYPE (new_temp
)),
6115 bitsize_zero_node
));
6116 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6120 vop1
= vec_oprnds1
[i
];
6121 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
6122 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6123 gimple_assign_set_lhs (new_stmt
, new_temp
);
6124 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6126 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6128 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6132 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6134 vec_oprnds0
.release ();
6135 vec_oprnds1
.release ();
6140 /* Function vectorizable_operation.
6142 Check if STMT_INFO performs a binary, unary or ternary operation that can
6144 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6145 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6146 Return true if STMT_INFO is vectorizable in this way. */
6149 vectorizable_operation (vec_info
*vinfo
,
6150 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6151 gimple
**vec_stmt
, slp_tree slp_node
,
6152 stmt_vector_for_cost
*cost_vec
)
6156 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
6158 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6159 enum tree_code code
, orig_code
;
6160 machine_mode vec_mode
;
6164 bool target_support_p
;
6165 enum vect_def_type dt
[3]
6166 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6168 poly_uint64 nunits_in
;
6169 poly_uint64 nunits_out
;
6171 int ncopies
, vec_num
;
6173 vec
<tree
> vec_oprnds0
= vNULL
;
6174 vec
<tree
> vec_oprnds1
= vNULL
;
6175 vec
<tree
> vec_oprnds2
= vNULL
;
6176 tree vop0
, vop1
, vop2
;
6177 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6179 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6182 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6186 /* Is STMT a vectorizable binary/unary operation? */
6187 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6191 /* Loads and stores are handled in vectorizable_{load,store}. */
6192 if (STMT_VINFO_DATA_REF (stmt_info
))
6195 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6197 /* Shifts are handled in vectorizable_shift. */
6198 if (code
== LSHIFT_EXPR
6199 || code
== RSHIFT_EXPR
6200 || code
== LROTATE_EXPR
6201 || code
== RROTATE_EXPR
)
6204 /* Comparisons are handled in vectorizable_comparison. */
6205 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6208 /* Conditions are handled in vectorizable_condition. */
6209 if (code
== COND_EXPR
)
6212 /* For pointer addition and subtraction, we should use the normal
6213 plus and minus for the vector operation. */
6214 if (code
== POINTER_PLUS_EXPR
)
6216 if (code
== POINTER_DIFF_EXPR
)
6219 /* Support only unary or binary operations. */
6220 op_type
= TREE_CODE_LENGTH (code
);
6221 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6223 if (dump_enabled_p ())
6224 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6225 "num. args = %d (not unary/binary/ternary op).\n",
6230 scalar_dest
= gimple_assign_lhs (stmt
);
6231 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6233 /* Most operations cannot handle bit-precision types without extra
6235 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6237 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6238 /* Exception are bitwise binary operations. */
6239 && code
!= BIT_IOR_EXPR
6240 && code
!= BIT_XOR_EXPR
6241 && code
!= BIT_AND_EXPR
)
6243 if (dump_enabled_p ())
6244 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6245 "bit-precision arithmetic not supported.\n");
6250 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6251 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6253 if (dump_enabled_p ())
6254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6255 "use not simple.\n");
6258 bool is_invariant
= (dt
[0] == vect_external_def
6259 || dt
[0] == vect_constant_def
);
6260 /* If op0 is an external or constant def, infer the vector type
6261 from the scalar type. */
6264 /* For boolean type we cannot determine vectype by
6265 invariant value (don't know whether it is a vector
6266 of booleans or vector of integers). We use output
6267 vectype because operations on boolean don't change
6269 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6271 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6273 if (dump_enabled_p ())
6274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6275 "not supported operation on bool value.\n");
6278 vectype
= vectype_out
;
6281 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6285 gcc_assert (vectype
);
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6290 "no vectype for scalar type %T\n",
6296 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6297 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6298 if (maybe_ne (nunits_out
, nunits_in
))
6301 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6302 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6303 if (op_type
== binary_op
|| op_type
== ternary_op
)
6305 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6306 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6308 if (dump_enabled_p ())
6309 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6310 "use not simple.\n");
6313 is_invariant
&= (dt
[1] == vect_external_def
6314 || dt
[1] == vect_constant_def
);
6316 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype2
)))
6319 if (op_type
== ternary_op
)
6321 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6322 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6324 if (dump_enabled_p ())
6325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6326 "use not simple.\n");
6329 is_invariant
&= (dt
[2] == vect_external_def
6330 || dt
[2] == vect_constant_def
);
6332 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype3
)))
6336 /* Multiple types in SLP are handled by creating the appropriate number of
6337 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6342 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6346 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6350 gcc_assert (ncopies
>= 1);
6352 /* Reject attempts to combine mask types with nonmask types, e.g. if
6353 we have an AND between a (nonmask) boolean loaded from memory and
6354 a (mask) boolean result of a comparison.
6356 TODO: We could easily fix these cases up using pattern statements. */
6357 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6358 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6359 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6361 if (dump_enabled_p ())
6362 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6363 "mixed mask and nonmask vector types\n");
6367 /* Supportable by target? */
6369 vec_mode
= TYPE_MODE (vectype
);
6370 if (code
== MULT_HIGHPART_EXPR
)
6371 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6374 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6377 if (dump_enabled_p ())
6378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6382 target_support_p
= (optab_handler (optab
, vec_mode
)
6383 != CODE_FOR_nothing
);
6386 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6387 if (!target_support_p
|| using_emulated_vectors_p
)
6389 if (dump_enabled_p ())
6390 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6391 "op not supported by target.\n");
6392 /* When vec_mode is not a vector mode and we verified ops we
6393 do not have to lower like AND are natively supported let
6394 those through even when the mode isn't word_mode. For
6395 ops we have to lower the lowering code assumes we are
6396 dealing with word_mode. */
6397 if ((((code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
)
6398 || !target_support_p
)
6399 && maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
))
6400 /* Check only during analysis. */
6401 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6403 if (dump_enabled_p ())
6404 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6407 if (dump_enabled_p ())
6408 dump_printf_loc (MSG_NOTE
, vect_location
,
6409 "proceeding using word mode.\n");
6410 using_emulated_vectors_p
= true;
6413 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6414 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6415 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6417 /* If operating on inactive elements could generate spurious traps,
6418 we need to restrict the operation to active lanes. Note that this
6419 specifically doesn't apply to unhoisted invariants, since they
6420 operate on the same value for every lane.
6422 Similarly, if this operation is part of a reduction, a fully-masked
6423 loop should only change the active lanes of the reduction chain,
6424 keeping the inactive lanes as-is. */
6425 bool mask_out_inactive
= ((!is_invariant
&& gimple_could_trap_p (stmt
))
6428 if (!vec_stmt
) /* transformation not required. */
6431 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6432 && mask_out_inactive
)
6434 if (cond_fn
== IFN_LAST
6435 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6436 OPTIMIZE_FOR_SPEED
))
6438 if (dump_enabled_p ())
6439 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6440 "can't use a fully-masked loop because no"
6441 " conditional operation is available.\n");
6442 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6445 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6449 /* Put types on constant and invariant SLP children. */
6451 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6452 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6453 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6455 if (dump_enabled_p ())
6456 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6457 "incompatible vector types for invariants\n");
6461 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6462 DUMP_VECT_SCOPE ("vectorizable_operation");
6463 vect_model_simple_cost (vinfo
, stmt_info
,
6464 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6465 if (using_emulated_vectors_p
)
6467 /* The above vect_model_simple_cost call handles constants
6468 in the prologue and (mis-)costs one of the stmts as
6469 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6470 for the actual lowering that will be applied. */
6472 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6486 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
, 0, vect_body
);
6493 if (dump_enabled_p ())
6494 dump_printf_loc (MSG_NOTE
, vect_location
,
6495 "transform binary/unary operation.\n");
6497 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6499 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6500 vectors with unsigned elements, but the result is signed. So, we
6501 need to compute the MINUS_EXPR into vectype temporary and
6502 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6503 tree vec_cvt_dest
= NULL_TREE
;
6504 if (orig_code
== POINTER_DIFF_EXPR
)
6506 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6507 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6511 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6513 /* In case the vectorization factor (VF) is bigger than the number
6514 of elements that we can fit in a vectype (nunits), we have to generate
6515 more than one vector stmt - i.e - we need to "unroll" the
6516 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6517 from one copy of the vector stmt to the next, in the field
6518 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6519 stages to find the correct vector defs to be used when vectorizing
6520 stmts that use the defs of the current stmt. The example below
6521 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6522 we need to create 4 vectorized stmts):
6524 before vectorization:
6525 RELATED_STMT VEC_STMT
6529 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6531 RELATED_STMT VEC_STMT
6532 VS1_0: vx0 = memref0 VS1_1 -
6533 VS1_1: vx1 = memref1 VS1_2 -
6534 VS1_2: vx2 = memref2 VS1_3 -
6535 VS1_3: vx3 = memref3 - -
6536 S1: x = load - VS1_0
6539 step2: vectorize stmt S2 (done here):
6540 To vectorize stmt S2 we first need to find the relevant vector
6541 def for the first operand 'x'. This is, as usual, obtained from
6542 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6543 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6544 relevant vector def 'vx0'. Having found 'vx0' we can generate
6545 the vector stmt VS2_0, and as usual, record it in the
6546 STMT_VINFO_VEC_STMT of stmt S2.
6547 When creating the second copy (VS2_1), we obtain the relevant vector
6548 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6549 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6550 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6551 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6552 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6553 chain of stmts and pointers:
6554 RELATED_STMT VEC_STMT
6555 VS1_0: vx0 = memref0 VS1_1 -
6556 VS1_1: vx1 = memref1 VS1_2 -
6557 VS1_2: vx2 = memref2 VS1_3 -
6558 VS1_3: vx3 = memref3 - -
6559 S1: x = load - VS1_0
6560 VS2_0: vz0 = vx0 + v1 VS2_1 -
6561 VS2_1: vz1 = vx1 + v1 VS2_2 -
6562 VS2_2: vz2 = vx2 + v1 VS2_3 -
6563 VS2_3: vz3 = vx3 + v1 - -
6564 S2: z = x + 1 - VS2_0 */
6566 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6567 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6568 /* Arguments are ready. Create the new vector stmt. */
6569 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6571 gimple
*new_stmt
= NULL
;
6572 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6573 ? vec_oprnds1
[i
] : NULL_TREE
);
6574 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6575 if (using_emulated_vectors_p
6576 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
))
6578 /* Lower the operation. This follows vector lowering. */
6579 unsigned int width
= vector_element_bits (vectype
);
6580 tree inner_type
= TREE_TYPE (vectype
);
6582 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode
), 1);
6583 HOST_WIDE_INT max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
6584 tree low_bits
= build_replicated_int_cst (word_type
, width
, max
>> 1);
6586 = build_replicated_int_cst (word_type
, width
, max
& ~(max
>> 1));
6587 tree wvop0
= make_ssa_name (word_type
);
6588 new_stmt
= gimple_build_assign (wvop0
, VIEW_CONVERT_EXPR
,
6589 build1 (VIEW_CONVERT_EXPR
,
6591 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6592 tree result_low
, signs
;
6593 if (code
== PLUS_EXPR
|| code
== MINUS_EXPR
)
6595 tree wvop1
= make_ssa_name (word_type
);
6596 new_stmt
= gimple_build_assign (wvop1
, VIEW_CONVERT_EXPR
,
6597 build1 (VIEW_CONVERT_EXPR
,
6599 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6600 signs
= make_ssa_name (word_type
);
6601 new_stmt
= gimple_build_assign (signs
,
6602 BIT_XOR_EXPR
, wvop0
, wvop1
);
6603 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6604 tree b_low
= make_ssa_name (word_type
);
6605 new_stmt
= gimple_build_assign (b_low
,
6606 BIT_AND_EXPR
, wvop1
, low_bits
);
6607 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6608 tree a_low
= make_ssa_name (word_type
);
6609 if (code
== PLUS_EXPR
)
6610 new_stmt
= gimple_build_assign (a_low
,
6611 BIT_AND_EXPR
, wvop0
, low_bits
);
6613 new_stmt
= gimple_build_assign (a_low
,
6614 BIT_IOR_EXPR
, wvop0
, high_bits
);
6615 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6616 if (code
== MINUS_EXPR
)
6618 new_stmt
= gimple_build_assign (NULL_TREE
,
6619 BIT_NOT_EXPR
, signs
);
6620 signs
= make_ssa_name (word_type
);
6621 gimple_assign_set_lhs (new_stmt
, signs
);
6622 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6624 new_stmt
= gimple_build_assign (NULL_TREE
,
6625 BIT_AND_EXPR
, signs
, high_bits
);
6626 signs
= make_ssa_name (word_type
);
6627 gimple_assign_set_lhs (new_stmt
, signs
);
6628 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6629 result_low
= make_ssa_name (word_type
);
6630 new_stmt
= gimple_build_assign (result_low
, code
, a_low
, b_low
);
6631 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6635 tree a_low
= make_ssa_name (word_type
);
6636 new_stmt
= gimple_build_assign (a_low
,
6637 BIT_AND_EXPR
, wvop0
, low_bits
);
6638 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6639 signs
= make_ssa_name (word_type
);
6640 new_stmt
= gimple_build_assign (signs
, BIT_NOT_EXPR
, wvop0
);
6641 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6642 new_stmt
= gimple_build_assign (NULL_TREE
,
6643 BIT_AND_EXPR
, signs
, high_bits
);
6644 signs
= make_ssa_name (word_type
);
6645 gimple_assign_set_lhs (new_stmt
, signs
);
6646 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6647 result_low
= make_ssa_name (word_type
);
6648 new_stmt
= gimple_build_assign (result_low
,
6649 MINUS_EXPR
, high_bits
, a_low
);
6650 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6652 new_stmt
= gimple_build_assign (NULL_TREE
, BIT_XOR_EXPR
, result_low
,
6654 result_low
= make_ssa_name (word_type
);
6655 gimple_assign_set_lhs (new_stmt
, result_low
);
6656 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6657 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
,
6658 build1 (VIEW_CONVERT_EXPR
,
6659 vectype
, result_low
));
6660 new_temp
= make_ssa_name (vectype
);
6661 gimple_assign_set_lhs (new_stmt
, new_temp
);
6662 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6664 else if (masked_loop_p
&& mask_out_inactive
)
6666 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6668 auto_vec
<tree
> vops (5);
6669 vops
.quick_push (mask
);
6670 vops
.quick_push (vop0
);
6672 vops
.quick_push (vop1
);
6674 vops
.quick_push (vop2
);
6677 /* Perform the operation on active elements only and take
6678 inactive elements from the reduction chain input. */
6680 vops
.quick_push (reduc_idx
== 1 ? vop1
: vop0
);
6684 auto else_value
= targetm
.preferred_else_value
6685 (cond_fn
, vectype
, vops
.length () - 1, &vops
[1]);
6686 vops
.quick_push (else_value
);
6688 gcall
*call
= gimple_build_call_internal_vec (cond_fn
, vops
);
6689 new_temp
= make_ssa_name (vec_dest
, call
);
6690 gimple_call_set_lhs (call
, new_temp
);
6691 gimple_call_set_nothrow (call
, true);
6692 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6697 tree mask
= NULL_TREE
;
6698 /* When combining two masks check if either of them is elsewhere
6699 combined with a loop mask, if that's the case we can mark that the
6700 new combined mask doesn't need to be combined with a loop mask. */
6702 && code
== BIT_AND_EXPR
6703 && VECTOR_BOOLEAN_TYPE_P (vectype
))
6705 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
6708 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6711 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6715 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
6718 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6721 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6726 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6727 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6728 gimple_assign_set_lhs (new_stmt
, new_temp
);
6729 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6730 if (using_emulated_vectors_p
)
6731 suppress_warning (new_stmt
, OPT_Wvector_operation_performance
);
6733 /* Enter the combined value into the vector cond hash so we don't
6734 AND it with a loop mask again. */
6736 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
6741 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6742 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6744 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6745 gimple_assign_set_lhs (new_stmt
, new_temp
);
6746 vect_finish_stmt_generation (vinfo
, stmt_info
,
6751 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6753 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6757 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6759 vec_oprnds0
.release ();
6760 vec_oprnds1
.release ();
6761 vec_oprnds2
.release ();
6766 /* A helper function to ensure data reference DR_INFO's base alignment. */
6769 ensure_base_align (dr_vec_info
*dr_info
)
6771 /* Alignment is only analyzed for the first element of a DR group,
6772 use that to look at base alignment we need to enforce. */
6773 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
6774 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
6776 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
6778 if (dr_info
->base_misaligned
)
6780 tree base_decl
= dr_info
->base_decl
;
6782 // We should only be able to increase the alignment of a base object if
6783 // we know what its new alignment should be at compile time.
6784 unsigned HOST_WIDE_INT align_base_to
=
6785 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6787 if (decl_in_symtab_p (base_decl
))
6788 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6789 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6791 SET_DECL_ALIGN (base_decl
, align_base_to
);
6792 DECL_USER_ALIGN (base_decl
) = 1;
6794 dr_info
->base_misaligned
= false;
6799 /* Function get_group_alias_ptr_type.
6801 Return the alias type for the group starting at FIRST_STMT_INFO. */
6804 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6806 struct data_reference
*first_dr
, *next_dr
;
6808 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6809 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6810 while (next_stmt_info
)
6812 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6813 if (get_alias_set (DR_REF (first_dr
))
6814 != get_alias_set (DR_REF (next_dr
)))
6816 if (dump_enabled_p ())
6817 dump_printf_loc (MSG_NOTE
, vect_location
,
6818 "conflicting alias set types.\n");
6819 return ptr_type_node
;
6821 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6823 return reference_alias_ptr_type (DR_REF (first_dr
));
6827 /* Function scan_operand_equal_p.
6829 Helper function for check_scan_store. Compare two references
6830 with .GOMP_SIMD_LANE bases. */
6833 scan_operand_equal_p (tree ref1
, tree ref2
)
6835 tree ref
[2] = { ref1
, ref2
};
6836 poly_int64 bitsize
[2], bitpos
[2];
6837 tree offset
[2], base
[2];
6838 for (int i
= 0; i
< 2; ++i
)
6841 int unsignedp
, reversep
, volatilep
= 0;
6842 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6843 &offset
[i
], &mode
, &unsignedp
,
6844 &reversep
, &volatilep
);
6845 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6847 if (TREE_CODE (base
[i
]) == MEM_REF
6848 && offset
[i
] == NULL_TREE
6849 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6851 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6852 if (is_gimple_assign (def_stmt
)
6853 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6854 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6855 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6857 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6859 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6860 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6865 if (!operand_equal_p (base
[0], base
[1], 0))
6867 if (maybe_ne (bitsize
[0], bitsize
[1]))
6869 if (offset
[0] != offset
[1])
6871 if (!offset
[0] || !offset
[1])
6873 if (!operand_equal_p (offset
[0], offset
[1], 0))
6876 for (int i
= 0; i
< 2; ++i
)
6878 step
[i
] = integer_one_node
;
6879 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6881 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6882 if (is_gimple_assign (def_stmt
)
6883 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6884 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6887 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6888 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6891 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6893 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6894 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6896 tree rhs1
= NULL_TREE
;
6897 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6899 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6900 if (gimple_assign_cast_p (def_stmt
))
6901 rhs1
= gimple_assign_rhs1 (def_stmt
);
6903 else if (CONVERT_EXPR_P (offset
[i
]))
6904 rhs1
= TREE_OPERAND (offset
[i
], 0);
6906 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6907 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6908 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6909 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6912 if (!operand_equal_p (offset
[0], offset
[1], 0)
6913 || !operand_equal_p (step
[0], step
[1], 0))
6921 enum scan_store_kind
{
6922 /* Normal permutation. */
6923 scan_store_kind_perm
,
6925 /* Whole vector left shift permutation with zero init. */
6926 scan_store_kind_lshift_zero
,
6928 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6929 scan_store_kind_lshift_cond
6932 /* Function check_scan_store.
6934 Verify if we can perform the needed permutations or whole vector shifts.
6935 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6936 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6937 to do at each step. */
6940 scan_store_can_perm_p (tree vectype
, tree init
,
6941 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6943 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6944 unsigned HOST_WIDE_INT nunits
;
6945 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6947 int units_log2
= exact_log2 (nunits
);
6948 if (units_log2
<= 0)
6952 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6953 for (i
= 0; i
<= units_log2
; ++i
)
6955 unsigned HOST_WIDE_INT j
, k
;
6956 enum scan_store_kind kind
= scan_store_kind_perm
;
6957 vec_perm_builder
sel (nunits
, nunits
, 1);
6958 sel
.quick_grow (nunits
);
6959 if (i
== units_log2
)
6961 for (j
= 0; j
< nunits
; ++j
)
6962 sel
[j
] = nunits
- 1;
6966 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6968 for (k
= 0; j
< nunits
; ++j
, ++k
)
6969 sel
[j
] = nunits
+ k
;
6971 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6972 if (!can_vec_perm_const_p (vec_mode
, vec_mode
, indices
))
6974 if (i
== units_log2
)
6977 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6979 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6981 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6982 /* Whole vector shifts shift in zeros, so if init is all zero
6983 constant, there is no need to do anything further. */
6984 if ((TREE_CODE (init
) != INTEGER_CST
6985 && TREE_CODE (init
) != REAL_CST
)
6986 || !initializer_zerop (init
))
6988 tree masktype
= truth_type_for (vectype
);
6989 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6991 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6994 kind
= whole_vector_shift_kind
;
6996 if (use_whole_vector
)
6998 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6999 use_whole_vector
->safe_grow_cleared (i
, true);
7000 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
7001 use_whole_vector
->safe_push (kind
);
7009 /* Function check_scan_store.
7011 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7014 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
7015 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
7016 vect_memory_access_type memory_access_type
)
7018 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7019 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7022 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
7025 || memory_access_type
!= VMAT_CONTIGUOUS
7026 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
7027 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
7028 || loop_vinfo
== NULL
7029 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7030 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7031 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
7032 || !integer_zerop (DR_INIT (dr_info
->dr
))
7033 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
7034 || !alias_sets_conflict_p (get_alias_set (vectype
),
7035 get_alias_set (TREE_TYPE (ref_type
))))
7037 if (dump_enabled_p ())
7038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7039 "unsupported OpenMP scan store.\n");
7043 /* We need to pattern match code built by OpenMP lowering and simplified
7044 by following optimizations into something we can handle.
7045 #pragma omp simd reduction(inscan,+:r)
7049 #pragma omp scan inclusive (r)
7052 shall have body with:
7053 // Initialization for input phase, store the reduction initializer:
7054 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7055 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7057 // Actual input phase:
7059 r.0_5 = D.2042[_20];
7062 // Initialization for scan phase:
7063 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7069 // Actual scan phase:
7071 r.1_8 = D.2042[_20];
7073 The "omp simd array" variable D.2042 holds the privatized copy used
7074 inside of the loop and D.2043 is another one that holds copies of
7075 the current original list item. The separate GOMP_SIMD_LANE ifn
7076 kinds are there in order to allow optimizing the initializer store
7077 and combiner sequence, e.g. if it is originally some C++ish user
7078 defined reduction, but allow the vectorizer to pattern recognize it
7079 and turn into the appropriate vectorized scan.
7081 For exclusive scan, this is slightly different:
7082 #pragma omp simd reduction(inscan,+:r)
7086 #pragma omp scan exclusive (r)
7089 shall have body with:
7090 // Initialization for input phase, store the reduction initializer:
7091 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7092 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7094 // Actual input phase:
7096 r.0_5 = D.2042[_20];
7099 // Initialization for scan phase:
7100 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7106 // Actual scan phase:
7108 r.1_8 = D.2044[_20];
7111 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
7113 /* Match the D.2042[_21] = 0; store above. Just require that
7114 it is a constant or external definition store. */
7115 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
7118 if (dump_enabled_p ())
7119 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7120 "unsupported OpenMP scan initializer store.\n");
7124 if (! loop_vinfo
->scan_map
)
7125 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
7126 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7127 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
7130 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
7132 /* These stores can be vectorized normally. */
7136 if (rhs_dt
!= vect_internal_def
)
7139 if (dump_enabled_p ())
7140 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7141 "unsupported OpenMP scan combiner pattern.\n");
7145 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7146 tree rhs
= gimple_assign_rhs1 (stmt
);
7147 if (TREE_CODE (rhs
) != SSA_NAME
)
7150 gimple
*other_store_stmt
= NULL
;
7151 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7152 bool inscan_var_store
7153 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7155 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7157 if (!inscan_var_store
)
7159 use_operand_p use_p
;
7160 imm_use_iterator iter
;
7161 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7163 gimple
*use_stmt
= USE_STMT (use_p
);
7164 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7166 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
7167 || !is_gimple_assign (use_stmt
)
7168 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
7170 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
7172 other_store_stmt
= use_stmt
;
7174 if (other_store_stmt
== NULL
)
7176 rhs
= gimple_assign_lhs (other_store_stmt
);
7177 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
7181 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
7183 use_operand_p use_p
;
7184 imm_use_iterator iter
;
7185 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7187 gimple
*use_stmt
= USE_STMT (use_p
);
7188 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7190 if (other_store_stmt
)
7192 other_store_stmt
= use_stmt
;
7198 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7199 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
7200 || !is_gimple_assign (def_stmt
)
7201 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
7204 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7205 /* For pointer addition, we should use the normal plus for the vector
7209 case POINTER_PLUS_EXPR
:
7212 case MULT_HIGHPART_EXPR
:
7217 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
7220 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7221 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7222 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
7225 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7226 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7227 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
7228 || !gimple_assign_load_p (load1_stmt
)
7229 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
7230 || !gimple_assign_load_p (load2_stmt
))
7233 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7234 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7235 if (load1_stmt_info
== NULL
7236 || load2_stmt_info
== NULL
7237 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
7238 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
7239 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
7240 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7243 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
7245 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7246 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
7247 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
7249 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7251 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7255 use_operand_p use_p
;
7256 imm_use_iterator iter
;
7257 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
7259 gimple
*use_stmt
= USE_STMT (use_p
);
7260 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
7262 if (other_store_stmt
)
7264 other_store_stmt
= use_stmt
;
7268 if (other_store_stmt
== NULL
)
7270 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
7271 || !gimple_store_p (other_store_stmt
))
7274 stmt_vec_info other_store_stmt_info
7275 = loop_vinfo
->lookup_stmt (other_store_stmt
);
7276 if (other_store_stmt_info
== NULL
7277 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
7278 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7281 gimple
*stmt1
= stmt
;
7282 gimple
*stmt2
= other_store_stmt
;
7283 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7284 std::swap (stmt1
, stmt2
);
7285 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7286 gimple_assign_rhs1 (load2_stmt
)))
7288 std::swap (rhs1
, rhs2
);
7289 std::swap (load1_stmt
, load2_stmt
);
7290 std::swap (load1_stmt_info
, load2_stmt_info
);
7292 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7293 gimple_assign_rhs1 (load1_stmt
)))
7296 tree var3
= NULL_TREE
;
7297 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
7298 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
7299 gimple_assign_rhs1 (load2_stmt
)))
7301 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7303 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7304 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
7305 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
7307 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7308 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
7309 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
7310 || lookup_attribute ("omp simd inscan exclusive",
7311 DECL_ATTRIBUTES (var3
)))
7315 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7316 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7317 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7320 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7321 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7322 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7323 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7324 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7325 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7328 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7329 std::swap (var1
, var2
);
7331 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7333 if (!lookup_attribute ("omp simd inscan exclusive",
7334 DECL_ATTRIBUTES (var1
)))
7339 if (loop_vinfo
->scan_map
== NULL
)
7341 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7345 /* The IL is as expected, now check if we can actually vectorize it.
7352 should be vectorized as (where _40 is the vectorized rhs
7353 from the D.2042[_21] = 0; store):
7354 _30 = MEM <vector(8) int> [(int *)&D.2043];
7355 _31 = MEM <vector(8) int> [(int *)&D.2042];
7356 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7358 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7359 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7361 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7362 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7363 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7365 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7366 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7368 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7369 MEM <vector(8) int> [(int *)&D.2043] = _39;
7370 MEM <vector(8) int> [(int *)&D.2042] = _38;
7377 should be vectorized as (where _40 is the vectorized rhs
7378 from the D.2042[_21] = 0; store):
7379 _30 = MEM <vector(8) int> [(int *)&D.2043];
7380 _31 = MEM <vector(8) int> [(int *)&D.2042];
7381 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7382 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7384 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7385 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7386 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7388 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7389 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7390 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7392 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7393 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7396 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7397 MEM <vector(8) int> [(int *)&D.2044] = _39;
7398 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7399 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7400 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7401 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7404 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7405 if (units_log2
== -1)
7412 /* Function vectorizable_scan_store.
7414 Helper of vectorizable_score, arguments like on vectorizable_store.
7415 Handle only the transformation, checking is done in check_scan_store. */
7418 vectorizable_scan_store (vec_info
*vinfo
,
7419 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7420 gimple
**vec_stmt
, int ncopies
)
7422 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7423 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7424 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7425 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7427 if (dump_enabled_p ())
7428 dump_printf_loc (MSG_NOTE
, vect_location
,
7429 "transform scan store. ncopies = %d\n", ncopies
);
7431 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7432 tree rhs
= gimple_assign_rhs1 (stmt
);
7433 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7435 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7436 bool inscan_var_store
7437 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7439 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7441 use_operand_p use_p
;
7442 imm_use_iterator iter
;
7443 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7445 gimple
*use_stmt
= USE_STMT (use_p
);
7446 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7448 rhs
= gimple_assign_lhs (use_stmt
);
7453 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7454 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7455 if (code
== POINTER_PLUS_EXPR
)
7457 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7458 && commutative_tree_code (code
));
7459 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7460 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7461 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7462 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7463 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7464 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7465 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7466 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7467 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7468 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7469 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7471 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7473 std::swap (rhs1
, rhs2
);
7474 std::swap (var1
, var2
);
7475 std::swap (load1_dr_info
, load2_dr_info
);
7478 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7481 unsigned HOST_WIDE_INT nunits
;
7482 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7484 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7485 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7486 gcc_assert (units_log2
> 0);
7487 auto_vec
<tree
, 16> perms
;
7488 perms
.quick_grow (units_log2
+ 1);
7489 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7490 for (int i
= 0; i
<= units_log2
; ++i
)
7492 unsigned HOST_WIDE_INT j
, k
;
7493 vec_perm_builder
sel (nunits
, nunits
, 1);
7494 sel
.quick_grow (nunits
);
7495 if (i
== units_log2
)
7496 for (j
= 0; j
< nunits
; ++j
)
7497 sel
[j
] = nunits
- 1;
7500 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7502 for (k
= 0; j
< nunits
; ++j
, ++k
)
7503 sel
[j
] = nunits
+ k
;
7505 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7506 if (!use_whole_vector
.is_empty ()
7507 && use_whole_vector
[i
] != scan_store_kind_perm
)
7509 if (zero_vec
== NULL_TREE
)
7510 zero_vec
= build_zero_cst (vectype
);
7511 if (masktype
== NULL_TREE
7512 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7513 masktype
= truth_type_for (vectype
);
7514 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7517 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7520 tree vec_oprnd1
= NULL_TREE
;
7521 tree vec_oprnd2
= NULL_TREE
;
7522 tree vec_oprnd3
= NULL_TREE
;
7523 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7524 tree dataref_offset
= build_int_cst (ref_type
, 0);
7525 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7526 vectype
, VMAT_CONTIGUOUS
);
7527 tree ldataref_ptr
= NULL_TREE
;
7528 tree orig
= NULL_TREE
;
7529 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7530 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7531 auto_vec
<tree
> vec_oprnds1
;
7532 auto_vec
<tree
> vec_oprnds2
;
7533 auto_vec
<tree
> vec_oprnds3
;
7534 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7535 *init
, &vec_oprnds1
,
7536 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7537 rhs2
, &vec_oprnds3
);
7538 for (int j
= 0; j
< ncopies
; j
++)
7540 vec_oprnd1
= vec_oprnds1
[j
];
7541 if (ldataref_ptr
== NULL
)
7542 vec_oprnd2
= vec_oprnds2
[j
];
7543 vec_oprnd3
= vec_oprnds3
[j
];
7546 else if (!inscan_var_store
)
7547 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7551 vec_oprnd2
= make_ssa_name (vectype
);
7552 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7553 unshare_expr (ldataref_ptr
),
7555 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7556 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7557 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7558 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7559 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7562 tree v
= vec_oprnd2
;
7563 for (int i
= 0; i
< units_log2
; ++i
)
7565 tree new_temp
= make_ssa_name (vectype
);
7566 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7568 && (use_whole_vector
[i
]
7569 != scan_store_kind_perm
))
7570 ? zero_vec
: vec_oprnd1
, v
,
7572 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7573 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7574 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7576 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7578 /* Whole vector shift shifted in zero bits, but if *init
7579 is not initializer_zerop, we need to replace those elements
7580 with elements from vec_oprnd1. */
7581 tree_vector_builder
vb (masktype
, nunits
, 1);
7582 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7583 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7584 ? boolean_false_node
: boolean_true_node
);
7586 tree new_temp2
= make_ssa_name (vectype
);
7587 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7588 new_temp
, vec_oprnd1
);
7589 vect_finish_stmt_generation (vinfo
, stmt_info
,
7591 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7592 new_temp
= new_temp2
;
7595 /* For exclusive scan, perform the perms[i] permutation once
7598 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7606 tree new_temp2
= make_ssa_name (vectype
);
7607 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7608 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7609 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7614 tree new_temp
= make_ssa_name (vectype
);
7615 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7616 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7617 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7619 tree last_perm_arg
= new_temp
;
7620 /* For exclusive scan, new_temp computed above is the exclusive scan
7621 prefix sum. Turn it into inclusive prefix sum for the broadcast
7622 of the last element into orig. */
7623 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7625 last_perm_arg
= make_ssa_name (vectype
);
7626 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7627 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7628 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7631 orig
= make_ssa_name (vectype
);
7632 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7633 last_perm_arg
, perms
[units_log2
]);
7634 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7635 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7637 if (!inscan_var_store
)
7639 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7640 unshare_expr (dataref_ptr
),
7642 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7643 g
= gimple_build_assign (data_ref
, new_temp
);
7644 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7645 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7649 if (inscan_var_store
)
7650 for (int j
= 0; j
< ncopies
; j
++)
7653 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7655 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7656 unshare_expr (dataref_ptr
),
7658 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7659 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7660 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7661 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7667 /* Function vectorizable_store.
7669 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7670 that can be vectorized.
7671 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7672 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7673 Return true if STMT_INFO is vectorizable in this way. */
7676 vectorizable_store (vec_info
*vinfo
,
7677 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7678 gimple
**vec_stmt
, slp_tree slp_node
,
7679 stmt_vector_for_cost
*cost_vec
)
7683 tree vec_oprnd
= NULL_TREE
;
7685 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7686 class loop
*loop
= NULL
;
7687 machine_mode vec_mode
;
7689 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7690 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7691 tree dataref_ptr
= NULL_TREE
;
7692 tree dataref_offset
= NULL_TREE
;
7693 gimple
*ptr_incr
= NULL
;
7696 stmt_vec_info first_stmt_info
;
7698 unsigned int group_size
, i
;
7699 vec
<tree
> oprnds
= vNULL
;
7700 vec
<tree
> result_chain
= vNULL
;
7701 vec
<tree
> vec_oprnds
= vNULL
;
7702 bool slp
= (slp_node
!= NULL
);
7703 unsigned int vec_num
;
7704 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7706 gather_scatter_info gs_info
;
7708 vec_load_store_type vls_type
;
7711 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7714 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7718 /* Is vectorizable store? */
7720 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7721 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7723 tree scalar_dest
= gimple_assign_lhs (assign
);
7724 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7725 && is_pattern_stmt_p (stmt_info
))
7726 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7727 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7728 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7729 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7730 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7731 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7732 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7733 && TREE_CODE (scalar_dest
) != MEM_REF
)
7738 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7739 if (!call
|| !gimple_call_internal_p (call
))
7742 internal_fn ifn
= gimple_call_internal_fn (call
);
7743 if (!internal_store_fn_p (ifn
))
7746 if (slp_node
!= NULL
)
7748 if (dump_enabled_p ())
7749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7750 "SLP of masked stores not supported.\n");
7754 int mask_index
= internal_fn_mask_index (ifn
);
7756 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
7757 &mask
, NULL
, &mask_dt
, &mask_vectype
))
7761 op
= vect_get_store_rhs (stmt_info
);
7763 /* Cannot have hybrid store SLP -- that would mean storing to the
7764 same location twice. */
7765 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7767 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7768 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7772 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7773 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7778 /* Multiple types in SLP are handled by creating the appropriate number of
7779 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7784 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7786 gcc_assert (ncopies
>= 1);
7788 /* FORNOW. This restriction should be relaxed. */
7789 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7791 if (dump_enabled_p ())
7792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7793 "multiple types in nested loop.\n");
7797 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7798 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7801 elem_type
= TREE_TYPE (vectype
);
7802 vec_mode
= TYPE_MODE (vectype
);
7804 if (!STMT_VINFO_DATA_REF (stmt_info
))
7807 vect_memory_access_type memory_access_type
;
7808 enum dr_alignment_support alignment_support_scheme
;
7811 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7812 ncopies
, &memory_access_type
, &poffset
,
7813 &alignment_support_scheme
, &misalignment
, &gs_info
))
7818 if (memory_access_type
== VMAT_CONTIGUOUS
)
7820 if (!VECTOR_MODE_P (vec_mode
)
7821 || !can_vec_mask_load_store_p (vec_mode
,
7822 TYPE_MODE (mask_vectype
), false))
7825 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7826 && (memory_access_type
!= VMAT_GATHER_SCATTER
7827 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7829 if (dump_enabled_p ())
7830 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7831 "unsupported access type for masked store.\n");
7834 else if (memory_access_type
== VMAT_GATHER_SCATTER
7835 && gs_info
.ifn
== IFN_LAST
7838 if (dump_enabled_p ())
7839 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7840 "unsupported masked emulated scatter.\n");
7846 /* FORNOW. In some cases can vectorize even if data-type not supported
7847 (e.g. - array initialization with 0). */
7848 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7852 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7853 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7854 && memory_access_type
!= VMAT_GATHER_SCATTER
7855 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7858 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7859 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7860 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7864 first_stmt_info
= stmt_info
;
7865 first_dr_info
= dr_info
;
7866 group_size
= vec_num
= 1;
7869 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7871 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7872 memory_access_type
))
7876 if (!vec_stmt
) /* transformation not required. */
7878 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7881 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7882 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
7883 vls_type
, group_size
,
7884 memory_access_type
, &gs_info
,
7888 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7891 if (dump_enabled_p ())
7892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7893 "incompatible vector types for invariants\n");
7897 if (dump_enabled_p ()
7898 && memory_access_type
!= VMAT_ELEMENTWISE
7899 && memory_access_type
!= VMAT_GATHER_SCATTER
7900 && alignment_support_scheme
!= dr_aligned
)
7901 dump_printf_loc (MSG_NOTE
, vect_location
,
7902 "Vectorizing an unaligned access.\n");
7904 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7905 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7906 memory_access_type
, &gs_info
,
7907 alignment_support_scheme
,
7908 misalignment
, vls_type
, slp_node
, cost_vec
);
7911 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7915 ensure_base_align (dr_info
);
7917 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7919 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7920 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7921 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7922 tree ptr
, var
, scale
, vec_mask
;
7923 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7924 tree mask_halfvectype
= mask_vectype
;
7925 edge pe
= loop_preheader_edge (loop
);
7928 enum { NARROW
, NONE
, WIDEN
} modifier
;
7929 poly_uint64 scatter_off_nunits
7930 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7932 if (known_eq (nunits
, scatter_off_nunits
))
7934 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7938 /* Currently gathers and scatters are only supported for
7939 fixed-length vectors. */
7940 unsigned int count
= scatter_off_nunits
.to_constant ();
7941 vec_perm_builder
sel (count
, count
, 1);
7942 for (i
= 0; i
< (unsigned int) count
; ++i
)
7943 sel
.quick_push (i
| (count
/ 2));
7945 vec_perm_indices
indices (sel
, 1, count
);
7946 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7948 gcc_assert (perm_mask
!= NULL_TREE
);
7950 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7954 /* Currently gathers and scatters are only supported for
7955 fixed-length vectors. */
7956 unsigned int count
= nunits
.to_constant ();
7957 vec_perm_builder
sel (count
, count
, 1);
7958 for (i
= 0; i
< (unsigned int) count
; ++i
)
7959 sel
.quick_push (i
| (count
/ 2));
7961 vec_perm_indices
indices (sel
, 2, count
);
7962 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7963 gcc_assert (perm_mask
!= NULL_TREE
);
7967 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7972 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7973 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7974 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7975 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7976 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7977 scaletype
= TREE_VALUE (arglist
);
7979 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7980 && TREE_CODE (rettype
) == VOID_TYPE
);
7982 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7983 if (!is_gimple_min_invariant (ptr
))
7985 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7986 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7987 gcc_assert (!new_bb
);
7990 if (mask
== NULL_TREE
)
7992 mask_arg
= build_int_cst (masktype
, -1);
7993 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7994 mask_arg
, masktype
, NULL
);
7997 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7999 auto_vec
<tree
> vec_oprnds0
;
8000 auto_vec
<tree
> vec_oprnds1
;
8001 auto_vec
<tree
> vec_masks
;
8004 tree mask_vectype
= truth_type_for (vectype
);
8005 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
8007 ? ncopies
/ 2 : ncopies
,
8008 mask
, &vec_masks
, mask_vectype
);
8010 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
8012 ? ncopies
/ 2 : ncopies
,
8013 gs_info
.offset
, &vec_oprnds0
);
8014 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
8016 ? ncopies
/ 2 : ncopies
,
8018 for (j
= 0; j
< ncopies
; ++j
)
8020 if (modifier
== WIDEN
)
8023 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
8024 perm_mask
, stmt_info
, gsi
);
8026 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
8027 src
= vec_oprnd1
= vec_oprnds1
[j
];
8029 mask_op
= vec_mask
= vec_masks
[j
];
8031 else if (modifier
== NARROW
)
8034 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
8035 perm_mask
, stmt_info
, gsi
);
8037 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
8038 op
= vec_oprnd0
= vec_oprnds0
[j
];
8040 mask_op
= vec_mask
= vec_masks
[j
/ 2];
8044 op
= vec_oprnd0
= vec_oprnds0
[j
];
8045 src
= vec_oprnd1
= vec_oprnds1
[j
];
8047 mask_op
= vec_mask
= vec_masks
[j
];
8050 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
8052 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
8053 TYPE_VECTOR_SUBPARTS (srctype
)));
8054 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
8055 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
8057 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
8058 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8062 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
8064 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
8065 TYPE_VECTOR_SUBPARTS (idxtype
)));
8066 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
8067 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
8069 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
8070 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8078 if (modifier
== NARROW
)
8080 var
= vect_get_new_ssa_name (mask_halfvectype
,
8083 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
8084 : VEC_UNPACK_LO_EXPR
,
8086 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8089 tree optype
= TREE_TYPE (mask_arg
);
8090 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
8093 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
8094 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
8095 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
8097 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
8098 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8100 if (!useless_type_conversion_p (masktype
, utype
))
8102 gcc_assert (TYPE_PRECISION (utype
)
8103 <= TYPE_PRECISION (masktype
));
8104 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
8105 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
8106 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8112 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
8113 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8115 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8117 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
8120 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
8121 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
8123 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8124 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
8129 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
8131 /* We vectorize all the stmts of the interleaving group when we
8132 reach the last stmt in the group. */
8133 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
8134 < DR_GROUP_SIZE (first_stmt_info
)
8143 grouped_store
= false;
8144 /* VEC_NUM is the number of vect stmts to be created for this
8146 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8147 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8148 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
8149 == first_stmt_info
);
8150 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8151 op
= vect_get_store_rhs (first_stmt_info
);
8154 /* VEC_NUM is the number of vect stmts to be created for this
8156 vec_num
= group_size
;
8158 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8161 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8163 if (dump_enabled_p ())
8164 dump_printf_loc (MSG_NOTE
, vect_location
,
8165 "transform store. ncopies = %d\n", ncopies
);
8167 if (memory_access_type
== VMAT_ELEMENTWISE
8168 || memory_access_type
== VMAT_STRIDED_SLP
)
8170 gimple_stmt_iterator incr_gsi
;
8176 tree stride_base
, stride_step
, alias_off
;
8180 /* Checked by get_load_store_type. */
8181 unsigned int const_nunits
= nunits
.to_constant ();
8183 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8184 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
8186 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8188 = fold_build_pointer_plus
8189 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8190 size_binop (PLUS_EXPR
,
8191 convert_to_ptrofftype (dr_offset
),
8192 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8193 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8195 /* For a store with loop-invariant (but other than power-of-2)
8196 stride (i.e. not a grouped access) like so:
8198 for (i = 0; i < n; i += stride)
8201 we generate a new induction variable and new stores from
8202 the components of the (vectorized) rhs:
8204 for (j = 0; ; j += VF*stride)
8209 array[j + stride] = tmp2;
8213 unsigned nstores
= const_nunits
;
8215 tree ltype
= elem_type
;
8216 tree lvectype
= vectype
;
8219 if (group_size
< const_nunits
8220 && const_nunits
% group_size
== 0)
8222 nstores
= const_nunits
/ group_size
;
8224 ltype
= build_vector_type (elem_type
, group_size
);
8227 /* First check if vec_extract optab doesn't support extraction
8228 of vector elts directly. */
8229 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
8231 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8232 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
8233 group_size
).exists (&vmode
)
8234 || (convert_optab_handler (vec_extract_optab
,
8235 TYPE_MODE (vectype
), vmode
)
8236 == CODE_FOR_nothing
))
8238 /* Try to avoid emitting an extract of vector elements
8239 by performing the extracts using an integer type of the
8240 same size, extracting from a vector of those and then
8241 re-interpreting it as the original vector type if
8244 = group_size
* GET_MODE_BITSIZE (elmode
);
8245 unsigned int lnunits
= const_nunits
/ group_size
;
8246 /* If we can't construct such a vector fall back to
8247 element extracts from the original vector type and
8248 element size stores. */
8249 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8250 && VECTOR_MODE_P (TYPE_MODE (vectype
))
8251 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8252 lnunits
).exists (&vmode
)
8253 && (convert_optab_handler (vec_extract_optab
,
8255 != CODE_FOR_nothing
))
8259 ltype
= build_nonstandard_integer_type (lsize
, 1);
8260 lvectype
= build_vector_type (ltype
, nstores
);
8262 /* Else fall back to vector extraction anyway.
8263 Fewer stores are more important than avoiding spilling
8264 of the vector we extract from. Compared to the
8265 construction case in vectorizable_load no store-forwarding
8266 issue exists here for reasonable archs. */
8269 else if (group_size
>= const_nunits
8270 && group_size
% const_nunits
== 0)
8273 lnel
= const_nunits
;
8277 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
8278 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8281 ivstep
= stride_step
;
8282 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
8283 build_int_cst (TREE_TYPE (ivstep
), vf
));
8285 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8287 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8288 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8289 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
,
8290 loop
, &incr_gsi
, insert_after
,
8292 incr
= gsi_stmt (incr_gsi
);
8294 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8296 alias_off
= build_int_cst (ref_type
, 0);
8297 stmt_vec_info next_stmt_info
= first_stmt_info
;
8298 for (g
= 0; g
< group_size
; g
++)
8300 running_off
= offvar
;
8303 tree size
= TYPE_SIZE_UNIT (ltype
);
8304 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
8306 tree newoff
= copy_ssa_name (running_off
, NULL
);
8307 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8309 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8310 running_off
= newoff
;
8313 op
= vect_get_store_rhs (next_stmt_info
);
8314 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
8316 unsigned int group_el
= 0;
8317 unsigned HOST_WIDE_INT
8318 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8319 for (j
= 0; j
< ncopies
; j
++)
8321 vec_oprnd
= vec_oprnds
[j
];
8322 /* Pun the vector to extract from if necessary. */
8323 if (lvectype
!= vectype
)
8325 tree tem
= make_ssa_name (lvectype
);
8327 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
8328 lvectype
, vec_oprnd
));
8329 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8332 for (i
= 0; i
< nstores
; i
++)
8334 tree newref
, newoff
;
8335 gimple
*incr
, *assign
;
8336 tree size
= TYPE_SIZE (ltype
);
8337 /* Extract the i'th component. */
8338 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8339 bitsize_int (i
), size
);
8340 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8343 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8347 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8349 newref
= build2 (MEM_REF
, ltype
,
8350 running_off
, this_off
);
8351 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8353 /* And store it to *running_off. */
8354 assign
= gimple_build_assign (newref
, elem
);
8355 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8359 || group_el
== group_size
)
8361 newoff
= copy_ssa_name (running_off
, NULL
);
8362 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8363 running_off
, stride_step
);
8364 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8366 running_off
= newoff
;
8369 if (g
== group_size
- 1
8372 if (j
== 0 && i
== 0)
8374 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8378 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8379 vec_oprnds
.release ();
8387 auto_vec
<tree
> dr_chain (group_size
);
8388 oprnds
.create (group_size
);
8390 gcc_assert (alignment_support_scheme
);
8391 vec_loop_masks
*loop_masks
8392 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8393 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8395 vec_loop_lens
*loop_lens
8396 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8397 ? &LOOP_VINFO_LENS (loop_vinfo
)
8400 /* Shouldn't go with length-based approach if fully masked. */
8401 gcc_assert (!loop_lens
|| !loop_masks
);
8403 /* Targets with store-lane instructions must not require explicit
8404 realignment. vect_supportable_dr_alignment always returns either
8405 dr_aligned or dr_unaligned_supported for masked operations. */
8406 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8409 || alignment_support_scheme
== dr_aligned
8410 || alignment_support_scheme
== dr_unaligned_supported
);
8412 tree offset
= NULL_TREE
;
8413 if (!known_eq (poffset
, 0))
8414 offset
= size_int (poffset
);
8417 tree vec_offset
= NULL_TREE
;
8418 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8420 aggr_type
= NULL_TREE
;
8423 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8425 aggr_type
= elem_type
;
8426 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8427 &bump
, &vec_offset
);
8431 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8432 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8434 aggr_type
= vectype
;
8435 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8436 memory_access_type
);
8440 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8442 /* In case the vectorization factor (VF) is bigger than the number
8443 of elements that we can fit in a vectype (nunits), we have to generate
8444 more than one vector stmt - i.e - we need to "unroll" the
8445 vector stmt by a factor VF/nunits. */
8447 /* In case of interleaving (non-unit grouped access):
8454 We create vectorized stores starting from base address (the access of the
8455 first stmt in the chain (S2 in the above example), when the last store stmt
8456 of the chain (S4) is reached:
8459 VS2: &base + vec_size*1 = vx0
8460 VS3: &base + vec_size*2 = vx1
8461 VS4: &base + vec_size*3 = vx3
8463 Then permutation statements are generated:
8465 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8466 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8469 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8470 (the order of the data-refs in the output of vect_permute_store_chain
8471 corresponds to the order of scalar stmts in the interleaving chain - see
8472 the documentation of vect_permute_store_chain()).
8474 In case of both multiple types and interleaving, above vector stores and
8475 permutation stmts are created for every copy. The result vector stmts are
8476 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8477 STMT_VINFO_RELATED_STMT for the next copies.
8480 auto_vec
<tree
> vec_masks
;
8481 tree vec_mask
= NULL
;
8482 auto_vec
<tree
> vec_offsets
;
8483 auto_vec
<vec
<tree
> > gvec_oprnds
;
8484 gvec_oprnds
.safe_grow_cleared (group_size
, true);
8485 for (j
= 0; j
< ncopies
; j
++)
8492 /* Get vectorized arguments for SLP_NODE. */
8493 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
8495 vec_oprnd
= vec_oprnds
[0];
8499 /* For interleaved stores we collect vectorized defs for all the
8500 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8501 used as an input to vect_permute_store_chain().
8503 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8504 and OPRNDS are of size 1. */
8505 stmt_vec_info next_stmt_info
= first_stmt_info
;
8506 for (i
= 0; i
< group_size
; i
++)
8508 /* Since gaps are not supported for interleaved stores,
8509 DR_GROUP_SIZE is the exact number of stmts in the chain.
8510 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8511 that there is no interleaving, DR_GROUP_SIZE is 1,
8512 and only one iteration of the loop will be executed. */
8513 op
= vect_get_store_rhs (next_stmt_info
);
8514 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8515 ncopies
, op
, &gvec_oprnds
[i
]);
8516 vec_oprnd
= gvec_oprnds
[i
][0];
8517 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
8518 oprnds
.quick_push (gvec_oprnds
[i
][0]);
8519 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8523 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8524 mask
, &vec_masks
, mask_vectype
);
8525 vec_mask
= vec_masks
[0];
8529 /* We should have catched mismatched types earlier. */
8530 gcc_assert (useless_type_conversion_p (vectype
,
8531 TREE_TYPE (vec_oprnd
)));
8532 bool simd_lane_access_p
8533 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8534 if (simd_lane_access_p
8536 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8537 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8538 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8539 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8540 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8541 get_alias_set (TREE_TYPE (ref_type
))))
8543 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8544 dataref_offset
= build_int_cst (ref_type
, 0);
8546 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8547 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8548 slp_node
, &gs_info
, &dataref_ptr
,
8552 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8553 simd_lane_access_p
? loop
: NULL
,
8554 offset
, &dummy
, gsi
, &ptr_incr
,
8555 simd_lane_access_p
, bump
);
8559 /* For interleaved stores we created vectorized defs for all the
8560 defs stored in OPRNDS in the previous iteration (previous copy).
8561 DR_CHAIN is then used as an input to vect_permute_store_chain().
8562 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8563 OPRNDS are of size 1. */
8564 for (i
= 0; i
< group_size
; i
++)
8566 vec_oprnd
= gvec_oprnds
[i
][j
];
8567 dr_chain
[i
] = gvec_oprnds
[i
][j
];
8568 oprnds
[i
] = gvec_oprnds
[i
][j
];
8571 vec_mask
= vec_masks
[j
];
8574 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8575 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8576 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8580 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8584 /* Get an array into which we can store the individual vectors. */
8585 vec_array
= create_vector_array (vectype
, vec_num
);
8587 /* Invalidate the current contents of VEC_ARRAY. This should
8588 become an RTL clobber too, which prevents the vector registers
8589 from being upward-exposed. */
8590 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8592 /* Store the individual vectors into the array. */
8593 for (i
= 0; i
< vec_num
; i
++)
8595 vec_oprnd
= dr_chain
[i
];
8596 write_vector_array (vinfo
, stmt_info
,
8597 gsi
, vec_oprnd
, vec_array
, i
);
8600 tree final_mask
= NULL
;
8602 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8605 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8606 final_mask
, vec_mask
, gsi
);
8612 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8614 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8615 tree alias_ptr
= build_int_cst (ref_type
, align
);
8616 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8617 dataref_ptr
, alias_ptr
,
8618 final_mask
, vec_array
);
8623 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8624 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8625 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8627 gimple_call_set_lhs (call
, data_ref
);
8629 gimple_call_set_nothrow (call
, true);
8630 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8633 /* Record that VEC_ARRAY is now dead. */
8634 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8642 result_chain
.create (group_size
);
8644 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8645 gsi
, &result_chain
);
8648 stmt_vec_info next_stmt_info
= first_stmt_info
;
8649 for (i
= 0; i
< vec_num
; i
++)
8652 unsigned HOST_WIDE_INT align
;
8654 tree final_mask
= NULL_TREE
;
8656 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8658 vectype
, vec_num
* j
+ i
);
8660 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8661 final_mask
, vec_mask
, gsi
);
8663 if (memory_access_type
== VMAT_GATHER_SCATTER
8664 && gs_info
.ifn
!= IFN_LAST
)
8666 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8667 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
8668 tree scale
= size_int (gs_info
.scale
);
8671 call
= gimple_build_call_internal
8672 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8673 scale
, vec_oprnd
, final_mask
);
8675 call
= gimple_build_call_internal
8676 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8678 gimple_call_set_nothrow (call
, true);
8679 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8683 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8685 /* Emulated scatter. */
8686 gcc_assert (!final_mask
);
8687 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
8688 unsigned HOST_WIDE_INT const_offset_nunits
8689 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
8691 vec
<constructor_elt
, va_gc
> *ctor_elts
;
8692 vec_alloc (ctor_elts
, const_nunits
);
8693 gimple_seq stmts
= NULL
;
8694 tree elt_type
= TREE_TYPE (vectype
);
8695 unsigned HOST_WIDE_INT elt_size
8696 = tree_to_uhwi (TYPE_SIZE (elt_type
));
8697 /* We support offset vectors with more elements
8698 than the data vector for now. */
8699 unsigned HOST_WIDE_INT factor
8700 = const_offset_nunits
/ const_nunits
;
8701 vec_offset
= vec_offsets
[j
/ factor
];
8702 unsigned elt_offset
= (j
% factor
) * const_nunits
;
8703 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
8704 tree scale
= size_int (gs_info
.scale
);
8705 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
8706 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
8707 for (unsigned k
= 0; k
< const_nunits
; ++k
)
8709 /* Compute the offsetted pointer. */
8710 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
8711 bitsize_int (k
+ elt_offset
));
8712 tree idx
= gimple_build (&stmts
, BIT_FIELD_REF
,
8713 idx_type
, vec_offset
,
8714 TYPE_SIZE (idx_type
), boff
);
8715 idx
= gimple_convert (&stmts
, sizetype
, idx
);
8716 idx
= gimple_build (&stmts
, MULT_EXPR
,
8717 sizetype
, idx
, scale
);
8718 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
8719 TREE_TYPE (dataref_ptr
),
8721 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
8722 /* Extract the element to be stored. */
8723 tree elt
= gimple_build (&stmts
, BIT_FIELD_REF
,
8724 TREE_TYPE (vectype
), vec_oprnd
,
8725 TYPE_SIZE (elt_type
),
8726 bitsize_int (k
* elt_size
));
8727 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
8729 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
8730 build_int_cst (ref_type
, 0));
8731 new_stmt
= gimple_build_assign (ref
, elt
);
8732 vect_finish_stmt_generation (vinfo
, stmt_info
,
8739 /* Bump the vector pointer. */
8740 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8741 gsi
, stmt_info
, bump
);
8744 vec_oprnd
= vec_oprnds
[i
];
8745 else if (grouped_store
)
8746 /* For grouped stores vectorized defs are interleaved in
8747 vect_permute_store_chain(). */
8748 vec_oprnd
= result_chain
[i
];
8750 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8751 if (alignment_support_scheme
== dr_aligned
)
8753 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
8755 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8759 misalign
= misalignment
;
8760 if (dataref_offset
== NULL_TREE
8761 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8762 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8764 align
= least_bit_hwi (misalign
| align
);
8766 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8768 tree perm_mask
= perm_mask_for_reverse (vectype
);
8769 tree perm_dest
= vect_create_destination_var
8770 (vect_get_store_rhs (stmt_info
), vectype
);
8771 tree new_temp
= make_ssa_name (perm_dest
);
8773 /* Generate the permute statement. */
8775 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8776 vec_oprnd
, perm_mask
);
8777 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8779 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8780 vec_oprnd
= new_temp
;
8783 /* Arguments are ready. Create the new vector stmt. */
8786 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8788 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8790 final_mask
, vec_oprnd
);
8791 gimple_call_set_nothrow (call
, true);
8792 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8798 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8799 vec_num
* ncopies
, vec_num
* j
+ i
);
8800 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8801 machine_mode vmode
= TYPE_MODE (vectype
);
8802 opt_machine_mode new_ovmode
8803 = get_len_load_store_mode (vmode
, false);
8804 machine_mode new_vmode
= new_ovmode
.require ();
8805 /* Need conversion if it's wrapped with VnQI. */
8806 if (vmode
!= new_vmode
)
8809 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8812 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8814 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8816 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8818 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8823 signed char biasval
=
8824 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8826 tree bias
= build_int_cst (intQI_type_node
, biasval
);
8828 = gimple_build_call_internal (IFN_LEN_STORE
, 5, dataref_ptr
,
8829 ptr
, final_len
, vec_oprnd
,
8831 gimple_call_set_nothrow (call
, true);
8832 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8837 data_ref
= fold_build2 (MEM_REF
, vectype
,
8841 : build_int_cst (ref_type
, 0));
8842 if (alignment_support_scheme
== dr_aligned
)
8845 TREE_TYPE (data_ref
)
8846 = build_aligned_type (TREE_TYPE (data_ref
),
8847 align
* BITS_PER_UNIT
);
8848 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8849 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8850 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8856 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8857 if (!next_stmt_info
)
8864 *vec_stmt
= new_stmt
;
8865 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8869 for (i
= 0; i
< group_size
; ++i
)
8871 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8875 result_chain
.release ();
8876 vec_oprnds
.release ();
8881 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8882 VECTOR_CST mask. No checks are made that the target platform supports the
8883 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8884 vect_gen_perm_mask_checked. */
8887 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8891 poly_uint64 nunits
= sel
.length ();
8892 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8894 mask_type
= build_vector_type (ssizetype
, nunits
);
8895 return vec_perm_indices_to_tree (mask_type
, sel
);
8898 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8899 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8902 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8904 machine_mode vmode
= TYPE_MODE (vectype
);
8905 gcc_assert (can_vec_perm_const_p (vmode
, vmode
, sel
));
8906 return vect_gen_perm_mask_any (vectype
, sel
);
8909 /* Given a vector variable X and Y, that was generated for the scalar
8910 STMT_INFO, generate instructions to permute the vector elements of X and Y
8911 using permutation mask MASK_VEC, insert them at *GSI and return the
8912 permuted vector variable. */
8915 permute_vec_elements (vec_info
*vinfo
,
8916 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8917 gimple_stmt_iterator
*gsi
)
8919 tree vectype
= TREE_TYPE (x
);
8920 tree perm_dest
, data_ref
;
8923 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8924 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8925 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8927 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8928 data_ref
= make_ssa_name (perm_dest
);
8930 /* Generate the permute statement. */
8931 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8932 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8937 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8938 inserting them on the loops preheader edge. Returns true if we
8939 were successful in doing so (and thus STMT_INFO can be moved then),
8940 otherwise returns false. */
8943 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8949 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8951 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8952 if (!gimple_nop_p (def_stmt
)
8953 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8955 /* Make sure we don't need to recurse. While we could do
8956 so in simple cases when there are more complex use webs
8957 we don't have an easy way to preserve stmt order to fulfil
8958 dependencies within them. */
8961 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8963 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8965 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8966 if (!gimple_nop_p (def_stmt2
)
8967 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8977 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8979 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8980 if (!gimple_nop_p (def_stmt
)
8981 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8983 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8984 gsi_remove (&gsi
, false);
8985 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8992 /* vectorizable_load.
8994 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8995 that can be vectorized.
8996 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8997 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8998 Return true if STMT_INFO is vectorizable in this way. */
9001 vectorizable_load (vec_info
*vinfo
,
9002 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9003 gimple
**vec_stmt
, slp_tree slp_node
,
9004 stmt_vector_for_cost
*cost_vec
)
9007 tree vec_dest
= NULL
;
9008 tree data_ref
= NULL
;
9009 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
9010 class loop
*loop
= NULL
;
9011 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
9012 bool nested_in_vect_loop
= false;
9017 tree dataref_ptr
= NULL_TREE
;
9018 tree dataref_offset
= NULL_TREE
;
9019 gimple
*ptr_incr
= NULL
;
9022 unsigned int group_size
;
9023 poly_uint64 group_gap_adj
;
9024 tree msq
= NULL_TREE
, lsq
;
9025 tree realignment_token
= NULL_TREE
;
9027 vec
<tree
> dr_chain
= vNULL
;
9028 bool grouped_load
= false;
9029 stmt_vec_info first_stmt_info
;
9030 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
9031 bool compute_in_loop
= false;
9032 class loop
*at_loop
;
9034 bool slp
= (slp_node
!= NULL
);
9035 bool slp_perm
= false;
9036 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
9039 gather_scatter_info gs_info
;
9041 enum vect_def_type mask_dt
= vect_unknown_def_type
;
9043 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9046 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9050 if (!STMT_VINFO_DATA_REF (stmt_info
))
9053 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
9054 int mask_index
= -1;
9055 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
9057 scalar_dest
= gimple_assign_lhs (assign
);
9058 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
9061 tree_code code
= gimple_assign_rhs_code (assign
);
9062 if (code
!= ARRAY_REF
9063 && code
!= BIT_FIELD_REF
9064 && code
!= INDIRECT_REF
9065 && code
!= COMPONENT_REF
9066 && code
!= IMAGPART_EXPR
9067 && code
!= REALPART_EXPR
9069 && TREE_CODE_CLASS (code
) != tcc_declaration
)
9074 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
9075 if (!call
|| !gimple_call_internal_p (call
))
9078 internal_fn ifn
= gimple_call_internal_fn (call
);
9079 if (!internal_load_fn_p (ifn
))
9082 scalar_dest
= gimple_call_lhs (call
);
9086 mask_index
= internal_fn_mask_index (ifn
);
9087 /* ??? For SLP the mask operand is always last. */
9088 if (mask_index
>= 0 && slp_node
)
9089 mask_index
= SLP_TREE_CHILDREN (slp_node
).length () - 1;
9091 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
9092 &mask
, NULL
, &mask_dt
, &mask_vectype
))
9096 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9097 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9101 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
9102 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
9103 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
9108 /* Multiple types in SLP are handled by creating the appropriate number of
9109 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9114 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9116 gcc_assert (ncopies
>= 1);
9118 /* FORNOW. This restriction should be relaxed. */
9119 if (nested_in_vect_loop
&& ncopies
> 1)
9121 if (dump_enabled_p ())
9122 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9123 "multiple types in nested loop.\n");
9127 /* Invalidate assumptions made by dependence analysis when vectorization
9128 on the unrolled body effectively re-orders stmts. */
9130 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9131 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9132 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9134 if (dump_enabled_p ())
9135 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9136 "cannot perform implicit CSE when unrolling "
9137 "with negative dependence distance\n");
9141 elem_type
= TREE_TYPE (vectype
);
9142 mode
= TYPE_MODE (vectype
);
9144 /* FORNOW. In some cases can vectorize even if data-type not supported
9145 (e.g. - data copies). */
9146 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
9148 if (dump_enabled_p ())
9149 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9150 "Aligned load, but unsupported type.\n");
9154 /* Check if the load is a part of an interleaving chain. */
9155 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
9157 grouped_load
= true;
9159 gcc_assert (!nested_in_vect_loop
);
9160 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9162 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9163 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9165 /* Refuse non-SLP vectorization of SLP-only groups. */
9166 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
9168 if (dump_enabled_p ())
9169 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9170 "cannot vectorize load in non-SLP mode.\n");
9174 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9180 /* In BB vectorization we may not actually use a loaded vector
9181 accessing elements in excess of DR_GROUP_SIZE. */
9182 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9183 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
9184 unsigned HOST_WIDE_INT nunits
;
9185 unsigned j
, k
, maxk
= 0;
9186 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
9189 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
9190 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
9191 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
9193 if (dump_enabled_p ())
9194 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9195 "BB vectorization with gaps at the end of "
9196 "a load is not supported\n");
9203 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
9206 if (dump_enabled_p ())
9207 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
9209 "unsupported load permutation\n");
9214 /* Invalidate assumptions made by dependence analysis when vectorization
9215 on the unrolled body effectively re-orders stmts. */
9216 if (!PURE_SLP_STMT (stmt_info
)
9217 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9218 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9219 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9221 if (dump_enabled_p ())
9222 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9223 "cannot perform implicit CSE when performing "
9224 "group loads with negative dependence distance\n");
9231 vect_memory_access_type memory_access_type
;
9232 enum dr_alignment_support alignment_support_scheme
;
9235 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
9236 ncopies
, &memory_access_type
, &poffset
,
9237 &alignment_support_scheme
, &misalignment
, &gs_info
))
9242 if (memory_access_type
== VMAT_CONTIGUOUS
)
9244 machine_mode vec_mode
= TYPE_MODE (vectype
);
9245 if (!VECTOR_MODE_P (vec_mode
)
9246 || !can_vec_mask_load_store_p (vec_mode
,
9247 TYPE_MODE (mask_vectype
), true))
9250 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
9251 && memory_access_type
!= VMAT_GATHER_SCATTER
)
9253 if (dump_enabled_p ())
9254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9255 "unsupported access type for masked load.\n");
9258 else if (memory_access_type
== VMAT_GATHER_SCATTER
9259 && gs_info
.ifn
== IFN_LAST
9262 if (dump_enabled_p ())
9263 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9264 "unsupported masked emulated gather.\n");
9269 if (!vec_stmt
) /* transformation not required. */
9273 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
9276 if (dump_enabled_p ())
9277 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9278 "incompatible vector types for invariants\n");
9283 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
9286 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
9287 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
9288 VLS_LOAD
, group_size
,
9289 memory_access_type
, &gs_info
,
9292 if (dump_enabled_p ()
9293 && memory_access_type
!= VMAT_ELEMENTWISE
9294 && memory_access_type
!= VMAT_GATHER_SCATTER
9295 && alignment_support_scheme
!= dr_aligned
)
9296 dump_printf_loc (MSG_NOTE
, vect_location
,
9297 "Vectorizing an unaligned access.\n");
9299 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9300 vinfo
->any_known_not_updated_vssa
= true;
9302 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
9303 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
9304 alignment_support_scheme
, misalignment
,
9305 &gs_info
, slp_node
, cost_vec
);
9310 gcc_assert (memory_access_type
9311 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
9313 if (dump_enabled_p ())
9314 dump_printf_loc (MSG_NOTE
, vect_location
,
9315 "transform load. ncopies = %d\n", ncopies
);
9319 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
9320 ensure_base_align (dr_info
);
9322 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
9324 vect_build_gather_load_calls (vinfo
,
9325 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
9329 if (memory_access_type
== VMAT_INVARIANT
)
9331 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
9332 /* If we have versioned for aliasing or the loop doesn't
9333 have any data dependencies that would preclude this,
9334 then we are sure this is a loop invariant load and
9335 thus we can insert it on the preheader edge. */
9336 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
9337 && !nested_in_vect_loop
9338 && hoist_defs_of_uses (stmt_info
, loop
));
9341 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
9342 if (dump_enabled_p ())
9343 dump_printf_loc (MSG_NOTE
, vect_location
,
9344 "hoisting out of the vectorized loop: %G",
9346 scalar_dest
= copy_ssa_name (scalar_dest
);
9347 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
9348 edge pe
= loop_preheader_edge (loop
);
9349 gphi
*vphi
= get_virtual_phi (loop
->header
);
9352 vuse
= PHI_ARG_DEF_FROM_EDGE (vphi
, pe
);
9354 vuse
= gimple_vuse (gsi_stmt (*gsi
));
9355 gimple
*new_stmt
= gimple_build_assign (scalar_dest
, rhs
);
9356 gimple_set_vuse (new_stmt
, vuse
);
9357 gsi_insert_on_edge_immediate (pe
, new_stmt
);
9359 /* These copies are all equivalent, but currently the representation
9360 requires a separate STMT_VINFO_VEC_STMT for each one. */
9361 gimple_stmt_iterator gsi2
= *gsi
;
9363 for (j
= 0; j
< ncopies
; j
++)
9366 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9369 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9371 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9373 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9377 *vec_stmt
= new_stmt
;
9378 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9384 if (memory_access_type
== VMAT_ELEMENTWISE
9385 || memory_access_type
== VMAT_STRIDED_SLP
)
9387 gimple_stmt_iterator incr_gsi
;
9392 vec
<constructor_elt
, va_gc
> *v
= NULL
;
9393 tree stride_base
, stride_step
, alias_off
;
9394 /* Checked by get_load_store_type. */
9395 unsigned int const_nunits
= nunits
.to_constant ();
9396 unsigned HOST_WIDE_INT cst_offset
= 0;
9399 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
9400 gcc_assert (!nested_in_vect_loop
);
9404 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9405 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9409 first_stmt_info
= stmt_info
;
9410 first_dr_info
= dr_info
;
9412 if (slp
&& grouped_load
)
9414 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9415 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9421 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
9422 * vect_get_place_in_interleaving_chain (stmt_info
,
9425 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
9428 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
9430 = fold_build_pointer_plus
9431 (DR_BASE_ADDRESS (first_dr_info
->dr
),
9432 size_binop (PLUS_EXPR
,
9433 convert_to_ptrofftype (dr_offset
),
9434 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
9435 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
9437 /* For a load with loop-invariant (but other than power-of-2)
9438 stride (i.e. not a grouped access) like so:
9440 for (i = 0; i < n; i += stride)
9443 we generate a new induction variable and new accesses to
9444 form a new vector (or vectors, depending on ncopies):
9446 for (j = 0; ; j += VF*stride)
9448 tmp2 = array[j + stride];
9450 vectemp = {tmp1, tmp2, ...}
9453 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
9454 build_int_cst (TREE_TYPE (stride_step
), vf
));
9456 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
9458 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
9459 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
9460 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
,
9461 loop
, &incr_gsi
, insert_after
,
9464 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9466 running_off
= offvar
;
9467 alias_off
= build_int_cst (ref_type
, 0);
9468 int nloads
= const_nunits
;
9470 tree ltype
= TREE_TYPE (vectype
);
9471 tree lvectype
= vectype
;
9472 auto_vec
<tree
> dr_chain
;
9473 if (memory_access_type
== VMAT_STRIDED_SLP
)
9475 if (group_size
< const_nunits
)
9477 /* First check if vec_init optab supports construction from vector
9478 elts directly. Otherwise avoid emitting a constructor of
9479 vector elements by performing the loads using an integer type
9480 of the same size, constructing a vector of those and then
9481 re-interpreting it as the original vector type. This avoids a
9482 huge runtime penalty due to the general inability to perform
9483 store forwarding from smaller stores to a larger load. */
9486 = vector_vector_composition_type (vectype
,
9487 const_nunits
/ group_size
,
9489 if (vtype
!= NULL_TREE
)
9491 nloads
= const_nunits
/ group_size
;
9500 lnel
= const_nunits
;
9503 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
9505 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9506 else if (nloads
== 1)
9511 /* For SLP permutation support we need to load the whole group,
9512 not only the number of vector stmts the permutation result
9516 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9518 unsigned int const_vf
= vf
.to_constant ();
9519 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9520 dr_chain
.create (ncopies
);
9523 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9525 unsigned int group_el
= 0;
9526 unsigned HOST_WIDE_INT
9527 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9528 unsigned int n_groups
= 0;
9529 for (j
= 0; j
< ncopies
; j
++)
9532 vec_alloc (v
, nloads
);
9533 gimple
*new_stmt
= NULL
;
9534 for (i
= 0; i
< nloads
; i
++)
9536 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9537 group_el
* elsz
+ cst_offset
);
9538 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9539 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9540 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9541 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9543 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9544 gimple_assign_lhs (new_stmt
));
9548 || group_el
== group_size
)
9551 /* When doing SLP make sure to not load elements from
9552 the next vector iteration, those will not be accessed
9553 so just use the last element again. See PR107451. */
9554 if (!slp
|| known_lt (n_groups
, vf
))
9556 tree newoff
= copy_ssa_name (running_off
);
9558 = gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9559 running_off
, stride_step
);
9560 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9561 running_off
= newoff
;
9568 tree vec_inv
= build_constructor (lvectype
, v
);
9569 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9570 vec_inv
, lvectype
, gsi
);
9571 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9572 if (lvectype
!= vectype
)
9574 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
9576 build1 (VIEW_CONVERT_EXPR
,
9577 vectype
, new_temp
));
9578 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9585 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
9587 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9592 *vec_stmt
= new_stmt
;
9593 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9599 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9605 if (memory_access_type
== VMAT_GATHER_SCATTER
9606 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9607 grouped_load
= false;
9611 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9612 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9613 /* For SLP vectorization we directly vectorize a subchain
9614 without permutation. */
9615 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9616 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9617 /* For BB vectorization always use the first stmt to base
9618 the data ref pointer on. */
9620 first_stmt_info_for_drptr
9621 = vect_find_first_scalar_stmt_in_slp (slp_node
);
9623 /* Check if the chain of loads is already vectorized. */
9624 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
9625 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9626 ??? But we can only do so if there is exactly one
9627 as we have no way to get at the rest. Leave the CSE
9629 ??? With the group load eventually participating
9630 in multiple different permutations (having multiple
9631 slp nodes which refer to the same group) the CSE
9632 is even wrong code. See PR56270. */
9635 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9638 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9641 /* VEC_NUM is the number of vect stmts to be created for this group. */
9644 grouped_load
= false;
9645 /* If an SLP permutation is from N elements to N elements,
9646 and if one vector holds a whole number of N, we can load
9647 the inputs to the permutation in the same way as an
9648 unpermuted sequence. In other cases we need to load the
9649 whole group, not only the number of vector stmts the
9650 permutation result fits in. */
9651 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
9653 && (group_size
!= scalar_lanes
9654 || !multiple_p (nunits
, group_size
)))
9656 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9657 variable VF; see vect_transform_slp_perm_load. */
9658 unsigned int const_vf
= vf
.to_constant ();
9659 unsigned int const_nunits
= nunits
.to_constant ();
9660 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9661 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9665 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9667 = group_size
- scalar_lanes
;
9671 vec_num
= group_size
;
9673 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9677 first_stmt_info
= stmt_info
;
9678 first_dr_info
= dr_info
;
9679 group_size
= vec_num
= 1;
9681 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9683 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9686 gcc_assert (alignment_support_scheme
);
9687 vec_loop_masks
*loop_masks
9688 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9689 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9691 vec_loop_lens
*loop_lens
9692 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
9693 ? &LOOP_VINFO_LENS (loop_vinfo
)
9696 /* Shouldn't go with length-based approach if fully masked. */
9697 gcc_assert (!loop_lens
|| !loop_masks
);
9699 /* Targets with store-lane instructions must not require explicit
9700 realignment. vect_supportable_dr_alignment always returns either
9701 dr_aligned or dr_unaligned_supported for masked operations. */
9702 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9705 || alignment_support_scheme
== dr_aligned
9706 || alignment_support_scheme
== dr_unaligned_supported
);
9708 /* In case the vectorization factor (VF) is bigger than the number
9709 of elements that we can fit in a vectype (nunits), we have to generate
9710 more than one vector stmt - i.e - we need to "unroll" the
9711 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9712 from one copy of the vector stmt to the next, in the field
9713 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9714 stages to find the correct vector defs to be used when vectorizing
9715 stmts that use the defs of the current stmt. The example below
9716 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9717 need to create 4 vectorized stmts):
9719 before vectorization:
9720 RELATED_STMT VEC_STMT
9724 step 1: vectorize stmt S1:
9725 We first create the vector stmt VS1_0, and, as usual, record a
9726 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9727 Next, we create the vector stmt VS1_1, and record a pointer to
9728 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9729 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9731 RELATED_STMT VEC_STMT
9732 VS1_0: vx0 = memref0 VS1_1 -
9733 VS1_1: vx1 = memref1 VS1_2 -
9734 VS1_2: vx2 = memref2 VS1_3 -
9735 VS1_3: vx3 = memref3 - -
9736 S1: x = load - VS1_0
9740 /* In case of interleaving (non-unit grouped access):
9747 Vectorized loads are created in the order of memory accesses
9748 starting from the access of the first stmt of the chain:
9751 VS2: vx1 = &base + vec_size*1
9752 VS3: vx3 = &base + vec_size*2
9753 VS4: vx4 = &base + vec_size*3
9755 Then permutation statements are generated:
9757 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9758 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9761 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9762 (the order of the data-refs in the output of vect_permute_load_chain
9763 corresponds to the order of scalar stmts in the interleaving chain - see
9764 the documentation of vect_permute_load_chain()).
9765 The generation of permutation stmts and recording them in
9766 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9768 In case of both multiple types and interleaving, the vector loads and
9769 permutation stmts above are created for every copy. The result vector
9770 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9771 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9773 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9774 on a target that supports unaligned accesses (dr_unaligned_supported)
9775 we generate the following code:
9779 p = p + indx * vectype_size;
9784 Otherwise, the data reference is potentially unaligned on a target that
9785 does not support unaligned accesses (dr_explicit_realign_optimized) -
9786 then generate the following code, in which the data in each iteration is
9787 obtained by two vector loads, one from the previous iteration, and one
9788 from the current iteration:
9790 msq_init = *(floor(p1))
9791 p2 = initial_addr + VS - 1;
9792 realignment_token = call target_builtin;
9795 p2 = p2 + indx * vectype_size
9797 vec_dest = realign_load (msq, lsq, realignment_token)
9802 /* If the misalignment remains the same throughout the execution of the
9803 loop, we can create the init_addr and permutation mask at the loop
9804 preheader. Otherwise, it needs to be created inside the loop.
9805 This can only occur when vectorizing memory accesses in the inner-loop
9806 nested within an outer-loop that is being vectorized. */
9808 if (nested_in_vect_loop
9809 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9810 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9812 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9813 compute_in_loop
= true;
9816 bool diff_first_stmt_info
9817 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9819 tree offset
= NULL_TREE
;
9820 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9821 || alignment_support_scheme
== dr_explicit_realign
)
9822 && !compute_in_loop
)
9824 /* If we have different first_stmt_info, we can't set up realignment
9825 here, since we can't guarantee first_stmt_info DR has been
9826 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9827 distance from first_stmt_info DR instead as below. */
9828 if (!diff_first_stmt_info
)
9829 msq
= vect_setup_realignment (vinfo
,
9830 first_stmt_info
, gsi
, &realignment_token
,
9831 alignment_support_scheme
, NULL_TREE
,
9833 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9835 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9836 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9838 gcc_assert (!first_stmt_info_for_drptr
);
9844 if (!known_eq (poffset
, 0))
9846 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
9847 : size_int (poffset
));
9850 tree vec_offset
= NULL_TREE
;
9851 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9853 aggr_type
= NULL_TREE
;
9856 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9858 aggr_type
= elem_type
;
9859 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9860 &bump
, &vec_offset
);
9864 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9865 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9867 aggr_type
= vectype
;
9868 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9869 memory_access_type
);
9872 auto_vec
<tree
> vec_offsets
;
9873 auto_vec
<tree
> vec_masks
;
9877 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
9880 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
9881 &vec_masks
, mask_vectype
);
9883 tree vec_mask
= NULL_TREE
;
9884 poly_uint64 group_elt
= 0;
9885 for (j
= 0; j
< ncopies
; j
++)
9887 /* 1. Create the vector or array pointer update chain. */
9890 bool simd_lane_access_p
9891 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9892 if (simd_lane_access_p
9893 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9894 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9895 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9896 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9897 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9898 get_alias_set (TREE_TYPE (ref_type
)))
9899 && (alignment_support_scheme
== dr_aligned
9900 || alignment_support_scheme
== dr_unaligned_supported
))
9902 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9903 dataref_offset
= build_int_cst (ref_type
, 0);
9905 else if (diff_first_stmt_info
)
9908 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9909 aggr_type
, at_loop
, offset
, &dummy
,
9910 gsi
, &ptr_incr
, simd_lane_access_p
,
9912 /* Adjust the pointer by the difference to first_stmt. */
9913 data_reference_p ptrdr
9914 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9916 = fold_convert (sizetype
,
9917 size_binop (MINUS_EXPR
,
9918 DR_INIT (first_dr_info
->dr
),
9920 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9922 if (alignment_support_scheme
== dr_explicit_realign
)
9924 msq
= vect_setup_realignment (vinfo
,
9925 first_stmt_info_for_drptr
, gsi
,
9927 alignment_support_scheme
,
9928 dataref_ptr
, &at_loop
);
9929 gcc_assert (!compute_in_loop
);
9932 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9934 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9935 slp_node
, &gs_info
, &dataref_ptr
,
9940 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9942 offset
, &dummy
, gsi
, &ptr_incr
,
9943 simd_lane_access_p
, bump
);
9945 vec_mask
= vec_masks
[0];
9950 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9952 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9953 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9956 vec_mask
= vec_masks
[j
];
9959 if (grouped_load
|| slp_perm
)
9960 dr_chain
.create (vec_num
);
9962 gimple
*new_stmt
= NULL
;
9963 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9967 vec_array
= create_vector_array (vectype
, vec_num
);
9969 tree final_mask
= NULL_TREE
;
9971 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9974 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9975 final_mask
, vec_mask
, gsi
);
9981 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9983 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9984 tree alias_ptr
= build_int_cst (ref_type
, align
);
9985 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9986 dataref_ptr
, alias_ptr
,
9992 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9993 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9994 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9996 gimple_call_set_lhs (call
, vec_array
);
9997 gimple_call_set_nothrow (call
, true);
9998 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
10001 /* Extract each vector into an SSA_NAME. */
10002 for (i
= 0; i
< vec_num
; i
++)
10004 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
10006 dr_chain
.quick_push (new_temp
);
10009 /* Record the mapping between SSA_NAMEs and statements. */
10010 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
10012 /* Record that VEC_ARRAY is now dead. */
10013 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
10017 for (i
= 0; i
< vec_num
; i
++)
10019 tree final_mask
= NULL_TREE
;
10021 && memory_access_type
!= VMAT_INVARIANT
)
10022 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
10024 vectype
, vec_num
* j
+ i
);
10026 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
10027 final_mask
, vec_mask
, gsi
);
10029 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10030 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10031 gsi
, stmt_info
, bump
);
10033 /* 2. Create the vector-load in the loop. */
10034 switch (alignment_support_scheme
)
10037 case dr_unaligned_supported
:
10039 unsigned int misalign
;
10040 unsigned HOST_WIDE_INT align
;
10042 if (memory_access_type
== VMAT_GATHER_SCATTER
10043 && gs_info
.ifn
!= IFN_LAST
)
10045 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10046 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
10047 tree zero
= build_zero_cst (vectype
);
10048 tree scale
= size_int (gs_info
.scale
);
10051 call
= gimple_build_call_internal
10052 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
10053 vec_offset
, scale
, zero
, final_mask
);
10055 call
= gimple_build_call_internal
10056 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
10057 vec_offset
, scale
, zero
);
10058 gimple_call_set_nothrow (call
, true);
10060 data_ref
= NULL_TREE
;
10063 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
10065 /* Emulated gather-scatter. */
10066 gcc_assert (!final_mask
);
10067 unsigned HOST_WIDE_INT const_nunits
10068 = nunits
.to_constant ();
10069 unsigned HOST_WIDE_INT const_offset_nunits
10070 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
10072 vec
<constructor_elt
, va_gc
> *ctor_elts
;
10073 vec_alloc (ctor_elts
, const_nunits
);
10074 gimple_seq stmts
= NULL
;
10075 /* We support offset vectors with more elements
10076 than the data vector for now. */
10077 unsigned HOST_WIDE_INT factor
10078 = const_offset_nunits
/ const_nunits
;
10079 vec_offset
= vec_offsets
[j
/ factor
];
10080 unsigned elt_offset
= (j
% factor
) * const_nunits
;
10081 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
10082 tree scale
= size_int (gs_info
.scale
);
10084 = get_object_alignment (DR_REF (first_dr_info
->dr
));
10085 tree ltype
= build_aligned_type (TREE_TYPE (vectype
),
10087 for (unsigned k
= 0; k
< const_nunits
; ++k
)
10089 tree boff
= size_binop (MULT_EXPR
,
10090 TYPE_SIZE (idx_type
),
10093 tree idx
= gimple_build (&stmts
, BIT_FIELD_REF
,
10094 idx_type
, vec_offset
,
10095 TYPE_SIZE (idx_type
),
10097 idx
= gimple_convert (&stmts
, sizetype
, idx
);
10098 idx
= gimple_build (&stmts
, MULT_EXPR
,
10099 sizetype
, idx
, scale
);
10100 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
10101 TREE_TYPE (dataref_ptr
),
10103 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
10104 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
10105 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
10106 build_int_cst (ref_type
, 0));
10107 new_stmt
= gimple_build_assign (elt
, ref
);
10108 gimple_set_vuse (new_stmt
,
10109 gimple_vuse (gsi_stmt (*gsi
)));
10110 gimple_seq_add_stmt (&stmts
, new_stmt
);
10111 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
10113 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
10114 new_stmt
= gimple_build_assign (NULL_TREE
,
10116 (vectype
, ctor_elts
));
10117 data_ref
= NULL_TREE
;
10122 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
10123 if (alignment_support_scheme
== dr_aligned
)
10125 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
10127 align
= dr_alignment
10128 (vect_dr_behavior (vinfo
, first_dr_info
));
10132 misalign
= misalignment
;
10133 if (dataref_offset
== NULL_TREE
10134 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
10135 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
10137 align
= least_bit_hwi (misalign
| align
);
10141 tree ptr
= build_int_cst (ref_type
,
10142 align
* BITS_PER_UNIT
);
10144 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
10147 gimple_call_set_nothrow (call
, true);
10149 data_ref
= NULL_TREE
;
10151 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
10154 = vect_get_loop_len (loop_vinfo
, loop_lens
,
10157 tree ptr
= build_int_cst (ref_type
,
10158 align
* BITS_PER_UNIT
);
10160 machine_mode vmode
= TYPE_MODE (vectype
);
10161 opt_machine_mode new_ovmode
10162 = get_len_load_store_mode (vmode
, true);
10163 machine_mode new_vmode
= new_ovmode
.require ();
10164 tree qi_type
= unsigned_intQI_type_node
;
10166 signed char biasval
=
10167 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10169 tree bias
= build_int_cst (intQI_type_node
, biasval
);
10172 = gimple_build_call_internal (IFN_LEN_LOAD
, 4,
10175 gimple_call_set_nothrow (call
, true);
10177 data_ref
= NULL_TREE
;
10179 /* Need conversion if it's wrapped with VnQI. */
10180 if (vmode
!= new_vmode
)
10183 = build_vector_type_for_mode (qi_type
, new_vmode
);
10184 tree var
= vect_get_new_ssa_name (new_vtype
,
10186 gimple_set_lhs (call
, var
);
10187 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
10189 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
10191 = gimple_build_assign (vec_dest
,
10192 VIEW_CONVERT_EXPR
, op
);
10197 tree ltype
= vectype
;
10198 tree new_vtype
= NULL_TREE
;
10199 unsigned HOST_WIDE_INT gap
10200 = DR_GROUP_GAP (first_stmt_info
);
10201 unsigned int vect_align
10202 = vect_known_alignment_in_bytes (first_dr_info
,
10204 unsigned int scalar_dr_size
10205 = vect_get_scalar_dr_size (first_dr_info
);
10206 /* If there's no peeling for gaps but we have a gap
10207 with slp loads then load the lower half of the
10208 vector only. See get_group_load_store_type for
10209 when we apply this optimization. */
10212 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
10214 && known_eq (nunits
, (group_size
- gap
) * 2)
10215 && known_eq (nunits
, group_size
)
10216 && gap
>= (vect_align
/ scalar_dr_size
))
10220 = vector_vector_composition_type (vectype
, 2,
10222 if (new_vtype
!= NULL_TREE
)
10223 ltype
= half_vtype
;
10226 = (dataref_offset
? dataref_offset
10227 : build_int_cst (ref_type
, 0));
10228 if (ltype
!= vectype
10229 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
10231 unsigned HOST_WIDE_INT gap_offset
10232 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
10233 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
10234 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
10237 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
10238 if (alignment_support_scheme
== dr_aligned
)
10241 TREE_TYPE (data_ref
)
10242 = build_aligned_type (TREE_TYPE (data_ref
),
10243 align
* BITS_PER_UNIT
);
10244 if (ltype
!= vectype
)
10246 vect_copy_ref_info (data_ref
,
10247 DR_REF (first_dr_info
->dr
));
10248 tree tem
= make_ssa_name (ltype
);
10249 new_stmt
= gimple_build_assign (tem
, data_ref
);
10250 vect_finish_stmt_generation (vinfo
, stmt_info
,
10253 vec
<constructor_elt
, va_gc
> *v
;
10255 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
10257 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
10258 build_zero_cst (ltype
));
10259 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
10263 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
10264 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
10265 build_zero_cst (ltype
));
10267 gcc_assert (new_vtype
!= NULL_TREE
);
10268 if (new_vtype
== vectype
)
10269 new_stmt
= gimple_build_assign (
10270 vec_dest
, build_constructor (vectype
, v
));
10273 tree new_vname
= make_ssa_name (new_vtype
);
10274 new_stmt
= gimple_build_assign (
10275 new_vname
, build_constructor (new_vtype
, v
));
10276 vect_finish_stmt_generation (vinfo
, stmt_info
,
10278 new_stmt
= gimple_build_assign (
10279 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
10286 case dr_explicit_realign
:
10290 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
10292 if (compute_in_loop
)
10293 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
10294 &realignment_token
,
10295 dr_explicit_realign
,
10296 dataref_ptr
, NULL
);
10298 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
10299 ptr
= copy_ssa_name (dataref_ptr
);
10301 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
10302 // For explicit realign the target alignment should be
10303 // known at compile time.
10304 unsigned HOST_WIDE_INT align
=
10305 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
10306 new_stmt
= gimple_build_assign
10307 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
10309 (TREE_TYPE (dataref_ptr
),
10310 -(HOST_WIDE_INT
) align
));
10311 vect_finish_stmt_generation (vinfo
, stmt_info
,
10314 = build2 (MEM_REF
, vectype
, ptr
,
10315 build_int_cst (ref_type
, 0));
10316 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10317 vec_dest
= vect_create_destination_var (scalar_dest
,
10319 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
10320 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10321 gimple_assign_set_lhs (new_stmt
, new_temp
);
10322 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
10323 vect_finish_stmt_generation (vinfo
, stmt_info
,
10327 bump
= size_binop (MULT_EXPR
, vs
,
10328 TYPE_SIZE_UNIT (elem_type
));
10329 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
10330 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
10332 new_stmt
= gimple_build_assign
10333 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
10335 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
10336 if (TREE_CODE (ptr
) == SSA_NAME
)
10337 ptr
= copy_ssa_name (ptr
, new_stmt
);
10339 ptr
= make_ssa_name (TREE_TYPE (ptr
), new_stmt
);
10340 gimple_assign_set_lhs (new_stmt
, ptr
);
10341 vect_finish_stmt_generation (vinfo
, stmt_info
,
10344 = build2 (MEM_REF
, vectype
, ptr
,
10345 build_int_cst (ref_type
, 0));
10348 case dr_explicit_realign_optimized
:
10350 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
10351 new_temp
= copy_ssa_name (dataref_ptr
);
10353 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
10354 // We should only be doing this if we know the target
10355 // alignment at compile time.
10356 unsigned HOST_WIDE_INT align
=
10357 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
10358 new_stmt
= gimple_build_assign
10359 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
10360 build_int_cst (TREE_TYPE (dataref_ptr
),
10361 -(HOST_WIDE_INT
) align
));
10362 vect_finish_stmt_generation (vinfo
, stmt_info
,
10365 = build2 (MEM_REF
, vectype
, new_temp
,
10366 build_int_cst (ref_type
, 0));
10370 gcc_unreachable ();
10372 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10373 /* DATA_REF is null if we've already built the statement. */
10376 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10377 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
10379 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10380 gimple_set_lhs (new_stmt
, new_temp
);
10381 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10383 /* 3. Handle explicit realignment if necessary/supported.
10385 vec_dest = realign_load (msq, lsq, realignment_token) */
10386 if (alignment_support_scheme
== dr_explicit_realign_optimized
10387 || alignment_support_scheme
== dr_explicit_realign
)
10389 lsq
= gimple_assign_lhs (new_stmt
);
10390 if (!realignment_token
)
10391 realignment_token
= dataref_ptr
;
10392 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10393 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
10394 msq
, lsq
, realignment_token
);
10395 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10396 gimple_assign_set_lhs (new_stmt
, new_temp
);
10397 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10399 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
10402 if (i
== vec_num
- 1 && j
== ncopies
- 1)
10403 add_phi_arg (phi
, lsq
,
10404 loop_latch_edge (containing_loop
),
10410 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
10412 tree perm_mask
= perm_mask_for_reverse (vectype
);
10413 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
10414 perm_mask
, stmt_info
, gsi
);
10415 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10418 /* Collect vector loads and later create their permutation in
10419 vect_transform_grouped_load (). */
10420 if (grouped_load
|| slp_perm
)
10421 dr_chain
.quick_push (new_temp
);
10423 /* Store vector loads in the corresponding SLP_NODE. */
10424 if (slp
&& !slp_perm
)
10425 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10427 /* With SLP permutation we load the gaps as well, without
10428 we need to skip the gaps after we manage to fully load
10429 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10430 group_elt
+= nunits
;
10431 if (maybe_ne (group_gap_adj
, 0U)
10433 && known_eq (group_elt
, group_size
- group_gap_adj
))
10435 poly_wide_int bump_val
10436 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
10438 if (tree_int_cst_sgn
10439 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
10440 bump_val
= -bump_val
;
10441 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10442 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10443 gsi
, stmt_info
, bump
);
10447 /* Bump the vector pointer to account for a gap or for excess
10448 elements loaded for a permuted SLP load. */
10449 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
10451 poly_wide_int bump_val
10452 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
10454 if (tree_int_cst_sgn
10455 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
10456 bump_val
= -bump_val
;
10457 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10458 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10463 if (slp
&& !slp_perm
)
10469 /* For SLP we know we've seen all possible uses of dr_chain so
10470 direct vect_transform_slp_perm_load to DCE the unused parts.
10471 ??? This is a hack to prevent compile-time issues as seen
10472 in PR101120 and friends. */
10473 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
10474 gsi
, vf
, false, &n_perms
,
10482 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
10483 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
10485 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10489 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10492 dr_chain
.release ();
10495 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10500 /* Function vect_is_simple_cond.
10503 LOOP - the loop that is being vectorized.
10504 COND - Condition that is checked for simple use.
10507 *COMP_VECTYPE - the vector type for the comparison.
10508 *DTS - The def types for the arguments of the comparison
10510 Returns whether a COND can be vectorized. Checks whether
10511 condition operands are supportable using vec_is_simple_use. */
10514 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
10515 slp_tree slp_node
, tree
*comp_vectype
,
10516 enum vect_def_type
*dts
, tree vectype
)
10519 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10523 if (TREE_CODE (cond
) == SSA_NAME
10524 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
10526 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
10527 &slp_op
, &dts
[0], comp_vectype
)
10529 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
10534 if (!COMPARISON_CLASS_P (cond
))
10537 lhs
= TREE_OPERAND (cond
, 0);
10538 rhs
= TREE_OPERAND (cond
, 1);
10540 if (TREE_CODE (lhs
) == SSA_NAME
)
10542 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
10543 &lhs
, &slp_op
, &dts
[0], &vectype1
))
10546 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
10547 || TREE_CODE (lhs
) == FIXED_CST
)
10548 dts
[0] = vect_constant_def
;
10552 if (TREE_CODE (rhs
) == SSA_NAME
)
10554 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
10555 &rhs
, &slp_op
, &dts
[1], &vectype2
))
10558 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10559 || TREE_CODE (rhs
) == FIXED_CST
)
10560 dts
[1] = vect_constant_def
;
10564 if (vectype1
&& vectype2
10565 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10566 TYPE_VECTOR_SUBPARTS (vectype2
)))
10569 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10570 /* Invariant comparison. */
10571 if (! *comp_vectype
)
10573 tree scalar_type
= TREE_TYPE (lhs
);
10574 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10575 *comp_vectype
= truth_type_for (vectype
);
10578 /* If we can widen the comparison to match vectype do so. */
10579 if (INTEGRAL_TYPE_P (scalar_type
)
10581 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10582 TYPE_SIZE (TREE_TYPE (vectype
))))
10583 scalar_type
= build_nonstandard_integer_type
10584 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10585 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10593 /* vectorizable_condition.
10595 Check if STMT_INFO is conditional modify expression that can be vectorized.
10596 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10597 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10600 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10602 Return true if STMT_INFO is vectorizable in this way. */
10605 vectorizable_condition (vec_info
*vinfo
,
10606 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10608 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10610 tree scalar_dest
= NULL_TREE
;
10611 tree vec_dest
= NULL_TREE
;
10612 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10613 tree then_clause
, else_clause
;
10614 tree comp_vectype
= NULL_TREE
;
10615 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10616 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10619 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10620 enum vect_def_type dts
[4]
10621 = {vect_unknown_def_type
, vect_unknown_def_type
,
10622 vect_unknown_def_type
, vect_unknown_def_type
};
10626 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10628 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10629 vec
<tree
> vec_oprnds0
= vNULL
;
10630 vec
<tree
> vec_oprnds1
= vNULL
;
10631 vec
<tree
> vec_oprnds2
= vNULL
;
10632 vec
<tree
> vec_oprnds3
= vNULL
;
10634 bool masked
= false;
10636 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10639 /* Is vectorizable conditional operation? */
10640 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10644 code
= gimple_assign_rhs_code (stmt
);
10645 if (code
!= COND_EXPR
)
10648 stmt_vec_info reduc_info
= NULL
;
10649 int reduc_index
= -1;
10650 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10652 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10657 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10658 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10659 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10660 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10661 || reduc_index
!= -1);
10665 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10669 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10670 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10675 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10679 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10683 gcc_assert (ncopies
>= 1);
10684 if (for_reduction
&& ncopies
> 1)
10685 return false; /* FORNOW */
10687 cond_expr
= gimple_assign_rhs1 (stmt
);
10689 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
10690 &comp_vectype
, &dts
[0], vectype
)
10694 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
10695 slp_tree then_slp_node
, else_slp_node
;
10696 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
10697 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10699 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
10700 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10703 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10706 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10709 masked
= !COMPARISON_CLASS_P (cond_expr
);
10710 vec_cmp_type
= truth_type_for (comp_vectype
);
10712 if (vec_cmp_type
== NULL_TREE
)
10715 cond_code
= TREE_CODE (cond_expr
);
10718 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10719 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10722 /* For conditional reductions, the "then" value needs to be the candidate
10723 value calculated by this iteration while the "else" value needs to be
10724 the result carried over from previous iterations. If the COND_EXPR
10725 is the other way around, we need to swap it. */
10726 bool must_invert_cmp_result
= false;
10727 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10730 must_invert_cmp_result
= true;
10733 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10734 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10735 if (new_code
== ERROR_MARK
)
10736 must_invert_cmp_result
= true;
10739 cond_code
= new_code
;
10740 /* Make sure we don't accidentally use the old condition. */
10741 cond_expr
= NULL_TREE
;
10744 std::swap (then_clause
, else_clause
);
10747 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10749 /* Boolean values may have another representation in vectors
10750 and therefore we prefer bit operations over comparison for
10751 them (which also works for scalar masks). We store opcodes
10752 to use in bitop1 and bitop2. Statement is vectorized as
10753 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10754 depending on bitop1 and bitop2 arity. */
10758 bitop1
= BIT_NOT_EXPR
;
10759 bitop2
= BIT_AND_EXPR
;
10762 bitop1
= BIT_NOT_EXPR
;
10763 bitop2
= BIT_IOR_EXPR
;
10766 bitop1
= BIT_NOT_EXPR
;
10767 bitop2
= BIT_AND_EXPR
;
10768 std::swap (cond_expr0
, cond_expr1
);
10771 bitop1
= BIT_NOT_EXPR
;
10772 bitop2
= BIT_IOR_EXPR
;
10773 std::swap (cond_expr0
, cond_expr1
);
10776 bitop1
= BIT_XOR_EXPR
;
10779 bitop1
= BIT_XOR_EXPR
;
10780 bitop2
= BIT_NOT_EXPR
;
10785 cond_code
= SSA_NAME
;
10788 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10789 && reduction_type
== EXTRACT_LAST_REDUCTION
10790 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10792 if (dump_enabled_p ())
10793 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10794 "reduction comparison operation not supported.\n");
10800 if (bitop1
!= NOP_EXPR
)
10802 machine_mode mode
= TYPE_MODE (comp_vectype
);
10805 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10806 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10809 if (bitop2
!= NOP_EXPR
)
10811 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10813 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10818 vect_cost_for_stmt kind
= vector_stmt
;
10819 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10820 /* Count one reduction-like operation per vector. */
10821 kind
= vec_to_scalar
;
10822 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10826 && (!vect_maybe_update_slp_op_vectype
10827 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10829 && !vect_maybe_update_slp_op_vectype
10830 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10831 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10832 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10834 if (dump_enabled_p ())
10835 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10836 "incompatible vector types for invariants\n");
10840 if (loop_vinfo
&& for_reduction
10841 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10843 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10844 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10845 ncopies
* vec_num
, vectype
, NULL
);
10846 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10847 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10849 if (dump_enabled_p ())
10850 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10851 "conditional reduction prevents the use"
10852 " of partial vectors.\n");
10853 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10857 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10858 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10866 scalar_dest
= gimple_assign_lhs (stmt
);
10867 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10868 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10870 bool swap_cond_operands
= false;
10872 /* See whether another part of the vectorized code applies a loop
10873 mask to the condition, or to its inverse. */
10875 vec_loop_masks
*masks
= NULL
;
10876 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10878 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10879 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10882 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10883 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10884 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10887 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10888 tree_code orig_code
= cond
.code
;
10889 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10890 if (!masked
&& loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10892 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10893 cond_code
= cond
.code
;
10894 swap_cond_operands
= true;
10898 /* Try the inverse of the current mask. We check if the
10899 inverse mask is live and if so we generate a negate of
10900 the current mask such that we still honor NaNs. */
10901 cond
.inverted_p
= true;
10902 cond
.code
= orig_code
;
10903 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10905 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10906 cond_code
= cond
.code
;
10907 swap_cond_operands
= true;
10908 must_invert_cmp_result
= true;
10915 /* Handle cond expr. */
10917 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10918 cond_expr
, &vec_oprnds0
, comp_vectype
,
10919 then_clause
, &vec_oprnds2
, vectype
,
10920 reduction_type
!= EXTRACT_LAST_REDUCTION
10921 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10923 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10924 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10925 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10926 then_clause
, &vec_oprnds2
, vectype
,
10927 reduction_type
!= EXTRACT_LAST_REDUCTION
10928 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10930 /* Arguments are ready. Create the new vector stmt. */
10931 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10933 vec_then_clause
= vec_oprnds2
[i
];
10934 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10935 vec_else_clause
= vec_oprnds3
[i
];
10937 if (swap_cond_operands
)
10938 std::swap (vec_then_clause
, vec_else_clause
);
10941 vec_compare
= vec_cond_lhs
;
10944 vec_cond_rhs
= vec_oprnds1
[i
];
10945 if (bitop1
== NOP_EXPR
)
10947 gimple_seq stmts
= NULL
;
10948 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10949 vec_cond_lhs
, vec_cond_rhs
);
10950 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10954 new_temp
= make_ssa_name (vec_cmp_type
);
10956 if (bitop1
== BIT_NOT_EXPR
)
10957 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10961 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10963 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10964 if (bitop2
== NOP_EXPR
)
10965 vec_compare
= new_temp
;
10966 else if (bitop2
== BIT_NOT_EXPR
10967 && reduction_type
!= EXTRACT_LAST_REDUCTION
)
10969 /* Instead of doing ~x ? y : z do x ? z : y. */
10970 vec_compare
= new_temp
;
10971 std::swap (vec_then_clause
, vec_else_clause
);
10975 vec_compare
= make_ssa_name (vec_cmp_type
);
10976 if (bitop2
== BIT_NOT_EXPR
)
10978 = gimple_build_assign (vec_compare
, bitop2
, new_temp
);
10981 = gimple_build_assign (vec_compare
, bitop2
,
10982 vec_cond_lhs
, new_temp
);
10983 vect_finish_stmt_generation (vinfo
, stmt_info
,
10989 /* If we decided to apply a loop mask to the result of the vector
10990 comparison, AND the comparison with the mask now. Later passes
10991 should then be able to reuse the AND results between mulitple
10995 for (int i = 0; i < 100; ++i)
10996 x[i] = y[i] ? z[i] : 10;
10998 results in following optimized GIMPLE:
11000 mask__35.8_43 = vect__4.7_41 != { 0, ... };
11001 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
11002 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
11003 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
11004 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
11005 vect_iftmp.11_47, { 10, ... }>;
11007 instead of using a masked and unmasked forms of
11008 vec != { 0, ... } (masked in the MASK_LOAD,
11009 unmasked in the VEC_COND_EXPR). */
11011 /* Force vec_compare to be an SSA_NAME rather than a comparison,
11012 in cases where that's necessary. */
11014 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
11016 if (!is_gimple_val (vec_compare
))
11018 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
11019 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
11021 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11022 vec_compare
= vec_compare_name
;
11025 if (must_invert_cmp_result
)
11027 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
11028 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
11031 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11032 vec_compare
= vec_compare_name
;
11038 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
11040 tree tmp2
= make_ssa_name (vec_cmp_type
);
11042 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
11044 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
11045 vec_compare
= tmp2
;
11050 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
11052 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
11053 tree lhs
= gimple_get_lhs (old_stmt
);
11054 new_stmt
= gimple_build_call_internal
11055 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
11057 gimple_call_set_lhs (new_stmt
, lhs
);
11058 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
11059 if (old_stmt
== gsi_stmt (*gsi
))
11060 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
11063 /* In this case we're moving the definition to later in the
11064 block. That doesn't matter because the only uses of the
11065 lhs are in phi statements. */
11066 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
11067 gsi_remove (&old_gsi
, true);
11068 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11073 new_temp
= make_ssa_name (vec_dest
);
11074 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
11075 vec_then_clause
, vec_else_clause
);
11076 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11079 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
11081 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11085 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11087 vec_oprnds0
.release ();
11088 vec_oprnds1
.release ();
11089 vec_oprnds2
.release ();
11090 vec_oprnds3
.release ();
11095 /* vectorizable_comparison.
11097 Check if STMT_INFO is comparison expression that can be vectorized.
11098 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11099 comparison, put it in VEC_STMT, and insert it at GSI.
11101 Return true if STMT_INFO is vectorizable in this way. */
11104 vectorizable_comparison (vec_info
*vinfo
,
11105 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11107 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
11109 tree lhs
, rhs1
, rhs2
;
11110 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
11111 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11112 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
11114 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
11115 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
11117 poly_uint64 nunits
;
11119 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
11121 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
11122 vec
<tree
> vec_oprnds0
= vNULL
;
11123 vec
<tree
> vec_oprnds1
= vNULL
;
11127 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
11130 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
11133 mask_type
= vectype
;
11134 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
11139 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
11141 gcc_assert (ncopies
>= 1);
11142 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
11145 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
11149 code
= gimple_assign_rhs_code (stmt
);
11151 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
11154 slp_tree slp_rhs1
, slp_rhs2
;
11155 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
11156 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
11159 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
11160 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
11163 if (vectype1
&& vectype2
11164 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
11165 TYPE_VECTOR_SUBPARTS (vectype2
)))
11168 vectype
= vectype1
? vectype1
: vectype2
;
11170 /* Invariant comparison. */
11173 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
11174 vectype
= mask_type
;
11176 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
11178 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
11181 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
11184 /* Can't compare mask and non-mask types. */
11185 if (vectype1
&& vectype2
11186 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
11189 /* Boolean values may have another representation in vectors
11190 and therefore we prefer bit operations over comparison for
11191 them (which also works for scalar masks). We store opcodes
11192 to use in bitop1 and bitop2. Statement is vectorized as
11193 BITOP2 (rhs1 BITOP1 rhs2) or
11194 rhs1 BITOP2 (BITOP1 rhs2)
11195 depending on bitop1 and bitop2 arity. */
11196 bool swap_p
= false;
11197 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
11199 if (code
== GT_EXPR
)
11201 bitop1
= BIT_NOT_EXPR
;
11202 bitop2
= BIT_AND_EXPR
;
11204 else if (code
== GE_EXPR
)
11206 bitop1
= BIT_NOT_EXPR
;
11207 bitop2
= BIT_IOR_EXPR
;
11209 else if (code
== LT_EXPR
)
11211 bitop1
= BIT_NOT_EXPR
;
11212 bitop2
= BIT_AND_EXPR
;
11215 else if (code
== LE_EXPR
)
11217 bitop1
= BIT_NOT_EXPR
;
11218 bitop2
= BIT_IOR_EXPR
;
11223 bitop1
= BIT_XOR_EXPR
;
11224 if (code
== EQ_EXPR
)
11225 bitop2
= BIT_NOT_EXPR
;
11231 if (bitop1
== NOP_EXPR
)
11233 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
11238 machine_mode mode
= TYPE_MODE (vectype
);
11241 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
11242 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
11245 if (bitop2
!= NOP_EXPR
)
11247 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
11248 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
11253 /* Put types on constant and invariant SLP children. */
11255 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
11256 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
11258 if (dump_enabled_p ())
11259 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11260 "incompatible vector types for invariants\n");
11264 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
11265 vect_model_simple_cost (vinfo
, stmt_info
,
11266 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
11267 dts
, ndts
, slp_node
, cost_vec
);
11274 lhs
= gimple_assign_lhs (stmt
);
11275 mask
= vect_create_destination_var (lhs
, mask_type
);
11277 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
11278 rhs1
, &vec_oprnds0
, vectype
,
11279 rhs2
, &vec_oprnds1
, vectype
);
11281 std::swap (vec_oprnds0
, vec_oprnds1
);
11283 /* Arguments are ready. Create the new vector stmt. */
11284 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
11287 vec_rhs2
= vec_oprnds1
[i
];
11289 new_temp
= make_ssa_name (mask
);
11290 if (bitop1
== NOP_EXPR
)
11292 new_stmt
= gimple_build_assign (new_temp
, code
,
11293 vec_rhs1
, vec_rhs2
);
11294 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11298 if (bitop1
== BIT_NOT_EXPR
)
11299 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
11301 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
11303 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11304 if (bitop2
!= NOP_EXPR
)
11306 tree res
= make_ssa_name (mask
);
11307 if (bitop2
== BIT_NOT_EXPR
)
11308 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
11310 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
11312 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11316 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
11318 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11322 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11324 vec_oprnds0
.release ();
11325 vec_oprnds1
.release ();
11330 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11331 can handle all live statements in the node. Otherwise return true
11332 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11333 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11336 can_vectorize_live_stmts (vec_info
*vinfo
,
11337 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11338 slp_tree slp_node
, slp_instance slp_node_instance
,
11340 stmt_vector_for_cost
*cost_vec
)
11344 stmt_vec_info slp_stmt_info
;
11346 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
11348 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
11349 && !vectorizable_live_operation (vinfo
,
11350 slp_stmt_info
, gsi
, slp_node
,
11351 slp_node_instance
, i
,
11352 vec_stmt_p
, cost_vec
))
11356 else if (STMT_VINFO_LIVE_P (stmt_info
)
11357 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
11358 slp_node
, slp_node_instance
, -1,
11359 vec_stmt_p
, cost_vec
))
11365 /* Make sure the statement is vectorizable. */
11368 vect_analyze_stmt (vec_info
*vinfo
,
11369 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
11370 slp_tree node
, slp_instance node_instance
,
11371 stmt_vector_for_cost
*cost_vec
)
11373 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
11374 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
11376 gimple_seq pattern_def_seq
;
11378 if (dump_enabled_p ())
11379 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
11382 if (gimple_has_volatile_ops (stmt_info
->stmt
))
11383 return opt_result::failure_at (stmt_info
->stmt
,
11385 " stmt has volatile operands: %G\n",
11388 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11390 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
11392 gimple_stmt_iterator si
;
11394 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
11396 stmt_vec_info pattern_def_stmt_info
11397 = vinfo
->lookup_stmt (gsi_stmt (si
));
11398 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
11399 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
11401 /* Analyze def stmt of STMT if it's a pattern stmt. */
11402 if (dump_enabled_p ())
11403 dump_printf_loc (MSG_NOTE
, vect_location
,
11404 "==> examining pattern def statement: %G",
11405 pattern_def_stmt_info
->stmt
);
11408 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
11409 need_to_vectorize
, node
, node_instance
,
11417 /* Skip stmts that do not need to be vectorized. In loops this is expected
11419 - the COND_EXPR which is the loop exit condition
11420 - any LABEL_EXPRs in the loop
11421 - computations that are used only for array indexing or loop control.
11422 In basic blocks we only analyze statements that are a part of some SLP
11423 instance, therefore, all the statements are relevant.
11425 Pattern statement needs to be analyzed instead of the original statement
11426 if the original statement is not relevant. Otherwise, we analyze both
11427 statements. In basic blocks we are called from some SLP instance
11428 traversal, don't analyze pattern stmts instead, the pattern stmts
11429 already will be part of SLP instance. */
11431 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
11432 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
11433 && !STMT_VINFO_LIVE_P (stmt_info
))
11435 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11436 && pattern_stmt_info
11437 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11438 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11440 /* Analyze PATTERN_STMT instead of the original stmt. */
11441 stmt_info
= pattern_stmt_info
;
11442 if (dump_enabled_p ())
11443 dump_printf_loc (MSG_NOTE
, vect_location
,
11444 "==> examining pattern statement: %G",
11449 if (dump_enabled_p ())
11450 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
11452 return opt_result::success ();
11455 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11457 && pattern_stmt_info
11458 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11459 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11461 /* Analyze PATTERN_STMT too. */
11462 if (dump_enabled_p ())
11463 dump_printf_loc (MSG_NOTE
, vect_location
,
11464 "==> examining pattern statement: %G",
11465 pattern_stmt_info
->stmt
);
11468 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
11469 node_instance
, cost_vec
);
11474 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
11476 case vect_internal_def
:
11479 case vect_reduction_def
:
11480 case vect_nested_cycle
:
11481 gcc_assert (!bb_vinfo
11482 && (relevance
== vect_used_in_outer
11483 || relevance
== vect_used_in_outer_by_reduction
11484 || relevance
== vect_used_by_reduction
11485 || relevance
== vect_unused_in_scope
11486 || relevance
== vect_used_only_live
));
11489 case vect_induction_def
:
11490 case vect_first_order_recurrence
:
11491 gcc_assert (!bb_vinfo
);
11494 case vect_constant_def
:
11495 case vect_external_def
:
11496 case vect_unknown_def_type
:
11498 gcc_unreachable ();
11501 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11503 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
11505 if (STMT_VINFO_RELEVANT_P (stmt_info
))
11507 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
11508 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
11509 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
11510 *need_to_vectorize
= true;
11513 if (PURE_SLP_STMT (stmt_info
) && !node
)
11515 if (dump_enabled_p ())
11516 dump_printf_loc (MSG_NOTE
, vect_location
,
11517 "handled only by SLP analysis\n");
11518 return opt_result::success ();
11523 && (STMT_VINFO_RELEVANT_P (stmt_info
)
11524 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
11525 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11526 -mveclibabi= takes preference over library functions with
11527 the simd attribute. */
11528 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11529 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
11531 || vectorizable_conversion (vinfo
, stmt_info
,
11532 NULL
, NULL
, node
, cost_vec
)
11533 || vectorizable_operation (vinfo
, stmt_info
,
11534 NULL
, NULL
, node
, cost_vec
)
11535 || vectorizable_assignment (vinfo
, stmt_info
,
11536 NULL
, NULL
, node
, cost_vec
)
11537 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11538 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11539 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11540 node
, node_instance
, cost_vec
)
11541 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11542 NULL
, node
, cost_vec
)
11543 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11544 || vectorizable_condition (vinfo
, stmt_info
,
11545 NULL
, NULL
, node
, cost_vec
)
11546 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11548 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11549 stmt_info
, NULL
, node
)
11550 || vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
11551 stmt_info
, NULL
, node
, cost_vec
));
11555 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11556 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11557 NULL
, NULL
, node
, cost_vec
)
11558 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11560 || vectorizable_shift (vinfo
, stmt_info
,
11561 NULL
, NULL
, node
, cost_vec
)
11562 || vectorizable_operation (vinfo
, stmt_info
,
11563 NULL
, NULL
, node
, cost_vec
)
11564 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11566 || vectorizable_load (vinfo
, stmt_info
,
11567 NULL
, NULL
, node
, cost_vec
)
11568 || vectorizable_store (vinfo
, stmt_info
,
11569 NULL
, NULL
, node
, cost_vec
)
11570 || vectorizable_condition (vinfo
, stmt_info
,
11571 NULL
, NULL
, node
, cost_vec
)
11572 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11574 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
11578 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11581 return opt_result::failure_at (stmt_info
->stmt
,
11583 " relevant stmt not supported: %G",
11586 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11587 need extra handling, except for vectorizable reductions. */
11589 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11590 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11591 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11592 stmt_info
, NULL
, node
, node_instance
,
11594 return opt_result::failure_at (stmt_info
->stmt
,
11596 " live stmt not supported: %G",
11599 return opt_result::success ();
11603 /* Function vect_transform_stmt.
11605 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11608 vect_transform_stmt (vec_info
*vinfo
,
11609 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11610 slp_tree slp_node
, slp_instance slp_node_instance
)
11612 bool is_store
= false;
11613 gimple
*vec_stmt
= NULL
;
11616 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11618 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11620 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
11622 switch (STMT_VINFO_TYPE (stmt_info
))
11624 case type_demotion_vec_info_type
:
11625 case type_promotion_vec_info_type
:
11626 case type_conversion_vec_info_type
:
11627 done
= vectorizable_conversion (vinfo
, stmt_info
,
11628 gsi
, &vec_stmt
, slp_node
, NULL
);
11632 case induc_vec_info_type
:
11633 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11634 stmt_info
, &vec_stmt
, slp_node
,
11639 case shift_vec_info_type
:
11640 done
= vectorizable_shift (vinfo
, stmt_info
,
11641 gsi
, &vec_stmt
, slp_node
, NULL
);
11645 case op_vec_info_type
:
11646 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11651 case assignment_vec_info_type
:
11652 done
= vectorizable_assignment (vinfo
, stmt_info
,
11653 gsi
, &vec_stmt
, slp_node
, NULL
);
11657 case load_vec_info_type
:
11658 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11663 case store_vec_info_type
:
11664 done
= vectorizable_store (vinfo
, stmt_info
,
11665 gsi
, &vec_stmt
, slp_node
, NULL
);
11667 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11669 /* In case of interleaving, the whole chain is vectorized when the
11670 last store in the chain is reached. Store stmts before the last
11671 one are skipped, and there vec_stmt_info shouldn't be freed
11673 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11674 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11681 case condition_vec_info_type
:
11682 done
= vectorizable_condition (vinfo
, stmt_info
,
11683 gsi
, &vec_stmt
, slp_node
, NULL
);
11687 case comparison_vec_info_type
:
11688 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11693 case call_vec_info_type
:
11694 done
= vectorizable_call (vinfo
, stmt_info
,
11695 gsi
, &vec_stmt
, slp_node
, NULL
);
11698 case call_simd_clone_vec_info_type
:
11699 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11703 case reduc_vec_info_type
:
11704 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11705 gsi
, &vec_stmt
, slp_node
);
11709 case cycle_phi_info_type
:
11710 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11711 &vec_stmt
, slp_node
, slp_node_instance
);
11715 case lc_phi_info_type
:
11716 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11717 stmt_info
, &vec_stmt
, slp_node
);
11721 case recurr_info_type
:
11722 done
= vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
11723 stmt_info
, &vec_stmt
, slp_node
, NULL
);
11727 case phi_info_type
:
11728 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
11733 if (!STMT_VINFO_LIVE_P (stmt_info
))
11735 if (dump_enabled_p ())
11736 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11737 "stmt not supported.\n");
11738 gcc_unreachable ();
11743 if (!slp_node
&& vec_stmt
)
11744 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
11746 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
11748 /* Handle stmts whose DEF is used outside the loop-nest that is
11749 being vectorized. */
11750 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
11751 slp_node_instance
, true, NULL
);
11756 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11762 /* Remove a group of stores (for SLP or interleaving), free their
11766 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11768 stmt_vec_info next_stmt_info
= first_stmt_info
;
11770 while (next_stmt_info
)
11772 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11773 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11774 /* Free the attached stmt_vec_info and remove the stmt. */
11775 vinfo
->remove_stmt (next_stmt_info
);
11776 next_stmt_info
= tmp
;
11780 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11781 elements of type SCALAR_TYPE, or null if the target doesn't support
11784 If NUNITS is zero, return a vector type that contains elements of
11785 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11787 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11788 for this vectorization region and want to "autodetect" the best choice.
11789 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11790 and we want the new type to be interoperable with it. PREVAILING_MODE
11791 in this case can be a scalar integer mode or a vector mode; when it
11792 is a vector mode, the function acts like a tree-level version of
11793 related_vector_mode. */
11796 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11797 tree scalar_type
, poly_uint64 nunits
)
11799 tree orig_scalar_type
= scalar_type
;
11800 scalar_mode inner_mode
;
11801 machine_mode simd_mode
;
11804 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11805 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11808 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11810 /* Interoperability between modes requires one to be a constant multiple
11811 of the other, so that the number of vectors required for each operation
11812 is a compile-time constant. */
11813 if (prevailing_mode
!= VOIDmode
11814 && !constant_multiple_p (nunits
* nbytes
,
11815 GET_MODE_SIZE (prevailing_mode
))
11816 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode
),
11820 /* For vector types of elements whose mode precision doesn't
11821 match their types precision we use a element type of mode
11822 precision. The vectorization routines will have to make sure
11823 they support the proper result truncation/extension.
11824 We also make sure to build vector types with INTEGER_TYPE
11825 component type only. */
11826 if (INTEGRAL_TYPE_P (scalar_type
)
11827 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11828 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11829 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11830 TYPE_UNSIGNED (scalar_type
));
11832 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11833 When the component mode passes the above test simply use a type
11834 corresponding to that mode. The theory is that any use that
11835 would cause problems with this will disable vectorization anyway. */
11836 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11837 && !INTEGRAL_TYPE_P (scalar_type
))
11838 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11840 /* We can't build a vector type of elements with alignment bigger than
11842 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11843 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11844 TYPE_UNSIGNED (scalar_type
));
11846 /* If we felt back to using the mode fail if there was
11847 no scalar type for it. */
11848 if (scalar_type
== NULL_TREE
)
11851 /* If no prevailing mode was supplied, use the mode the target prefers.
11852 Otherwise lookup a vector mode based on the prevailing mode. */
11853 if (prevailing_mode
== VOIDmode
)
11855 gcc_assert (known_eq (nunits
, 0U));
11856 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11857 if (SCALAR_INT_MODE_P (simd_mode
))
11859 /* Traditional behavior is not to take the integer mode
11860 literally, but simply to use it as a way of determining
11861 the vector size. It is up to mode_for_vector to decide
11862 what the TYPE_MODE should be.
11864 Note that nunits == 1 is allowed in order to support single
11865 element vector types. */
11866 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11867 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11871 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11872 || !related_vector_mode (prevailing_mode
,
11873 inner_mode
, nunits
).exists (&simd_mode
))
11875 /* Fall back to using mode_for_vector, mostly in the hope of being
11876 able to use an integer mode. */
11877 if (known_eq (nunits
, 0U)
11878 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11881 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11885 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11887 /* In cases where the mode was chosen by mode_for_vector, check that
11888 the target actually supports the chosen mode, or that it at least
11889 allows the vector mode to be replaced by a like-sized integer. */
11890 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11891 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11894 /* Re-attach the address-space qualifier if we canonicalized the scalar
11896 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11897 return build_qualified_type
11898 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11903 /* Function get_vectype_for_scalar_type.
11905 Returns the vector type corresponding to SCALAR_TYPE as supported
11906 by the target. If GROUP_SIZE is nonzero and we're performing BB
11907 vectorization, make sure that the number of elements in the vector
11908 is no bigger than GROUP_SIZE. */
11911 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11912 unsigned int group_size
)
11914 /* For BB vectorization, we should always have a group size once we've
11915 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11916 are tentative requests during things like early data reference
11917 analysis and pattern recognition. */
11918 if (is_a
<bb_vec_info
> (vinfo
))
11919 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11923 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11925 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11926 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11928 /* Register the natural choice of vector type, before the group size
11929 has been applied. */
11931 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11933 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11934 try again with an explicit number of elements. */
11937 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11939 /* Start with the biggest number of units that fits within
11940 GROUP_SIZE and halve it until we find a valid vector type.
11941 Usually either the first attempt will succeed or all will
11942 fail (in the latter case because GROUP_SIZE is too small
11943 for the target), but it's possible that a target could have
11944 a hole between supported vector types.
11946 If GROUP_SIZE is not a power of 2, this has the effect of
11947 trying the largest power of 2 that fits within the group,
11948 even though the group is not a multiple of that vector size.
11949 The BB vectorizer will then try to carve up the group into
11951 unsigned int nunits
= 1 << floor_log2 (group_size
);
11954 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11955 scalar_type
, nunits
);
11958 while (nunits
> 1 && !vectype
);
11964 /* Return the vector type corresponding to SCALAR_TYPE as supported
11965 by the target. NODE, if nonnull, is the SLP tree node that will
11966 use the returned vector type. */
11969 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11971 unsigned int group_size
= 0;
11973 group_size
= SLP_TREE_LANES (node
);
11974 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11977 /* Function get_mask_type_for_scalar_type.
11979 Returns the mask type corresponding to a result of comparison
11980 of vectors of specified SCALAR_TYPE as supported by target.
11981 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11982 make sure that the number of elements in the vector is no bigger
11983 than GROUP_SIZE. */
11986 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11987 unsigned int group_size
)
11989 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11994 return truth_type_for (vectype
);
11997 /* Function get_same_sized_vectype
11999 Returns a vector type corresponding to SCALAR_TYPE of size
12000 VECTOR_TYPE if supported by the target. */
12003 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
12005 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
12006 return truth_type_for (vector_type
);
12008 poly_uint64 nunits
;
12009 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
12010 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
12013 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
12014 scalar_type
, nunits
);
12017 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
12018 would not change the chosen vector modes. */
12021 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
12023 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
12024 i
!= vinfo
->used_vector_modes
.end (); ++i
)
12025 if (!VECTOR_MODE_P (*i
)
12026 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
12031 /* Function vect_is_simple_use.
12034 VINFO - the vect info of the loop or basic block that is being vectorized.
12035 OPERAND - operand in the loop or bb.
12037 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
12038 case OPERAND is an SSA_NAME that is defined in the vectorizable region
12039 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
12040 the definition could be anywhere in the function
12041 DT - the type of definition
12043 Returns whether a stmt with OPERAND can be vectorized.
12044 For loops, supportable operands are constants, loop invariants, and operands
12045 that are defined by the current iteration of the loop. Unsupportable
12046 operands are those that are defined by a previous iteration of the loop (as
12047 is the case in reduction/induction computations).
12048 For basic blocks, supportable operands are constants and bb invariants.
12049 For now, operands defined outside the basic block are not supported. */
12052 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
12053 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
12055 if (def_stmt_info_out
)
12056 *def_stmt_info_out
= NULL
;
12058 *def_stmt_out
= NULL
;
12059 *dt
= vect_unknown_def_type
;
12061 if (dump_enabled_p ())
12063 dump_printf_loc (MSG_NOTE
, vect_location
,
12064 "vect_is_simple_use: operand ");
12065 if (TREE_CODE (operand
) == SSA_NAME
12066 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
12067 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
12069 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
12072 if (CONSTANT_CLASS_P (operand
))
12073 *dt
= vect_constant_def
;
12074 else if (is_gimple_min_invariant (operand
))
12075 *dt
= vect_external_def
;
12076 else if (TREE_CODE (operand
) != SSA_NAME
)
12077 *dt
= vect_unknown_def_type
;
12078 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
12079 *dt
= vect_external_def
;
12082 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
12083 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
12085 *dt
= vect_external_def
;
12088 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
12089 def_stmt
= stmt_vinfo
->stmt
;
12090 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
12091 if (def_stmt_info_out
)
12092 *def_stmt_info_out
= stmt_vinfo
;
12095 *def_stmt_out
= def_stmt
;
12098 if (dump_enabled_p ())
12100 dump_printf (MSG_NOTE
, ", type of def: ");
12103 case vect_uninitialized_def
:
12104 dump_printf (MSG_NOTE
, "uninitialized\n");
12106 case vect_constant_def
:
12107 dump_printf (MSG_NOTE
, "constant\n");
12109 case vect_external_def
:
12110 dump_printf (MSG_NOTE
, "external\n");
12112 case vect_internal_def
:
12113 dump_printf (MSG_NOTE
, "internal\n");
12115 case vect_induction_def
:
12116 dump_printf (MSG_NOTE
, "induction\n");
12118 case vect_reduction_def
:
12119 dump_printf (MSG_NOTE
, "reduction\n");
12121 case vect_double_reduction_def
:
12122 dump_printf (MSG_NOTE
, "double reduction\n");
12124 case vect_nested_cycle
:
12125 dump_printf (MSG_NOTE
, "nested cycle\n");
12127 case vect_first_order_recurrence
:
12128 dump_printf (MSG_NOTE
, "first order recurrence\n");
12130 case vect_unknown_def_type
:
12131 dump_printf (MSG_NOTE
, "unknown\n");
12136 if (*dt
== vect_unknown_def_type
)
12138 if (dump_enabled_p ())
12139 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12140 "Unsupported pattern.\n");
12147 /* Function vect_is_simple_use.
12149 Same as vect_is_simple_use but also determines the vector operand
12150 type of OPERAND and stores it to *VECTYPE. If the definition of
12151 OPERAND is vect_uninitialized_def, vect_constant_def or
12152 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
12153 is responsible to compute the best suited vector type for the
12157 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
12158 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
12159 gimple
**def_stmt_out
)
12161 stmt_vec_info def_stmt_info
;
12163 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
12167 *def_stmt_out
= def_stmt
;
12168 if (def_stmt_info_out
)
12169 *def_stmt_info_out
= def_stmt_info
;
12171 /* Now get a vector type if the def is internal, otherwise supply
12172 NULL_TREE and leave it up to the caller to figure out a proper
12173 type for the use stmt. */
12174 if (*dt
== vect_internal_def
12175 || *dt
== vect_induction_def
12176 || *dt
== vect_reduction_def
12177 || *dt
== vect_double_reduction_def
12178 || *dt
== vect_nested_cycle
12179 || *dt
== vect_first_order_recurrence
)
12181 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
12182 gcc_assert (*vectype
!= NULL_TREE
);
12183 if (dump_enabled_p ())
12184 dump_printf_loc (MSG_NOTE
, vect_location
,
12185 "vect_is_simple_use: vectype %T\n", *vectype
);
12187 else if (*dt
== vect_uninitialized_def
12188 || *dt
== vect_constant_def
12189 || *dt
== vect_external_def
)
12190 *vectype
= NULL_TREE
;
12192 gcc_unreachable ();
12197 /* Function vect_is_simple_use.
12199 Same as vect_is_simple_use but determines the operand by operand
12200 position OPERAND from either STMT or SLP_NODE, filling in *OP
12201 and *SLP_DEF (when SLP_NODE is not NULL). */
12204 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
12205 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
12206 enum vect_def_type
*dt
,
12207 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
12211 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
12213 *vectype
= SLP_TREE_VECTYPE (child
);
12214 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
12216 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
12217 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
12221 if (def_stmt_info_out
)
12222 *def_stmt_info_out
= NULL
;
12223 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
12224 *dt
= SLP_TREE_DEF_TYPE (child
);
12231 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
12233 if (gimple_assign_rhs_code (ass
) == COND_EXPR
12234 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
12237 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
12239 *op
= gimple_op (ass
, operand
);
12241 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
12242 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
12244 *op
= gimple_op (ass
, operand
+ 1);
12246 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
12247 *op
= gimple_call_arg (call
, operand
);
12249 gcc_unreachable ();
12250 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
12254 /* If OP is not NULL and is external or constant update its vector
12255 type with VECTYPE. Returns true if successful or false if not,
12256 for example when conflicting vector types are present. */
12259 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
12261 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
12263 if (SLP_TREE_VECTYPE (op
))
12264 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
12265 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
12266 should be handled by patters. Allow vect_constant_def for now. */
12267 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
12268 && SLP_TREE_DEF_TYPE (op
) == vect_external_def
)
12270 SLP_TREE_VECTYPE (op
) = vectype
;
12274 /* Function supportable_widening_operation
12276 Check whether an operation represented by the code CODE is a
12277 widening operation that is supported by the target platform in
12278 vector form (i.e., when operating on arguments of type VECTYPE_IN
12279 producing a result of type VECTYPE_OUT).
12281 Widening operations we currently support are NOP (CONVERT), FLOAT,
12282 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
12283 are supported by the target platform either directly (via vector
12284 tree-codes), or via target builtins.
12287 - CODE1 and CODE2 are codes of vector operations to be used when
12288 vectorizing the operation, if available.
12289 - MULTI_STEP_CVT determines the number of required intermediate steps in
12290 case of multi-step conversion (like char->short->int - in that case
12291 MULTI_STEP_CVT will be 1).
12292 - INTERM_TYPES contains the intermediate type required to perform the
12293 widening operation (short in the above example). */
12296 supportable_widening_operation (vec_info
*vinfo
,
12297 enum tree_code code
, stmt_vec_info stmt_info
,
12298 tree vectype_out
, tree vectype_in
,
12299 enum tree_code
*code1
, enum tree_code
*code2
,
12300 int *multi_step_cvt
,
12301 vec
<tree
> *interm_types
)
12303 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
12304 class loop
*vect_loop
= NULL
;
12305 machine_mode vec_mode
;
12306 enum insn_code icode1
, icode2
;
12307 optab optab1
, optab2
;
12308 tree vectype
= vectype_in
;
12309 tree wide_vectype
= vectype_out
;
12310 enum tree_code c1
, c2
;
12312 tree prev_type
, intermediate_type
;
12313 machine_mode intermediate_mode
, prev_mode
;
12314 optab optab3
, optab4
;
12316 *multi_step_cvt
= 0;
12318 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
12322 case WIDEN_MULT_EXPR
:
12323 /* The result of a vectorized widening operation usually requires
12324 two vectors (because the widened results do not fit into one vector).
12325 The generated vector results would normally be expected to be
12326 generated in the same order as in the original scalar computation,
12327 i.e. if 8 results are generated in each vector iteration, they are
12328 to be organized as follows:
12329 vect1: [res1,res2,res3,res4],
12330 vect2: [res5,res6,res7,res8].
12332 However, in the special case that the result of the widening
12333 operation is used in a reduction computation only, the order doesn't
12334 matter (because when vectorizing a reduction we change the order of
12335 the computation). Some targets can take advantage of this and
12336 generate more efficient code. For example, targets like Altivec,
12337 that support widen_mult using a sequence of {mult_even,mult_odd}
12338 generate the following vectors:
12339 vect1: [res1,res3,res5,res7],
12340 vect2: [res2,res4,res6,res8].
12342 When vectorizing outer-loops, we execute the inner-loop sequentially
12343 (each vectorized inner-loop iteration contributes to VF outer-loop
12344 iterations in parallel). We therefore don't allow to change the
12345 order of the computation in the inner-loop during outer-loop
12347 /* TODO: Another case in which order doesn't *really* matter is when we
12348 widen and then contract again, e.g. (short)((int)x * y >> 8).
12349 Normally, pack_trunc performs an even/odd permute, whereas the
12350 repack from an even/odd expansion would be an interleave, which
12351 would be significantly simpler for e.g. AVX2. */
12352 /* In any case, in order to avoid duplicating the code below, recurse
12353 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12354 are properly set up for the caller. If we fail, we'll continue with
12355 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12357 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
12358 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
12359 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
12360 stmt_info
, vectype_out
,
12361 vectype_in
, code1
, code2
,
12362 multi_step_cvt
, interm_types
))
12364 /* Elements in a vector with vect_used_by_reduction property cannot
12365 be reordered if the use chain with this property does not have the
12366 same operation. One such an example is s += a * b, where elements
12367 in a and b cannot be reordered. Here we check if the vector defined
12368 by STMT is only directly used in the reduction statement. */
12369 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
12370 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
12372 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
12375 c1
= VEC_WIDEN_MULT_LO_EXPR
;
12376 c2
= VEC_WIDEN_MULT_HI_EXPR
;
12379 case DOT_PROD_EXPR
:
12380 c1
= DOT_PROD_EXPR
;
12381 c2
= DOT_PROD_EXPR
;
12389 case VEC_WIDEN_MULT_EVEN_EXPR
:
12390 /* Support the recursion induced just above. */
12391 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
12392 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
12395 case WIDEN_LSHIFT_EXPR
:
12396 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
12397 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
12400 case WIDEN_PLUS_EXPR
:
12401 c1
= VEC_WIDEN_PLUS_LO_EXPR
;
12402 c2
= VEC_WIDEN_PLUS_HI_EXPR
;
12405 case WIDEN_MINUS_EXPR
:
12406 c1
= VEC_WIDEN_MINUS_LO_EXPR
;
12407 c2
= VEC_WIDEN_MINUS_HI_EXPR
;
12411 c1
= VEC_UNPACK_LO_EXPR
;
12412 c2
= VEC_UNPACK_HI_EXPR
;
12416 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
12417 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
12420 case FIX_TRUNC_EXPR
:
12421 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
12422 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
12426 gcc_unreachable ();
12429 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
12430 std::swap (c1
, c2
);
12432 if (code
== FIX_TRUNC_EXPR
)
12434 /* The signedness is determined from output operand. */
12435 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12436 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
12438 else if (CONVERT_EXPR_CODE_P (code
)
12439 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
12440 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12441 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
12442 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12444 /* If the input and result modes are the same, a different optab
12445 is needed where we pass in the number of units in vectype. */
12446 optab1
= vec_unpacks_sbool_lo_optab
;
12447 optab2
= vec_unpacks_sbool_hi_optab
;
12451 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12452 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
12455 if (!optab1
|| !optab2
)
12458 vec_mode
= TYPE_MODE (vectype
);
12459 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
12460 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
12466 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12467 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12469 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12471 /* For scalar masks we may have different boolean
12472 vector types having the same QImode. Thus we
12473 add additional check for elements number. */
12474 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
12475 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12479 /* Check if it's a multi-step conversion that can be done using intermediate
12482 prev_type
= vectype
;
12483 prev_mode
= vec_mode
;
12485 if (!CONVERT_EXPR_CODE_P (code
))
12488 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12489 intermediate steps in promotion sequence. We try
12490 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12492 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12493 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12495 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12496 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12498 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
12499 else if (VECTOR_MODE_P (intermediate_mode
))
12501 tree intermediate_element_type
12502 = lang_hooks
.types
.type_for_mode (GET_MODE_INNER (intermediate_mode
),
12503 TYPE_UNSIGNED (prev_type
));
12505 = build_vector_type_for_mode (intermediate_element_type
,
12506 intermediate_mode
);
12510 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
12511 TYPE_UNSIGNED (prev_type
));
12513 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12514 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12515 && intermediate_mode
== prev_mode
12516 && SCALAR_INT_MODE_P (prev_mode
))
12518 /* If the input and result modes are the same, a different optab
12519 is needed where we pass in the number of units in vectype. */
12520 optab3
= vec_unpacks_sbool_lo_optab
;
12521 optab4
= vec_unpacks_sbool_hi_optab
;
12525 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12526 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
12529 if (!optab3
|| !optab4
12530 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
12531 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12532 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
12533 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
12534 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
12535 == CODE_FOR_nothing
)
12536 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
12537 == CODE_FOR_nothing
))
12540 interm_types
->quick_push (intermediate_type
);
12541 (*multi_step_cvt
)++;
12543 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12544 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12546 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12548 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
12549 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12553 prev_type
= intermediate_type
;
12554 prev_mode
= intermediate_mode
;
12557 interm_types
->release ();
12562 /* Function supportable_narrowing_operation
12564 Check whether an operation represented by the code CODE is a
12565 narrowing operation that is supported by the target platform in
12566 vector form (i.e., when operating on arguments of type VECTYPE_IN
12567 and producing a result of type VECTYPE_OUT).
12569 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12570 and FLOAT. This function checks if these operations are supported by
12571 the target platform directly via vector tree-codes.
12574 - CODE1 is the code of a vector operation to be used when
12575 vectorizing the operation, if available.
12576 - MULTI_STEP_CVT determines the number of required intermediate steps in
12577 case of multi-step conversion (like int->short->char - in that case
12578 MULTI_STEP_CVT will be 1).
12579 - INTERM_TYPES contains the intermediate type required to perform the
12580 narrowing operation (short in the above example). */
12583 supportable_narrowing_operation (enum tree_code code
,
12584 tree vectype_out
, tree vectype_in
,
12585 enum tree_code
*code1
, int *multi_step_cvt
,
12586 vec
<tree
> *interm_types
)
12588 machine_mode vec_mode
;
12589 enum insn_code icode1
;
12590 optab optab1
, interm_optab
;
12591 tree vectype
= vectype_in
;
12592 tree narrow_vectype
= vectype_out
;
12594 tree intermediate_type
, prev_type
;
12595 machine_mode intermediate_mode
, prev_mode
;
12597 unsigned HOST_WIDE_INT n_elts
;
12600 *multi_step_cvt
= 0;
12604 c1
= VEC_PACK_TRUNC_EXPR
;
12605 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12606 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12607 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
))
12608 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&n_elts
)
12609 && n_elts
< BITS_PER_UNIT
)
12610 optab1
= vec_pack_sbool_trunc_optab
;
12612 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12615 case FIX_TRUNC_EXPR
:
12616 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12617 /* The signedness is determined from output operand. */
12618 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12622 c1
= VEC_PACK_FLOAT_EXPR
;
12623 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12627 gcc_unreachable ();
12633 vec_mode
= TYPE_MODE (vectype
);
12634 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12639 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12641 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12643 /* For scalar masks we may have different boolean
12644 vector types having the same QImode. Thus we
12645 add additional check for elements number. */
12646 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12647 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12651 if (code
== FLOAT_EXPR
)
12654 /* Check if it's a multi-step conversion that can be done using intermediate
12656 prev_mode
= vec_mode
;
12657 prev_type
= vectype
;
12658 if (code
== FIX_TRUNC_EXPR
)
12659 uns
= TYPE_UNSIGNED (vectype_out
);
12661 uns
= TYPE_UNSIGNED (vectype
);
12663 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12664 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12665 costly than signed. */
12666 if (code
== FIX_TRUNC_EXPR
&& uns
)
12668 enum insn_code icode2
;
12671 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12673 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12674 if (interm_optab
!= unknown_optab
12675 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12676 && insn_data
[icode1
].operand
[0].mode
12677 == insn_data
[icode2
].operand
[0].mode
)
12680 optab1
= interm_optab
;
12685 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12686 intermediate steps in promotion sequence. We try
12687 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12688 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12689 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12691 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12692 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12694 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12697 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12698 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12699 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12700 && SCALAR_INT_MODE_P (prev_mode
)
12701 && TYPE_VECTOR_SUBPARTS (intermediate_type
).is_constant (&n_elts
)
12702 && n_elts
< BITS_PER_UNIT
)
12703 interm_optab
= vec_pack_sbool_trunc_optab
;
12706 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12709 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12710 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12711 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12712 == CODE_FOR_nothing
))
12715 interm_types
->quick_push (intermediate_type
);
12716 (*multi_step_cvt
)++;
12718 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12720 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12722 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12723 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12727 prev_mode
= intermediate_mode
;
12728 prev_type
= intermediate_type
;
12729 optab1
= interm_optab
;
12732 interm_types
->release ();
12736 /* Generate and return a vector mask of MASK_TYPE such that
12737 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12738 Add the statements to SEQ. */
12741 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12742 tree end_index
, const char *name
)
12744 tree cmp_type
= TREE_TYPE (start_index
);
12745 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12746 cmp_type
, mask_type
,
12747 OPTIMIZE_FOR_SPEED
));
12748 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12749 start_index
, end_index
,
12750 build_zero_cst (mask_type
));
12753 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
12755 tmp
= make_ssa_name (mask_type
);
12756 gimple_call_set_lhs (call
, tmp
);
12757 gimple_seq_add_stmt (seq
, call
);
12761 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12762 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12765 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12768 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
12769 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12772 /* Try to compute the vector types required to vectorize STMT_INFO,
12773 returning true on success and false if vectorization isn't possible.
12774 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12775 take sure that the number of elements in the vectors is no bigger
12780 - Set *STMT_VECTYPE_OUT to:
12781 - NULL_TREE if the statement doesn't need to be vectorized;
12782 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12784 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12785 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12786 statement does not help to determine the overall number of units. */
12789 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12790 tree
*stmt_vectype_out
,
12791 tree
*nunits_vectype_out
,
12792 unsigned int group_size
)
12794 gimple
*stmt
= stmt_info
->stmt
;
12796 /* For BB vectorization, we should always have a group size once we've
12797 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12798 are tentative requests during things like early data reference
12799 analysis and pattern recognition. */
12800 if (is_a
<bb_vec_info
> (vinfo
))
12801 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12805 *stmt_vectype_out
= NULL_TREE
;
12806 *nunits_vectype_out
= NULL_TREE
;
12808 if (gimple_get_lhs (stmt
) == NULL_TREE
12809 /* MASK_STORE has no lhs, but is ok. */
12810 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12812 if (is_a
<gcall
*> (stmt
))
12814 /* Ignore calls with no lhs. These must be calls to
12815 #pragma omp simd functions, and what vectorization factor
12816 it really needs can't be determined until
12817 vectorizable_simd_clone_call. */
12818 if (dump_enabled_p ())
12819 dump_printf_loc (MSG_NOTE
, vect_location
,
12820 "defer to SIMD clone analysis.\n");
12821 return opt_result::success ();
12824 return opt_result::failure_at (stmt
,
12825 "not vectorized: irregular stmt.%G", stmt
);
12829 tree scalar_type
= NULL_TREE
;
12830 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12832 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12833 if (dump_enabled_p ())
12834 dump_printf_loc (MSG_NOTE
, vect_location
,
12835 "precomputed vectype: %T\n", vectype
);
12837 else if (vect_use_mask_type_p (stmt_info
))
12839 unsigned int precision
= stmt_info
->mask_precision
;
12840 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12841 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12843 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12844 " data-type %T\n", scalar_type
);
12845 if (dump_enabled_p ())
12846 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12850 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12851 scalar_type
= TREE_TYPE (DR_REF (dr
));
12852 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12853 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12855 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12857 if (dump_enabled_p ())
12860 dump_printf_loc (MSG_NOTE
, vect_location
,
12861 "get vectype for scalar type (group size %d):"
12862 " %T\n", group_size
, scalar_type
);
12864 dump_printf_loc (MSG_NOTE
, vect_location
,
12865 "get vectype for scalar type: %T\n", scalar_type
);
12867 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12869 return opt_result::failure_at (stmt
,
12871 " unsupported data-type %T\n",
12874 if (dump_enabled_p ())
12875 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12878 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
12879 return opt_result::failure_at (stmt
,
12880 "not vectorized: vector stmt in loop:%G",
12883 *stmt_vectype_out
= vectype
;
12885 /* Don't try to compute scalar types if the stmt produces a boolean
12886 vector; use the existing vector type instead. */
12887 tree nunits_vectype
= vectype
;
12888 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12890 /* The number of units is set according to the smallest scalar
12891 type (or the largest vector size, but we only support one
12892 vector size per vectorization). */
12893 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12894 TREE_TYPE (vectype
));
12895 if (scalar_type
!= TREE_TYPE (vectype
))
12897 if (dump_enabled_p ())
12898 dump_printf_loc (MSG_NOTE
, vect_location
,
12899 "get vectype for smallest scalar type: %T\n",
12901 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12903 if (!nunits_vectype
)
12904 return opt_result::failure_at
12905 (stmt
, "not vectorized: unsupported data-type %T\n",
12907 if (dump_enabled_p ())
12908 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12913 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12914 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
12915 return opt_result::failure_at (stmt
,
12916 "Not vectorized: Incompatible number "
12917 "of vector subparts between %T and %T\n",
12918 nunits_vectype
, *stmt_vectype_out
);
12920 if (dump_enabled_p ())
12922 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12923 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12924 dump_printf (MSG_NOTE
, "\n");
12927 *nunits_vectype_out
= nunits_vectype
;
12928 return opt_result::success ();
12931 /* Generate and return statement sequence that sets vector length LEN that is:
12933 min_of_start_and_end = min (START_INDEX, END_INDEX);
12934 left_len = END_INDEX - min_of_start_and_end;
12935 rhs = min (left_len, LEN_LIMIT);
12938 Note: the cost of the code generated by this function is modeled
12939 by vect_estimate_min_profitable_iters, so changes here may need
12940 corresponding changes there. */
12943 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12945 gimple_seq stmts
= NULL
;
12946 tree len_type
= TREE_TYPE (len
);
12947 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12949 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12950 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12951 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12952 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12953 gimple_seq_add_stmt (&stmts
, stmt
);