1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 tree vectype
, int misalign
,
96 enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_scatter_store
;
105 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, vectype
, misalign
};
106 body_cost_vec
->safe_push (si
);
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 if (is_gimple_debug (gsi_stmt (si
)))
641 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 "init: stmt relevant? %G", stmt_info
->stmt
);
646 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
647 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
651 /* 2. Process_worklist */
652 while (worklist
.length () > 0)
657 stmt_vec_info stmt_vinfo
= worklist
.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE
, vect_location
,
660 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
665 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
680 case vect_reduction_def
:
681 gcc_assert (relevant
!= vect_unused_in_scope
);
682 if (relevant
!= vect_unused_in_scope
683 && relevant
!= vect_used_in_scope
684 && relevant
!= vect_used_by_reduction
685 && relevant
!= vect_used_only_live
)
686 return opt_result::failure_at
687 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
690 case vect_nested_cycle
:
691 if (relevant
!= vect_unused_in_scope
692 && relevant
!= vect_used_in_outer_by_reduction
693 && relevant
!= vect_used_in_outer
)
694 return opt_result::failure_at
695 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
698 case vect_double_reduction_def
:
699 if (relevant
!= vect_unused_in_scope
700 && relevant
!= vect_used_by_reduction
701 && relevant
!= vect_used_only_live
)
702 return opt_result::failure_at
703 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
710 if (is_pattern_stmt_p (stmt_vinfo
))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
717 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
718 tree op
= gimple_assign_rhs1 (assign
);
721 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
724 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
725 loop_vinfo
, relevant
, &worklist
, false);
728 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
729 loop_vinfo
, relevant
, &worklist
, false);
734 for (; i
< gimple_num_ops (assign
); i
++)
736 op
= gimple_op (assign
, i
);
737 if (TREE_CODE (op
) == SSA_NAME
)
740 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
747 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
749 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
751 tree arg
= gimple_call_arg (call
, i
);
753 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
761 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
763 tree op
= USE_FROM_PTR (use_p
);
765 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
773 gather_scatter_info gs_info
;
774 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
777 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
798 vect_model_simple_cost (vec_info
*,
799 stmt_vec_info stmt_info
, int ncopies
,
800 enum vect_def_type
*dt
,
803 stmt_vector_for_cost
*cost_vec
,
804 vect_cost_for_stmt kind
= vector_stmt
)
806 int inside_cost
= 0, prologue_cost
= 0;
808 gcc_assert (cost_vec
!= NULL
);
810 /* ??? Somehow we need to fix this at the callers. */
812 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
818 for (int i
= 0; i
< ndts
; i
++)
819 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
820 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
821 stmt_info
, 0, vect_prologue
);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
825 stmt_info
, 0, vect_body
);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE
, vect_location
,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
844 enum vect_def_type
*dt
,
845 unsigned int ncopies
, int pwr
,
846 stmt_vector_for_cost
*cost_vec
,
850 int inside_cost
= 0, prologue_cost
= 0;
852 for (i
= 0; i
< pwr
+ 1; i
++)
854 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
856 ? vector_stmt
: vec_promote_demote
,
857 stmt_info
, 0, vect_body
);
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i
= 0; i
< 2; i
++)
863 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
864 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
865 stmt_info
, 0, vect_prologue
);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE
, vect_location
,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
873 /* Returns true if the current function returns DECL. */
876 cfun_returns (tree decl
)
880 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
882 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
885 if (gimple_return_retval (ret
) == decl
)
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
893 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
895 while (gimple_clobber_p (def
));
896 if (is_a
<gassign
*> (def
)
897 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
898 && gimple_assign_rhs1 (def
) == decl
)
904 /* Function vect_model_store_cost
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
910 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
911 vect_memory_access_type memory_access_type
,
912 dr_alignment_support alignment_support_scheme
,
914 vec_load_store_type vls_type
, slp_tree slp_node
,
915 stmt_vector_for_cost
*cost_vec
)
917 unsigned int inside_cost
= 0, prologue_cost
= 0;
918 stmt_vec_info first_stmt_info
= stmt_info
;
919 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
921 /* ??? Somehow we need to fix this at the callers. */
923 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
925 if (vls_type
== VLS_STORE_INVARIANT
)
928 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
929 stmt_info
, 0, vect_prologue
);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node
&& grouped_access_p
)
935 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
947 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
949 /* Uses a high and low interleave or shuffle operations for each
951 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
952 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
953 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
954 stmt_info
, 0, vect_body
);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE
, vect_location
,
958 "vect_model_store_cost: strided group_size = %d .\n",
962 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
963 /* Costs of the stores. */
964 if (memory_access_type
== VMAT_ELEMENTWISE
965 || memory_access_type
== VMAT_GATHER_SCATTER
)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
969 inside_cost
+= record_stmt_cost (cost_vec
,
970 ncopies
* assumed_nunits
,
971 scalar_store
, stmt_info
, 0, vect_body
);
974 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, alignment_support_scheme
,
975 misalignment
, &inside_cost
, cost_vec
);
977 if (memory_access_type
== VMAT_ELEMENTWISE
978 || memory_access_type
== VMAT_STRIDED_SLP
)
980 /* N scalar stores plus extracting the elements. */
981 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
982 inside_cost
+= record_stmt_cost (cost_vec
,
983 ncopies
* assumed_nunits
,
984 vec_to_scalar
, stmt_info
, 0, vect_body
);
987 /* When vectorizing a store into the function result assign
988 a penalty if the function returns in a multi-register location.
989 In this case we assume we'll end up with having to spill the
990 vector result and do piecewise loads as a conservative estimate. */
991 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
993 && (TREE_CODE (base
) == RESULT_DECL
994 || (DECL_P (base
) && cfun_returns (base
)))
995 && !aggregate_value_p (base
, cfun
->decl
))
997 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
998 /* ??? Handle PARALLEL in some way. */
1001 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1002 /* Assume that a single reg-reg move is possible and cheap,
1003 do not account for vector to gp register move cost. */
1007 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1009 stmt_info
, 0, vect_epilogue
);
1011 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1013 stmt_info
, 0, vect_epilogue
);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE
, vect_location
,
1020 "vect_model_store_cost: inside_cost = %d, "
1021 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1025 /* Calculate cost of DR's memory access. */
1027 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1028 dr_alignment_support alignment_support_scheme
,
1030 unsigned int *inside_cost
,
1031 stmt_vector_for_cost
*body_cost_vec
)
1033 switch (alignment_support_scheme
)
1037 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1038 vector_store
, stmt_info
, 0,
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE
, vect_location
,
1043 "vect_model_store_cost: aligned.\n");
1047 case dr_unaligned_supported
:
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1051 unaligned_store
, stmt_info
,
1052 misalignment
, vect_body
);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE
, vect_location
,
1055 "vect_model_store_cost: unaligned supported by "
1060 case dr_unaligned_unsupported
:
1062 *inside_cost
= VECT_MAX_COST
;
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1066 "vect_model_store_cost: unsupported access.\n");
1076 /* Function vect_model_load_cost
1078 Models cost for loads. In the case of grouped accesses, one access has
1079 the overhead of the grouped access attributed to it. Since unaligned
1080 accesses are supported for loads, we also account for the costs of the
1081 access scheme chosen. */
1084 vect_model_load_cost (vec_info
*vinfo
,
1085 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1086 vect_memory_access_type memory_access_type
,
1087 dr_alignment_support alignment_support_scheme
,
1089 gather_scatter_info
*gs_info
,
1091 stmt_vector_for_cost
*cost_vec
)
1093 unsigned int inside_cost
= 0, prologue_cost
= 0;
1094 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1096 gcc_assert (cost_vec
);
1098 /* ??? Somehow we need to fix this at the callers. */
1100 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1102 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1104 /* If the load is permuted then the alignment is determined by
1105 the first group element not by the first scalar stmt DR. */
1106 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1107 /* Record the cost for the permutation. */
1108 unsigned n_perms
, n_loads
;
1109 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1110 vf
, true, &n_perms
, &n_loads
);
1111 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1112 first_stmt_info
, 0, vect_body
);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1119 /* Grouped loads read all elements in the group at once,
1120 so we want the DR for the first statement. */
1121 stmt_vec_info first_stmt_info
= stmt_info
;
1122 if (!slp_node
&& grouped_access_p
)
1123 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1125 /* True if we should include any once-per-group costs as well as
1126 the cost of the statement itself. For SLP we only get called
1127 once per group anyhow. */
1128 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1130 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1131 ones we actually need. Account for the cost of unused results. */
1132 if (first_stmt_p
&& !slp_node
&& memory_access_type
== VMAT_LOAD_STORE_LANES
)
1134 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
1135 stmt_vec_info next_stmt_info
= first_stmt_info
;
1139 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
1141 while (next_stmt_info
);
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE
, vect_location
,
1146 "vect_model_load_cost: %d unused vectors.\n",
1148 vect_get_load_cost (vinfo
, stmt_info
, ncopies
* gaps
,
1149 alignment_support_scheme
, misalignment
, false,
1150 &inside_cost
, &prologue_cost
,
1151 cost_vec
, cost_vec
, true);
1155 /* We assume that the cost of a single load-lanes instruction is
1156 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1157 access is instead being provided by a load-and-permute operation,
1158 include the cost of the permutes. */
1160 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1162 /* Uses an even and odd extract operations or shuffle operations
1163 for each needed permute. */
1164 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1165 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1166 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1167 stmt_info
, 0, vect_body
);
1169 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_NOTE
, vect_location
,
1171 "vect_model_load_cost: strided group_size = %d .\n",
1175 /* The loads themselves. */
1176 if (memory_access_type
== VMAT_ELEMENTWISE
1177 || memory_access_type
== VMAT_GATHER_SCATTER
)
1179 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1180 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1181 if (memory_access_type
== VMAT_GATHER_SCATTER
1182 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1183 /* For emulated gathers N offset vector element extracts
1184 (we assume the scalar scaling and ptr + offset add is consumed by
1186 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1187 vec_to_scalar
, stmt_info
, 0,
1189 /* N scalar loads plus gathering them into a vector. */
1190 inside_cost
+= record_stmt_cost (cost_vec
,
1191 ncopies
* assumed_nunits
,
1192 scalar_load
, stmt_info
, 0, vect_body
);
1194 else if (memory_access_type
== VMAT_INVARIANT
)
1196 /* Invariant loads will ideally be hoisted and splat to a vector. */
1197 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1198 scalar_load
, stmt_info
, 0,
1200 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1201 scalar_to_vec
, stmt_info
, 0,
1205 vect_get_load_cost (vinfo
, stmt_info
, ncopies
,
1206 alignment_support_scheme
, misalignment
, first_stmt_p
,
1207 &inside_cost
, &prologue_cost
,
1208 cost_vec
, cost_vec
, true);
1209 if (memory_access_type
== VMAT_ELEMENTWISE
1210 || memory_access_type
== VMAT_STRIDED_SLP
1211 || (memory_access_type
== VMAT_GATHER_SCATTER
1212 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1213 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1214 stmt_info
, 0, vect_body
);
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE
, vect_location
,
1218 "vect_model_load_cost: inside_cost = %d, "
1219 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1223 /* Calculate cost of DR's memory access. */
1225 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1226 dr_alignment_support alignment_support_scheme
,
1228 bool add_realign_cost
, unsigned int *inside_cost
,
1229 unsigned int *prologue_cost
,
1230 stmt_vector_for_cost
*prologue_cost_vec
,
1231 stmt_vector_for_cost
*body_cost_vec
,
1232 bool record_prologue_costs
)
1234 switch (alignment_support_scheme
)
1238 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1239 stmt_info
, 0, vect_body
);
1241 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE
, vect_location
,
1243 "vect_model_load_cost: aligned.\n");
1247 case dr_unaligned_supported
:
1249 /* Here, we assign an additional cost for the unaligned load. */
1250 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1251 unaligned_load
, stmt_info
,
1252 misalignment
, vect_body
);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE
, vect_location
,
1256 "vect_model_load_cost: unaligned supported by "
1261 case dr_explicit_realign
:
1263 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1264 vector_load
, stmt_info
, 0, vect_body
);
1265 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1266 vec_perm
, stmt_info
, 0, vect_body
);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1271 if (targetm
.vectorize
.builtin_mask_for_load
)
1272 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1273 stmt_info
, 0, vect_body
);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE
, vect_location
,
1277 "vect_model_load_cost: explicit realign\n");
1281 case dr_explicit_realign_optimized
:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE
, vect_location
,
1285 "vect_model_load_cost: unaligned software "
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost
&& record_prologue_costs
)
1297 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1298 vector_stmt
, stmt_info
,
1300 if (targetm
.vectorize
.builtin_mask_for_load
)
1301 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1302 vector_stmt
, stmt_info
,
1306 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1307 stmt_info
, 0, vect_body
);
1308 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1309 stmt_info
, 0, vect_body
);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE
, vect_location
,
1313 "vect_model_load_cost: explicit realign optimized"
1319 case dr_unaligned_unsupported
:
1321 *inside_cost
= VECT_MAX_COST
;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1325 "vect_model_load_cost: unsupported access.\n");
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1338 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1339 gimple_stmt_iterator
*gsi
)
1342 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1344 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1346 if (dump_enabled_p ())
1347 dump_printf_loc (MSG_NOTE
, vect_location
,
1348 "created new init_stmt: %G", new_stmt
);
1351 /* Function vect_init_vector.
1353 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1354 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1355 vector type a vector with all elements equal to VAL is created first.
1356 Place the initialization at GSI if it is not NULL. Otherwise, place the
1357 initialization at the loop preheader.
1358 Return the DEF of INIT_STMT.
1359 It will be used in the vectorization of STMT_INFO. */
1362 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1363 gimple_stmt_iterator
*gsi
)
1368 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1369 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1371 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1372 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1374 /* Scalar boolean value should be transformed into
1375 all zeros or all ones value before building a vector. */
1376 if (VECTOR_BOOLEAN_TYPE_P (type
))
1378 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1379 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1381 if (CONSTANT_CLASS_P (val
))
1382 val
= integer_zerop (val
) ? false_val
: true_val
;
1385 new_temp
= make_ssa_name (TREE_TYPE (type
));
1386 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1387 val
, true_val
, false_val
);
1388 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1394 gimple_seq stmts
= NULL
;
1395 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1396 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1397 TREE_TYPE (type
), val
);
1399 /* ??? Condition vectorization expects us to do
1400 promotion of invariant/external defs. */
1401 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1402 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1403 !gsi_end_p (gsi2
); )
1405 init_stmt
= gsi_stmt (gsi2
);
1406 gsi_remove (&gsi2
, false);
1407 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1411 val
= build_vector_from_val (type
, val
);
1414 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1415 init_stmt
= gimple_build_assign (new_temp
, val
);
1416 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1421 /* Function vect_get_vec_defs_for_operand.
1423 OP is an operand in STMT_VINFO. This function returns a vector of
1424 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1426 In the case that OP is an SSA_NAME which is defined in the loop, then
1427 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1429 In case OP is an invariant or constant, a new stmt that creates a vector def
1430 needs to be introduced. VECTYPE may be used to specify a required type for
1431 vector invariant. */
1434 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1436 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1439 enum vect_def_type dt
;
1441 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE
, vect_location
,
1445 "vect_get_vec_defs_for_operand: %T\n", op
);
1447 stmt_vec_info def_stmt_info
;
1448 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1449 &def_stmt_info
, &def_stmt
);
1450 gcc_assert (is_simple_use
);
1451 if (def_stmt
&& dump_enabled_p ())
1452 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1454 vec_oprnds
->create (ncopies
);
1455 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1457 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1461 vector_type
= vectype
;
1462 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1463 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1464 vector_type
= truth_type_for (stmt_vectype
);
1466 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1468 gcc_assert (vector_type
);
1469 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1471 vec_oprnds
->quick_push (vop
);
1475 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1476 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1477 for (unsigned i
= 0; i
< ncopies
; ++i
)
1478 vec_oprnds
->quick_push (gimple_get_lhs
1479 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1484 /* Get vectorized definitions for OP0 and OP1. */
1487 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1489 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1490 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1491 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1492 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1497 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1508 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1509 op0
, vec_oprnds0
, vectype0
);
1511 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1512 op1
, vec_oprnds1
, vectype1
);
1514 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1515 op2
, vec_oprnds2
, vectype2
);
1517 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1518 op3
, vec_oprnds3
, vectype3
);
1523 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1525 tree op0
, vec
<tree
> *vec_oprnds0
,
1526 tree op1
, vec
<tree
> *vec_oprnds1
,
1527 tree op2
, vec
<tree
> *vec_oprnds2
,
1528 tree op3
, vec
<tree
> *vec_oprnds3
)
1530 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1531 op0
, vec_oprnds0
, NULL_TREE
,
1532 op1
, vec_oprnds1
, NULL_TREE
,
1533 op2
, vec_oprnds2
, NULL_TREE
,
1534 op3
, vec_oprnds3
, NULL_TREE
);
1537 /* Helper function called by vect_finish_replace_stmt and
1538 vect_finish_stmt_generation. Set the location of the new
1539 statement and create and return a stmt_vec_info for it. */
1542 vect_finish_stmt_generation_1 (vec_info
*,
1543 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1550 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1552 /* While EH edges will generally prevent vectorization, stmt might
1553 e.g. be in a must-not-throw region. Ensure newly created stmts
1554 that could throw are part of the same region. */
1555 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1556 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1557 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1560 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1563 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1564 which sets the same scalar result as STMT_INFO did. Create and return a
1565 stmt_vec_info for VEC_STMT. */
1568 vect_finish_replace_stmt (vec_info
*vinfo
,
1569 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1571 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1572 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1574 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1575 gsi_replace (&gsi
, vec_stmt
, true);
1577 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1580 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1581 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1584 vect_finish_stmt_generation (vec_info
*vinfo
,
1585 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1586 gimple_stmt_iterator
*gsi
)
1588 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1590 if (!gsi_end_p (*gsi
)
1591 && gimple_has_mem_ops (vec_stmt
))
1593 gimple
*at_stmt
= gsi_stmt (*gsi
);
1594 tree vuse
= gimple_vuse (at_stmt
);
1595 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1597 tree vdef
= gimple_vdef (at_stmt
);
1598 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1599 gimple_set_modified (vec_stmt
, true);
1600 /* If we have an SSA vuse and insert a store, update virtual
1601 SSA form to avoid triggering the renamer. Do so only
1602 if we can easily see all uses - which is what almost always
1603 happens with the way vectorized stmts are inserted. */
1604 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1605 && ((is_gimple_assign (vec_stmt
)
1606 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1607 || (is_gimple_call (vec_stmt
)
1608 && !(gimple_call_flags (vec_stmt
)
1609 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1611 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1612 gimple_set_vdef (vec_stmt
, new_vdef
);
1613 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1617 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1618 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1621 /* We want to vectorize a call to combined function CFN with function
1622 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1623 as the types of all inputs. Check whether this is possible using
1624 an internal function, returning its code if so or IFN_LAST if not. */
1627 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1628 tree vectype_out
, tree vectype_in
)
1631 if (internal_fn_p (cfn
))
1632 ifn
= as_internal_fn (cfn
);
1634 ifn
= associated_internal_fn (fndecl
);
1635 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1637 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1638 if (info
.vectorizable
)
1640 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1641 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1642 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1643 OPTIMIZE_FOR_SPEED
))
1651 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1652 gimple_stmt_iterator
*);
1654 /* Check whether a load or store statement in the loop described by
1655 LOOP_VINFO is possible in a loop using partial vectors. This is
1656 testing whether the vectorizer pass has the appropriate support,
1657 as well as whether the target does.
1659 VLS_TYPE says whether the statement is a load or store and VECTYPE
1660 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1661 says how the load or store is going to be implemented and GROUP_SIZE
1662 is the number of load or store statements in the containing group.
1663 If the access is a gather load or scatter store, GS_INFO describes
1664 its arguments. If the load or store is conditional, SCALAR_MASK is the
1665 condition under which it occurs.
1667 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1668 vectors is not supported, otherwise record the required rgroup control
1672 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1673 vec_load_store_type vls_type
,
1675 vect_memory_access_type
1677 unsigned int ncopies
,
1678 gather_scatter_info
*gs_info
,
1681 /* Invariant loads need no special support. */
1682 if (memory_access_type
== VMAT_INVARIANT
)
1685 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1686 machine_mode vecmode
= TYPE_MODE (vectype
);
1687 bool is_load
= (vls_type
== VLS_LOAD
);
1688 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1691 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1692 : !vect_store_lanes_supported (vectype
, group_size
, true))
1694 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1696 "can't operate on partial vectors because"
1697 " the target doesn't have an appropriate"
1698 " load/store-lanes instruction.\n");
1699 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1702 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1706 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1708 internal_fn ifn
= (is_load
1709 ? IFN_MASK_GATHER_LOAD
1710 : IFN_MASK_SCATTER_STORE
);
1711 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1712 gs_info
->memory_type
,
1713 gs_info
->offset_vectype
,
1716 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1718 "can't operate on partial vectors because"
1719 " the target doesn't have an appropriate"
1720 " gather load or scatter store instruction.\n");
1721 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1724 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1728 if (memory_access_type
!= VMAT_CONTIGUOUS
1729 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1731 /* Element X of the data must come from iteration i * VF + X of the
1732 scalar loop. We need more work to support other mappings. */
1733 if (dump_enabled_p ())
1734 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1735 "can't operate on partial vectors because an"
1736 " access isn't contiguous.\n");
1737 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1741 if (!VECTOR_MODE_P (vecmode
))
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1745 "can't operate on partial vectors when emulating"
1746 " vector operations.\n");
1747 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1751 /* We might load more scalars than we need for permuting SLP loads.
1752 We checked in get_group_load_store_type that the extra elements
1753 don't leak into a new vector. */
1754 auto get_valid_nvectors
= [] (poly_uint64 size
, poly_uint64 nunits
)
1756 unsigned int nvectors
;
1757 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1762 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1763 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1764 machine_mode mask_mode
;
1765 bool using_partial_vectors_p
= false;
1766 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1767 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1769 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1770 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1771 using_partial_vectors_p
= true;
1775 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1777 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1778 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1779 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1780 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1781 using_partial_vectors_p
= true;
1784 if (!using_partial_vectors_p
)
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1788 "can't operate on partial vectors because the"
1789 " target doesn't have the appropriate partial"
1790 " vectorization load or store.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1795 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1796 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1797 that needs to be applied to all loads and stores in a vectorized loop.
1798 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1800 MASK_TYPE is the type of both masks. If new statements are needed,
1801 insert them before GSI. */
1804 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1805 gimple_stmt_iterator
*gsi
)
1807 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1811 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1812 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1813 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1814 vec_mask
, loop_mask
);
1815 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1819 /* Determine whether we can use a gather load or scatter store to vectorize
1820 strided load or store STMT_INFO by truncating the current offset to a
1821 smaller width. We need to be able to construct an offset vector:
1823 { 0, X, X*2, X*3, ... }
1825 without loss of precision, where X is STMT_INFO's DR_STEP.
1827 Return true if this is possible, describing the gather load or scatter
1828 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1831 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1832 loop_vec_info loop_vinfo
, bool masked_p
,
1833 gather_scatter_info
*gs_info
)
1835 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1836 data_reference
*dr
= dr_info
->dr
;
1837 tree step
= DR_STEP (dr
);
1838 if (TREE_CODE (step
) != INTEGER_CST
)
1840 /* ??? Perhaps we could use range information here? */
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_NOTE
, vect_location
,
1843 "cannot truncate variable step.\n");
1847 /* Get the number of bits in an element. */
1848 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1849 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1850 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1852 /* Set COUNT to the upper limit on the number of elements - 1.
1853 Start with the maximum vectorization factor. */
1854 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1856 /* Try lowering COUNT to the number of scalar latch iterations. */
1857 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1858 widest_int max_iters
;
1859 if (max_loop_iterations (loop
, &max_iters
)
1860 && max_iters
< count
)
1861 count
= max_iters
.to_shwi ();
1863 /* Try scales of 1 and the element size. */
1864 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1865 wi::overflow_type overflow
= wi::OVF_NONE
;
1866 for (int i
= 0; i
< 2; ++i
)
1868 int scale
= scales
[i
];
1870 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1873 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1874 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1877 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1878 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1880 /* Find the narrowest viable offset type. */
1881 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1882 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1885 /* See whether the target supports the operation with an offset
1886 no narrower than OFFSET_TYPE. */
1887 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1888 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1889 vectype
, memory_type
, offset_type
, scale
,
1890 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1891 || gs_info
->ifn
== IFN_LAST
)
1894 gs_info
->decl
= NULL_TREE
;
1895 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1896 but we don't need to store that here. */
1897 gs_info
->base
= NULL_TREE
;
1898 gs_info
->element_type
= TREE_TYPE (vectype
);
1899 gs_info
->offset
= fold_convert (offset_type
, step
);
1900 gs_info
->offset_dt
= vect_constant_def
;
1901 gs_info
->scale
= scale
;
1902 gs_info
->memory_type
= memory_type
;
1906 if (overflow
&& dump_enabled_p ())
1907 dump_printf_loc (MSG_NOTE
, vect_location
,
1908 "truncating gather/scatter offset to %d bits"
1909 " might change its value.\n", element_bits
);
1914 /* Return true if we can use gather/scatter internal functions to
1915 vectorize STMT_INFO, which is a grouped or strided load or store.
1916 MASKED_P is true if load or store is conditional. When returning
1917 true, fill in GS_INFO with the information required to perform the
1921 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1922 loop_vec_info loop_vinfo
, bool masked_p
,
1923 gather_scatter_info
*gs_info
)
1925 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1926 || gs_info
->ifn
== IFN_LAST
)
1927 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1930 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1931 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1933 gcc_assert (TYPE_PRECISION (new_offset_type
)
1934 >= TYPE_PRECISION (old_offset_type
));
1935 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1937 if (dump_enabled_p ())
1938 dump_printf_loc (MSG_NOTE
, vect_location
,
1939 "using gather/scatter for strided/grouped access,"
1940 " scale = %d\n", gs_info
->scale
);
1945 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1946 elements with a known constant step. Return -1 if that step
1947 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1950 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1952 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1953 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1957 /* If the target supports a permute mask that reverses the elements in
1958 a vector of type VECTYPE, return that mask, otherwise return null. */
1961 perm_mask_for_reverse (tree vectype
)
1963 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1965 /* The encoding has a single stepped pattern. */
1966 vec_perm_builder
sel (nunits
, 1, 3);
1967 for (int i
= 0; i
< 3; ++i
)
1968 sel
.quick_push (nunits
- 1 - i
);
1970 vec_perm_indices
indices (sel
, 1, nunits
);
1971 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1973 return vect_gen_perm_mask_checked (vectype
, indices
);
1976 /* A subroutine of get_load_store_type, with a subset of the same
1977 arguments. Handle the case where STMT_INFO is a load or store that
1978 accesses consecutive elements with a negative step. Sets *POFFSET
1979 to the offset to be applied to the DR for the first access. */
1981 static vect_memory_access_type
1982 get_negative_load_store_type (vec_info
*vinfo
,
1983 stmt_vec_info stmt_info
, tree vectype
,
1984 vec_load_store_type vls_type
,
1985 unsigned int ncopies
, poly_int64
*poffset
)
1987 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1988 dr_alignment_support alignment_support_scheme
;
1992 if (dump_enabled_p ())
1993 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1994 "multiple types with negative step.\n");
1995 return VMAT_ELEMENTWISE
;
1998 /* For backward running DRs the first access in vectype actually is
1999 N-1 elements before the address of the DR. */
2000 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
2001 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
2003 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
2004 alignment_support_scheme
2005 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
2006 if (alignment_support_scheme
!= dr_aligned
2007 && alignment_support_scheme
!= dr_unaligned_supported
)
2009 if (dump_enabled_p ())
2010 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2011 "negative step but alignment required.\n");
2013 return VMAT_ELEMENTWISE
;
2016 if (vls_type
== VLS_STORE_INVARIANT
)
2018 if (dump_enabled_p ())
2019 dump_printf_loc (MSG_NOTE
, vect_location
,
2020 "negative step with invariant source;"
2021 " no permute needed.\n");
2022 return VMAT_CONTIGUOUS_DOWN
;
2025 if (!perm_mask_for_reverse (vectype
))
2027 if (dump_enabled_p ())
2028 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2029 "negative step and reversing not supported.\n");
2031 return VMAT_ELEMENTWISE
;
2034 return VMAT_CONTIGUOUS_REVERSE
;
2037 /* STMT_INFO is either a masked or unconditional store. Return the value
2041 vect_get_store_rhs (stmt_vec_info stmt_info
)
2043 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2045 gcc_assert (gimple_assign_single_p (assign
));
2046 return gimple_assign_rhs1 (assign
);
2048 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2050 internal_fn ifn
= gimple_call_internal_fn (call
);
2051 int index
= internal_fn_stored_value_index (ifn
);
2052 gcc_assert (index
>= 0);
2053 return gimple_call_arg (call
, index
);
2058 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2060 This function returns a vector type which can be composed with NETLS pieces,
2061 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2062 same vector size as the return vector. It checks target whether supports
2063 pieces-size vector mode for construction firstly, if target fails to, check
2064 pieces-size scalar mode for construction further. It returns NULL_TREE if
2065 fails to find the available composition.
2067 For example, for (vtype=V16QI, nelts=4), we can probably get:
2068 - V16QI with PTYPE V4QI.
2069 - V4SI with PTYPE SI.
2073 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2075 gcc_assert (VECTOR_TYPE_P (vtype
));
2076 gcc_assert (known_gt (nelts
, 0U));
2078 machine_mode vmode
= TYPE_MODE (vtype
);
2079 if (!VECTOR_MODE_P (vmode
))
2082 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2083 unsigned int pbsize
;
2084 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2086 /* First check if vec_init optab supports construction from
2087 vector pieces directly. */
2088 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2089 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2091 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2092 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2093 != CODE_FOR_nothing
))
2095 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2099 /* Otherwise check if exists an integer type of the same piece size and
2100 if vec_init optab supports construction from it directly. */
2101 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2102 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2103 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2104 != CODE_FOR_nothing
))
2106 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2107 return build_vector_type (*ptype
, nelts
);
2114 /* A subroutine of get_load_store_type, with a subset of the same
2115 arguments. Handle the case where STMT_INFO is part of a grouped load
2118 For stores, the statements in the group are all consecutive
2119 and there is no gap at the end. For loads, the statements in the
2120 group might not be consecutive; there can be gaps between statements
2121 as well as at the end. */
2124 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2125 tree vectype
, slp_tree slp_node
,
2126 bool masked_p
, vec_load_store_type vls_type
,
2127 vect_memory_access_type
*memory_access_type
,
2128 poly_int64
*poffset
,
2129 dr_alignment_support
*alignment_support_scheme
,
2131 gather_scatter_info
*gs_info
)
2133 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2134 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2135 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2136 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2137 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2138 bool single_element_p
= (stmt_info
== first_stmt_info
2139 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2140 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2141 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2143 /* True if the vectorized statements would access beyond the last
2144 statement in the group. */
2145 bool overrun_p
= false;
2147 /* True if we can cope with such overrun by peeling for gaps, so that
2148 there is at least one final scalar iteration after the vector loop. */
2149 bool can_overrun_p
= (!masked_p
2150 && vls_type
== VLS_LOAD
2154 /* There can only be a gap at the end of the group if the stride is
2155 known at compile time. */
2156 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2158 /* Stores can't yet have gaps. */
2159 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2163 /* For SLP vectorization we directly vectorize a subchain
2164 without permutation. */
2165 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2167 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2168 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2170 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2171 separated by the stride, until we have a complete vector.
2172 Fall back to scalar accesses if that isn't possible. */
2173 if (multiple_p (nunits
, group_size
))
2174 *memory_access_type
= VMAT_STRIDED_SLP
;
2176 *memory_access_type
= VMAT_ELEMENTWISE
;
2180 overrun_p
= loop_vinfo
&& gap
!= 0;
2181 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2183 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2184 "Grouped store with gaps requires"
2185 " non-consecutive accesses\n");
2188 /* An overrun is fine if the trailing elements are smaller
2189 than the alignment boundary B. Every vector access will
2190 be a multiple of B and so we are guaranteed to access a
2191 non-gap element in the same B-sized block. */
2193 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2195 / vect_get_scalar_dr_size (first_dr_info
)))
2198 /* If the gap splits the vector in half and the target
2199 can do half-vector operations avoid the epilogue peeling
2200 by simply loading half of the vector only. Usually
2201 the construction with an upper zero half will be elided. */
2202 dr_alignment_support alss
;
2203 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2207 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2208 vectype
, misalign
)))
2210 || alss
== dr_unaligned_supported
)
2211 && known_eq (nunits
, (group_size
- gap
) * 2)
2212 && known_eq (nunits
, group_size
)
2213 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2217 if (overrun_p
&& !can_overrun_p
)
2219 if (dump_enabled_p ())
2220 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2221 "Peeling for outer loop is not supported\n");
2224 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2227 if (single_element_p
)
2228 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2229 only correct for single element "interleaving" SLP. */
2230 *memory_access_type
= get_negative_load_store_type
2231 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2234 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2235 separated by the stride, until we have a complete vector.
2236 Fall back to scalar accesses if that isn't possible. */
2237 if (multiple_p (nunits
, group_size
))
2238 *memory_access_type
= VMAT_STRIDED_SLP
;
2240 *memory_access_type
= VMAT_ELEMENTWISE
;
2245 gcc_assert (!loop_vinfo
|| cmp
> 0);
2246 *memory_access_type
= VMAT_CONTIGUOUS
;
2252 /* We can always handle this case using elementwise accesses,
2253 but see if something more efficient is available. */
2254 *memory_access_type
= VMAT_ELEMENTWISE
;
2256 /* If there is a gap at the end of the group then these optimizations
2257 would access excess elements in the last iteration. */
2258 bool would_overrun_p
= (gap
!= 0);
2259 /* An overrun is fine if the trailing elements are smaller than the
2260 alignment boundary B. Every vector access will be a multiple of B
2261 and so we are guaranteed to access a non-gap element in the
2262 same B-sized block. */
2265 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2266 / vect_get_scalar_dr_size (first_dr_info
)))
2267 would_overrun_p
= false;
2269 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2270 && (can_overrun_p
|| !would_overrun_p
)
2271 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2273 /* First cope with the degenerate case of a single-element
2275 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2278 /* Otherwise try using LOAD/STORE_LANES. */
2279 else if (vls_type
== VLS_LOAD
2280 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2281 : vect_store_lanes_supported (vectype
, group_size
,
2284 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2285 overrun_p
= would_overrun_p
;
2288 /* If that fails, try using permuting loads. */
2289 else if (vls_type
== VLS_LOAD
2290 ? vect_grouped_load_supported (vectype
, single_element_p
,
2292 : vect_grouped_store_supported (vectype
, group_size
))
2294 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2295 overrun_p
= would_overrun_p
;
2299 /* As a last resort, trying using a gather load or scatter store.
2301 ??? Although the code can handle all group sizes correctly,
2302 it probably isn't a win to use separate strided accesses based
2303 on nearby locations. Or, even if it's a win over scalar code,
2304 it might not be a win over vectorizing at a lower VF, if that
2305 allows us to use contiguous accesses. */
2306 if (*memory_access_type
== VMAT_ELEMENTWISE
2309 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2311 *memory_access_type
= VMAT_GATHER_SCATTER
;
2314 if (*memory_access_type
== VMAT_GATHER_SCATTER
2315 || *memory_access_type
== VMAT_ELEMENTWISE
)
2317 *alignment_support_scheme
= dr_unaligned_supported
;
2318 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2322 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2323 *alignment_support_scheme
2324 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2328 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2330 /* STMT is the leader of the group. Check the operands of all the
2331 stmts of the group. */
2332 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2333 while (next_stmt_info
)
2335 tree op
= vect_get_store_rhs (next_stmt_info
);
2336 enum vect_def_type dt
;
2337 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2339 if (dump_enabled_p ())
2340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2341 "use not simple.\n");
2344 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2350 gcc_assert (can_overrun_p
);
2351 if (dump_enabled_p ())
2352 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2353 "Data access with gaps requires scalar "
2355 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2361 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2362 if there is a memory access type that the vectorized form can use,
2363 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2364 or scatters, fill in GS_INFO accordingly. In addition
2365 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2366 the target does not support the alignment scheme. *MISALIGNMENT
2367 is set according to the alignment of the access (including
2368 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2370 SLP says whether we're performing SLP rather than loop vectorization.
2371 MASKED_P is true if the statement is conditional on a vectorized mask.
2372 VECTYPE is the vector type that the vectorized statements will use.
2373 NCOPIES is the number of vector statements that will be needed. */
2376 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2377 tree vectype
, slp_tree slp_node
,
2378 bool masked_p
, vec_load_store_type vls_type
,
2379 unsigned int ncopies
,
2380 vect_memory_access_type
*memory_access_type
,
2381 poly_int64
*poffset
,
2382 dr_alignment_support
*alignment_support_scheme
,
2384 gather_scatter_info
*gs_info
)
2386 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2387 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2388 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2390 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2392 *memory_access_type
= VMAT_GATHER_SCATTER
;
2393 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2395 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2396 &gs_info
->offset_dt
,
2397 &gs_info
->offset_vectype
))
2399 if (dump_enabled_p ())
2400 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2401 "%s index use not simple.\n",
2402 vls_type
== VLS_LOAD
? "gather" : "scatter");
2405 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2407 if (vls_type
!= VLS_LOAD
)
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2411 "unsupported emulated scatter.\n");
2414 else if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2415 || !TYPE_VECTOR_SUBPARTS
2416 (gs_info
->offset_vectype
).is_constant ()
2417 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2418 (gs_info
->offset_vectype
),
2419 TYPE_VECTOR_SUBPARTS (vectype
)))
2421 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2423 "unsupported vector types for emulated "
2428 /* Gather-scatter accesses perform only component accesses, alignment
2429 is irrelevant for them. */
2430 *alignment_support_scheme
= dr_unaligned_supported
;
2432 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2434 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2436 vls_type
, memory_access_type
, poffset
,
2437 alignment_support_scheme
,
2438 misalignment
, gs_info
))
2441 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2443 gcc_assert (!slp_node
);
2445 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2447 *memory_access_type
= VMAT_GATHER_SCATTER
;
2449 *memory_access_type
= VMAT_ELEMENTWISE
;
2450 /* Alignment is irrelevant here. */
2451 *alignment_support_scheme
= dr_unaligned_supported
;
2455 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2458 gcc_assert (vls_type
== VLS_LOAD
);
2459 *memory_access_type
= VMAT_INVARIANT
;
2460 /* Invariant accesses perform only component accesses, alignment
2461 is irrelevant for them. */
2462 *alignment_support_scheme
= dr_unaligned_supported
;
2467 *memory_access_type
= get_negative_load_store_type
2468 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2470 *memory_access_type
= VMAT_CONTIGUOUS
;
2471 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2473 *alignment_support_scheme
2474 = vect_supportable_dr_alignment (vinfo
,
2475 STMT_VINFO_DR_INFO (stmt_info
),
2476 vectype
, *misalignment
);
2480 if ((*memory_access_type
== VMAT_ELEMENTWISE
2481 || *memory_access_type
== VMAT_STRIDED_SLP
)
2482 && !nunits
.is_constant ())
2484 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2486 "Not using elementwise accesses due to variable "
2487 "vectorization factor.\n");
2491 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2493 if (dump_enabled_p ())
2494 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2495 "unsupported unaligned access\n");
2499 /* FIXME: At the moment the cost model seems to underestimate the
2500 cost of using elementwise accesses. This check preserves the
2501 traditional behavior until that can be fixed. */
2502 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2503 if (!first_stmt_info
)
2504 first_stmt_info
= stmt_info
;
2505 if (*memory_access_type
== VMAT_ELEMENTWISE
2506 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2507 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2508 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2509 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2511 if (dump_enabled_p ())
2512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2513 "not falling back to elementwise accesses\n");
2519 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2520 conditional operation STMT_INFO. When returning true, store the mask
2521 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2522 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2523 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2526 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2527 slp_tree slp_node
, unsigned mask_index
,
2528 tree
*mask
, slp_tree
*mask_node
,
2529 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2531 enum vect_def_type mask_dt
;
2533 slp_tree mask_node_1
;
2534 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2535 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2537 if (dump_enabled_p ())
2538 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2539 "mask use not simple.\n");
2543 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2545 if (dump_enabled_p ())
2546 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2547 "mask argument is not a boolean.\n");
2551 /* If the caller is not prepared for adjusting an external/constant
2552 SLP mask vector type fail. */
2555 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2557 if (dump_enabled_p ())
2558 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2559 "SLP mask argument is not vectorized.\n");
2563 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2565 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2567 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2569 if (dump_enabled_p ())
2570 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2571 "could not find an appropriate vector mask type.\n");
2575 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2576 TYPE_VECTOR_SUBPARTS (vectype
)))
2578 if (dump_enabled_p ())
2579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2580 "vector mask type %T"
2581 " does not match vector data type %T.\n",
2582 mask_vectype
, vectype
);
2587 *mask_dt_out
= mask_dt
;
2588 *mask_vectype_out
= mask_vectype
;
2590 *mask_node
= mask_node_1
;
2594 /* Return true if stored value RHS is suitable for vectorizing store
2595 statement STMT_INFO. When returning true, store the type of the
2596 definition in *RHS_DT_OUT, the type of the vectorized store value in
2597 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2600 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2601 slp_tree slp_node
, tree rhs
,
2602 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2603 vec_load_store_type
*vls_type_out
)
2605 /* In the case this is a store from a constant make sure
2606 native_encode_expr can handle it. */
2607 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2609 if (dump_enabled_p ())
2610 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2611 "cannot encode constant as a byte sequence.\n");
2616 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2618 if (gimple_call_internal_p (call
)
2619 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2620 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2623 enum vect_def_type rhs_dt
;
2626 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2627 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2629 if (dump_enabled_p ())
2630 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2631 "use not simple.\n");
2635 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2636 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2638 if (dump_enabled_p ())
2639 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2640 "incompatible vector types.\n");
2644 *rhs_dt_out
= rhs_dt
;
2645 *rhs_vectype_out
= rhs_vectype
;
2646 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2647 *vls_type_out
= VLS_STORE_INVARIANT
;
2649 *vls_type_out
= VLS_STORE
;
2653 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2654 Note that we support masks with floating-point type, in which case the
2655 floats are interpreted as a bitmask. */
2658 vect_build_all_ones_mask (vec_info
*vinfo
,
2659 stmt_vec_info stmt_info
, tree masktype
)
2661 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2662 return build_int_cst (masktype
, -1);
2663 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2665 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2666 mask
= build_vector_from_val (masktype
, mask
);
2667 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2669 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2673 for (int j
= 0; j
< 6; ++j
)
2675 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2676 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2677 mask
= build_vector_from_val (masktype
, mask
);
2678 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2683 /* Build an all-zero merge value of type VECTYPE while vectorizing
2684 STMT_INFO as a gather load. */
2687 vect_build_zero_merge_argument (vec_info
*vinfo
,
2688 stmt_vec_info stmt_info
, tree vectype
)
2691 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2692 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2693 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2697 for (int j
= 0; j
< 6; ++j
)
2699 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2700 merge
= build_real (TREE_TYPE (vectype
), r
);
2704 merge
= build_vector_from_val (vectype
, merge
);
2705 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2708 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2709 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2710 the gather load operation. If the load is conditional, MASK is the
2711 unvectorized condition and MASK_DT is its definition type, otherwise
2715 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2716 gimple_stmt_iterator
*gsi
,
2718 gather_scatter_info
*gs_info
,
2721 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2722 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2723 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2724 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2725 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2726 edge pe
= loop_preheader_edge (loop
);
2727 enum { NARROW
, NONE
, WIDEN
} modifier
;
2728 poly_uint64 gather_off_nunits
2729 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2731 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2732 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2733 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2734 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2735 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2736 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2737 tree scaletype
= TREE_VALUE (arglist
);
2738 tree real_masktype
= masktype
;
2739 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2741 || TREE_CODE (masktype
) == INTEGER_TYPE
2742 || types_compatible_p (srctype
, masktype
)));
2743 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2744 masktype
= truth_type_for (srctype
);
2746 tree mask_halftype
= masktype
;
2747 tree perm_mask
= NULL_TREE
;
2748 tree mask_perm_mask
= NULL_TREE
;
2749 if (known_eq (nunits
, gather_off_nunits
))
2751 else if (known_eq (nunits
* 2, gather_off_nunits
))
2755 /* Currently widening gathers and scatters are only supported for
2756 fixed-length vectors. */
2757 int count
= gather_off_nunits
.to_constant ();
2758 vec_perm_builder
sel (count
, count
, 1);
2759 for (int i
= 0; i
< count
; ++i
)
2760 sel
.quick_push (i
| (count
/ 2));
2762 vec_perm_indices
indices (sel
, 1, count
);
2763 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2766 else if (known_eq (nunits
, gather_off_nunits
* 2))
2770 /* Currently narrowing gathers and scatters are only supported for
2771 fixed-length vectors. */
2772 int count
= nunits
.to_constant ();
2773 vec_perm_builder
sel (count
, count
, 1);
2774 sel
.quick_grow (count
);
2775 for (int i
= 0; i
< count
; ++i
)
2776 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2777 vec_perm_indices
indices (sel
, 2, count
);
2778 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2782 if (mask
&& masktype
== real_masktype
)
2784 for (int i
= 0; i
< count
; ++i
)
2785 sel
[i
] = i
| (count
/ 2);
2786 indices
.new_vector (sel
, 2, count
);
2787 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2790 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2795 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2796 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2798 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2799 if (!is_gimple_min_invariant (ptr
))
2802 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2803 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2804 gcc_assert (!new_bb
);
2807 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2809 tree vec_oprnd0
= NULL_TREE
;
2810 tree vec_mask
= NULL_TREE
;
2811 tree src_op
= NULL_TREE
;
2812 tree mask_op
= NULL_TREE
;
2813 tree prev_res
= NULL_TREE
;
2817 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2818 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2821 auto_vec
<tree
> vec_oprnds0
;
2822 auto_vec
<tree
> vec_masks
;
2823 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2824 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2825 gs_info
->offset
, &vec_oprnds0
);
2827 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2828 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2829 mask
, &vec_masks
, masktype
);
2830 for (int j
= 0; j
< ncopies
; ++j
)
2833 if (modifier
== WIDEN
&& (j
& 1))
2834 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2835 perm_mask
, stmt_info
, gsi
);
2837 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2839 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2841 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2842 TYPE_VECTOR_SUBPARTS (idxtype
)));
2843 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2844 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2845 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2846 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2852 if (mask_perm_mask
&& (j
& 1))
2853 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2854 mask_perm_mask
, stmt_info
, gsi
);
2857 if (modifier
== NARROW
)
2860 vec_mask
= vec_masks
[j
/ 2];
2863 vec_mask
= vec_masks
[j
];
2866 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2868 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2869 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2870 gcc_assert (known_eq (sub1
, sub2
));
2871 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2872 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2874 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2875 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2879 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2881 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2883 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2884 : VEC_UNPACK_LO_EXPR
,
2886 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2892 tree mask_arg
= mask_op
;
2893 if (masktype
!= real_masktype
)
2895 tree utype
, optype
= TREE_TYPE (mask_op
);
2896 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2897 utype
= real_masktype
;
2899 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2900 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2901 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2903 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2904 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2906 if (!useless_type_conversion_p (real_masktype
, utype
))
2908 gcc_assert (TYPE_PRECISION (utype
)
2909 <= TYPE_PRECISION (real_masktype
));
2910 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2911 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2912 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2915 src_op
= build_zero_cst (srctype
);
2917 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2920 if (!useless_type_conversion_p (vectype
, rettype
))
2922 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2923 TYPE_VECTOR_SUBPARTS (rettype
)));
2924 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2925 gimple_call_set_lhs (new_stmt
, op
);
2926 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2927 var
= make_ssa_name (vec_dest
);
2928 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2929 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2930 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2934 var
= make_ssa_name (vec_dest
, new_stmt
);
2935 gimple_call_set_lhs (new_stmt
, var
);
2936 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2939 if (modifier
== NARROW
)
2946 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2948 new_stmt
= SSA_NAME_DEF_STMT (var
);
2951 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
2953 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
2956 /* Prepare the base and offset in GS_INFO for vectorization.
2957 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2958 to the vectorized offset argument for the first copy of STMT_INFO.
2959 STMT_INFO is the statement described by GS_INFO and LOOP is the
2963 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
2964 class loop
*loop
, stmt_vec_info stmt_info
,
2965 slp_tree slp_node
, gather_scatter_info
*gs_info
,
2966 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
2968 gimple_seq stmts
= NULL
;
2969 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2973 edge pe
= loop_preheader_edge (loop
);
2974 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2975 gcc_assert (!new_bb
);
2978 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
2982 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
2983 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
2984 gs_info
->offset
, vec_offset
,
2985 gs_info
->offset_vectype
);
2989 /* Prepare to implement a grouped or strided load or store using
2990 the gather load or scatter store operation described by GS_INFO.
2991 STMT_INFO is the load or store statement.
2993 Set *DATAREF_BUMP to the amount that should be added to the base
2994 address after each copy of the vectorized statement. Set *VEC_OFFSET
2995 to an invariant offset vector in which element I has the value
2996 I * DR_STEP / SCALE. */
2999 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
3000 loop_vec_info loop_vinfo
,
3001 gather_scatter_info
*gs_info
,
3002 tree
*dataref_bump
, tree
*vec_offset
)
3004 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3005 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3007 tree bump
= size_binop (MULT_EXPR
,
3008 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3009 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3010 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
3012 /* The offset given in GS_INFO can have pointer type, so use the element
3013 type of the vector instead. */
3014 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3016 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3017 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3018 ssize_int (gs_info
->scale
));
3019 step
= fold_convert (offset_type
, step
);
3021 /* Create {0, X, X*2, X*3, ...}. */
3022 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3023 build_zero_cst (offset_type
), step
);
3024 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
3027 /* Return the amount that should be added to a vector pointer to move
3028 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3029 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3033 vect_get_data_ptr_increment (vec_info
*vinfo
,
3034 dr_vec_info
*dr_info
, tree aggr_type
,
3035 vect_memory_access_type memory_access_type
)
3037 if (memory_access_type
== VMAT_INVARIANT
)
3038 return size_zero_node
;
3040 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3041 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3042 if (tree_int_cst_sgn (step
) == -1)
3043 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3047 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3050 vectorizable_bswap (vec_info
*vinfo
,
3051 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3052 gimple
**vec_stmt
, slp_tree slp_node
,
3054 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3057 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3058 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3061 op
= gimple_call_arg (stmt
, 0);
3062 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3063 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3065 /* Multiple types in SLP are handled by creating the appropriate number of
3066 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3071 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3073 gcc_assert (ncopies
>= 1);
3075 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3079 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3080 unsigned word_bytes
;
3081 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3084 /* The encoding uses one stepped pattern for each byte in the word. */
3085 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3086 for (unsigned i
= 0; i
< 3; ++i
)
3087 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3088 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3090 vec_perm_indices
indices (elts
, 1, num_bytes
);
3091 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3097 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3099 if (dump_enabled_p ())
3100 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3101 "incompatible vector types for invariants\n");
3105 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3106 DUMP_VECT_SCOPE ("vectorizable_bswap");
3107 record_stmt_cost (cost_vec
,
3108 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3109 record_stmt_cost (cost_vec
,
3111 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3112 vec_perm
, stmt_info
, 0, vect_body
);
3116 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3119 vec
<tree
> vec_oprnds
= vNULL
;
3120 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3122 /* Arguments are ready. create the new vector stmt. */
3125 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3128 tree tem
= make_ssa_name (char_vectype
);
3129 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3130 char_vectype
, vop
));
3131 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3132 tree tem2
= make_ssa_name (char_vectype
);
3133 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3134 tem
, tem
, bswap_vconst
);
3135 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3136 tem
= make_ssa_name (vectype
);
3137 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3139 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3141 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3143 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3147 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3149 vec_oprnds
.release ();
3153 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3154 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3155 in a single step. On success, store the binary pack code in
3159 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3160 tree_code
*convert_code
)
3162 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3163 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3167 int multi_step_cvt
= 0;
3168 auto_vec
<tree
, 8> interm_types
;
3169 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3170 &code
, &multi_step_cvt
, &interm_types
)
3174 *convert_code
= code
;
3178 /* Function vectorizable_call.
3180 Check if STMT_INFO performs a function call that can be vectorized.
3181 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3182 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3183 Return true if STMT_INFO is vectorizable in this way. */
3186 vectorizable_call (vec_info
*vinfo
,
3187 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3188 gimple
**vec_stmt
, slp_tree slp_node
,
3189 stmt_vector_for_cost
*cost_vec
)
3195 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3196 tree vectype_out
, vectype_in
;
3197 poly_uint64 nunits_in
;
3198 poly_uint64 nunits_out
;
3199 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3200 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3201 tree fndecl
, new_temp
, rhs_type
;
3202 enum vect_def_type dt
[4]
3203 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3204 vect_unknown_def_type
};
3205 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3206 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3207 int ndts
= ARRAY_SIZE (dt
);
3209 auto_vec
<tree
, 8> vargs
;
3210 enum { NARROW
, NONE
, WIDEN
} modifier
;
3214 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3217 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3221 /* Is STMT_INFO a vectorizable call? */
3222 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3226 if (gimple_call_internal_p (stmt
)
3227 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3228 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3229 /* Handled by vectorizable_load and vectorizable_store. */
3232 if (gimple_call_lhs (stmt
) == NULL_TREE
3233 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3236 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3238 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3240 /* Process function arguments. */
3241 rhs_type
= NULL_TREE
;
3242 vectype_in
= NULL_TREE
;
3243 nargs
= gimple_call_num_args (stmt
);
3245 /* Bail out if the function has more than four arguments, we do not have
3246 interesting builtin functions to vectorize with more than two arguments
3247 except for fma. No arguments is also not good. */
3248 if (nargs
== 0 || nargs
> 4)
3251 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3252 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3253 if (cfn
== CFN_GOMP_SIMD_LANE
)
3256 rhs_type
= unsigned_type_node
;
3260 if (internal_fn_p (cfn
))
3261 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3263 for (i
= 0; i
< nargs
; i
++)
3265 if ((int) i
== mask_opno
)
3267 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3268 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3273 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3274 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3276 if (dump_enabled_p ())
3277 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3278 "use not simple.\n");
3282 /* We can only handle calls with arguments of the same type. */
3284 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3286 if (dump_enabled_p ())
3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3288 "argument types differ.\n");
3292 rhs_type
= TREE_TYPE (op
);
3295 vectype_in
= vectypes
[i
];
3296 else if (vectypes
[i
]
3297 && !types_compatible_p (vectypes
[i
], vectype_in
))
3299 if (dump_enabled_p ())
3300 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3301 "argument vector types differ.\n");
3305 /* If all arguments are external or constant defs, infer the vector type
3306 from the scalar type. */
3308 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3310 gcc_assert (vectype_in
);
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3315 "no vectype for scalar type %T\n", rhs_type
);
3319 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3320 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3321 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3322 by a pack of the two vectors into an SI vector. We would need
3323 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3324 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3326 if (dump_enabled_p ())
3327 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3328 "mismatched vector sizes %T and %T\n",
3329 vectype_in
, vectype_out
);
3333 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3334 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3336 if (dump_enabled_p ())
3337 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3338 "mixed mask and nonmask vector types\n");
3343 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3344 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3345 if (known_eq (nunits_in
* 2, nunits_out
))
3347 else if (known_eq (nunits_out
, nunits_in
))
3349 else if (known_eq (nunits_out
* 2, nunits_in
))
3354 /* We only handle functions that do not read or clobber memory. */
3355 if (gimple_vuse (stmt
))
3357 if (dump_enabled_p ())
3358 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3359 "function reads from or writes to memory.\n");
3363 /* For now, we only vectorize functions if a target specific builtin
3364 is available. TODO -- in some cases, it might be profitable to
3365 insert the calls for pieces of the vector, in order to be able
3366 to vectorize other operations in the loop. */
3368 internal_fn ifn
= IFN_LAST
;
3369 tree callee
= gimple_call_fndecl (stmt
);
3371 /* First try using an internal function. */
3372 tree_code convert_code
= ERROR_MARK
;
3374 && (modifier
== NONE
3375 || (modifier
== NARROW
3376 && simple_integer_narrowing (vectype_out
, vectype_in
,
3378 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3381 /* If that fails, try asking for a target-specific built-in function. */
3382 if (ifn
== IFN_LAST
)
3384 if (cfn
!= CFN_LAST
)
3385 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3386 (cfn
, vectype_out
, vectype_in
);
3387 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3388 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3389 (callee
, vectype_out
, vectype_in
);
3392 if (ifn
== IFN_LAST
&& !fndecl
)
3394 if (cfn
== CFN_GOMP_SIMD_LANE
3397 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3398 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3399 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3400 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3402 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3403 { 0, 1, 2, ... vf - 1 } vector. */
3404 gcc_assert (nargs
== 0);
3406 else if (modifier
== NONE
3407 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3408 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3409 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3410 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3411 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3412 slp_op
, vectype_in
, cost_vec
);
3415 if (dump_enabled_p ())
3416 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3417 "function is not vectorizable.\n");
3424 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3425 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3427 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3429 /* Sanity check: make sure that at least one copy of the vectorized stmt
3430 needs to be generated. */
3431 gcc_assert (ncopies
>= 1);
3433 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3434 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3435 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3436 if (!vec_stmt
) /* transformation not required. */
3439 for (i
= 0; i
< nargs
; ++i
)
3440 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], vectype_in
))
3442 if (dump_enabled_p ())
3443 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3444 "incompatible vector types for invariants\n");
3447 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3448 DUMP_VECT_SCOPE ("vectorizable_call");
3449 vect_model_simple_cost (vinfo
, stmt_info
,
3450 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3451 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3452 record_stmt_cost (cost_vec
, ncopies
/ 2,
3453 vec_promote_demote
, stmt_info
, 0, vect_body
);
3456 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3457 && (reduc_idx
>= 0 || mask_opno
>= 0))
3460 && (cond_fn
== IFN_LAST
3461 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3462 OPTIMIZE_FOR_SPEED
)))
3464 if (dump_enabled_p ())
3465 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3466 "can't use a fully-masked loop because no"
3467 " conditional operation is available.\n");
3468 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3472 unsigned int nvectors
3474 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3476 tree scalar_mask
= NULL_TREE
;
3478 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3479 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3480 vectype_out
, scalar_mask
);
3488 if (dump_enabled_p ())
3489 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3492 scalar_dest
= gimple_call_lhs (stmt
);
3493 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3495 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3496 unsigned int vect_nargs
= nargs
;
3497 if (masked_loop_p
&& reduc_idx
>= 0)
3503 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3505 tree prev_res
= NULL_TREE
;
3506 vargs
.safe_grow (vect_nargs
, true);
3507 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3508 for (j
= 0; j
< ncopies
; ++j
)
3510 /* Build argument list for the vectorized call. */
3513 vec
<tree
> vec_oprnds0
;
3515 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3516 vec_oprnds0
= vec_defs
[0];
3518 /* Arguments are ready. Create the new vector stmt. */
3519 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3522 if (masked_loop_p
&& reduc_idx
>= 0)
3524 unsigned int vec_num
= vec_oprnds0
.length ();
3525 /* Always true for SLP. */
3526 gcc_assert (ncopies
== 1);
3527 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, vec_num
,
3531 for (k
= 0; k
< nargs
; k
++)
3533 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3534 vargs
[varg
++] = vec_oprndsk
[i
];
3536 if (masked_loop_p
&& reduc_idx
>= 0)
3537 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3539 if (modifier
== NARROW
)
3541 /* We don't define any narrowing conditional functions
3543 gcc_assert (mask_opno
< 0);
3544 tree half_res
= make_ssa_name (vectype_in
);
3546 = gimple_build_call_internal_vec (ifn
, vargs
);
3547 gimple_call_set_lhs (call
, half_res
);
3548 gimple_call_set_nothrow (call
, true);
3549 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3552 prev_res
= half_res
;
3555 new_temp
= make_ssa_name (vec_dest
);
3556 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3557 prev_res
, half_res
);
3558 vect_finish_stmt_generation (vinfo
, stmt_info
,
3563 if (mask_opno
>= 0 && masked_loop_p
)
3565 unsigned int vec_num
= vec_oprnds0
.length ();
3566 /* Always true for SLP. */
3567 gcc_assert (ncopies
== 1);
3568 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3570 vargs
[mask_opno
] = prepare_load_store_mask
3571 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3575 if (ifn
!= IFN_LAST
)
3576 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3578 call
= gimple_build_call_vec (fndecl
, vargs
);
3579 new_temp
= make_ssa_name (vec_dest
, call
);
3580 gimple_call_set_lhs (call
, new_temp
);
3581 gimple_call_set_nothrow (call
, true);
3582 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3585 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3591 if (masked_loop_p
&& reduc_idx
>= 0)
3592 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, ncopies
,
3594 for (i
= 0; i
< nargs
; i
++)
3596 op
= gimple_call_arg (stmt
, i
);
3599 vec_defs
.quick_push (vNULL
);
3600 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3604 vargs
[varg
++] = vec_defs
[i
][j
];
3606 if (masked_loop_p
&& reduc_idx
>= 0)
3607 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3609 if (mask_opno
>= 0 && masked_loop_p
)
3611 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3614 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3615 vargs
[mask_opno
], gsi
);
3619 if (cfn
== CFN_GOMP_SIMD_LANE
)
3621 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3623 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3624 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3625 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3626 new_temp
= make_ssa_name (vec_dest
);
3627 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3628 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3630 else if (modifier
== NARROW
)
3632 /* We don't define any narrowing conditional functions at
3634 gcc_assert (mask_opno
< 0);
3635 tree half_res
= make_ssa_name (vectype_in
);
3636 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3637 gimple_call_set_lhs (call
, half_res
);
3638 gimple_call_set_nothrow (call
, true);
3639 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3642 prev_res
= half_res
;
3645 new_temp
= make_ssa_name (vec_dest
);
3646 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3647 prev_res
, half_res
);
3648 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3653 if (ifn
!= IFN_LAST
)
3654 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3656 call
= gimple_build_call_vec (fndecl
, vargs
);
3657 new_temp
= make_ssa_name (vec_dest
, call
);
3658 gimple_call_set_lhs (call
, new_temp
);
3659 gimple_call_set_nothrow (call
, true);
3660 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3664 if (j
== (modifier
== NARROW
? 1 : 0))
3665 *vec_stmt
= new_stmt
;
3666 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3668 for (i
= 0; i
< nargs
; i
++)
3670 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3671 vec_oprndsi
.release ();
3674 else if (modifier
== NARROW
)
3676 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3677 /* We don't define any narrowing conditional functions at present. */
3678 gcc_assert (mask_opno
< 0);
3679 for (j
= 0; j
< ncopies
; ++j
)
3681 /* Build argument list for the vectorized call. */
3683 vargs
.create (nargs
* 2);
3689 vec
<tree
> vec_oprnds0
;
3691 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3692 vec_oprnds0
= vec_defs
[0];
3694 /* Arguments are ready. Create the new vector stmt. */
3695 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3699 for (k
= 0; k
< nargs
; k
++)
3701 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3702 vargs
.quick_push (vec_oprndsk
[i
]);
3703 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3706 if (ifn
!= IFN_LAST
)
3707 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3709 call
= gimple_build_call_vec (fndecl
, vargs
);
3710 new_temp
= make_ssa_name (vec_dest
, call
);
3711 gimple_call_set_lhs (call
, new_temp
);
3712 gimple_call_set_nothrow (call
, true);
3713 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3714 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3719 for (i
= 0; i
< nargs
; i
++)
3721 op
= gimple_call_arg (stmt
, i
);
3724 vec_defs
.quick_push (vNULL
);
3725 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3726 op
, &vec_defs
[i
], vectypes
[i
]);
3728 vec_oprnd0
= vec_defs
[i
][2*j
];
3729 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3731 vargs
.quick_push (vec_oprnd0
);
3732 vargs
.quick_push (vec_oprnd1
);
3735 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3736 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3737 gimple_call_set_lhs (new_stmt
, new_temp
);
3738 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3740 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3744 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3746 for (i
= 0; i
< nargs
; i
++)
3748 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3749 vec_oprndsi
.release ();
3753 /* No current target implements this case. */
3758 /* The call in STMT might prevent it from being removed in dce.
3759 We however cannot remove it here, due to the way the ssa name
3760 it defines is mapped to the new definition. So just replace
3761 rhs of the statement with something harmless. */
3766 stmt_info
= vect_orig_stmt (stmt_info
);
3767 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3770 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3771 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3777 struct simd_call_arg_info
3781 HOST_WIDE_INT linear_step
;
3782 enum vect_def_type dt
;
3784 bool simd_lane_linear
;
3787 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3788 is linear within simd lane (but not within whole loop), note it in
3792 vect_simd_lane_linear (tree op
, class loop
*loop
,
3793 struct simd_call_arg_info
*arginfo
)
3795 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3797 if (!is_gimple_assign (def_stmt
)
3798 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3799 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3802 tree base
= gimple_assign_rhs1 (def_stmt
);
3803 HOST_WIDE_INT linear_step
= 0;
3804 tree v
= gimple_assign_rhs2 (def_stmt
);
3805 while (TREE_CODE (v
) == SSA_NAME
)
3808 def_stmt
= SSA_NAME_DEF_STMT (v
);
3809 if (is_gimple_assign (def_stmt
))
3810 switch (gimple_assign_rhs_code (def_stmt
))
3813 t
= gimple_assign_rhs2 (def_stmt
);
3814 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3816 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3817 v
= gimple_assign_rhs1 (def_stmt
);
3820 t
= gimple_assign_rhs2 (def_stmt
);
3821 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3823 linear_step
= tree_to_shwi (t
);
3824 v
= gimple_assign_rhs1 (def_stmt
);
3827 t
= gimple_assign_rhs1 (def_stmt
);
3828 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3829 || (TYPE_PRECISION (TREE_TYPE (v
))
3830 < TYPE_PRECISION (TREE_TYPE (t
))))
3839 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3841 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3842 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3847 arginfo
->linear_step
= linear_step
;
3849 arginfo
->simd_lane_linear
= true;
3855 /* Return the number of elements in vector type VECTYPE, which is associated
3856 with a SIMD clone. At present these vectors always have a constant
3859 static unsigned HOST_WIDE_INT
3860 simd_clone_subparts (tree vectype
)
3862 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3865 /* Function vectorizable_simd_clone_call.
3867 Check if STMT_INFO performs a function call that can be vectorized
3868 by calling a simd clone of the function.
3869 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3870 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3871 Return true if STMT_INFO is vectorizable in this way. */
3874 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3875 gimple_stmt_iterator
*gsi
,
3876 gimple
**vec_stmt
, slp_tree slp_node
,
3877 stmt_vector_for_cost
*)
3882 tree vec_oprnd0
= NULL_TREE
;
3885 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3886 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3887 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3888 tree fndecl
, new_temp
;
3890 auto_vec
<simd_call_arg_info
> arginfo
;
3891 vec
<tree
> vargs
= vNULL
;
3893 tree lhs
, rtype
, ratype
;
3894 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3896 /* Is STMT a vectorizable call? */
3897 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3901 fndecl
= gimple_call_fndecl (stmt
);
3902 if (fndecl
== NULL_TREE
)
3905 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3906 if (node
== NULL
|| node
->simd_clones
== NULL
)
3909 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3912 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3916 if (gimple_call_lhs (stmt
)
3917 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3920 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3922 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3924 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3931 /* Process function arguments. */
3932 nargs
= gimple_call_num_args (stmt
);
3934 /* Bail out if the function has zero arguments. */
3938 arginfo
.reserve (nargs
, true);
3940 for (i
= 0; i
< nargs
; i
++)
3942 simd_call_arg_info thisarginfo
;
3945 thisarginfo
.linear_step
= 0;
3946 thisarginfo
.align
= 0;
3947 thisarginfo
.op
= NULL_TREE
;
3948 thisarginfo
.simd_lane_linear
= false;
3950 op
= gimple_call_arg (stmt
, i
);
3951 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3952 &thisarginfo
.vectype
)
3953 || thisarginfo
.dt
== vect_uninitialized_def
)
3955 if (dump_enabled_p ())
3956 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3957 "use not simple.\n");
3961 if (thisarginfo
.dt
== vect_constant_def
3962 || thisarginfo
.dt
== vect_external_def
)
3963 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3966 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3967 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3969 if (dump_enabled_p ())
3970 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3971 "vector mask arguments are not supported\n");
3976 /* For linear arguments, the analyze phase should have saved
3977 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3978 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3979 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3981 gcc_assert (vec_stmt
);
3982 thisarginfo
.linear_step
3983 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3985 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3986 thisarginfo
.simd_lane_linear
3987 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3988 == boolean_true_node
);
3989 /* If loop has been peeled for alignment, we need to adjust it. */
3990 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3991 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3992 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3994 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3995 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3996 tree opt
= TREE_TYPE (thisarginfo
.op
);
3997 bias
= fold_convert (TREE_TYPE (step
), bias
);
3998 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4000 = fold_build2 (POINTER_TYPE_P (opt
)
4001 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4002 thisarginfo
.op
, bias
);
4006 && thisarginfo
.dt
!= vect_constant_def
4007 && thisarginfo
.dt
!= vect_external_def
4009 && TREE_CODE (op
) == SSA_NAME
4010 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4012 && tree_fits_shwi_p (iv
.step
))
4014 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4015 thisarginfo
.op
= iv
.base
;
4017 else if ((thisarginfo
.dt
== vect_constant_def
4018 || thisarginfo
.dt
== vect_external_def
)
4019 && POINTER_TYPE_P (TREE_TYPE (op
)))
4020 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4021 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4023 if (POINTER_TYPE_P (TREE_TYPE (op
))
4024 && !thisarginfo
.linear_step
4026 && thisarginfo
.dt
!= vect_constant_def
4027 && thisarginfo
.dt
!= vect_external_def
4030 && TREE_CODE (op
) == SSA_NAME
)
4031 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4033 arginfo
.quick_push (thisarginfo
);
4036 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4037 if (!vf
.is_constant ())
4039 if (dump_enabled_p ())
4040 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4041 "not considering SIMD clones; not yet supported"
4042 " for variable-width vectors.\n");
4046 unsigned int badness
= 0;
4047 struct cgraph_node
*bestn
= NULL
;
4048 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4049 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4051 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4052 n
= n
->simdclone
->next_clone
)
4054 unsigned int this_badness
= 0;
4055 unsigned int num_calls
;
4056 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
4057 || n
->simdclone
->nargs
!= nargs
)
4060 this_badness
+= exact_log2 (num_calls
) * 4096;
4061 if (n
->simdclone
->inbranch
)
4062 this_badness
+= 8192;
4063 int target_badness
= targetm
.simd_clone
.usable (n
);
4064 if (target_badness
< 0)
4066 this_badness
+= target_badness
* 512;
4067 /* FORNOW: Have to add code to add the mask argument. */
4068 if (n
->simdclone
->inbranch
)
4070 for (i
= 0; i
< nargs
; i
++)
4072 switch (n
->simdclone
->args
[i
].arg_type
)
4074 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4075 if (!useless_type_conversion_p
4076 (n
->simdclone
->args
[i
].orig_type
,
4077 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4079 else if (arginfo
[i
].dt
== vect_constant_def
4080 || arginfo
[i
].dt
== vect_external_def
4081 || arginfo
[i
].linear_step
)
4084 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4085 if (arginfo
[i
].dt
!= vect_constant_def
4086 && arginfo
[i
].dt
!= vect_external_def
)
4089 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4090 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4091 if (arginfo
[i
].dt
== vect_constant_def
4092 || arginfo
[i
].dt
== vect_external_def
4093 || (arginfo
[i
].linear_step
4094 != n
->simdclone
->args
[i
].linear_step
))
4097 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4098 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4099 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4100 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4101 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4102 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4106 case SIMD_CLONE_ARG_TYPE_MASK
:
4109 if (i
== (size_t) -1)
4111 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4116 if (arginfo
[i
].align
)
4117 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4118 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4120 if (i
== (size_t) -1)
4122 if (bestn
== NULL
|| this_badness
< badness
)
4125 badness
= this_badness
;
4132 for (i
= 0; i
< nargs
; i
++)
4133 if ((arginfo
[i
].dt
== vect_constant_def
4134 || arginfo
[i
].dt
== vect_external_def
)
4135 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4137 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4138 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4140 if (arginfo
[i
].vectype
== NULL
4141 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4142 simd_clone_subparts (arginfo
[i
].vectype
)))
4146 fndecl
= bestn
->decl
;
4147 nunits
= bestn
->simdclone
->simdlen
;
4148 ncopies
= vector_unroll_factor (vf
, nunits
);
4150 /* If the function isn't const, only allow it in simd loops where user
4151 has asserted that at least nunits consecutive iterations can be
4152 performed using SIMD instructions. */
4153 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4154 && gimple_vuse (stmt
))
4157 /* Sanity check: make sure that at least one copy of the vectorized stmt
4158 needs to be generated. */
4159 gcc_assert (ncopies
>= 1);
4161 if (!vec_stmt
) /* transformation not required. */
4163 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4164 for (i
= 0; i
< nargs
; i
++)
4165 if ((bestn
->simdclone
->args
[i
].arg_type
4166 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4167 || (bestn
->simdclone
->args
[i
].arg_type
4168 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4170 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4173 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4174 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4175 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4176 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4177 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4178 tree sll
= arginfo
[i
].simd_lane_linear
4179 ? boolean_true_node
: boolean_false_node
;
4180 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4182 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4183 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4184 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4185 dt, slp_node, cost_vec); */
4191 if (dump_enabled_p ())
4192 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4195 scalar_dest
= gimple_call_lhs (stmt
);
4196 vec_dest
= NULL_TREE
;
4201 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4202 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4203 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4206 rtype
= TREE_TYPE (ratype
);
4210 auto_vec
<vec
<tree
> > vec_oprnds
;
4211 auto_vec
<unsigned> vec_oprnds_i
;
4212 vec_oprnds
.safe_grow_cleared (nargs
, true);
4213 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4214 for (j
= 0; j
< ncopies
; ++j
)
4216 /* Build argument list for the vectorized call. */
4218 vargs
.create (nargs
);
4222 for (i
= 0; i
< nargs
; i
++)
4224 unsigned int k
, l
, m
, o
;
4226 op
= gimple_call_arg (stmt
, i
);
4227 switch (bestn
->simdclone
->args
[i
].arg_type
)
4229 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4230 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4231 o
= vector_unroll_factor (nunits
,
4232 simd_clone_subparts (atype
));
4233 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4235 if (simd_clone_subparts (atype
)
4236 < simd_clone_subparts (arginfo
[i
].vectype
))
4238 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4239 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4240 / simd_clone_subparts (atype
));
4241 gcc_assert ((k
& (k
- 1)) == 0);
4244 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4245 ncopies
* o
/ k
, op
,
4247 vec_oprnds_i
[i
] = 0;
4248 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4252 vec_oprnd0
= arginfo
[i
].op
;
4253 if ((m
& (k
- 1)) == 0)
4254 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4256 arginfo
[i
].op
= vec_oprnd0
;
4258 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4260 bitsize_int ((m
& (k
- 1)) * prec
));
4262 = gimple_build_assign (make_ssa_name (atype
),
4264 vect_finish_stmt_generation (vinfo
, stmt_info
,
4266 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4270 k
= (simd_clone_subparts (atype
)
4271 / simd_clone_subparts (arginfo
[i
].vectype
));
4272 gcc_assert ((k
& (k
- 1)) == 0);
4273 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4275 vec_alloc (ctor_elts
, k
);
4278 for (l
= 0; l
< k
; l
++)
4280 if (m
== 0 && l
== 0)
4282 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4286 vec_oprnds_i
[i
] = 0;
4287 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4290 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4291 arginfo
[i
].op
= vec_oprnd0
;
4294 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4298 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4302 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4304 = gimple_build_assign (make_ssa_name (atype
),
4306 vect_finish_stmt_generation (vinfo
, stmt_info
,
4308 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4311 vargs
.safe_push (vec_oprnd0
);
4314 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4316 = gimple_build_assign (make_ssa_name (atype
),
4318 vect_finish_stmt_generation (vinfo
, stmt_info
,
4320 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4325 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4326 vargs
.safe_push (op
);
4328 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4329 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4334 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4335 &stmts
, true, NULL_TREE
);
4339 edge pe
= loop_preheader_edge (loop
);
4340 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4341 gcc_assert (!new_bb
);
4343 if (arginfo
[i
].simd_lane_linear
)
4345 vargs
.safe_push (arginfo
[i
].op
);
4348 tree phi_res
= copy_ssa_name (op
);
4349 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4350 add_phi_arg (new_phi
, arginfo
[i
].op
,
4351 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4353 = POINTER_TYPE_P (TREE_TYPE (op
))
4354 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4355 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4356 ? sizetype
: TREE_TYPE (op
);
4358 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4360 tree tcst
= wide_int_to_tree (type
, cst
);
4361 tree phi_arg
= copy_ssa_name (op
);
4363 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4364 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4365 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4366 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4368 arginfo
[i
].op
= phi_res
;
4369 vargs
.safe_push (phi_res
);
4374 = POINTER_TYPE_P (TREE_TYPE (op
))
4375 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4376 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4377 ? sizetype
: TREE_TYPE (op
);
4379 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4381 tree tcst
= wide_int_to_tree (type
, cst
);
4382 new_temp
= make_ssa_name (TREE_TYPE (op
));
4384 = gimple_build_assign (new_temp
, code
,
4385 arginfo
[i
].op
, tcst
);
4386 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4387 vargs
.safe_push (new_temp
);
4390 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4391 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4392 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4393 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4394 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4395 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4401 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4405 || known_eq (simd_clone_subparts (rtype
), nunits
));
4407 new_temp
= create_tmp_var (ratype
);
4408 else if (useless_type_conversion_p (vectype
, rtype
))
4409 new_temp
= make_ssa_name (vec_dest
, new_call
);
4411 new_temp
= make_ssa_name (rtype
, new_call
);
4412 gimple_call_set_lhs (new_call
, new_temp
);
4414 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4415 gimple
*new_stmt
= new_call
;
4419 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4422 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4423 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4424 k
= vector_unroll_factor (nunits
,
4425 simd_clone_subparts (vectype
));
4426 gcc_assert ((k
& (k
- 1)) == 0);
4427 for (l
= 0; l
< k
; l
++)
4432 t
= build_fold_addr_expr (new_temp
);
4433 t
= build2 (MEM_REF
, vectype
, t
,
4434 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4437 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4438 bitsize_int (prec
), bitsize_int (l
* prec
));
4439 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4440 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4442 if (j
== 0 && l
== 0)
4443 *vec_stmt
= new_stmt
;
4444 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4448 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4451 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4453 unsigned int k
= (simd_clone_subparts (vectype
)
4454 / simd_clone_subparts (rtype
));
4455 gcc_assert ((k
& (k
- 1)) == 0);
4456 if ((j
& (k
- 1)) == 0)
4457 vec_alloc (ret_ctor_elts
, k
);
4461 o
= vector_unroll_factor (nunits
,
4462 simd_clone_subparts (rtype
));
4463 for (m
= 0; m
< o
; m
++)
4465 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4466 size_int (m
), NULL_TREE
, NULL_TREE
);
4467 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4469 vect_finish_stmt_generation (vinfo
, stmt_info
,
4471 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4472 gimple_assign_lhs (new_stmt
));
4474 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4477 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4478 if ((j
& (k
- 1)) != k
- 1)
4480 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4482 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4483 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4485 if ((unsigned) j
== k
- 1)
4486 *vec_stmt
= new_stmt
;
4487 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4492 tree t
= build_fold_addr_expr (new_temp
);
4493 t
= build2 (MEM_REF
, vectype
, t
,
4494 build_int_cst (TREE_TYPE (t
), 0));
4495 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4496 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4497 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4499 else if (!useless_type_conversion_p (vectype
, rtype
))
4501 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4503 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4504 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4509 *vec_stmt
= new_stmt
;
4510 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4513 for (i
= 0; i
< nargs
; ++i
)
4515 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4520 /* The call in STMT might prevent it from being removed in dce.
4521 We however cannot remove it here, due to the way the ssa name
4522 it defines is mapped to the new definition. So just replace
4523 rhs of the statement with something harmless. */
4531 type
= TREE_TYPE (scalar_dest
);
4532 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4533 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4536 new_stmt
= gimple_build_nop ();
4537 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4538 unlink_stmt_vdef (stmt
);
4544 /* Function vect_gen_widened_results_half
4546 Create a vector stmt whose code, type, number of arguments, and result
4547 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4548 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4549 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4550 needs to be created (DECL is a function-decl of a target-builtin).
4551 STMT_INFO is the original scalar stmt that we are vectorizing. */
4554 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4555 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4556 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4557 stmt_vec_info stmt_info
)
4562 /* Generate half of the widened result: */
4563 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4564 if (op_type
!= binary_op
)
4566 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4567 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4568 gimple_assign_set_lhs (new_stmt
, new_temp
);
4569 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4575 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4576 For multi-step conversions store the resulting vectors and call the function
4580 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4582 stmt_vec_info stmt_info
,
4583 vec
<tree
> &vec_dsts
,
4584 gimple_stmt_iterator
*gsi
,
4585 slp_tree slp_node
, enum tree_code code
)
4588 tree vop0
, vop1
, new_tmp
, vec_dest
;
4590 vec_dest
= vec_dsts
.pop ();
4592 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4594 /* Create demotion operation. */
4595 vop0
= (*vec_oprnds
)[i
];
4596 vop1
= (*vec_oprnds
)[i
+ 1];
4597 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4598 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4599 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4600 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4603 /* Store the resulting vector for next recursive call. */
4604 (*vec_oprnds
)[i
/2] = new_tmp
;
4607 /* This is the last step of the conversion sequence. Store the
4608 vectors in SLP_NODE or in vector info of the scalar statement
4609 (or in STMT_VINFO_RELATED_STMT chain). */
4611 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4613 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4617 /* For multi-step demotion operations we first generate demotion operations
4618 from the source type to the intermediate types, and then combine the
4619 results (stored in VEC_OPRNDS) in demotion operation to the destination
4623 /* At each level of recursion we have half of the operands we had at the
4625 vec_oprnds
->truncate ((i
+1)/2);
4626 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4628 stmt_info
, vec_dsts
, gsi
,
4629 slp_node
, VEC_PACK_TRUNC_EXPR
);
4632 vec_dsts
.quick_push (vec_dest
);
4636 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4637 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4638 STMT_INFO. For multi-step conversions store the resulting vectors and
4639 call the function recursively. */
4642 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4643 vec
<tree
> *vec_oprnds0
,
4644 vec
<tree
> *vec_oprnds1
,
4645 stmt_vec_info stmt_info
, tree vec_dest
,
4646 gimple_stmt_iterator
*gsi
,
4647 enum tree_code code1
,
4648 enum tree_code code2
, int op_type
)
4651 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4652 gimple
*new_stmt1
, *new_stmt2
;
4653 vec
<tree
> vec_tmp
= vNULL
;
4655 vec_tmp
.create (vec_oprnds0
->length () * 2);
4656 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4658 if (op_type
== binary_op
)
4659 vop1
= (*vec_oprnds1
)[i
];
4663 /* Generate the two halves of promotion operation. */
4664 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4665 op_type
, vec_dest
, gsi
,
4667 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4668 op_type
, vec_dest
, gsi
,
4670 if (is_gimple_call (new_stmt1
))
4672 new_tmp1
= gimple_call_lhs (new_stmt1
);
4673 new_tmp2
= gimple_call_lhs (new_stmt2
);
4677 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4678 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4681 /* Store the results for the next step. */
4682 vec_tmp
.quick_push (new_tmp1
);
4683 vec_tmp
.quick_push (new_tmp2
);
4686 vec_oprnds0
->release ();
4687 *vec_oprnds0
= vec_tmp
;
4690 /* Create vectorized promotion stmts for widening stmts using only half the
4691 potential vector size for input. */
4693 vect_create_half_widening_stmts (vec_info
*vinfo
,
4694 vec
<tree
> *vec_oprnds0
,
4695 vec
<tree
> *vec_oprnds1
,
4696 stmt_vec_info stmt_info
, tree vec_dest
,
4697 gimple_stmt_iterator
*gsi
,
4698 enum tree_code code1
,
4706 vec
<tree
> vec_tmp
= vNULL
;
4708 vec_tmp
.create (vec_oprnds0
->length ());
4709 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4711 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
4713 gcc_assert (op_type
== binary_op
);
4714 vop1
= (*vec_oprnds1
)[i
];
4716 /* Widen the first vector input. */
4717 out_type
= TREE_TYPE (vec_dest
);
4718 new_tmp1
= make_ssa_name (out_type
);
4719 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
4720 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
4721 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
4723 /* Widen the second vector input. */
4724 new_tmp2
= make_ssa_name (out_type
);
4725 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
4726 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
4727 /* Perform the operation. With both vector inputs widened. */
4728 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, new_tmp2
);
4732 /* Perform the operation. With the single vector input widened. */
4733 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, vop1
);
4736 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
4737 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
4738 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
4740 /* Store the results for the next step. */
4741 vec_tmp
.quick_push (new_tmp3
);
4744 vec_oprnds0
->release ();
4745 *vec_oprnds0
= vec_tmp
;
4749 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4750 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4751 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4752 Return true if STMT_INFO is vectorizable in this way. */
4755 vectorizable_conversion (vec_info
*vinfo
,
4756 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4757 gimple
**vec_stmt
, slp_tree slp_node
,
4758 stmt_vector_for_cost
*cost_vec
)
4762 tree op0
, op1
= NULL_TREE
;
4763 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4764 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4765 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4767 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4769 poly_uint64 nunits_in
;
4770 poly_uint64 nunits_out
;
4771 tree vectype_out
, vectype_in
;
4773 tree lhs_type
, rhs_type
;
4774 enum { NARROW
, NONE
, WIDEN
} modifier
;
4775 vec
<tree
> vec_oprnds0
= vNULL
;
4776 vec
<tree
> vec_oprnds1
= vNULL
;
4778 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4779 int multi_step_cvt
= 0;
4780 vec
<tree
> interm_types
= vNULL
;
4781 tree intermediate_type
, cvt_type
= NULL_TREE
;
4783 unsigned short fltsz
;
4785 /* Is STMT a vectorizable conversion? */
4787 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4790 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4794 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4798 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4801 code
= gimple_assign_rhs_code (stmt
);
4802 if (!CONVERT_EXPR_CODE_P (code
)
4803 && code
!= FIX_TRUNC_EXPR
4804 && code
!= FLOAT_EXPR
4805 && code
!= WIDEN_PLUS_EXPR
4806 && code
!= WIDEN_MINUS_EXPR
4807 && code
!= WIDEN_MULT_EXPR
4808 && code
!= WIDEN_LSHIFT_EXPR
)
4811 bool widen_arith
= (code
== WIDEN_PLUS_EXPR
4812 || code
== WIDEN_MINUS_EXPR
4813 || code
== WIDEN_MULT_EXPR
4814 || code
== WIDEN_LSHIFT_EXPR
);
4815 op_type
= TREE_CODE_LENGTH (code
);
4817 /* Check types of lhs and rhs. */
4818 scalar_dest
= gimple_assign_lhs (stmt
);
4819 lhs_type
= TREE_TYPE (scalar_dest
);
4820 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4822 /* Check the operands of the operation. */
4823 slp_tree slp_op0
, slp_op1
= NULL
;
4824 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4825 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4827 if (dump_enabled_p ())
4828 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4829 "use not simple.\n");
4833 rhs_type
= TREE_TYPE (op0
);
4834 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4835 && !((INTEGRAL_TYPE_P (lhs_type
)
4836 && INTEGRAL_TYPE_P (rhs_type
))
4837 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4838 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4841 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4842 && ((INTEGRAL_TYPE_P (lhs_type
)
4843 && !type_has_mode_precision_p (lhs_type
))
4844 || (INTEGRAL_TYPE_P (rhs_type
)
4845 && !type_has_mode_precision_p (rhs_type
))))
4847 if (dump_enabled_p ())
4848 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4849 "type conversion to/from bit-precision unsupported."
4854 if (op_type
== binary_op
)
4856 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
4857 || code
== WIDEN_PLUS_EXPR
|| code
== WIDEN_MINUS_EXPR
);
4859 op1
= gimple_assign_rhs2 (stmt
);
4861 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4862 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4864 if (dump_enabled_p ())
4865 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4866 "use not simple.\n");
4869 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4872 vectype_in
= vectype1_in
;
4875 /* If op0 is an external or constant def, infer the vector type
4876 from the scalar type. */
4878 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4880 gcc_assert (vectype_in
);
4883 if (dump_enabled_p ())
4884 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4885 "no vectype for scalar type %T\n", rhs_type
);
4890 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4891 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4893 if (dump_enabled_p ())
4894 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4895 "can't convert between boolean and non "
4896 "boolean vectors %T\n", rhs_type
);
4901 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4902 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4903 if (known_eq (nunits_out
, nunits_in
))
4908 else if (multiple_p (nunits_out
, nunits_in
))
4912 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4916 /* Multiple types in SLP are handled by creating the appropriate number of
4917 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4921 else if (modifier
== NARROW
)
4922 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4924 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4926 /* Sanity check: make sure that at least one copy of the vectorized stmt
4927 needs to be generated. */
4928 gcc_assert (ncopies
>= 1);
4930 bool found_mode
= false;
4931 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4932 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4933 opt_scalar_mode rhs_mode_iter
;
4935 /* Supportable by target? */
4939 if (code
!= FIX_TRUNC_EXPR
4940 && code
!= FLOAT_EXPR
4941 && !CONVERT_EXPR_CODE_P (code
))
4943 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4947 if (dump_enabled_p ())
4948 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4949 "conversion not supported by target.\n");
4953 if (known_eq (nunits_in
, nunits_out
))
4955 if (!supportable_half_widening_operation (code
, vectype_out
,
4956 vectype_in
, &code1
))
4958 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4961 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
4962 vectype_out
, vectype_in
, &code1
,
4963 &code2
, &multi_step_cvt
,
4966 /* Binary widening operation can only be supported directly by the
4968 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4972 if (code
!= FLOAT_EXPR
4973 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4976 fltsz
= GET_MODE_SIZE (lhs_mode
);
4977 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4979 rhs_mode
= rhs_mode_iter
.require ();
4980 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4984 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4985 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4986 if (cvt_type
== NULL_TREE
)
4989 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4991 if (!supportable_convert_operation (code
, vectype_out
,
4992 cvt_type
, &codecvt1
))
4995 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
4996 vectype_out
, cvt_type
,
4997 &codecvt1
, &codecvt2
,
5002 gcc_assert (multi_step_cvt
== 0);
5004 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5006 vectype_in
, &code1
, &code2
,
5007 &multi_step_cvt
, &interm_types
))
5017 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5018 codecvt2
= ERROR_MARK
;
5022 interm_types
.safe_push (cvt_type
);
5023 cvt_type
= NULL_TREE
;
5028 gcc_assert (op_type
== unary_op
);
5029 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5030 &code1
, &multi_step_cvt
,
5034 if (code
!= FIX_TRUNC_EXPR
5035 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5039 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5040 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5041 if (cvt_type
== NULL_TREE
)
5043 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
5046 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5047 &code1
, &multi_step_cvt
,
5056 if (!vec_stmt
) /* transformation not required. */
5059 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5060 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5062 if (dump_enabled_p ())
5063 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5064 "incompatible vector types for invariants\n");
5067 DUMP_VECT_SCOPE ("vectorizable_conversion");
5068 if (modifier
== NONE
)
5070 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5071 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5074 else if (modifier
== NARROW
)
5076 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5077 /* The final packing step produces one vector result per copy. */
5078 unsigned int nvectors
5079 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5080 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5081 multi_step_cvt
, cost_vec
,
5086 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5087 /* The initial unpacking step produces two vector results
5088 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5089 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5090 unsigned int nvectors
5092 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5094 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5095 multi_step_cvt
, cost_vec
,
5098 interm_types
.release ();
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_NOTE
, vect_location
,
5105 "transform conversion. ncopies = %d.\n", ncopies
);
5107 if (op_type
== binary_op
)
5109 if (CONSTANT_CLASS_P (op0
))
5110 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5111 else if (CONSTANT_CLASS_P (op1
))
5112 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5115 /* In case of multi-step conversion, we first generate conversion operations
5116 to the intermediate types, and then from that types to the final one.
5117 We create vector destinations for the intermediate type (TYPES) received
5118 from supportable_*_operation, and store them in the correct order
5119 for future use in vect_create_vectorized_*_stmts (). */
5120 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5121 vec_dest
= vect_create_destination_var (scalar_dest
,
5122 (cvt_type
&& modifier
== WIDEN
)
5123 ? cvt_type
: vectype_out
);
5124 vec_dsts
.quick_push (vec_dest
);
5128 for (i
= interm_types
.length () - 1;
5129 interm_types
.iterate (i
, &intermediate_type
); i
--)
5131 vec_dest
= vect_create_destination_var (scalar_dest
,
5133 vec_dsts
.quick_push (vec_dest
);
5138 vec_dest
= vect_create_destination_var (scalar_dest
,
5140 ? vectype_out
: cvt_type
);
5145 if (modifier
== WIDEN
)
5147 else if (modifier
== NARROW
)
5150 ninputs
= vect_pow2 (multi_step_cvt
);
5158 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5160 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5162 /* Arguments are ready, create the new vector stmt. */
5163 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5164 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5165 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5166 gimple_assign_set_lhs (new_stmt
, new_temp
);
5167 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5170 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5172 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5177 /* In case the vectorization factor (VF) is bigger than the number
5178 of elements that we can fit in a vectype (nunits), we have to
5179 generate more than one vector stmt - i.e - we need to "unroll"
5180 the vector stmt by a factor VF/nunits. */
5181 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5183 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5185 if (code
== WIDEN_LSHIFT_EXPR
)
5187 int oprnds_size
= vec_oprnds0
.length ();
5188 vec_oprnds1
.create (oprnds_size
);
5189 for (i
= 0; i
< oprnds_size
; ++i
)
5190 vec_oprnds1
.quick_push (op1
);
5192 /* Arguments are ready. Create the new vector stmts. */
5193 for (i
= multi_step_cvt
; i
>= 0; i
--)
5195 tree this_dest
= vec_dsts
[i
];
5196 enum tree_code c1
= code1
, c2
= code2
;
5197 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5202 if (known_eq (nunits_out
, nunits_in
))
5203 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
,
5204 &vec_oprnds1
, stmt_info
,
5208 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5209 &vec_oprnds1
, stmt_info
,
5214 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5219 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5220 new_temp
= make_ssa_name (vec_dest
);
5221 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
5222 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5225 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5228 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5230 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5235 /* In case the vectorization factor (VF) is bigger than the number
5236 of elements that we can fit in a vectype (nunits), we have to
5237 generate more than one vector stmt - i.e - we need to "unroll"
5238 the vector stmt by a factor VF/nunits. */
5239 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5241 /* Arguments are ready. Create the new vector stmts. */
5243 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5245 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5246 new_temp
= make_ssa_name (vec_dest
);
5248 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5249 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5250 vec_oprnds0
[i
] = new_temp
;
5253 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5255 stmt_info
, vec_dsts
, gsi
,
5260 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5262 vec_oprnds0
.release ();
5263 vec_oprnds1
.release ();
5264 interm_types
.release ();
5269 /* Return true if we can assume from the scalar form of STMT_INFO that
5270 neither the scalar nor the vector forms will generate code. STMT_INFO
5271 is known not to involve a data reference. */
5274 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5276 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5280 tree lhs
= gimple_assign_lhs (stmt
);
5281 tree_code code
= gimple_assign_rhs_code (stmt
);
5282 tree rhs
= gimple_assign_rhs1 (stmt
);
5284 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5287 if (CONVERT_EXPR_CODE_P (code
))
5288 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5293 /* Function vectorizable_assignment.
5295 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5296 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5297 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5298 Return true if STMT_INFO is vectorizable in this way. */
5301 vectorizable_assignment (vec_info
*vinfo
,
5302 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5303 gimple
**vec_stmt
, slp_tree slp_node
,
5304 stmt_vector_for_cost
*cost_vec
)
5309 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5311 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5315 vec
<tree
> vec_oprnds
= vNULL
;
5317 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5318 enum tree_code code
;
5321 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5324 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5328 /* Is vectorizable assignment? */
5329 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5333 scalar_dest
= gimple_assign_lhs (stmt
);
5334 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5337 if (STMT_VINFO_DATA_REF (stmt_info
))
5340 code
= gimple_assign_rhs_code (stmt
);
5341 if (!(gimple_assign_single_p (stmt
)
5342 || code
== PAREN_EXPR
5343 || CONVERT_EXPR_CODE_P (code
)))
5346 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5347 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5349 /* Multiple types in SLP are handled by creating the appropriate number of
5350 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5355 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5357 gcc_assert (ncopies
>= 1);
5360 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5361 &dt
[0], &vectype_in
))
5363 if (dump_enabled_p ())
5364 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5365 "use not simple.\n");
5369 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5371 /* We can handle NOP_EXPR conversions that do not change the number
5372 of elements or the vector size. */
5373 if ((CONVERT_EXPR_CODE_P (code
)
5374 || code
== VIEW_CONVERT_EXPR
)
5376 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5377 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5378 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5381 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
5382 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5384 if (dump_enabled_p ())
5385 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5386 "can't convert between boolean and non "
5387 "boolean vectors %T\n", TREE_TYPE (op
));
5392 /* We do not handle bit-precision changes. */
5393 if ((CONVERT_EXPR_CODE_P (code
)
5394 || code
== VIEW_CONVERT_EXPR
)
5395 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5396 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5397 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5398 /* But a conversion that does not change the bit-pattern is ok. */
5399 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5400 > TYPE_PRECISION (TREE_TYPE (op
)))
5401 && TYPE_UNSIGNED (TREE_TYPE (op
))))
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5405 "type conversion to/from bit-precision "
5410 if (!vec_stmt
) /* transformation not required. */
5413 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5417 "incompatible vector types for invariants\n");
5420 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5421 DUMP_VECT_SCOPE ("vectorizable_assignment");
5422 if (!vect_nop_conversion_p (stmt_info
))
5423 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5429 if (dump_enabled_p ())
5430 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5433 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5436 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5438 /* Arguments are ready. create the new vector stmt. */
5439 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5441 if (CONVERT_EXPR_CODE_P (code
)
5442 || code
== VIEW_CONVERT_EXPR
)
5443 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5444 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5445 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5446 gimple_assign_set_lhs (new_stmt
, new_temp
);
5447 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5449 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5451 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5454 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5456 vec_oprnds
.release ();
5461 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5462 either as shift by a scalar or by a vector. */
5465 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5468 machine_mode vec_mode
;
5473 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5477 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5479 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5481 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5483 || (optab_handler (optab
, TYPE_MODE (vectype
))
5484 == CODE_FOR_nothing
))
5488 vec_mode
= TYPE_MODE (vectype
);
5489 icode
= (int) optab_handler (optab
, vec_mode
);
5490 if (icode
== CODE_FOR_nothing
)
5497 /* Function vectorizable_shift.
5499 Check if STMT_INFO performs a shift operation that can be vectorized.
5500 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5501 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5502 Return true if STMT_INFO is vectorizable in this way. */
5505 vectorizable_shift (vec_info
*vinfo
,
5506 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5507 gimple
**vec_stmt
, slp_tree slp_node
,
5508 stmt_vector_for_cost
*cost_vec
)
5512 tree op0
, op1
= NULL
;
5513 tree vec_oprnd1
= NULL_TREE
;
5515 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5516 enum tree_code code
;
5517 machine_mode vec_mode
;
5521 machine_mode optab_op2_mode
;
5522 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5524 poly_uint64 nunits_in
;
5525 poly_uint64 nunits_out
;
5530 vec
<tree
> vec_oprnds0
= vNULL
;
5531 vec
<tree
> vec_oprnds1
= vNULL
;
5534 bool scalar_shift_arg
= true;
5535 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5536 bool incompatible_op1_vectype_p
= false;
5538 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5541 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5542 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5546 /* Is STMT a vectorizable binary/unary operation? */
5547 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5551 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5554 code
= gimple_assign_rhs_code (stmt
);
5556 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5557 || code
== RROTATE_EXPR
))
5560 scalar_dest
= gimple_assign_lhs (stmt
);
5561 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5562 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5564 if (dump_enabled_p ())
5565 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5566 "bit-precision shifts not supported.\n");
5571 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5572 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5574 if (dump_enabled_p ())
5575 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5576 "use not simple.\n");
5579 /* If op0 is an external or constant def, infer the vector type
5580 from the scalar type. */
5582 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5584 gcc_assert (vectype
);
5587 if (dump_enabled_p ())
5588 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5589 "no vectype for scalar type\n");
5593 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5594 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5595 if (maybe_ne (nunits_out
, nunits_in
))
5598 stmt_vec_info op1_def_stmt_info
;
5600 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5601 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5603 if (dump_enabled_p ())
5604 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5605 "use not simple.\n");
5609 /* Multiple types in SLP are handled by creating the appropriate number of
5610 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5615 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5617 gcc_assert (ncopies
>= 1);
5619 /* Determine whether the shift amount is a vector, or scalar. If the
5620 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5622 if ((dt
[1] == vect_internal_def
5623 || dt
[1] == vect_induction_def
5624 || dt
[1] == vect_nested_cycle
)
5626 scalar_shift_arg
= false;
5627 else if (dt
[1] == vect_constant_def
5628 || dt
[1] == vect_external_def
5629 || dt
[1] == vect_internal_def
)
5631 /* In SLP, need to check whether the shift count is the same,
5632 in loops if it is a constant or invariant, it is always
5636 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5637 stmt_vec_info slpstmt_info
;
5639 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5641 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5642 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5643 scalar_shift_arg
= false;
5646 /* For internal SLP defs we have to make sure we see scalar stmts
5647 for all vector elements.
5648 ??? For different vectors we could resort to a different
5649 scalar shift operand but code-generation below simply always
5651 if (dt
[1] == vect_internal_def
5652 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5654 scalar_shift_arg
= false;
5657 /* If the shift amount is computed by a pattern stmt we cannot
5658 use the scalar amount directly thus give up and use a vector
5660 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5661 scalar_shift_arg
= false;
5665 if (dump_enabled_p ())
5666 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5667 "operand mode requires invariant argument.\n");
5671 /* Vector shifted by vector. */
5672 bool was_scalar_shift_arg
= scalar_shift_arg
;
5673 if (!scalar_shift_arg
)
5675 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5676 if (dump_enabled_p ())
5677 dump_printf_loc (MSG_NOTE
, vect_location
,
5678 "vector/vector shift/rotate found.\n");
5681 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5683 incompatible_op1_vectype_p
5684 = (op1_vectype
== NULL_TREE
5685 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5686 TYPE_VECTOR_SUBPARTS (vectype
))
5687 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5688 if (incompatible_op1_vectype_p
5690 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5691 || slp_op1
->refcnt
!= 1))
5693 if (dump_enabled_p ())
5694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5695 "unusable type for last operand in"
5696 " vector/vector shift/rotate.\n");
5700 /* See if the machine has a vector shifted by scalar insn and if not
5701 then see if it has a vector shifted by vector insn. */
5704 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5706 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5708 if (dump_enabled_p ())
5709 dump_printf_loc (MSG_NOTE
, vect_location
,
5710 "vector/scalar shift/rotate found.\n");
5714 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5716 && (optab_handler (optab
, TYPE_MODE (vectype
))
5717 != CODE_FOR_nothing
))
5719 scalar_shift_arg
= false;
5721 if (dump_enabled_p ())
5722 dump_printf_loc (MSG_NOTE
, vect_location
,
5723 "vector/vector shift/rotate found.\n");
5726 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5730 /* Unlike the other binary operators, shifts/rotates have
5731 the rhs being int, instead of the same type as the lhs,
5732 so make sure the scalar is the right type if we are
5733 dealing with vectors of long long/long/short/char. */
5734 incompatible_op1_vectype_p
5736 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5738 if (incompatible_op1_vectype_p
5739 && dt
[1] == vect_internal_def
)
5741 if (dump_enabled_p ())
5742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5743 "unusable type for last operand in"
5744 " vector/vector shift/rotate.\n");
5751 /* Supportable by target? */
5754 if (dump_enabled_p ())
5755 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5759 vec_mode
= TYPE_MODE (vectype
);
5760 icode
= (int) optab_handler (optab
, vec_mode
);
5761 if (icode
== CODE_FOR_nothing
)
5763 if (dump_enabled_p ())
5764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5765 "op not supported by target.\n");
5768 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5769 if (vect_emulated_vector_p (vectype
))
5772 if (!vec_stmt
) /* transformation not required. */
5775 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5776 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5777 && (!incompatible_op1_vectype_p
5778 || dt
[1] == vect_constant_def
)
5779 && !vect_maybe_update_slp_op_vectype
5781 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5783 if (dump_enabled_p ())
5784 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5785 "incompatible vector types for invariants\n");
5788 /* Now adjust the constant shift amount in place. */
5790 && incompatible_op1_vectype_p
5791 && dt
[1] == vect_constant_def
)
5793 for (unsigned i
= 0;
5794 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5796 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5797 = fold_convert (TREE_TYPE (vectype
),
5798 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5799 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5803 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5804 DUMP_VECT_SCOPE ("vectorizable_shift");
5805 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5806 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5812 if (dump_enabled_p ())
5813 dump_printf_loc (MSG_NOTE
, vect_location
,
5814 "transform binary/unary operation.\n");
5816 if (incompatible_op1_vectype_p
&& !slp_node
)
5818 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
5819 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5820 if (dt
[1] != vect_constant_def
)
5821 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5822 TREE_TYPE (vectype
), NULL
);
5826 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5828 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
5830 /* Vector shl and shr insn patterns can be defined with scalar
5831 operand 2 (shift operand). In this case, use constant or loop
5832 invariant op1 directly, without extending it to vector mode
5834 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5835 if (!VECTOR_MODE_P (optab_op2_mode
))
5837 if (dump_enabled_p ())
5838 dump_printf_loc (MSG_NOTE
, vect_location
,
5839 "operand 1 using scalar mode.\n");
5841 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
5842 vec_oprnds1
.quick_push (vec_oprnd1
);
5843 /* Store vec_oprnd1 for every vector stmt to be created.
5844 We check during the analysis that all the shift arguments
5846 TODO: Allow different constants for different vector
5847 stmts generated for an SLP instance. */
5849 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
5850 vec_oprnds1
.quick_push (vec_oprnd1
);
5853 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
5855 if (was_scalar_shift_arg
)
5857 /* If the argument was the same in all lanes create
5858 the correctly typed vector shift amount directly. */
5859 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5860 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
5861 !loop_vinfo
? gsi
: NULL
);
5862 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5863 !loop_vinfo
? gsi
: NULL
);
5864 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5865 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5866 vec_oprnds1
.quick_push (vec_oprnd1
);
5868 else if (dt
[1] == vect_constant_def
)
5869 /* The constant shift amount has been adjusted in place. */
5872 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5875 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5876 (a special case for certain kind of vector shifts); otherwise,
5877 operand 1 should be of a vector type (the usual case). */
5878 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5880 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
5882 /* Arguments are ready. Create the new vector stmt. */
5883 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5885 /* For internal defs where we need to use a scalar shift arg
5886 extract the first lane. */
5887 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
5889 vop1
= vec_oprnds1
[0];
5890 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
5892 = gimple_build_assign (new_temp
,
5893 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
5895 TYPE_SIZE (TREE_TYPE (new_temp
)),
5896 bitsize_zero_node
));
5897 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5901 vop1
= vec_oprnds1
[i
];
5902 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5903 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5904 gimple_assign_set_lhs (new_stmt
, new_temp
);
5905 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5907 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5909 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5913 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5915 vec_oprnds0
.release ();
5916 vec_oprnds1
.release ();
5922 /* Function vectorizable_operation.
5924 Check if STMT_INFO performs a binary, unary or ternary operation that can
5926 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5927 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5928 Return true if STMT_INFO is vectorizable in this way. */
5931 vectorizable_operation (vec_info
*vinfo
,
5932 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5933 gimple
**vec_stmt
, slp_tree slp_node
,
5934 stmt_vector_for_cost
*cost_vec
)
5938 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5940 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5941 enum tree_code code
, orig_code
;
5942 machine_mode vec_mode
;
5946 bool target_support_p
;
5947 enum vect_def_type dt
[3]
5948 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5950 poly_uint64 nunits_in
;
5951 poly_uint64 nunits_out
;
5953 int ncopies
, vec_num
;
5955 vec
<tree
> vec_oprnds0
= vNULL
;
5956 vec
<tree
> vec_oprnds1
= vNULL
;
5957 vec
<tree
> vec_oprnds2
= vNULL
;
5958 tree vop0
, vop1
, vop2
;
5959 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5961 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5964 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5968 /* Is STMT a vectorizable binary/unary operation? */
5969 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5973 /* Loads and stores are handled in vectorizable_{load,store}. */
5974 if (STMT_VINFO_DATA_REF (stmt_info
))
5977 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5979 /* Shifts are handled in vectorizable_shift. */
5980 if (code
== LSHIFT_EXPR
5981 || code
== RSHIFT_EXPR
5982 || code
== LROTATE_EXPR
5983 || code
== RROTATE_EXPR
)
5986 /* Comparisons are handled in vectorizable_comparison. */
5987 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
5990 /* Conditions are handled in vectorizable_condition. */
5991 if (code
== COND_EXPR
)
5994 /* For pointer addition and subtraction, we should use the normal
5995 plus and minus for the vector operation. */
5996 if (code
== POINTER_PLUS_EXPR
)
5998 if (code
== POINTER_DIFF_EXPR
)
6001 /* Support only unary or binary operations. */
6002 op_type
= TREE_CODE_LENGTH (code
);
6003 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6005 if (dump_enabled_p ())
6006 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6007 "num. args = %d (not unary/binary/ternary op).\n",
6012 scalar_dest
= gimple_assign_lhs (stmt
);
6013 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6015 /* Most operations cannot handle bit-precision types without extra
6017 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6019 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6020 /* Exception are bitwise binary operations. */
6021 && code
!= BIT_IOR_EXPR
6022 && code
!= BIT_XOR_EXPR
6023 && code
!= BIT_AND_EXPR
)
6025 if (dump_enabled_p ())
6026 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6027 "bit-precision arithmetic not supported.\n");
6032 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6033 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6035 if (dump_enabled_p ())
6036 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6037 "use not simple.\n");
6040 /* If op0 is an external or constant def, infer the vector type
6041 from the scalar type. */
6044 /* For boolean type we cannot determine vectype by
6045 invariant value (don't know whether it is a vector
6046 of booleans or vector of integers). We use output
6047 vectype because operations on boolean don't change
6049 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6051 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6053 if (dump_enabled_p ())
6054 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6055 "not supported operation on bool value.\n");
6058 vectype
= vectype_out
;
6061 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6065 gcc_assert (vectype
);
6068 if (dump_enabled_p ())
6069 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6070 "no vectype for scalar type %T\n",
6076 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6077 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6078 if (maybe_ne (nunits_out
, nunits_in
))
6081 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6082 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6083 if (op_type
== binary_op
|| op_type
== ternary_op
)
6085 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6086 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6088 if (dump_enabled_p ())
6089 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6090 "use not simple.\n");
6094 if (op_type
== ternary_op
)
6096 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6097 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6099 if (dump_enabled_p ())
6100 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6101 "use not simple.\n");
6106 /* Multiple types in SLP are handled by creating the appropriate number of
6107 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6112 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6116 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6120 gcc_assert (ncopies
>= 1);
6122 /* Reject attempts to combine mask types with nonmask types, e.g. if
6123 we have an AND between a (nonmask) boolean loaded from memory and
6124 a (mask) boolean result of a comparison.
6126 TODO: We could easily fix these cases up using pattern statements. */
6127 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6128 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6129 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6131 if (dump_enabled_p ())
6132 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6133 "mixed mask and nonmask vector types\n");
6137 /* Supportable by target? */
6139 vec_mode
= TYPE_MODE (vectype
);
6140 if (code
== MULT_HIGHPART_EXPR
)
6141 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6144 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6147 if (dump_enabled_p ())
6148 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6152 target_support_p
= (optab_handler (optab
, vec_mode
)
6153 != CODE_FOR_nothing
);
6156 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6157 if (!target_support_p
)
6159 if (dump_enabled_p ())
6160 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6161 "op not supported by target.\n");
6162 /* Check only during analysis. */
6163 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6164 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6166 if (dump_enabled_p ())
6167 dump_printf_loc (MSG_NOTE
, vect_location
,
6168 "proceeding using word mode.\n");
6169 using_emulated_vectors_p
= true;
6172 if (using_emulated_vectors_p
6173 && !vect_can_vectorize_without_simd_p (code
))
6175 if (dump_enabled_p ())
6176 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6180 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6181 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6182 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6184 if (!vec_stmt
) /* transformation not required. */
6186 /* If this operation is part of a reduction, a fully-masked loop
6187 should only change the active lanes of the reduction chain,
6188 keeping the inactive lanes as-is. */
6190 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6193 if (cond_fn
== IFN_LAST
6194 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6195 OPTIMIZE_FOR_SPEED
))
6197 if (dump_enabled_p ())
6198 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6199 "can't use a fully-masked loop because no"
6200 " conditional operation is available.\n");
6201 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6204 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6208 /* Put types on constant and invariant SLP children. */
6210 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6211 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6212 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6214 if (dump_enabled_p ())
6215 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6216 "incompatible vector types for invariants\n");
6220 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6221 DUMP_VECT_SCOPE ("vectorizable_operation");
6222 vect_model_simple_cost (vinfo
, stmt_info
,
6223 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6224 if (using_emulated_vectors_p
)
6226 /* The above vect_model_simple_cost call handles constants
6227 in the prologue and (mis-)costs one of the stmts as
6228 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6229 for the actual lowering that will be applied. */
6231 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6245 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
, 0, vect_body
);
6252 if (dump_enabled_p ())
6253 dump_printf_loc (MSG_NOTE
, vect_location
,
6254 "transform binary/unary operation.\n");
6256 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6258 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6259 vectors with unsigned elements, but the result is signed. So, we
6260 need to compute the MINUS_EXPR into vectype temporary and
6261 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6262 tree vec_cvt_dest
= NULL_TREE
;
6263 if (orig_code
== POINTER_DIFF_EXPR
)
6265 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6266 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6270 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6272 /* In case the vectorization factor (VF) is bigger than the number
6273 of elements that we can fit in a vectype (nunits), we have to generate
6274 more than one vector stmt - i.e - we need to "unroll" the
6275 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6276 from one copy of the vector stmt to the next, in the field
6277 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6278 stages to find the correct vector defs to be used when vectorizing
6279 stmts that use the defs of the current stmt. The example below
6280 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6281 we need to create 4 vectorized stmts):
6283 before vectorization:
6284 RELATED_STMT VEC_STMT
6288 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6290 RELATED_STMT VEC_STMT
6291 VS1_0: vx0 = memref0 VS1_1 -
6292 VS1_1: vx1 = memref1 VS1_2 -
6293 VS1_2: vx2 = memref2 VS1_3 -
6294 VS1_3: vx3 = memref3 - -
6295 S1: x = load - VS1_0
6298 step2: vectorize stmt S2 (done here):
6299 To vectorize stmt S2 we first need to find the relevant vector
6300 def for the first operand 'x'. This is, as usual, obtained from
6301 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6302 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6303 relevant vector def 'vx0'. Having found 'vx0' we can generate
6304 the vector stmt VS2_0, and as usual, record it in the
6305 STMT_VINFO_VEC_STMT of stmt S2.
6306 When creating the second copy (VS2_1), we obtain the relevant vector
6307 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6308 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6309 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6310 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6311 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6312 chain of stmts and pointers:
6313 RELATED_STMT VEC_STMT
6314 VS1_0: vx0 = memref0 VS1_1 -
6315 VS1_1: vx1 = memref1 VS1_2 -
6316 VS1_2: vx2 = memref2 VS1_3 -
6317 VS1_3: vx3 = memref3 - -
6318 S1: x = load - VS1_0
6319 VS2_0: vz0 = vx0 + v1 VS2_1 -
6320 VS2_1: vz1 = vx1 + v1 VS2_2 -
6321 VS2_2: vz2 = vx2 + v1 VS2_3 -
6322 VS2_3: vz3 = vx3 + v1 - -
6323 S2: z = x + 1 - VS2_0 */
6325 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6326 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6327 /* Arguments are ready. Create the new vector stmt. */
6328 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6330 gimple
*new_stmt
= NULL
;
6331 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6332 ? vec_oprnds1
[i
] : NULL_TREE
);
6333 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6334 if (masked_loop_p
&& reduc_idx
>= 0)
6336 /* Perform the operation on active elements only and take
6337 inactive elements from the reduction chain input. */
6339 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6340 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6342 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6344 new_temp
= make_ssa_name (vec_dest
, call
);
6345 gimple_call_set_lhs (call
, new_temp
);
6346 gimple_call_set_nothrow (call
, true);
6347 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6352 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6353 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6354 gimple_assign_set_lhs (new_stmt
, new_temp
);
6355 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6358 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6359 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6361 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6362 gimple_assign_set_lhs (new_stmt
, new_temp
);
6363 vect_finish_stmt_generation (vinfo
, stmt_info
,
6368 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6370 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6374 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6376 vec_oprnds0
.release ();
6377 vec_oprnds1
.release ();
6378 vec_oprnds2
.release ();
6383 /* A helper function to ensure data reference DR_INFO's base alignment. */
6386 ensure_base_align (dr_vec_info
*dr_info
)
6388 /* Alignment is only analyzed for the first element of a DR group,
6389 use that to look at base alignment we need to enforce. */
6390 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
6391 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
6393 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
6395 if (dr_info
->base_misaligned
)
6397 tree base_decl
= dr_info
->base_decl
;
6399 // We should only be able to increase the alignment of a base object if
6400 // we know what its new alignment should be at compile time.
6401 unsigned HOST_WIDE_INT align_base_to
=
6402 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6404 if (decl_in_symtab_p (base_decl
))
6405 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6406 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6408 SET_DECL_ALIGN (base_decl
, align_base_to
);
6409 DECL_USER_ALIGN (base_decl
) = 1;
6411 dr_info
->base_misaligned
= false;
6416 /* Function get_group_alias_ptr_type.
6418 Return the alias type for the group starting at FIRST_STMT_INFO. */
6421 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6423 struct data_reference
*first_dr
, *next_dr
;
6425 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6426 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6427 while (next_stmt_info
)
6429 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6430 if (get_alias_set (DR_REF (first_dr
))
6431 != get_alias_set (DR_REF (next_dr
)))
6433 if (dump_enabled_p ())
6434 dump_printf_loc (MSG_NOTE
, vect_location
,
6435 "conflicting alias set types.\n");
6436 return ptr_type_node
;
6438 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6440 return reference_alias_ptr_type (DR_REF (first_dr
));
6444 /* Function scan_operand_equal_p.
6446 Helper function for check_scan_store. Compare two references
6447 with .GOMP_SIMD_LANE bases. */
6450 scan_operand_equal_p (tree ref1
, tree ref2
)
6452 tree ref
[2] = { ref1
, ref2
};
6453 poly_int64 bitsize
[2], bitpos
[2];
6454 tree offset
[2], base
[2];
6455 for (int i
= 0; i
< 2; ++i
)
6458 int unsignedp
, reversep
, volatilep
= 0;
6459 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6460 &offset
[i
], &mode
, &unsignedp
,
6461 &reversep
, &volatilep
);
6462 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6464 if (TREE_CODE (base
[i
]) == MEM_REF
6465 && offset
[i
] == NULL_TREE
6466 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6468 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6469 if (is_gimple_assign (def_stmt
)
6470 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6471 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6472 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6474 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6476 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6477 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6482 if (!operand_equal_p (base
[0], base
[1], 0))
6484 if (maybe_ne (bitsize
[0], bitsize
[1]))
6486 if (offset
[0] != offset
[1])
6488 if (!offset
[0] || !offset
[1])
6490 if (!operand_equal_p (offset
[0], offset
[1], 0))
6493 for (int i
= 0; i
< 2; ++i
)
6495 step
[i
] = integer_one_node
;
6496 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6498 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6499 if (is_gimple_assign (def_stmt
)
6500 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6501 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6504 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6505 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6508 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6510 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6511 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6513 tree rhs1
= NULL_TREE
;
6514 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6516 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6517 if (gimple_assign_cast_p (def_stmt
))
6518 rhs1
= gimple_assign_rhs1 (def_stmt
);
6520 else if (CONVERT_EXPR_P (offset
[i
]))
6521 rhs1
= TREE_OPERAND (offset
[i
], 0);
6523 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6524 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6525 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6526 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6529 if (!operand_equal_p (offset
[0], offset
[1], 0)
6530 || !operand_equal_p (step
[0], step
[1], 0))
6538 enum scan_store_kind
{
6539 /* Normal permutation. */
6540 scan_store_kind_perm
,
6542 /* Whole vector left shift permutation with zero init. */
6543 scan_store_kind_lshift_zero
,
6545 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6546 scan_store_kind_lshift_cond
6549 /* Function check_scan_store.
6551 Verify if we can perform the needed permutations or whole vector shifts.
6552 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6553 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6554 to do at each step. */
6557 scan_store_can_perm_p (tree vectype
, tree init
,
6558 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6560 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6561 unsigned HOST_WIDE_INT nunits
;
6562 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6564 int units_log2
= exact_log2 (nunits
);
6565 if (units_log2
<= 0)
6569 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6570 for (i
= 0; i
<= units_log2
; ++i
)
6572 unsigned HOST_WIDE_INT j
, k
;
6573 enum scan_store_kind kind
= scan_store_kind_perm
;
6574 vec_perm_builder
sel (nunits
, nunits
, 1);
6575 sel
.quick_grow (nunits
);
6576 if (i
== units_log2
)
6578 for (j
= 0; j
< nunits
; ++j
)
6579 sel
[j
] = nunits
- 1;
6583 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6585 for (k
= 0; j
< nunits
; ++j
, ++k
)
6586 sel
[j
] = nunits
+ k
;
6588 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6589 if (!can_vec_perm_const_p (vec_mode
, indices
))
6591 if (i
== units_log2
)
6594 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6596 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6598 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6599 /* Whole vector shifts shift in zeros, so if init is all zero
6600 constant, there is no need to do anything further. */
6601 if ((TREE_CODE (init
) != INTEGER_CST
6602 && TREE_CODE (init
) != REAL_CST
)
6603 || !initializer_zerop (init
))
6605 tree masktype
= truth_type_for (vectype
);
6606 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6608 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6611 kind
= whole_vector_shift_kind
;
6613 if (use_whole_vector
)
6615 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6616 use_whole_vector
->safe_grow_cleared (i
, true);
6617 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6618 use_whole_vector
->safe_push (kind
);
6626 /* Function check_scan_store.
6628 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6631 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6632 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6633 vect_memory_access_type memory_access_type
)
6635 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6636 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6639 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6642 || memory_access_type
!= VMAT_CONTIGUOUS
6643 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6644 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6645 || loop_vinfo
== NULL
6646 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6647 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6648 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6649 || !integer_zerop (DR_INIT (dr_info
->dr
))
6650 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6651 || !alias_sets_conflict_p (get_alias_set (vectype
),
6652 get_alias_set (TREE_TYPE (ref_type
))))
6654 if (dump_enabled_p ())
6655 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6656 "unsupported OpenMP scan store.\n");
6660 /* We need to pattern match code built by OpenMP lowering and simplified
6661 by following optimizations into something we can handle.
6662 #pragma omp simd reduction(inscan,+:r)
6666 #pragma omp scan inclusive (r)
6669 shall have body with:
6670 // Initialization for input phase, store the reduction initializer:
6671 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6672 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6674 // Actual input phase:
6676 r.0_5 = D.2042[_20];
6679 // Initialization for scan phase:
6680 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6686 // Actual scan phase:
6688 r.1_8 = D.2042[_20];
6690 The "omp simd array" variable D.2042 holds the privatized copy used
6691 inside of the loop and D.2043 is another one that holds copies of
6692 the current original list item. The separate GOMP_SIMD_LANE ifn
6693 kinds are there in order to allow optimizing the initializer store
6694 and combiner sequence, e.g. if it is originally some C++ish user
6695 defined reduction, but allow the vectorizer to pattern recognize it
6696 and turn into the appropriate vectorized scan.
6698 For exclusive scan, this is slightly different:
6699 #pragma omp simd reduction(inscan,+:r)
6703 #pragma omp scan exclusive (r)
6706 shall have body with:
6707 // Initialization for input phase, store the reduction initializer:
6708 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6709 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6711 // Actual input phase:
6713 r.0_5 = D.2042[_20];
6716 // Initialization for scan phase:
6717 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6723 // Actual scan phase:
6725 r.1_8 = D.2044[_20];
6728 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6730 /* Match the D.2042[_21] = 0; store above. Just require that
6731 it is a constant or external definition store. */
6732 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6735 if (dump_enabled_p ())
6736 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6737 "unsupported OpenMP scan initializer store.\n");
6741 if (! loop_vinfo
->scan_map
)
6742 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6743 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6744 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6747 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6749 /* These stores can be vectorized normally. */
6753 if (rhs_dt
!= vect_internal_def
)
6756 if (dump_enabled_p ())
6757 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6758 "unsupported OpenMP scan combiner pattern.\n");
6762 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6763 tree rhs
= gimple_assign_rhs1 (stmt
);
6764 if (TREE_CODE (rhs
) != SSA_NAME
)
6767 gimple
*other_store_stmt
= NULL
;
6768 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6769 bool inscan_var_store
6770 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6772 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6774 if (!inscan_var_store
)
6776 use_operand_p use_p
;
6777 imm_use_iterator iter
;
6778 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6780 gimple
*use_stmt
= USE_STMT (use_p
);
6781 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6783 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6784 || !is_gimple_assign (use_stmt
)
6785 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6787 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6789 other_store_stmt
= use_stmt
;
6791 if (other_store_stmt
== NULL
)
6793 rhs
= gimple_assign_lhs (other_store_stmt
);
6794 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6798 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6800 use_operand_p use_p
;
6801 imm_use_iterator iter
;
6802 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6804 gimple
*use_stmt
= USE_STMT (use_p
);
6805 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6807 if (other_store_stmt
)
6809 other_store_stmt
= use_stmt
;
6815 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6816 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6817 || !is_gimple_assign (def_stmt
)
6818 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6821 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6822 /* For pointer addition, we should use the normal plus for the vector
6826 case POINTER_PLUS_EXPR
:
6829 case MULT_HIGHPART_EXPR
:
6834 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6837 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6838 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6839 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6842 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6843 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6844 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6845 || !gimple_assign_load_p (load1_stmt
)
6846 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6847 || !gimple_assign_load_p (load2_stmt
))
6850 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6851 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6852 if (load1_stmt_info
== NULL
6853 || load2_stmt_info
== NULL
6854 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6855 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6856 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6857 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6860 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6862 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6863 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6864 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6866 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6868 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6872 use_operand_p use_p
;
6873 imm_use_iterator iter
;
6874 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6876 gimple
*use_stmt
= USE_STMT (use_p
);
6877 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6879 if (other_store_stmt
)
6881 other_store_stmt
= use_stmt
;
6885 if (other_store_stmt
== NULL
)
6887 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6888 || !gimple_store_p (other_store_stmt
))
6891 stmt_vec_info other_store_stmt_info
6892 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6893 if (other_store_stmt_info
== NULL
6894 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6895 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6898 gimple
*stmt1
= stmt
;
6899 gimple
*stmt2
= other_store_stmt
;
6900 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6901 std::swap (stmt1
, stmt2
);
6902 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6903 gimple_assign_rhs1 (load2_stmt
)))
6905 std::swap (rhs1
, rhs2
);
6906 std::swap (load1_stmt
, load2_stmt
);
6907 std::swap (load1_stmt_info
, load2_stmt_info
);
6909 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6910 gimple_assign_rhs1 (load1_stmt
)))
6913 tree var3
= NULL_TREE
;
6914 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6915 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6916 gimple_assign_rhs1 (load2_stmt
)))
6918 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6920 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6921 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6922 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6924 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6925 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6926 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6927 || lookup_attribute ("omp simd inscan exclusive",
6928 DECL_ATTRIBUTES (var3
)))
6932 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6933 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6934 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6937 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6938 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6939 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6940 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6941 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6942 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6945 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6946 std::swap (var1
, var2
);
6948 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6950 if (!lookup_attribute ("omp simd inscan exclusive",
6951 DECL_ATTRIBUTES (var1
)))
6956 if (loop_vinfo
->scan_map
== NULL
)
6958 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6962 /* The IL is as expected, now check if we can actually vectorize it.
6969 should be vectorized as (where _40 is the vectorized rhs
6970 from the D.2042[_21] = 0; store):
6971 _30 = MEM <vector(8) int> [(int *)&D.2043];
6972 _31 = MEM <vector(8) int> [(int *)&D.2042];
6973 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6975 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6976 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6978 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6979 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6980 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6982 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6983 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6985 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6986 MEM <vector(8) int> [(int *)&D.2043] = _39;
6987 MEM <vector(8) int> [(int *)&D.2042] = _38;
6994 should be vectorized as (where _40 is the vectorized rhs
6995 from the D.2042[_21] = 0; store):
6996 _30 = MEM <vector(8) int> [(int *)&D.2043];
6997 _31 = MEM <vector(8) int> [(int *)&D.2042];
6998 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6999 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7001 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7002 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7003 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7005 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7006 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7007 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7009 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7010 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7013 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7014 MEM <vector(8) int> [(int *)&D.2044] = _39;
7015 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7016 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7017 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7018 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7021 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7022 if (units_log2
== -1)
7029 /* Function vectorizable_scan_store.
7031 Helper of vectorizable_score, arguments like on vectorizable_store.
7032 Handle only the transformation, checking is done in check_scan_store. */
7035 vectorizable_scan_store (vec_info
*vinfo
,
7036 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7037 gimple
**vec_stmt
, int ncopies
)
7039 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7040 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7041 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7042 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7044 if (dump_enabled_p ())
7045 dump_printf_loc (MSG_NOTE
, vect_location
,
7046 "transform scan store. ncopies = %d\n", ncopies
);
7048 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7049 tree rhs
= gimple_assign_rhs1 (stmt
);
7050 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7052 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7053 bool inscan_var_store
7054 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7056 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7058 use_operand_p use_p
;
7059 imm_use_iterator iter
;
7060 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7062 gimple
*use_stmt
= USE_STMT (use_p
);
7063 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7065 rhs
= gimple_assign_lhs (use_stmt
);
7070 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7071 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7072 if (code
== POINTER_PLUS_EXPR
)
7074 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7075 && commutative_tree_code (code
));
7076 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7077 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7078 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7079 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7080 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7081 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7082 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7083 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7084 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7085 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7086 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7088 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7090 std::swap (rhs1
, rhs2
);
7091 std::swap (var1
, var2
);
7092 std::swap (load1_dr_info
, load2_dr_info
);
7095 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7098 unsigned HOST_WIDE_INT nunits
;
7099 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7101 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7102 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7103 gcc_assert (units_log2
> 0);
7104 auto_vec
<tree
, 16> perms
;
7105 perms
.quick_grow (units_log2
+ 1);
7106 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7107 for (int i
= 0; i
<= units_log2
; ++i
)
7109 unsigned HOST_WIDE_INT j
, k
;
7110 vec_perm_builder
sel (nunits
, nunits
, 1);
7111 sel
.quick_grow (nunits
);
7112 if (i
== units_log2
)
7113 for (j
= 0; j
< nunits
; ++j
)
7114 sel
[j
] = nunits
- 1;
7117 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7119 for (k
= 0; j
< nunits
; ++j
, ++k
)
7120 sel
[j
] = nunits
+ k
;
7122 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7123 if (!use_whole_vector
.is_empty ()
7124 && use_whole_vector
[i
] != scan_store_kind_perm
)
7126 if (zero_vec
== NULL_TREE
)
7127 zero_vec
= build_zero_cst (vectype
);
7128 if (masktype
== NULL_TREE
7129 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7130 masktype
= truth_type_for (vectype
);
7131 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7134 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7137 tree vec_oprnd1
= NULL_TREE
;
7138 tree vec_oprnd2
= NULL_TREE
;
7139 tree vec_oprnd3
= NULL_TREE
;
7140 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7141 tree dataref_offset
= build_int_cst (ref_type
, 0);
7142 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7143 vectype
, VMAT_CONTIGUOUS
);
7144 tree ldataref_ptr
= NULL_TREE
;
7145 tree orig
= NULL_TREE
;
7146 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7147 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7148 auto_vec
<tree
> vec_oprnds1
;
7149 auto_vec
<tree
> vec_oprnds2
;
7150 auto_vec
<tree
> vec_oprnds3
;
7151 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7152 *init
, &vec_oprnds1
,
7153 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7154 rhs2
, &vec_oprnds3
);
7155 for (int j
= 0; j
< ncopies
; j
++)
7157 vec_oprnd1
= vec_oprnds1
[j
];
7158 if (ldataref_ptr
== NULL
)
7159 vec_oprnd2
= vec_oprnds2
[j
];
7160 vec_oprnd3
= vec_oprnds3
[j
];
7163 else if (!inscan_var_store
)
7164 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7168 vec_oprnd2
= make_ssa_name (vectype
);
7169 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7170 unshare_expr (ldataref_ptr
),
7172 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7173 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7174 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7175 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7176 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7179 tree v
= vec_oprnd2
;
7180 for (int i
= 0; i
< units_log2
; ++i
)
7182 tree new_temp
= make_ssa_name (vectype
);
7183 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7185 && (use_whole_vector
[i
]
7186 != scan_store_kind_perm
))
7187 ? zero_vec
: vec_oprnd1
, v
,
7189 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7190 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7191 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7193 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7195 /* Whole vector shift shifted in zero bits, but if *init
7196 is not initializer_zerop, we need to replace those elements
7197 with elements from vec_oprnd1. */
7198 tree_vector_builder
vb (masktype
, nunits
, 1);
7199 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7200 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7201 ? boolean_false_node
: boolean_true_node
);
7203 tree new_temp2
= make_ssa_name (vectype
);
7204 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7205 new_temp
, vec_oprnd1
);
7206 vect_finish_stmt_generation (vinfo
, stmt_info
,
7208 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7209 new_temp
= new_temp2
;
7212 /* For exclusive scan, perform the perms[i] permutation once
7215 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7223 tree new_temp2
= make_ssa_name (vectype
);
7224 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7225 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7226 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7231 tree new_temp
= make_ssa_name (vectype
);
7232 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7233 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7234 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7236 tree last_perm_arg
= new_temp
;
7237 /* For exclusive scan, new_temp computed above is the exclusive scan
7238 prefix sum. Turn it into inclusive prefix sum for the broadcast
7239 of the last element into orig. */
7240 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7242 last_perm_arg
= make_ssa_name (vectype
);
7243 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7244 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7245 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7248 orig
= make_ssa_name (vectype
);
7249 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7250 last_perm_arg
, perms
[units_log2
]);
7251 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7252 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7254 if (!inscan_var_store
)
7256 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7257 unshare_expr (dataref_ptr
),
7259 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7260 g
= gimple_build_assign (data_ref
, new_temp
);
7261 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7262 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7266 if (inscan_var_store
)
7267 for (int j
= 0; j
< ncopies
; j
++)
7270 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7272 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7273 unshare_expr (dataref_ptr
),
7275 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7276 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7277 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7278 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7284 /* Function vectorizable_store.
7286 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7287 that can be vectorized.
7288 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7289 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7290 Return true if STMT_INFO is vectorizable in this way. */
7293 vectorizable_store (vec_info
*vinfo
,
7294 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7295 gimple
**vec_stmt
, slp_tree slp_node
,
7296 stmt_vector_for_cost
*cost_vec
)
7300 tree vec_oprnd
= NULL_TREE
;
7302 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7303 class loop
*loop
= NULL
;
7304 machine_mode vec_mode
;
7306 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7307 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7308 tree dataref_ptr
= NULL_TREE
;
7309 tree dataref_offset
= NULL_TREE
;
7310 gimple
*ptr_incr
= NULL
;
7313 stmt_vec_info first_stmt_info
;
7315 unsigned int group_size
, i
;
7316 vec
<tree
> oprnds
= vNULL
;
7317 vec
<tree
> result_chain
= vNULL
;
7318 vec
<tree
> vec_oprnds
= vNULL
;
7319 bool slp
= (slp_node
!= NULL
);
7320 unsigned int vec_num
;
7321 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7323 gather_scatter_info gs_info
;
7325 vec_load_store_type vls_type
;
7328 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7331 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7335 /* Is vectorizable store? */
7337 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7338 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7340 tree scalar_dest
= gimple_assign_lhs (assign
);
7341 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7342 && is_pattern_stmt_p (stmt_info
))
7343 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7344 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7345 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7346 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7347 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7348 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7349 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7350 && TREE_CODE (scalar_dest
) != MEM_REF
)
7355 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7356 if (!call
|| !gimple_call_internal_p (call
))
7359 internal_fn ifn
= gimple_call_internal_fn (call
);
7360 if (!internal_store_fn_p (ifn
))
7363 if (slp_node
!= NULL
)
7365 if (dump_enabled_p ())
7366 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7367 "SLP of masked stores not supported.\n");
7371 int mask_index
= internal_fn_mask_index (ifn
);
7373 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
7374 &mask
, NULL
, &mask_dt
, &mask_vectype
))
7378 op
= vect_get_store_rhs (stmt_info
);
7380 /* Cannot have hybrid store SLP -- that would mean storing to the
7381 same location twice. */
7382 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7384 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7385 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7389 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7390 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7395 /* Multiple types in SLP are handled by creating the appropriate number of
7396 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7401 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7403 gcc_assert (ncopies
>= 1);
7405 /* FORNOW. This restriction should be relaxed. */
7406 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7408 if (dump_enabled_p ())
7409 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7410 "multiple types in nested loop.\n");
7414 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7415 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7418 elem_type
= TREE_TYPE (vectype
);
7419 vec_mode
= TYPE_MODE (vectype
);
7421 if (!STMT_VINFO_DATA_REF (stmt_info
))
7424 vect_memory_access_type memory_access_type
;
7425 enum dr_alignment_support alignment_support_scheme
;
7428 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7429 ncopies
, &memory_access_type
, &poffset
,
7430 &alignment_support_scheme
, &misalignment
, &gs_info
))
7435 if (memory_access_type
== VMAT_CONTIGUOUS
)
7437 if (!VECTOR_MODE_P (vec_mode
)
7438 || !can_vec_mask_load_store_p (vec_mode
,
7439 TYPE_MODE (mask_vectype
), false))
7442 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7443 && (memory_access_type
!= VMAT_GATHER_SCATTER
7444 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7446 if (dump_enabled_p ())
7447 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7448 "unsupported access type for masked store.\n");
7454 /* FORNOW. In some cases can vectorize even if data-type not supported
7455 (e.g. - array initialization with 0). */
7456 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7460 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7461 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7462 && memory_access_type
!= VMAT_GATHER_SCATTER
7463 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7466 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7467 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7468 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7472 first_stmt_info
= stmt_info
;
7473 first_dr_info
= dr_info
;
7474 group_size
= vec_num
= 1;
7477 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7479 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7480 memory_access_type
))
7484 if (!vec_stmt
) /* transformation not required. */
7486 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7489 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7490 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, vls_type
,
7491 group_size
, memory_access_type
,
7492 ncopies
, &gs_info
, mask
);
7495 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7498 if (dump_enabled_p ())
7499 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7500 "incompatible vector types for invariants\n");
7504 if (dump_enabled_p ()
7505 && memory_access_type
!= VMAT_ELEMENTWISE
7506 && memory_access_type
!= VMAT_GATHER_SCATTER
7507 && alignment_support_scheme
!= dr_aligned
)
7508 dump_printf_loc (MSG_NOTE
, vect_location
,
7509 "Vectorizing an unaligned access.\n");
7511 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7512 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7513 memory_access_type
, alignment_support_scheme
,
7514 misalignment
, vls_type
, slp_node
, cost_vec
);
7517 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7521 ensure_base_align (dr_info
);
7523 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7525 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7526 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7527 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7528 tree ptr
, var
, scale
, vec_mask
;
7529 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7530 tree mask_halfvectype
= mask_vectype
;
7531 edge pe
= loop_preheader_edge (loop
);
7534 enum { NARROW
, NONE
, WIDEN
} modifier
;
7535 poly_uint64 scatter_off_nunits
7536 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7538 if (known_eq (nunits
, scatter_off_nunits
))
7540 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7544 /* Currently gathers and scatters are only supported for
7545 fixed-length vectors. */
7546 unsigned int count
= scatter_off_nunits
.to_constant ();
7547 vec_perm_builder
sel (count
, count
, 1);
7548 for (i
= 0; i
< (unsigned int) count
; ++i
)
7549 sel
.quick_push (i
| (count
/ 2));
7551 vec_perm_indices
indices (sel
, 1, count
);
7552 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7554 gcc_assert (perm_mask
!= NULL_TREE
);
7556 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7560 /* Currently gathers and scatters are only supported for
7561 fixed-length vectors. */
7562 unsigned int count
= nunits
.to_constant ();
7563 vec_perm_builder
sel (count
, count
, 1);
7564 for (i
= 0; i
< (unsigned int) count
; ++i
)
7565 sel
.quick_push (i
| (count
/ 2));
7567 vec_perm_indices
indices (sel
, 2, count
);
7568 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7569 gcc_assert (perm_mask
!= NULL_TREE
);
7573 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7578 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7579 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7580 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7581 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7582 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7583 scaletype
= TREE_VALUE (arglist
);
7585 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7586 && TREE_CODE (rettype
) == VOID_TYPE
);
7588 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7589 if (!is_gimple_min_invariant (ptr
))
7591 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7592 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7593 gcc_assert (!new_bb
);
7596 if (mask
== NULL_TREE
)
7598 mask_arg
= build_int_cst (masktype
, -1);
7599 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7600 mask_arg
, masktype
, NULL
);
7603 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7605 auto_vec
<tree
> vec_oprnds0
;
7606 auto_vec
<tree
> vec_oprnds1
;
7607 auto_vec
<tree
> vec_masks
;
7610 tree mask_vectype
= truth_type_for (vectype
);
7611 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7613 ? ncopies
/ 2 : ncopies
,
7614 mask
, &vec_masks
, mask_vectype
);
7616 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7618 ? ncopies
/ 2 : ncopies
,
7619 gs_info
.offset
, &vec_oprnds0
);
7620 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7622 ? ncopies
/ 2 : ncopies
,
7624 for (j
= 0; j
< ncopies
; ++j
)
7626 if (modifier
== WIDEN
)
7629 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7630 perm_mask
, stmt_info
, gsi
);
7632 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7633 src
= vec_oprnd1
= vec_oprnds1
[j
];
7635 mask_op
= vec_mask
= vec_masks
[j
];
7637 else if (modifier
== NARROW
)
7640 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7641 perm_mask
, stmt_info
, gsi
);
7643 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7644 op
= vec_oprnd0
= vec_oprnds0
[j
];
7646 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7650 op
= vec_oprnd0
= vec_oprnds0
[j
];
7651 src
= vec_oprnd1
= vec_oprnds1
[j
];
7653 mask_op
= vec_mask
= vec_masks
[j
];
7656 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7658 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7659 TYPE_VECTOR_SUBPARTS (srctype
)));
7660 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7661 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7663 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7664 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7668 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7670 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7671 TYPE_VECTOR_SUBPARTS (idxtype
)));
7672 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7673 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7675 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7676 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7684 if (modifier
== NARROW
)
7686 var
= vect_get_new_ssa_name (mask_halfvectype
,
7689 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7690 : VEC_UNPACK_LO_EXPR
,
7692 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7695 tree optype
= TREE_TYPE (mask_arg
);
7696 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7699 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7700 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7701 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7703 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7704 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7706 if (!useless_type_conversion_p (masktype
, utype
))
7708 gcc_assert (TYPE_PRECISION (utype
)
7709 <= TYPE_PRECISION (masktype
));
7710 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7711 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7712 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7718 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7719 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7721 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7723 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7726 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7727 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7729 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7730 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7735 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7737 /* We vectorize all the stmts of the interleaving group when we
7738 reach the last stmt in the group. */
7739 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7740 < DR_GROUP_SIZE (first_stmt_info
)
7749 grouped_store
= false;
7750 /* VEC_NUM is the number of vect stmts to be created for this
7752 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7753 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7754 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7755 == first_stmt_info
);
7756 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7757 op
= vect_get_store_rhs (first_stmt_info
);
7760 /* VEC_NUM is the number of vect stmts to be created for this
7762 vec_num
= group_size
;
7764 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7767 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7769 if (dump_enabled_p ())
7770 dump_printf_loc (MSG_NOTE
, vect_location
,
7771 "transform store. ncopies = %d\n", ncopies
);
7773 if (memory_access_type
== VMAT_ELEMENTWISE
7774 || memory_access_type
== VMAT_STRIDED_SLP
)
7776 gimple_stmt_iterator incr_gsi
;
7782 tree stride_base
, stride_step
, alias_off
;
7786 /* Checked by get_load_store_type. */
7787 unsigned int const_nunits
= nunits
.to_constant ();
7789 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7790 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7792 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7794 = fold_build_pointer_plus
7795 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7796 size_binop (PLUS_EXPR
,
7797 convert_to_ptrofftype (dr_offset
),
7798 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7799 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7801 /* For a store with loop-invariant (but other than power-of-2)
7802 stride (i.e. not a grouped access) like so:
7804 for (i = 0; i < n; i += stride)
7807 we generate a new induction variable and new stores from
7808 the components of the (vectorized) rhs:
7810 for (j = 0; ; j += VF*stride)
7815 array[j + stride] = tmp2;
7819 unsigned nstores
= const_nunits
;
7821 tree ltype
= elem_type
;
7822 tree lvectype
= vectype
;
7825 if (group_size
< const_nunits
7826 && const_nunits
% group_size
== 0)
7828 nstores
= const_nunits
/ group_size
;
7830 ltype
= build_vector_type (elem_type
, group_size
);
7833 /* First check if vec_extract optab doesn't support extraction
7834 of vector elts directly. */
7835 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7837 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7838 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7839 group_size
).exists (&vmode
)
7840 || (convert_optab_handler (vec_extract_optab
,
7841 TYPE_MODE (vectype
), vmode
)
7842 == CODE_FOR_nothing
))
7844 /* Try to avoid emitting an extract of vector elements
7845 by performing the extracts using an integer type of the
7846 same size, extracting from a vector of those and then
7847 re-interpreting it as the original vector type if
7850 = group_size
* GET_MODE_BITSIZE (elmode
);
7851 unsigned int lnunits
= const_nunits
/ group_size
;
7852 /* If we can't construct such a vector fall back to
7853 element extracts from the original vector type and
7854 element size stores. */
7855 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7856 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7857 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7858 lnunits
).exists (&vmode
)
7859 && (convert_optab_handler (vec_extract_optab
,
7861 != CODE_FOR_nothing
))
7865 ltype
= build_nonstandard_integer_type (lsize
, 1);
7866 lvectype
= build_vector_type (ltype
, nstores
);
7868 /* Else fall back to vector extraction anyway.
7869 Fewer stores are more important than avoiding spilling
7870 of the vector we extract from. Compared to the
7871 construction case in vectorizable_load no store-forwarding
7872 issue exists here for reasonable archs. */
7875 else if (group_size
>= const_nunits
7876 && group_size
% const_nunits
== 0)
7879 lnel
= const_nunits
;
7883 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7884 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7887 ivstep
= stride_step
;
7888 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7889 build_int_cst (TREE_TYPE (ivstep
), vf
));
7891 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7893 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7894 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7895 create_iv (stride_base
, ivstep
, NULL
,
7896 loop
, &incr_gsi
, insert_after
,
7898 incr
= gsi_stmt (incr_gsi
);
7900 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7902 alias_off
= build_int_cst (ref_type
, 0);
7903 stmt_vec_info next_stmt_info
= first_stmt_info
;
7904 for (g
= 0; g
< group_size
; g
++)
7906 running_off
= offvar
;
7909 tree size
= TYPE_SIZE_UNIT (ltype
);
7910 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7912 tree newoff
= copy_ssa_name (running_off
, NULL
);
7913 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7915 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7916 running_off
= newoff
;
7919 op
= vect_get_store_rhs (next_stmt_info
);
7920 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
7922 unsigned int group_el
= 0;
7923 unsigned HOST_WIDE_INT
7924 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7925 for (j
= 0; j
< ncopies
; j
++)
7927 vec_oprnd
= vec_oprnds
[j
];
7928 /* Pun the vector to extract from if necessary. */
7929 if (lvectype
!= vectype
)
7931 tree tem
= make_ssa_name (lvectype
);
7933 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7934 lvectype
, vec_oprnd
));
7935 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
7938 for (i
= 0; i
< nstores
; i
++)
7940 tree newref
, newoff
;
7941 gimple
*incr
, *assign
;
7942 tree size
= TYPE_SIZE (ltype
);
7943 /* Extract the i'th component. */
7944 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7945 bitsize_int (i
), size
);
7946 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7949 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7953 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7955 newref
= build2 (MEM_REF
, ltype
,
7956 running_off
, this_off
);
7957 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7959 /* And store it to *running_off. */
7960 assign
= gimple_build_assign (newref
, elem
);
7961 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
7965 || group_el
== group_size
)
7967 newoff
= copy_ssa_name (running_off
, NULL
);
7968 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7969 running_off
, stride_step
);
7970 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7972 running_off
= newoff
;
7975 if (g
== group_size
- 1
7978 if (j
== 0 && i
== 0)
7980 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
7984 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7985 vec_oprnds
.release ();
7993 auto_vec
<tree
> dr_chain (group_size
);
7994 oprnds
.create (group_size
);
7996 gcc_assert (alignment_support_scheme
);
7997 vec_loop_masks
*loop_masks
7998 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7999 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8001 vec_loop_lens
*loop_lens
8002 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8003 ? &LOOP_VINFO_LENS (loop_vinfo
)
8006 /* Shouldn't go with length-based approach if fully masked. */
8007 gcc_assert (!loop_lens
|| !loop_masks
);
8009 /* Targets with store-lane instructions must not require explicit
8010 realignment. vect_supportable_dr_alignment always returns either
8011 dr_aligned or dr_unaligned_supported for masked operations. */
8012 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8015 || alignment_support_scheme
== dr_aligned
8016 || alignment_support_scheme
== dr_unaligned_supported
);
8018 tree offset
= NULL_TREE
;
8019 if (!known_eq (poffset
, 0))
8020 offset
= size_int (poffset
);
8023 tree vec_offset
= NULL_TREE
;
8024 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8026 aggr_type
= NULL_TREE
;
8029 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8031 aggr_type
= elem_type
;
8032 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8033 &bump
, &vec_offset
);
8037 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8038 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8040 aggr_type
= vectype
;
8041 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8042 memory_access_type
);
8046 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8048 /* In case the vectorization factor (VF) is bigger than the number
8049 of elements that we can fit in a vectype (nunits), we have to generate
8050 more than one vector stmt - i.e - we need to "unroll" the
8051 vector stmt by a factor VF/nunits. */
8053 /* In case of interleaving (non-unit grouped access):
8060 We create vectorized stores starting from base address (the access of the
8061 first stmt in the chain (S2 in the above example), when the last store stmt
8062 of the chain (S4) is reached:
8065 VS2: &base + vec_size*1 = vx0
8066 VS3: &base + vec_size*2 = vx1
8067 VS4: &base + vec_size*3 = vx3
8069 Then permutation statements are generated:
8071 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8072 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8075 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8076 (the order of the data-refs in the output of vect_permute_store_chain
8077 corresponds to the order of scalar stmts in the interleaving chain - see
8078 the documentation of vect_permute_store_chain()).
8080 In case of both multiple types and interleaving, above vector stores and
8081 permutation stmts are created for every copy. The result vector stmts are
8082 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8083 STMT_VINFO_RELATED_STMT for the next copies.
8086 auto_vec
<tree
> vec_masks
;
8087 tree vec_mask
= NULL
;
8088 auto_vec
<tree
> vec_offsets
;
8089 auto_vec
<vec
<tree
> > gvec_oprnds
;
8090 gvec_oprnds
.safe_grow_cleared (group_size
, true);
8091 for (j
= 0; j
< ncopies
; j
++)
8098 /* Get vectorized arguments for SLP_NODE. */
8099 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
8101 vec_oprnd
= vec_oprnds
[0];
8105 /* For interleaved stores we collect vectorized defs for all the
8106 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8107 used as an input to vect_permute_store_chain().
8109 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8110 and OPRNDS are of size 1. */
8111 stmt_vec_info next_stmt_info
= first_stmt_info
;
8112 for (i
= 0; i
< group_size
; i
++)
8114 /* Since gaps are not supported for interleaved stores,
8115 DR_GROUP_SIZE is the exact number of stmts in the chain.
8116 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8117 that there is no interleaving, DR_GROUP_SIZE is 1,
8118 and only one iteration of the loop will be executed. */
8119 op
= vect_get_store_rhs (next_stmt_info
);
8120 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8121 ncopies
, op
, &gvec_oprnds
[i
]);
8122 vec_oprnd
= gvec_oprnds
[i
][0];
8123 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
8124 oprnds
.quick_push (gvec_oprnds
[i
][0]);
8125 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8129 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8130 mask
, &vec_masks
, mask_vectype
);
8131 vec_mask
= vec_masks
[0];
8135 /* We should have catched mismatched types earlier. */
8136 gcc_assert (useless_type_conversion_p (vectype
,
8137 TREE_TYPE (vec_oprnd
)));
8138 bool simd_lane_access_p
8139 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8140 if (simd_lane_access_p
8142 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8143 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8144 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8145 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8146 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8147 get_alias_set (TREE_TYPE (ref_type
))))
8149 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8150 dataref_offset
= build_int_cst (ref_type
, 0);
8152 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8154 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8155 slp_node
, &gs_info
, &dataref_ptr
,
8157 vec_offset
= vec_offsets
[0];
8161 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8162 simd_lane_access_p
? loop
: NULL
,
8163 offset
, &dummy
, gsi
, &ptr_incr
,
8164 simd_lane_access_p
, bump
);
8168 /* For interleaved stores we created vectorized defs for all the
8169 defs stored in OPRNDS in the previous iteration (previous copy).
8170 DR_CHAIN is then used as an input to vect_permute_store_chain().
8171 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8172 OPRNDS are of size 1. */
8173 for (i
= 0; i
< group_size
; i
++)
8175 vec_oprnd
= gvec_oprnds
[i
][j
];
8176 dr_chain
[i
] = gvec_oprnds
[i
][j
];
8177 oprnds
[i
] = gvec_oprnds
[i
][j
];
8180 vec_mask
= vec_masks
[j
];
8183 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8184 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8185 vec_offset
= vec_offsets
[j
];
8187 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8191 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8195 /* Get an array into which we can store the individual vectors. */
8196 vec_array
= create_vector_array (vectype
, vec_num
);
8198 /* Invalidate the current contents of VEC_ARRAY. This should
8199 become an RTL clobber too, which prevents the vector registers
8200 from being upward-exposed. */
8201 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8203 /* Store the individual vectors into the array. */
8204 for (i
= 0; i
< vec_num
; i
++)
8206 vec_oprnd
= dr_chain
[i
];
8207 write_vector_array (vinfo
, stmt_info
,
8208 gsi
, vec_oprnd
, vec_array
, i
);
8211 tree final_mask
= NULL
;
8213 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8216 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8223 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8225 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8226 tree alias_ptr
= build_int_cst (ref_type
, align
);
8227 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8228 dataref_ptr
, alias_ptr
,
8229 final_mask
, vec_array
);
8234 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8235 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8236 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8238 gimple_call_set_lhs (call
, data_ref
);
8240 gimple_call_set_nothrow (call
, true);
8241 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8244 /* Record that VEC_ARRAY is now dead. */
8245 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8253 result_chain
.create (group_size
);
8255 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8256 gsi
, &result_chain
);
8259 stmt_vec_info next_stmt_info
= first_stmt_info
;
8260 for (i
= 0; i
< vec_num
; i
++)
8263 unsigned HOST_WIDE_INT align
;
8265 tree final_mask
= NULL_TREE
;
8267 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8269 vectype
, vec_num
* j
+ i
);
8271 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8274 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8276 tree scale
= size_int (gs_info
.scale
);
8279 call
= gimple_build_call_internal
8280 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8281 scale
, vec_oprnd
, final_mask
);
8283 call
= gimple_build_call_internal
8284 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8286 gimple_call_set_nothrow (call
, true);
8287 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8293 /* Bump the vector pointer. */
8294 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8295 gsi
, stmt_info
, bump
);
8298 vec_oprnd
= vec_oprnds
[i
];
8299 else if (grouped_store
)
8300 /* For grouped stores vectorized defs are interleaved in
8301 vect_permute_store_chain(). */
8302 vec_oprnd
= result_chain
[i
];
8304 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8305 if (alignment_support_scheme
== dr_aligned
)
8307 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
8309 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8313 misalign
= misalignment
;
8314 if (dataref_offset
== NULL_TREE
8315 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8316 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8318 align
= least_bit_hwi (misalign
| align
);
8320 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8322 tree perm_mask
= perm_mask_for_reverse (vectype
);
8323 tree perm_dest
= vect_create_destination_var
8324 (vect_get_store_rhs (stmt_info
), vectype
);
8325 tree new_temp
= make_ssa_name (perm_dest
);
8327 /* Generate the permute statement. */
8329 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8330 vec_oprnd
, perm_mask
);
8331 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8333 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8334 vec_oprnd
= new_temp
;
8337 /* Arguments are ready. Create the new vector stmt. */
8340 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8342 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8344 final_mask
, vec_oprnd
);
8345 gimple_call_set_nothrow (call
, true);
8346 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8352 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8353 vec_num
* ncopies
, vec_num
* j
+ i
);
8354 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8355 machine_mode vmode
= TYPE_MODE (vectype
);
8356 opt_machine_mode new_ovmode
8357 = get_len_load_store_mode (vmode
, false);
8358 machine_mode new_vmode
= new_ovmode
.require ();
8359 /* Need conversion if it's wrapped with VnQI. */
8360 if (vmode
!= new_vmode
)
8363 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8366 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8368 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8370 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8372 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8377 = gimple_build_call_internal (IFN_LEN_STORE
, 4, dataref_ptr
,
8378 ptr
, final_len
, vec_oprnd
);
8379 gimple_call_set_nothrow (call
, true);
8380 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8385 data_ref
= fold_build2 (MEM_REF
, vectype
,
8389 : build_int_cst (ref_type
, 0));
8390 if (alignment_support_scheme
== dr_aligned
)
8393 TREE_TYPE (data_ref
)
8394 = build_aligned_type (TREE_TYPE (data_ref
),
8395 align
* BITS_PER_UNIT
);
8396 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8397 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8398 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8404 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8405 if (!next_stmt_info
)
8412 *vec_stmt
= new_stmt
;
8413 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8417 for (i
= 0; i
< group_size
; ++i
)
8419 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8423 result_chain
.release ();
8424 vec_oprnds
.release ();
8429 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8430 VECTOR_CST mask. No checks are made that the target platform supports the
8431 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8432 vect_gen_perm_mask_checked. */
8435 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8439 poly_uint64 nunits
= sel
.length ();
8440 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8442 mask_type
= build_vector_type (ssizetype
, nunits
);
8443 return vec_perm_indices_to_tree (mask_type
, sel
);
8446 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8447 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8450 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8452 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8453 return vect_gen_perm_mask_any (vectype
, sel
);
8456 /* Given a vector variable X and Y, that was generated for the scalar
8457 STMT_INFO, generate instructions to permute the vector elements of X and Y
8458 using permutation mask MASK_VEC, insert them at *GSI and return the
8459 permuted vector variable. */
8462 permute_vec_elements (vec_info
*vinfo
,
8463 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8464 gimple_stmt_iterator
*gsi
)
8466 tree vectype
= TREE_TYPE (x
);
8467 tree perm_dest
, data_ref
;
8470 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8471 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8472 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8474 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8475 data_ref
= make_ssa_name (perm_dest
);
8477 /* Generate the permute statement. */
8478 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8479 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8484 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8485 inserting them on the loops preheader edge. Returns true if we
8486 were successful in doing so (and thus STMT_INFO can be moved then),
8487 otherwise returns false. */
8490 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8496 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8498 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8499 if (!gimple_nop_p (def_stmt
)
8500 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8502 /* Make sure we don't need to recurse. While we could do
8503 so in simple cases when there are more complex use webs
8504 we don't have an easy way to preserve stmt order to fulfil
8505 dependencies within them. */
8508 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8510 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8512 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8513 if (!gimple_nop_p (def_stmt2
)
8514 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8524 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8526 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8527 if (!gimple_nop_p (def_stmt
)
8528 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8530 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8531 gsi_remove (&gsi
, false);
8532 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8539 /* vectorizable_load.
8541 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8542 that can be vectorized.
8543 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8544 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8545 Return true if STMT_INFO is vectorizable in this way. */
8548 vectorizable_load (vec_info
*vinfo
,
8549 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8550 gimple
**vec_stmt
, slp_tree slp_node
,
8551 stmt_vector_for_cost
*cost_vec
)
8554 tree vec_dest
= NULL
;
8555 tree data_ref
= NULL
;
8556 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8557 class loop
*loop
= NULL
;
8558 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8559 bool nested_in_vect_loop
= false;
8564 tree dataref_ptr
= NULL_TREE
;
8565 tree dataref_offset
= NULL_TREE
;
8566 gimple
*ptr_incr
= NULL
;
8569 unsigned int group_size
;
8570 poly_uint64 group_gap_adj
;
8571 tree msq
= NULL_TREE
, lsq
;
8572 tree realignment_token
= NULL_TREE
;
8574 vec
<tree
> dr_chain
= vNULL
;
8575 bool grouped_load
= false;
8576 stmt_vec_info first_stmt_info
;
8577 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8578 bool compute_in_loop
= false;
8579 class loop
*at_loop
;
8581 bool slp
= (slp_node
!= NULL
);
8582 bool slp_perm
= false;
8583 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8586 gather_scatter_info gs_info
;
8588 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8590 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8593 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8597 if (!STMT_VINFO_DATA_REF (stmt_info
))
8600 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8601 int mask_index
= -1;
8602 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8604 scalar_dest
= gimple_assign_lhs (assign
);
8605 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8608 tree_code code
= gimple_assign_rhs_code (assign
);
8609 if (code
!= ARRAY_REF
8610 && code
!= BIT_FIELD_REF
8611 && code
!= INDIRECT_REF
8612 && code
!= COMPONENT_REF
8613 && code
!= IMAGPART_EXPR
8614 && code
!= REALPART_EXPR
8616 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8621 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8622 if (!call
|| !gimple_call_internal_p (call
))
8625 internal_fn ifn
= gimple_call_internal_fn (call
);
8626 if (!internal_load_fn_p (ifn
))
8629 scalar_dest
= gimple_call_lhs (call
);
8633 mask_index
= internal_fn_mask_index (ifn
);
8634 /* ??? For SLP the mask operand is always last. */
8635 if (mask_index
>= 0 && slp_node
)
8636 mask_index
= SLP_TREE_CHILDREN (slp_node
).length () - 1;
8638 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8639 &mask
, NULL
, &mask_dt
, &mask_vectype
))
8643 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8644 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8648 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8649 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8650 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8655 /* Multiple types in SLP are handled by creating the appropriate number of
8656 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8661 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8663 gcc_assert (ncopies
>= 1);
8665 /* FORNOW. This restriction should be relaxed. */
8666 if (nested_in_vect_loop
&& ncopies
> 1)
8668 if (dump_enabled_p ())
8669 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8670 "multiple types in nested loop.\n");
8674 /* Invalidate assumptions made by dependence analysis when vectorization
8675 on the unrolled body effectively re-orders stmts. */
8677 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8678 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8679 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8681 if (dump_enabled_p ())
8682 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8683 "cannot perform implicit CSE when unrolling "
8684 "with negative dependence distance\n");
8688 elem_type
= TREE_TYPE (vectype
);
8689 mode
= TYPE_MODE (vectype
);
8691 /* FORNOW. In some cases can vectorize even if data-type not supported
8692 (e.g. - data copies). */
8693 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8695 if (dump_enabled_p ())
8696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8697 "Aligned load, but unsupported type.\n");
8701 /* Check if the load is a part of an interleaving chain. */
8702 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8704 grouped_load
= true;
8706 gcc_assert (!nested_in_vect_loop
);
8707 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8709 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8710 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8712 /* Refuse non-SLP vectorization of SLP-only groups. */
8713 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8715 if (dump_enabled_p ())
8716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8717 "cannot vectorize load in non-SLP mode.\n");
8721 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8727 /* In BB vectorization we may not actually use a loaded vector
8728 accessing elements in excess of DR_GROUP_SIZE. */
8729 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8730 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8731 unsigned HOST_WIDE_INT nunits
;
8732 unsigned j
, k
, maxk
= 0;
8733 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8736 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
8737 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8738 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8740 if (dump_enabled_p ())
8741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8742 "BB vectorization with gaps at the end of "
8743 "a load is not supported\n");
8750 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8753 if (dump_enabled_p ())
8754 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8756 "unsupported load permutation\n");
8761 /* Invalidate assumptions made by dependence analysis when vectorization
8762 on the unrolled body effectively re-orders stmts. */
8763 if (!PURE_SLP_STMT (stmt_info
)
8764 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8765 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8766 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8768 if (dump_enabled_p ())
8769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8770 "cannot perform implicit CSE when performing "
8771 "group loads with negative dependence distance\n");
8778 vect_memory_access_type memory_access_type
;
8779 enum dr_alignment_support alignment_support_scheme
;
8782 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
8783 ncopies
, &memory_access_type
, &poffset
,
8784 &alignment_support_scheme
, &misalignment
, &gs_info
))
8789 if (memory_access_type
== VMAT_CONTIGUOUS
)
8791 machine_mode vec_mode
= TYPE_MODE (vectype
);
8792 if (!VECTOR_MODE_P (vec_mode
)
8793 || !can_vec_mask_load_store_p (vec_mode
,
8794 TYPE_MODE (mask_vectype
), true))
8797 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8798 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8800 if (dump_enabled_p ())
8801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8802 "unsupported access type for masked load.\n");
8805 else if (memory_access_type
== VMAT_GATHER_SCATTER
8806 && gs_info
.ifn
== IFN_LAST
8809 if (dump_enabled_p ())
8810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8811 "unsupported masked emulated gather.\n");
8816 if (!vec_stmt
) /* transformation not required. */
8820 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
8823 if (dump_enabled_p ())
8824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8825 "incompatible vector types for invariants\n");
8830 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8833 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8834 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, VLS_LOAD
,
8835 group_size
, memory_access_type
,
8836 ncopies
, &gs_info
, mask
);
8838 if (dump_enabled_p ()
8839 && memory_access_type
!= VMAT_ELEMENTWISE
8840 && memory_access_type
!= VMAT_GATHER_SCATTER
8841 && alignment_support_scheme
!= dr_aligned
)
8842 dump_printf_loc (MSG_NOTE
, vect_location
,
8843 "Vectorizing an unaligned access.\n");
8845 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8846 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8847 alignment_support_scheme
, misalignment
,
8848 &gs_info
, slp_node
, cost_vec
);
8853 gcc_assert (memory_access_type
8854 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8856 if (dump_enabled_p ())
8857 dump_printf_loc (MSG_NOTE
, vect_location
,
8858 "transform load. ncopies = %d\n", ncopies
);
8862 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8863 ensure_base_align (dr_info
);
8865 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8867 vect_build_gather_load_calls (vinfo
,
8868 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8872 if (memory_access_type
== VMAT_INVARIANT
)
8874 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8875 /* If we have versioned for aliasing or the loop doesn't
8876 have any data dependencies that would preclude this,
8877 then we are sure this is a loop invariant load and
8878 thus we can insert it on the preheader edge. */
8879 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8880 && !nested_in_vect_loop
8881 && hoist_defs_of_uses (stmt_info
, loop
));
8884 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8885 if (dump_enabled_p ())
8886 dump_printf_loc (MSG_NOTE
, vect_location
,
8887 "hoisting out of the vectorized loop: %G", stmt
);
8888 scalar_dest
= copy_ssa_name (scalar_dest
);
8889 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8890 gsi_insert_on_edge_immediate
8891 (loop_preheader_edge (loop
),
8892 gimple_build_assign (scalar_dest
, rhs
));
8894 /* These copies are all equivalent, but currently the representation
8895 requires a separate STMT_VINFO_VEC_STMT for each one. */
8896 gimple_stmt_iterator gsi2
= *gsi
;
8898 for (j
= 0; j
< ncopies
; j
++)
8901 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8904 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8906 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8908 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8912 *vec_stmt
= new_stmt
;
8913 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8919 if (memory_access_type
== VMAT_ELEMENTWISE
8920 || memory_access_type
== VMAT_STRIDED_SLP
)
8922 gimple_stmt_iterator incr_gsi
;
8927 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8928 tree stride_base
, stride_step
, alias_off
;
8929 /* Checked by get_load_store_type. */
8930 unsigned int const_nunits
= nunits
.to_constant ();
8931 unsigned HOST_WIDE_INT cst_offset
= 0;
8934 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
8935 gcc_assert (!nested_in_vect_loop
);
8939 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8940 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8944 first_stmt_info
= stmt_info
;
8945 first_dr_info
= dr_info
;
8947 if (slp
&& grouped_load
)
8949 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8950 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8956 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8957 * vect_get_place_in_interleaving_chain (stmt_info
,
8960 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8963 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8965 = fold_build_pointer_plus
8966 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8967 size_binop (PLUS_EXPR
,
8968 convert_to_ptrofftype (dr_offset
),
8969 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8970 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8972 /* For a load with loop-invariant (but other than power-of-2)
8973 stride (i.e. not a grouped access) like so:
8975 for (i = 0; i < n; i += stride)
8978 we generate a new induction variable and new accesses to
8979 form a new vector (or vectors, depending on ncopies):
8981 for (j = 0; ; j += VF*stride)
8983 tmp2 = array[j + stride];
8985 vectemp = {tmp1, tmp2, ...}
8988 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8989 build_int_cst (TREE_TYPE (stride_step
), vf
));
8991 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8993 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8994 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8995 create_iv (stride_base
, ivstep
, NULL
,
8996 loop
, &incr_gsi
, insert_after
,
8999 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9001 running_off
= offvar
;
9002 alias_off
= build_int_cst (ref_type
, 0);
9003 int nloads
= const_nunits
;
9005 tree ltype
= TREE_TYPE (vectype
);
9006 tree lvectype
= vectype
;
9007 auto_vec
<tree
> dr_chain
;
9008 if (memory_access_type
== VMAT_STRIDED_SLP
)
9010 if (group_size
< const_nunits
)
9012 /* First check if vec_init optab supports construction from vector
9013 elts directly. Otherwise avoid emitting a constructor of
9014 vector elements by performing the loads using an integer type
9015 of the same size, constructing a vector of those and then
9016 re-interpreting it as the original vector type. This avoids a
9017 huge runtime penalty due to the general inability to perform
9018 store forwarding from smaller stores to a larger load. */
9021 = vector_vector_composition_type (vectype
,
9022 const_nunits
/ group_size
,
9024 if (vtype
!= NULL_TREE
)
9026 nloads
= const_nunits
/ group_size
;
9035 lnel
= const_nunits
;
9038 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
9040 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9041 else if (nloads
== 1)
9046 /* For SLP permutation support we need to load the whole group,
9047 not only the number of vector stmts the permutation result
9051 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9053 unsigned int const_vf
= vf
.to_constant ();
9054 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9055 dr_chain
.create (ncopies
);
9058 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9060 unsigned int group_el
= 0;
9061 unsigned HOST_WIDE_INT
9062 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9063 for (j
= 0; j
< ncopies
; j
++)
9066 vec_alloc (v
, nloads
);
9067 gimple
*new_stmt
= NULL
;
9068 for (i
= 0; i
< nloads
; i
++)
9070 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9071 group_el
* elsz
+ cst_offset
);
9072 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9073 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9074 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9075 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9077 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9078 gimple_assign_lhs (new_stmt
));
9082 || group_el
== group_size
)
9084 tree newoff
= copy_ssa_name (running_off
);
9085 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9086 running_off
, stride_step
);
9087 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9089 running_off
= newoff
;
9095 tree vec_inv
= build_constructor (lvectype
, v
);
9096 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9097 vec_inv
, lvectype
, gsi
);
9098 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9099 if (lvectype
!= vectype
)
9101 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
9103 build1 (VIEW_CONVERT_EXPR
,
9104 vectype
, new_temp
));
9105 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9112 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
9114 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9119 *vec_stmt
= new_stmt
;
9120 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9126 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9132 if (memory_access_type
== VMAT_GATHER_SCATTER
9133 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9134 grouped_load
= false;
9138 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9139 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9140 /* For SLP vectorization we directly vectorize a subchain
9141 without permutation. */
9142 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9143 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9144 /* For BB vectorization always use the first stmt to base
9145 the data ref pointer on. */
9147 first_stmt_info_for_drptr
9148 = vect_find_first_scalar_stmt_in_slp (slp_node
);
9150 /* Check if the chain of loads is already vectorized. */
9151 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
9152 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9153 ??? But we can only do so if there is exactly one
9154 as we have no way to get at the rest. Leave the CSE
9156 ??? With the group load eventually participating
9157 in multiple different permutations (having multiple
9158 slp nodes which refer to the same group) the CSE
9159 is even wrong code. See PR56270. */
9162 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9165 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9168 /* VEC_NUM is the number of vect stmts to be created for this group. */
9171 grouped_load
= false;
9172 /* If an SLP permutation is from N elements to N elements,
9173 and if one vector holds a whole number of N, we can load
9174 the inputs to the permutation in the same way as an
9175 unpermuted sequence. In other cases we need to load the
9176 whole group, not only the number of vector stmts the
9177 permutation result fits in. */
9178 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
9180 && (group_size
!= scalar_lanes
9181 || !multiple_p (nunits
, group_size
)))
9183 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9184 variable VF; see vect_transform_slp_perm_load. */
9185 unsigned int const_vf
= vf
.to_constant ();
9186 unsigned int const_nunits
= nunits
.to_constant ();
9187 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9188 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9192 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9194 = group_size
- scalar_lanes
;
9198 vec_num
= group_size
;
9200 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9204 first_stmt_info
= stmt_info
;
9205 first_dr_info
= dr_info
;
9206 group_size
= vec_num
= 1;
9208 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9211 gcc_assert (alignment_support_scheme
);
9212 vec_loop_masks
*loop_masks
9213 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9214 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9216 vec_loop_lens
*loop_lens
9217 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
9218 ? &LOOP_VINFO_LENS (loop_vinfo
)
9221 /* Shouldn't go with length-based approach if fully masked. */
9222 gcc_assert (!loop_lens
|| !loop_masks
);
9224 /* Targets with store-lane instructions must not require explicit
9225 realignment. vect_supportable_dr_alignment always returns either
9226 dr_aligned or dr_unaligned_supported for masked operations. */
9227 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9230 || alignment_support_scheme
== dr_aligned
9231 || alignment_support_scheme
== dr_unaligned_supported
);
9233 /* In case the vectorization factor (VF) is bigger than the number
9234 of elements that we can fit in a vectype (nunits), we have to generate
9235 more than one vector stmt - i.e - we need to "unroll" the
9236 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9237 from one copy of the vector stmt to the next, in the field
9238 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9239 stages to find the correct vector defs to be used when vectorizing
9240 stmts that use the defs of the current stmt. The example below
9241 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9242 need to create 4 vectorized stmts):
9244 before vectorization:
9245 RELATED_STMT VEC_STMT
9249 step 1: vectorize stmt S1:
9250 We first create the vector stmt VS1_0, and, as usual, record a
9251 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9252 Next, we create the vector stmt VS1_1, and record a pointer to
9253 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9254 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9256 RELATED_STMT VEC_STMT
9257 VS1_0: vx0 = memref0 VS1_1 -
9258 VS1_1: vx1 = memref1 VS1_2 -
9259 VS1_2: vx2 = memref2 VS1_3 -
9260 VS1_3: vx3 = memref3 - -
9261 S1: x = load - VS1_0
9265 /* In case of interleaving (non-unit grouped access):
9272 Vectorized loads are created in the order of memory accesses
9273 starting from the access of the first stmt of the chain:
9276 VS2: vx1 = &base + vec_size*1
9277 VS3: vx3 = &base + vec_size*2
9278 VS4: vx4 = &base + vec_size*3
9280 Then permutation statements are generated:
9282 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9283 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9286 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9287 (the order of the data-refs in the output of vect_permute_load_chain
9288 corresponds to the order of scalar stmts in the interleaving chain - see
9289 the documentation of vect_permute_load_chain()).
9290 The generation of permutation stmts and recording them in
9291 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9293 In case of both multiple types and interleaving, the vector loads and
9294 permutation stmts above are created for every copy. The result vector
9295 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9296 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9298 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9299 on a target that supports unaligned accesses (dr_unaligned_supported)
9300 we generate the following code:
9304 p = p + indx * vectype_size;
9309 Otherwise, the data reference is potentially unaligned on a target that
9310 does not support unaligned accesses (dr_explicit_realign_optimized) -
9311 then generate the following code, in which the data in each iteration is
9312 obtained by two vector loads, one from the previous iteration, and one
9313 from the current iteration:
9315 msq_init = *(floor(p1))
9316 p2 = initial_addr + VS - 1;
9317 realignment_token = call target_builtin;
9320 p2 = p2 + indx * vectype_size
9322 vec_dest = realign_load (msq, lsq, realignment_token)
9327 /* If the misalignment remains the same throughout the execution of the
9328 loop, we can create the init_addr and permutation mask at the loop
9329 preheader. Otherwise, it needs to be created inside the loop.
9330 This can only occur when vectorizing memory accesses in the inner-loop
9331 nested within an outer-loop that is being vectorized. */
9333 if (nested_in_vect_loop
9334 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9335 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9337 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9338 compute_in_loop
= true;
9341 bool diff_first_stmt_info
9342 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9344 tree offset
= NULL_TREE
;
9345 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9346 || alignment_support_scheme
== dr_explicit_realign
)
9347 && !compute_in_loop
)
9349 /* If we have different first_stmt_info, we can't set up realignment
9350 here, since we can't guarantee first_stmt_info DR has been
9351 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9352 distance from first_stmt_info DR instead as below. */
9353 if (!diff_first_stmt_info
)
9354 msq
= vect_setup_realignment (vinfo
,
9355 first_stmt_info
, gsi
, &realignment_token
,
9356 alignment_support_scheme
, NULL_TREE
,
9358 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9360 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9361 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9363 gcc_assert (!first_stmt_info_for_drptr
);
9369 if (!known_eq (poffset
, 0))
9371 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
9372 : size_int (poffset
));
9375 tree vec_offset
= NULL_TREE
;
9376 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9378 aggr_type
= NULL_TREE
;
9381 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9383 aggr_type
= elem_type
;
9384 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9385 &bump
, &vec_offset
);
9389 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9390 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9392 aggr_type
= vectype
;
9393 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9394 memory_access_type
);
9397 vec
<tree
> vec_offsets
= vNULL
;
9398 auto_vec
<tree
> vec_masks
;
9402 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
9405 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
9406 &vec_masks
, mask_vectype
);
9408 tree vec_mask
= NULL_TREE
;
9409 poly_uint64 group_elt
= 0;
9410 for (j
= 0; j
< ncopies
; j
++)
9412 /* 1. Create the vector or array pointer update chain. */
9415 bool simd_lane_access_p
9416 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9417 if (simd_lane_access_p
9418 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9419 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9420 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9421 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9422 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9423 get_alias_set (TREE_TYPE (ref_type
)))
9424 && (alignment_support_scheme
== dr_aligned
9425 || alignment_support_scheme
== dr_unaligned_supported
))
9427 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9428 dataref_offset
= build_int_cst (ref_type
, 0);
9430 else if (diff_first_stmt_info
)
9433 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9434 aggr_type
, at_loop
, offset
, &dummy
,
9435 gsi
, &ptr_incr
, simd_lane_access_p
,
9437 /* Adjust the pointer by the difference to first_stmt. */
9438 data_reference_p ptrdr
9439 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9441 = fold_convert (sizetype
,
9442 size_binop (MINUS_EXPR
,
9443 DR_INIT (first_dr_info
->dr
),
9445 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9447 if (alignment_support_scheme
== dr_explicit_realign
)
9449 msq
= vect_setup_realignment (vinfo
,
9450 first_stmt_info_for_drptr
, gsi
,
9452 alignment_support_scheme
,
9453 dataref_ptr
, &at_loop
);
9454 gcc_assert (!compute_in_loop
);
9457 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9459 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9460 slp_node
, &gs_info
, &dataref_ptr
,
9465 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9467 offset
, &dummy
, gsi
, &ptr_incr
,
9468 simd_lane_access_p
, bump
);
9470 vec_mask
= vec_masks
[0];
9475 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9477 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9478 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9481 vec_mask
= vec_masks
[j
];
9484 if (grouped_load
|| slp_perm
)
9485 dr_chain
.create (vec_num
);
9487 gimple
*new_stmt
= NULL
;
9488 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9492 vec_array
= create_vector_array (vectype
, vec_num
);
9494 tree final_mask
= NULL_TREE
;
9496 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9499 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9506 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9508 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9509 tree alias_ptr
= build_int_cst (ref_type
, align
);
9510 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9511 dataref_ptr
, alias_ptr
,
9517 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9518 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9519 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9521 gimple_call_set_lhs (call
, vec_array
);
9522 gimple_call_set_nothrow (call
, true);
9523 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9526 /* Extract each vector into an SSA_NAME. */
9527 for (i
= 0; i
< vec_num
; i
++)
9529 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9531 dr_chain
.quick_push (new_temp
);
9534 /* Record the mapping between SSA_NAMEs and statements. */
9535 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9537 /* Record that VEC_ARRAY is now dead. */
9538 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9542 for (i
= 0; i
< vec_num
; i
++)
9544 tree final_mask
= NULL_TREE
;
9546 && memory_access_type
!= VMAT_INVARIANT
)
9547 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9549 vectype
, vec_num
* j
+ i
);
9551 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9555 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9556 gsi
, stmt_info
, bump
);
9558 /* 2. Create the vector-load in the loop. */
9559 switch (alignment_support_scheme
)
9562 case dr_unaligned_supported
:
9564 unsigned int misalign
;
9565 unsigned HOST_WIDE_INT align
;
9567 if (memory_access_type
== VMAT_GATHER_SCATTER
9568 && gs_info
.ifn
!= IFN_LAST
)
9570 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9571 vec_offset
= vec_offsets
[j
];
9572 tree zero
= build_zero_cst (vectype
);
9573 tree scale
= size_int (gs_info
.scale
);
9576 call
= gimple_build_call_internal
9577 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9578 vec_offset
, scale
, zero
, final_mask
);
9580 call
= gimple_build_call_internal
9581 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9582 vec_offset
, scale
, zero
);
9583 gimple_call_set_nothrow (call
, true);
9585 data_ref
= NULL_TREE
;
9588 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9590 /* Emulated gather-scatter. */
9591 gcc_assert (!final_mask
);
9592 unsigned HOST_WIDE_INT const_nunits
9593 = nunits
.to_constant ();
9594 unsigned HOST_WIDE_INT const_offset_nunits
9595 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
9597 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9598 vec_alloc (ctor_elts
, const_nunits
);
9599 gimple_seq stmts
= NULL
;
9600 /* We support offset vectors with more elements
9601 than the data vector for now. */
9602 unsigned HOST_WIDE_INT factor
9603 = const_offset_nunits
/ const_nunits
;
9604 vec_offset
= vec_offsets
[j
/ factor
];
9605 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9606 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9607 tree scale
= size_int (gs_info
.scale
);
9609 = get_object_alignment (DR_REF (first_dr_info
->dr
));
9610 tree ltype
= build_aligned_type (TREE_TYPE (vectype
),
9612 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9614 tree boff
= size_binop (MULT_EXPR
,
9615 TYPE_SIZE (idx_type
),
9618 tree idx
= gimple_build (&stmts
, BIT_FIELD_REF
,
9619 idx_type
, vec_offset
,
9620 TYPE_SIZE (idx_type
),
9622 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9623 idx
= gimple_build (&stmts
, MULT_EXPR
,
9624 sizetype
, idx
, scale
);
9625 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
9626 TREE_TYPE (dataref_ptr
),
9628 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9629 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
9630 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
9631 build_int_cst (ref_type
, 0));
9632 new_stmt
= gimple_build_assign (elt
, ref
);
9633 gimple_seq_add_stmt (&stmts
, new_stmt
);
9634 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
9636 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9637 new_stmt
= gimple_build_assign (NULL_TREE
,
9639 (vectype
, ctor_elts
));
9640 data_ref
= NULL_TREE
;
9645 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9646 if (alignment_support_scheme
== dr_aligned
)
9648 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9650 align
= dr_alignment
9651 (vect_dr_behavior (vinfo
, first_dr_info
));
9655 misalign
= misalignment
;
9656 if (dataref_offset
== NULL_TREE
9657 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9658 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9660 align
= least_bit_hwi (misalign
| align
);
9664 tree ptr
= build_int_cst (ref_type
,
9665 align
* BITS_PER_UNIT
);
9667 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9670 gimple_call_set_nothrow (call
, true);
9672 data_ref
= NULL_TREE
;
9674 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
9677 = vect_get_loop_len (loop_vinfo
, loop_lens
,
9680 tree ptr
= build_int_cst (ref_type
,
9681 align
* BITS_PER_UNIT
);
9683 = gimple_build_call_internal (IFN_LEN_LOAD
, 3,
9686 gimple_call_set_nothrow (call
, true);
9688 data_ref
= NULL_TREE
;
9690 /* Need conversion if it's wrapped with VnQI. */
9691 machine_mode vmode
= TYPE_MODE (vectype
);
9692 opt_machine_mode new_ovmode
9693 = get_len_load_store_mode (vmode
, true);
9694 machine_mode new_vmode
= new_ovmode
.require ();
9695 if (vmode
!= new_vmode
)
9697 tree qi_type
= unsigned_intQI_type_node
;
9699 = build_vector_type_for_mode (qi_type
, new_vmode
);
9700 tree var
= vect_get_new_ssa_name (new_vtype
,
9702 gimple_set_lhs (call
, var
);
9703 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
9705 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
9707 = gimple_build_assign (vec_dest
,
9708 VIEW_CONVERT_EXPR
, op
);
9713 tree ltype
= vectype
;
9714 tree new_vtype
= NULL_TREE
;
9715 unsigned HOST_WIDE_INT gap
9716 = DR_GROUP_GAP (first_stmt_info
);
9717 unsigned int vect_align
9718 = vect_known_alignment_in_bytes (first_dr_info
,
9720 unsigned int scalar_dr_size
9721 = vect_get_scalar_dr_size (first_dr_info
);
9722 /* If there's no peeling for gaps but we have a gap
9723 with slp loads then load the lower half of the
9724 vector only. See get_group_load_store_type for
9725 when we apply this optimization. */
9728 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9730 && known_eq (nunits
, (group_size
- gap
) * 2)
9731 && known_eq (nunits
, group_size
)
9732 && gap
>= (vect_align
/ scalar_dr_size
))
9736 = vector_vector_composition_type (vectype
, 2,
9738 if (new_vtype
!= NULL_TREE
)
9742 = (dataref_offset
? dataref_offset
9743 : build_int_cst (ref_type
, 0));
9744 if (ltype
!= vectype
9745 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9747 unsigned HOST_WIDE_INT gap_offset
9748 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9749 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9750 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9753 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9754 if (alignment_support_scheme
== dr_aligned
)
9757 TREE_TYPE (data_ref
)
9758 = build_aligned_type (TREE_TYPE (data_ref
),
9759 align
* BITS_PER_UNIT
);
9760 if (ltype
!= vectype
)
9762 vect_copy_ref_info (data_ref
,
9763 DR_REF (first_dr_info
->dr
));
9764 tree tem
= make_ssa_name (ltype
);
9765 new_stmt
= gimple_build_assign (tem
, data_ref
);
9766 vect_finish_stmt_generation (vinfo
, stmt_info
,
9769 vec
<constructor_elt
, va_gc
> *v
;
9771 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9773 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9774 build_zero_cst (ltype
));
9775 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9779 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9780 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9781 build_zero_cst (ltype
));
9783 gcc_assert (new_vtype
!= NULL_TREE
);
9784 if (new_vtype
== vectype
)
9785 new_stmt
= gimple_build_assign (
9786 vec_dest
, build_constructor (vectype
, v
));
9789 tree new_vname
= make_ssa_name (new_vtype
);
9790 new_stmt
= gimple_build_assign (
9791 new_vname
, build_constructor (new_vtype
, v
));
9792 vect_finish_stmt_generation (vinfo
, stmt_info
,
9794 new_stmt
= gimple_build_assign (
9795 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9802 case dr_explicit_realign
:
9806 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9808 if (compute_in_loop
)
9809 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9811 dr_explicit_realign
,
9814 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9815 ptr
= copy_ssa_name (dataref_ptr
);
9817 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9818 // For explicit realign the target alignment should be
9819 // known at compile time.
9820 unsigned HOST_WIDE_INT align
=
9821 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9822 new_stmt
= gimple_build_assign
9823 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9825 (TREE_TYPE (dataref_ptr
),
9826 -(HOST_WIDE_INT
) align
));
9827 vect_finish_stmt_generation (vinfo
, stmt_info
,
9830 = build2 (MEM_REF
, vectype
, ptr
,
9831 build_int_cst (ref_type
, 0));
9832 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9833 vec_dest
= vect_create_destination_var (scalar_dest
,
9835 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9836 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9837 gimple_assign_set_lhs (new_stmt
, new_temp
);
9838 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9839 vect_finish_stmt_generation (vinfo
, stmt_info
,
9843 bump
= size_binop (MULT_EXPR
, vs
,
9844 TYPE_SIZE_UNIT (elem_type
));
9845 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9846 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9848 new_stmt
= gimple_build_assign
9849 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9851 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9852 ptr
= copy_ssa_name (ptr
, new_stmt
);
9853 gimple_assign_set_lhs (new_stmt
, ptr
);
9854 vect_finish_stmt_generation (vinfo
, stmt_info
,
9857 = build2 (MEM_REF
, vectype
, ptr
,
9858 build_int_cst (ref_type
, 0));
9861 case dr_explicit_realign_optimized
:
9863 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9864 new_temp
= copy_ssa_name (dataref_ptr
);
9866 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9867 // We should only be doing this if we know the target
9868 // alignment at compile time.
9869 unsigned HOST_WIDE_INT align
=
9870 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9871 new_stmt
= gimple_build_assign
9872 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9873 build_int_cst (TREE_TYPE (dataref_ptr
),
9874 -(HOST_WIDE_INT
) align
));
9875 vect_finish_stmt_generation (vinfo
, stmt_info
,
9878 = build2 (MEM_REF
, vectype
, new_temp
,
9879 build_int_cst (ref_type
, 0));
9885 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9886 /* DATA_REF is null if we've already built the statement. */
9889 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9890 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9892 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9893 gimple_set_lhs (new_stmt
, new_temp
);
9894 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9896 /* 3. Handle explicit realignment if necessary/supported.
9898 vec_dest = realign_load (msq, lsq, realignment_token) */
9899 if (alignment_support_scheme
== dr_explicit_realign_optimized
9900 || alignment_support_scheme
== dr_explicit_realign
)
9902 lsq
= gimple_assign_lhs (new_stmt
);
9903 if (!realignment_token
)
9904 realignment_token
= dataref_ptr
;
9905 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9906 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9907 msq
, lsq
, realignment_token
);
9908 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9909 gimple_assign_set_lhs (new_stmt
, new_temp
);
9910 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9912 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9915 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9916 add_phi_arg (phi
, lsq
,
9917 loop_latch_edge (containing_loop
),
9923 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9925 tree perm_mask
= perm_mask_for_reverse (vectype
);
9926 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9927 perm_mask
, stmt_info
, gsi
);
9928 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9931 /* Collect vector loads and later create their permutation in
9932 vect_transform_grouped_load (). */
9933 if (grouped_load
|| slp_perm
)
9934 dr_chain
.quick_push (new_temp
);
9936 /* Store vector loads in the corresponding SLP_NODE. */
9937 if (slp
&& !slp_perm
)
9938 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9940 /* With SLP permutation we load the gaps as well, without
9941 we need to skip the gaps after we manage to fully load
9942 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9943 group_elt
+= nunits
;
9944 if (maybe_ne (group_gap_adj
, 0U)
9946 && known_eq (group_elt
, group_size
- group_gap_adj
))
9948 poly_wide_int bump_val
9949 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9951 if (tree_int_cst_sgn
9952 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
9953 bump_val
= -bump_val
;
9954 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9955 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9956 gsi
, stmt_info
, bump
);
9960 /* Bump the vector pointer to account for a gap or for excess
9961 elements loaded for a permuted SLP load. */
9962 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9964 poly_wide_int bump_val
9965 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9967 if (tree_int_cst_sgn
9968 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
9969 bump_val
= -bump_val
;
9970 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9971 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9976 if (slp
&& !slp_perm
)
9982 /* For SLP we know we've seen all possible uses of dr_chain so
9983 direct vect_transform_slp_perm_load to DCE the unused parts.
9984 ??? This is a hack to prevent compile-time issues as seen
9985 in PR101120 and friends. */
9986 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
9987 gsi
, vf
, false, &n_perms
,
9995 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9996 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
9998 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10002 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10005 dr_chain
.release ();
10008 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10013 /* Function vect_is_simple_cond.
10016 LOOP - the loop that is being vectorized.
10017 COND - Condition that is checked for simple use.
10020 *COMP_VECTYPE - the vector type for the comparison.
10021 *DTS - The def types for the arguments of the comparison
10023 Returns whether a COND can be vectorized. Checks whether
10024 condition operands are supportable using vec_is_simple_use. */
10027 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
10028 slp_tree slp_node
, tree
*comp_vectype
,
10029 enum vect_def_type
*dts
, tree vectype
)
10032 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10036 if (TREE_CODE (cond
) == SSA_NAME
10037 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
10039 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
10040 &slp_op
, &dts
[0], comp_vectype
)
10042 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
10047 if (!COMPARISON_CLASS_P (cond
))
10050 lhs
= TREE_OPERAND (cond
, 0);
10051 rhs
= TREE_OPERAND (cond
, 1);
10053 if (TREE_CODE (lhs
) == SSA_NAME
)
10055 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
10056 &lhs
, &slp_op
, &dts
[0], &vectype1
))
10059 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
10060 || TREE_CODE (lhs
) == FIXED_CST
)
10061 dts
[0] = vect_constant_def
;
10065 if (TREE_CODE (rhs
) == SSA_NAME
)
10067 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
10068 &rhs
, &slp_op
, &dts
[1], &vectype2
))
10071 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10072 || TREE_CODE (rhs
) == FIXED_CST
)
10073 dts
[1] = vect_constant_def
;
10077 if (vectype1
&& vectype2
10078 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10079 TYPE_VECTOR_SUBPARTS (vectype2
)))
10082 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10083 /* Invariant comparison. */
10084 if (! *comp_vectype
)
10086 tree scalar_type
= TREE_TYPE (lhs
);
10087 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10088 *comp_vectype
= truth_type_for (vectype
);
10091 /* If we can widen the comparison to match vectype do so. */
10092 if (INTEGRAL_TYPE_P (scalar_type
)
10094 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10095 TYPE_SIZE (TREE_TYPE (vectype
))))
10096 scalar_type
= build_nonstandard_integer_type
10097 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10098 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10106 /* vectorizable_condition.
10108 Check if STMT_INFO is conditional modify expression that can be vectorized.
10109 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10110 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10113 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10115 Return true if STMT_INFO is vectorizable in this way. */
10118 vectorizable_condition (vec_info
*vinfo
,
10119 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10121 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10123 tree scalar_dest
= NULL_TREE
;
10124 tree vec_dest
= NULL_TREE
;
10125 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10126 tree then_clause
, else_clause
;
10127 tree comp_vectype
= NULL_TREE
;
10128 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10129 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10132 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10133 enum vect_def_type dts
[4]
10134 = {vect_unknown_def_type
, vect_unknown_def_type
,
10135 vect_unknown_def_type
, vect_unknown_def_type
};
10139 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10141 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10142 vec
<tree
> vec_oprnds0
= vNULL
;
10143 vec
<tree
> vec_oprnds1
= vNULL
;
10144 vec
<tree
> vec_oprnds2
= vNULL
;
10145 vec
<tree
> vec_oprnds3
= vNULL
;
10147 bool masked
= false;
10149 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10152 /* Is vectorizable conditional operation? */
10153 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10157 code
= gimple_assign_rhs_code (stmt
);
10158 if (code
!= COND_EXPR
)
10161 stmt_vec_info reduc_info
= NULL
;
10162 int reduc_index
= -1;
10163 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10165 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10168 if (STMT_SLP_TYPE (stmt_info
))
10170 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10171 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10172 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10173 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10174 || reduc_index
!= -1);
10178 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10182 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10183 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10188 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10192 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10196 gcc_assert (ncopies
>= 1);
10197 if (for_reduction
&& ncopies
> 1)
10198 return false; /* FORNOW */
10200 cond_expr
= gimple_assign_rhs1 (stmt
);
10202 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
10203 &comp_vectype
, &dts
[0], vectype
)
10207 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
10208 slp_tree then_slp_node
, else_slp_node
;
10209 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
10210 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10212 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
10213 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10216 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10219 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10222 masked
= !COMPARISON_CLASS_P (cond_expr
);
10223 vec_cmp_type
= truth_type_for (comp_vectype
);
10225 if (vec_cmp_type
== NULL_TREE
)
10228 cond_code
= TREE_CODE (cond_expr
);
10231 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10232 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10235 /* For conditional reductions, the "then" value needs to be the candidate
10236 value calculated by this iteration while the "else" value needs to be
10237 the result carried over from previous iterations. If the COND_EXPR
10238 is the other way around, we need to swap it. */
10239 bool must_invert_cmp_result
= false;
10240 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10243 must_invert_cmp_result
= true;
10246 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10247 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10248 if (new_code
== ERROR_MARK
)
10249 must_invert_cmp_result
= true;
10252 cond_code
= new_code
;
10253 /* Make sure we don't accidentally use the old condition. */
10254 cond_expr
= NULL_TREE
;
10257 std::swap (then_clause
, else_clause
);
10260 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10262 /* Boolean values may have another representation in vectors
10263 and therefore we prefer bit operations over comparison for
10264 them (which also works for scalar masks). We store opcodes
10265 to use in bitop1 and bitop2. Statement is vectorized as
10266 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10267 depending on bitop1 and bitop2 arity. */
10271 bitop1
= BIT_NOT_EXPR
;
10272 bitop2
= BIT_AND_EXPR
;
10275 bitop1
= BIT_NOT_EXPR
;
10276 bitop2
= BIT_IOR_EXPR
;
10279 bitop1
= BIT_NOT_EXPR
;
10280 bitop2
= BIT_AND_EXPR
;
10281 std::swap (cond_expr0
, cond_expr1
);
10284 bitop1
= BIT_NOT_EXPR
;
10285 bitop2
= BIT_IOR_EXPR
;
10286 std::swap (cond_expr0
, cond_expr1
);
10289 bitop1
= BIT_XOR_EXPR
;
10292 bitop1
= BIT_XOR_EXPR
;
10293 bitop2
= BIT_NOT_EXPR
;
10298 cond_code
= SSA_NAME
;
10301 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10302 && reduction_type
== EXTRACT_LAST_REDUCTION
10303 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10305 if (dump_enabled_p ())
10306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10307 "reduction comparison operation not supported.\n");
10313 if (bitop1
!= NOP_EXPR
)
10315 machine_mode mode
= TYPE_MODE (comp_vectype
);
10318 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10319 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10322 if (bitop2
!= NOP_EXPR
)
10324 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10326 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10331 vect_cost_for_stmt kind
= vector_stmt
;
10332 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10333 /* Count one reduction-like operation per vector. */
10334 kind
= vec_to_scalar
;
10335 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10339 && (!vect_maybe_update_slp_op_vectype
10340 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10342 && !vect_maybe_update_slp_op_vectype
10343 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10344 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10345 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10347 if (dump_enabled_p ())
10348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10349 "incompatible vector types for invariants\n");
10353 if (loop_vinfo
&& for_reduction
10354 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10356 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10357 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10358 ncopies
* vec_num
, vectype
, NULL
);
10359 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10360 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10362 if (dump_enabled_p ())
10363 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10364 "conditional reduction prevents the use"
10365 " of partial vectors.\n");
10366 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10370 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10371 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10379 scalar_dest
= gimple_assign_lhs (stmt
);
10380 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10381 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10383 bool swap_cond_operands
= false;
10385 /* See whether another part of the vectorized code applies a loop
10386 mask to the condition, or to its inverse. */
10388 vec_loop_masks
*masks
= NULL
;
10389 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10391 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10392 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10395 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10396 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10397 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10400 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10401 tree_code orig_code
= cond
.code
;
10402 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10403 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10405 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10406 cond_code
= cond
.code
;
10407 swap_cond_operands
= true;
10411 /* Try the inverse of the current mask. We check if the
10412 inverse mask is live and if so we generate a negate of
10413 the current mask such that we still honor NaNs. */
10414 cond
.inverted_p
= true;
10415 cond
.code
= orig_code
;
10416 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10418 bitop1
= orig_code
;
10419 bitop2
= BIT_NOT_EXPR
;
10420 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10421 cond_code
= cond
.code
;
10422 swap_cond_operands
= true;
10429 /* Handle cond expr. */
10431 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10432 cond_expr
, &vec_oprnds0
, comp_vectype
,
10433 then_clause
, &vec_oprnds2
, vectype
,
10434 reduction_type
!= EXTRACT_LAST_REDUCTION
10435 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10437 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10438 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10439 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10440 then_clause
, &vec_oprnds2
, vectype
,
10441 reduction_type
!= EXTRACT_LAST_REDUCTION
10442 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10444 /* Arguments are ready. Create the new vector stmt. */
10445 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10447 vec_then_clause
= vec_oprnds2
[i
];
10448 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10449 vec_else_clause
= vec_oprnds3
[i
];
10451 if (swap_cond_operands
)
10452 std::swap (vec_then_clause
, vec_else_clause
);
10455 vec_compare
= vec_cond_lhs
;
10458 vec_cond_rhs
= vec_oprnds1
[i
];
10459 if (bitop1
== NOP_EXPR
)
10461 gimple_seq stmts
= NULL
;
10462 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10463 vec_cond_lhs
, vec_cond_rhs
);
10464 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10468 new_temp
= make_ssa_name (vec_cmp_type
);
10470 if (bitop1
== BIT_NOT_EXPR
)
10471 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10475 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10477 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10478 if (bitop2
== NOP_EXPR
)
10479 vec_compare
= new_temp
;
10480 else if (bitop2
== BIT_NOT_EXPR
)
10482 /* Instead of doing ~x ? y : z do x ? z : y. */
10483 vec_compare
= new_temp
;
10484 std::swap (vec_then_clause
, vec_else_clause
);
10488 vec_compare
= make_ssa_name (vec_cmp_type
);
10490 = gimple_build_assign (vec_compare
, bitop2
,
10491 vec_cond_lhs
, new_temp
);
10492 vect_finish_stmt_generation (vinfo
, stmt_info
,
10498 /* If we decided to apply a loop mask to the result of the vector
10499 comparison, AND the comparison with the mask now. Later passes
10500 should then be able to reuse the AND results between mulitple
10504 for (int i = 0; i < 100; ++i)
10505 x[i] = y[i] ? z[i] : 10;
10507 results in following optimized GIMPLE:
10509 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10510 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10511 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10512 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10513 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10514 vect_iftmp.11_47, { 10, ... }>;
10516 instead of using a masked and unmasked forms of
10517 vec != { 0, ... } (masked in the MASK_LOAD,
10518 unmasked in the VEC_COND_EXPR). */
10520 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10521 in cases where that's necessary. */
10523 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10525 if (!is_gimple_val (vec_compare
))
10527 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10528 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10530 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10531 vec_compare
= vec_compare_name
;
10534 if (must_invert_cmp_result
)
10536 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10537 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10540 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10541 vec_compare
= vec_compare_name
;
10547 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10549 tree tmp2
= make_ssa_name (vec_cmp_type
);
10551 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10553 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10554 vec_compare
= tmp2
;
10559 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10561 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10562 tree lhs
= gimple_get_lhs (old_stmt
);
10563 new_stmt
= gimple_build_call_internal
10564 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10566 gimple_call_set_lhs (new_stmt
, lhs
);
10567 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10568 if (old_stmt
== gsi_stmt (*gsi
))
10569 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10572 /* In this case we're moving the definition to later in the
10573 block. That doesn't matter because the only uses of the
10574 lhs are in phi statements. */
10575 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10576 gsi_remove (&old_gsi
, true);
10577 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10582 new_temp
= make_ssa_name (vec_dest
);
10583 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10584 vec_then_clause
, vec_else_clause
);
10585 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10588 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10590 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10594 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10596 vec_oprnds0
.release ();
10597 vec_oprnds1
.release ();
10598 vec_oprnds2
.release ();
10599 vec_oprnds3
.release ();
10604 /* vectorizable_comparison.
10606 Check if STMT_INFO is comparison expression that can be vectorized.
10607 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10608 comparison, put it in VEC_STMT, and insert it at GSI.
10610 Return true if STMT_INFO is vectorizable in this way. */
10613 vectorizable_comparison (vec_info
*vinfo
,
10614 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10616 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10618 tree lhs
, rhs1
, rhs2
;
10619 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10620 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10621 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10623 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10624 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10626 poly_uint64 nunits
;
10628 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10630 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10631 vec
<tree
> vec_oprnds0
= vNULL
;
10632 vec
<tree
> vec_oprnds1
= vNULL
;
10636 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10639 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10642 mask_type
= vectype
;
10643 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10648 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10650 gcc_assert (ncopies
>= 1);
10651 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10654 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10658 code
= gimple_assign_rhs_code (stmt
);
10660 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10663 slp_tree slp_rhs1
, slp_rhs2
;
10664 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10665 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10668 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10669 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10672 if (vectype1
&& vectype2
10673 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10674 TYPE_VECTOR_SUBPARTS (vectype2
)))
10677 vectype
= vectype1
? vectype1
: vectype2
;
10679 /* Invariant comparison. */
10682 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10683 vectype
= mask_type
;
10685 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10687 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10690 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10693 /* Can't compare mask and non-mask types. */
10694 if (vectype1
&& vectype2
10695 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10698 /* Boolean values may have another representation in vectors
10699 and therefore we prefer bit operations over comparison for
10700 them (which also works for scalar masks). We store opcodes
10701 to use in bitop1 and bitop2. Statement is vectorized as
10702 BITOP2 (rhs1 BITOP1 rhs2) or
10703 rhs1 BITOP2 (BITOP1 rhs2)
10704 depending on bitop1 and bitop2 arity. */
10705 bool swap_p
= false;
10706 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10708 if (code
== GT_EXPR
)
10710 bitop1
= BIT_NOT_EXPR
;
10711 bitop2
= BIT_AND_EXPR
;
10713 else if (code
== GE_EXPR
)
10715 bitop1
= BIT_NOT_EXPR
;
10716 bitop2
= BIT_IOR_EXPR
;
10718 else if (code
== LT_EXPR
)
10720 bitop1
= BIT_NOT_EXPR
;
10721 bitop2
= BIT_AND_EXPR
;
10724 else if (code
== LE_EXPR
)
10726 bitop1
= BIT_NOT_EXPR
;
10727 bitop2
= BIT_IOR_EXPR
;
10732 bitop1
= BIT_XOR_EXPR
;
10733 if (code
== EQ_EXPR
)
10734 bitop2
= BIT_NOT_EXPR
;
10740 if (bitop1
== NOP_EXPR
)
10742 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10747 machine_mode mode
= TYPE_MODE (vectype
);
10750 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10751 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10754 if (bitop2
!= NOP_EXPR
)
10756 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10757 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10762 /* Put types on constant and invariant SLP children. */
10764 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10765 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10767 if (dump_enabled_p ())
10768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10769 "incompatible vector types for invariants\n");
10773 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10774 vect_model_simple_cost (vinfo
, stmt_info
,
10775 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10776 dts
, ndts
, slp_node
, cost_vec
);
10783 lhs
= gimple_assign_lhs (stmt
);
10784 mask
= vect_create_destination_var (lhs
, mask_type
);
10786 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10787 rhs1
, &vec_oprnds0
, vectype
,
10788 rhs2
, &vec_oprnds1
, vectype
);
10790 std::swap (vec_oprnds0
, vec_oprnds1
);
10792 /* Arguments are ready. Create the new vector stmt. */
10793 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10796 vec_rhs2
= vec_oprnds1
[i
];
10798 new_temp
= make_ssa_name (mask
);
10799 if (bitop1
== NOP_EXPR
)
10801 new_stmt
= gimple_build_assign (new_temp
, code
,
10802 vec_rhs1
, vec_rhs2
);
10803 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10807 if (bitop1
== BIT_NOT_EXPR
)
10808 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10810 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10812 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10813 if (bitop2
!= NOP_EXPR
)
10815 tree res
= make_ssa_name (mask
);
10816 if (bitop2
== BIT_NOT_EXPR
)
10817 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10819 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10821 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10825 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10827 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10831 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10833 vec_oprnds0
.release ();
10834 vec_oprnds1
.release ();
10839 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10840 can handle all live statements in the node. Otherwise return true
10841 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10842 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10845 can_vectorize_live_stmts (vec_info
*vinfo
,
10846 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10847 slp_tree slp_node
, slp_instance slp_node_instance
,
10849 stmt_vector_for_cost
*cost_vec
)
10853 stmt_vec_info slp_stmt_info
;
10855 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10857 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10858 && !vectorizable_live_operation (vinfo
,
10859 slp_stmt_info
, gsi
, slp_node
,
10860 slp_node_instance
, i
,
10861 vec_stmt_p
, cost_vec
))
10865 else if (STMT_VINFO_LIVE_P (stmt_info
)
10866 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
10867 slp_node
, slp_node_instance
, -1,
10868 vec_stmt_p
, cost_vec
))
10874 /* Make sure the statement is vectorizable. */
10877 vect_analyze_stmt (vec_info
*vinfo
,
10878 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10879 slp_tree node
, slp_instance node_instance
,
10880 stmt_vector_for_cost
*cost_vec
)
10882 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10883 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10885 gimple_seq pattern_def_seq
;
10887 if (dump_enabled_p ())
10888 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10891 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10892 return opt_result::failure_at (stmt_info
->stmt
,
10894 " stmt has volatile operands: %G\n",
10897 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10899 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10901 gimple_stmt_iterator si
;
10903 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10905 stmt_vec_info pattern_def_stmt_info
10906 = vinfo
->lookup_stmt (gsi_stmt (si
));
10907 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10908 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10910 /* Analyze def stmt of STMT if it's a pattern stmt. */
10911 if (dump_enabled_p ())
10912 dump_printf_loc (MSG_NOTE
, vect_location
,
10913 "==> examining pattern def statement: %G",
10914 pattern_def_stmt_info
->stmt
);
10917 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10918 need_to_vectorize
, node
, node_instance
,
10926 /* Skip stmts that do not need to be vectorized. In loops this is expected
10928 - the COND_EXPR which is the loop exit condition
10929 - any LABEL_EXPRs in the loop
10930 - computations that are used only for array indexing or loop control.
10931 In basic blocks we only analyze statements that are a part of some SLP
10932 instance, therefore, all the statements are relevant.
10934 Pattern statement needs to be analyzed instead of the original statement
10935 if the original statement is not relevant. Otherwise, we analyze both
10936 statements. In basic blocks we are called from some SLP instance
10937 traversal, don't analyze pattern stmts instead, the pattern stmts
10938 already will be part of SLP instance. */
10940 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10941 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10942 && !STMT_VINFO_LIVE_P (stmt_info
))
10944 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10945 && pattern_stmt_info
10946 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10947 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10949 /* Analyze PATTERN_STMT instead of the original stmt. */
10950 stmt_info
= pattern_stmt_info
;
10951 if (dump_enabled_p ())
10952 dump_printf_loc (MSG_NOTE
, vect_location
,
10953 "==> examining pattern statement: %G",
10958 if (dump_enabled_p ())
10959 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10961 return opt_result::success ();
10964 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10966 && pattern_stmt_info
10967 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10968 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10970 /* Analyze PATTERN_STMT too. */
10971 if (dump_enabled_p ())
10972 dump_printf_loc (MSG_NOTE
, vect_location
,
10973 "==> examining pattern statement: %G",
10974 pattern_stmt_info
->stmt
);
10977 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
10978 node_instance
, cost_vec
);
10983 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10985 case vect_internal_def
:
10988 case vect_reduction_def
:
10989 case vect_nested_cycle
:
10990 gcc_assert (!bb_vinfo
10991 && (relevance
== vect_used_in_outer
10992 || relevance
== vect_used_in_outer_by_reduction
10993 || relevance
== vect_used_by_reduction
10994 || relevance
== vect_unused_in_scope
10995 || relevance
== vect_used_only_live
));
10998 case vect_induction_def
:
10999 gcc_assert (!bb_vinfo
);
11002 case vect_constant_def
:
11003 case vect_external_def
:
11004 case vect_unknown_def_type
:
11006 gcc_unreachable ();
11009 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11011 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
11013 if (STMT_VINFO_RELEVANT_P (stmt_info
))
11015 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
11016 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
11017 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
11018 *need_to_vectorize
= true;
11021 if (PURE_SLP_STMT (stmt_info
) && !node
)
11023 if (dump_enabled_p ())
11024 dump_printf_loc (MSG_NOTE
, vect_location
,
11025 "handled only by SLP analysis\n");
11026 return opt_result::success ();
11031 && (STMT_VINFO_RELEVANT_P (stmt_info
)
11032 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
11033 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11034 -mveclibabi= takes preference over library functions with
11035 the simd attribute. */
11036 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11037 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
11039 || vectorizable_conversion (vinfo
, stmt_info
,
11040 NULL
, NULL
, node
, cost_vec
)
11041 || vectorizable_operation (vinfo
, stmt_info
,
11042 NULL
, NULL
, node
, cost_vec
)
11043 || vectorizable_assignment (vinfo
, stmt_info
,
11044 NULL
, NULL
, node
, cost_vec
)
11045 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11046 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11047 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11048 node
, node_instance
, cost_vec
)
11049 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11050 NULL
, node
, cost_vec
)
11051 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11052 || vectorizable_condition (vinfo
, stmt_info
,
11053 NULL
, NULL
, node
, cost_vec
)
11054 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11056 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11057 stmt_info
, NULL
, node
));
11061 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11062 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11063 NULL
, NULL
, node
, cost_vec
)
11064 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11066 || vectorizable_shift (vinfo
, stmt_info
,
11067 NULL
, NULL
, node
, cost_vec
)
11068 || vectorizable_operation (vinfo
, stmt_info
,
11069 NULL
, NULL
, node
, cost_vec
)
11070 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11072 || vectorizable_load (vinfo
, stmt_info
,
11073 NULL
, NULL
, node
, cost_vec
)
11074 || vectorizable_store (vinfo
, stmt_info
,
11075 NULL
, NULL
, node
, cost_vec
)
11076 || vectorizable_condition (vinfo
, stmt_info
,
11077 NULL
, NULL
, node
, cost_vec
)
11078 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11080 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
11084 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11087 return opt_result::failure_at (stmt_info
->stmt
,
11089 " relevant stmt not supported: %G",
11092 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11093 need extra handling, except for vectorizable reductions. */
11095 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11096 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11097 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11098 stmt_info
, NULL
, node
, node_instance
,
11100 return opt_result::failure_at (stmt_info
->stmt
,
11102 " live stmt not supported: %G",
11105 return opt_result::success ();
11109 /* Function vect_transform_stmt.
11111 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11114 vect_transform_stmt (vec_info
*vinfo
,
11115 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11116 slp_tree slp_node
, slp_instance slp_node_instance
)
11118 bool is_store
= false;
11119 gimple
*vec_stmt
= NULL
;
11122 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11124 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11126 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
11128 switch (STMT_VINFO_TYPE (stmt_info
))
11130 case type_demotion_vec_info_type
:
11131 case type_promotion_vec_info_type
:
11132 case type_conversion_vec_info_type
:
11133 done
= vectorizable_conversion (vinfo
, stmt_info
,
11134 gsi
, &vec_stmt
, slp_node
, NULL
);
11138 case induc_vec_info_type
:
11139 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11140 stmt_info
, &vec_stmt
, slp_node
,
11145 case shift_vec_info_type
:
11146 done
= vectorizable_shift (vinfo
, stmt_info
,
11147 gsi
, &vec_stmt
, slp_node
, NULL
);
11151 case op_vec_info_type
:
11152 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11157 case assignment_vec_info_type
:
11158 done
= vectorizable_assignment (vinfo
, stmt_info
,
11159 gsi
, &vec_stmt
, slp_node
, NULL
);
11163 case load_vec_info_type
:
11164 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11169 case store_vec_info_type
:
11170 done
= vectorizable_store (vinfo
, stmt_info
,
11171 gsi
, &vec_stmt
, slp_node
, NULL
);
11173 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11175 /* In case of interleaving, the whole chain is vectorized when the
11176 last store in the chain is reached. Store stmts before the last
11177 one are skipped, and there vec_stmt_info shouldn't be freed
11179 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11180 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11187 case condition_vec_info_type
:
11188 done
= vectorizable_condition (vinfo
, stmt_info
,
11189 gsi
, &vec_stmt
, slp_node
, NULL
);
11193 case comparison_vec_info_type
:
11194 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11199 case call_vec_info_type
:
11200 done
= vectorizable_call (vinfo
, stmt_info
,
11201 gsi
, &vec_stmt
, slp_node
, NULL
);
11204 case call_simd_clone_vec_info_type
:
11205 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11209 case reduc_vec_info_type
:
11210 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11211 gsi
, &vec_stmt
, slp_node
);
11215 case cycle_phi_info_type
:
11216 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11217 &vec_stmt
, slp_node
, slp_node_instance
);
11221 case lc_phi_info_type
:
11222 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11223 stmt_info
, &vec_stmt
, slp_node
);
11227 case phi_info_type
:
11228 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
11233 if (!STMT_VINFO_LIVE_P (stmt_info
))
11235 if (dump_enabled_p ())
11236 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11237 "stmt not supported.\n");
11238 gcc_unreachable ();
11243 if (!slp_node
&& vec_stmt
)
11244 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
11246 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
11248 /* Handle stmts whose DEF is used outside the loop-nest that is
11249 being vectorized. */
11250 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
11251 slp_node_instance
, true, NULL
);
11256 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11262 /* Remove a group of stores (for SLP or interleaving), free their
11266 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11268 stmt_vec_info next_stmt_info
= first_stmt_info
;
11270 while (next_stmt_info
)
11272 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11273 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11274 /* Free the attached stmt_vec_info and remove the stmt. */
11275 vinfo
->remove_stmt (next_stmt_info
);
11276 next_stmt_info
= tmp
;
11280 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11281 elements of type SCALAR_TYPE, or null if the target doesn't support
11284 If NUNITS is zero, return a vector type that contains elements of
11285 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11287 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11288 for this vectorization region and want to "autodetect" the best choice.
11289 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11290 and we want the new type to be interoperable with it. PREVAILING_MODE
11291 in this case can be a scalar integer mode or a vector mode; when it
11292 is a vector mode, the function acts like a tree-level version of
11293 related_vector_mode. */
11296 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11297 tree scalar_type
, poly_uint64 nunits
)
11299 tree orig_scalar_type
= scalar_type
;
11300 scalar_mode inner_mode
;
11301 machine_mode simd_mode
;
11304 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11305 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11308 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11310 /* For vector types of elements whose mode precision doesn't
11311 match their types precision we use a element type of mode
11312 precision. The vectorization routines will have to make sure
11313 they support the proper result truncation/extension.
11314 We also make sure to build vector types with INTEGER_TYPE
11315 component type only. */
11316 if (INTEGRAL_TYPE_P (scalar_type
)
11317 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11318 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11319 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11320 TYPE_UNSIGNED (scalar_type
));
11322 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11323 When the component mode passes the above test simply use a type
11324 corresponding to that mode. The theory is that any use that
11325 would cause problems with this will disable vectorization anyway. */
11326 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11327 && !INTEGRAL_TYPE_P (scalar_type
))
11328 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11330 /* We can't build a vector type of elements with alignment bigger than
11332 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11333 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11334 TYPE_UNSIGNED (scalar_type
));
11336 /* If we felt back to using the mode fail if there was
11337 no scalar type for it. */
11338 if (scalar_type
== NULL_TREE
)
11341 /* If no prevailing mode was supplied, use the mode the target prefers.
11342 Otherwise lookup a vector mode based on the prevailing mode. */
11343 if (prevailing_mode
== VOIDmode
)
11345 gcc_assert (known_eq (nunits
, 0U));
11346 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11347 if (SCALAR_INT_MODE_P (simd_mode
))
11349 /* Traditional behavior is not to take the integer mode
11350 literally, but simply to use it as a way of determining
11351 the vector size. It is up to mode_for_vector to decide
11352 what the TYPE_MODE should be.
11354 Note that nunits == 1 is allowed in order to support single
11355 element vector types. */
11356 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11357 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11361 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11362 || !related_vector_mode (prevailing_mode
,
11363 inner_mode
, nunits
).exists (&simd_mode
))
11365 /* Fall back to using mode_for_vector, mostly in the hope of being
11366 able to use an integer mode. */
11367 if (known_eq (nunits
, 0U)
11368 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11371 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11375 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11377 /* In cases where the mode was chosen by mode_for_vector, check that
11378 the target actually supports the chosen mode, or that it at least
11379 allows the vector mode to be replaced by a like-sized integer. */
11380 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11381 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11384 /* Re-attach the address-space qualifier if we canonicalized the scalar
11386 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11387 return build_qualified_type
11388 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11393 /* Function get_vectype_for_scalar_type.
11395 Returns the vector type corresponding to SCALAR_TYPE as supported
11396 by the target. If GROUP_SIZE is nonzero and we're performing BB
11397 vectorization, make sure that the number of elements in the vector
11398 is no bigger than GROUP_SIZE. */
11401 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11402 unsigned int group_size
)
11404 /* For BB vectorization, we should always have a group size once we've
11405 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11406 are tentative requests during things like early data reference
11407 analysis and pattern recognition. */
11408 if (is_a
<bb_vec_info
> (vinfo
))
11409 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11413 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11415 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11416 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11418 /* Register the natural choice of vector type, before the group size
11419 has been applied. */
11421 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11423 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11424 try again with an explicit number of elements. */
11427 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11429 /* Start with the biggest number of units that fits within
11430 GROUP_SIZE and halve it until we find a valid vector type.
11431 Usually either the first attempt will succeed or all will
11432 fail (in the latter case because GROUP_SIZE is too small
11433 for the target), but it's possible that a target could have
11434 a hole between supported vector types.
11436 If GROUP_SIZE is not a power of 2, this has the effect of
11437 trying the largest power of 2 that fits within the group,
11438 even though the group is not a multiple of that vector size.
11439 The BB vectorizer will then try to carve up the group into
11441 unsigned int nunits
= 1 << floor_log2 (group_size
);
11444 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11445 scalar_type
, nunits
);
11448 while (nunits
> 1 && !vectype
);
11454 /* Return the vector type corresponding to SCALAR_TYPE as supported
11455 by the target. NODE, if nonnull, is the SLP tree node that will
11456 use the returned vector type. */
11459 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11461 unsigned int group_size
= 0;
11463 group_size
= SLP_TREE_LANES (node
);
11464 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11467 /* Function get_mask_type_for_scalar_type.
11469 Returns the mask type corresponding to a result of comparison
11470 of vectors of specified SCALAR_TYPE as supported by target.
11471 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11472 make sure that the number of elements in the vector is no bigger
11473 than GROUP_SIZE. */
11476 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11477 unsigned int group_size
)
11479 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11484 return truth_type_for (vectype
);
11487 /* Function get_same_sized_vectype
11489 Returns a vector type corresponding to SCALAR_TYPE of size
11490 VECTOR_TYPE if supported by the target. */
11493 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11495 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11496 return truth_type_for (vector_type
);
11498 poly_uint64 nunits
;
11499 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11500 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11503 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11504 scalar_type
, nunits
);
11507 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11508 would not change the chosen vector modes. */
11511 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11513 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11514 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11515 if (!VECTOR_MODE_P (*i
)
11516 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11521 /* Function vect_is_simple_use.
11524 VINFO - the vect info of the loop or basic block that is being vectorized.
11525 OPERAND - operand in the loop or bb.
11527 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11528 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11529 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11530 the definition could be anywhere in the function
11531 DT - the type of definition
11533 Returns whether a stmt with OPERAND can be vectorized.
11534 For loops, supportable operands are constants, loop invariants, and operands
11535 that are defined by the current iteration of the loop. Unsupportable
11536 operands are those that are defined by a previous iteration of the loop (as
11537 is the case in reduction/induction computations).
11538 For basic blocks, supportable operands are constants and bb invariants.
11539 For now, operands defined outside the basic block are not supported. */
11542 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11543 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11545 if (def_stmt_info_out
)
11546 *def_stmt_info_out
= NULL
;
11548 *def_stmt_out
= NULL
;
11549 *dt
= vect_unknown_def_type
;
11551 if (dump_enabled_p ())
11553 dump_printf_loc (MSG_NOTE
, vect_location
,
11554 "vect_is_simple_use: operand ");
11555 if (TREE_CODE (operand
) == SSA_NAME
11556 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11557 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11559 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11562 if (CONSTANT_CLASS_P (operand
))
11563 *dt
= vect_constant_def
;
11564 else if (is_gimple_min_invariant (operand
))
11565 *dt
= vect_external_def
;
11566 else if (TREE_CODE (operand
) != SSA_NAME
)
11567 *dt
= vect_unknown_def_type
;
11568 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11569 *dt
= vect_external_def
;
11572 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11573 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11575 *dt
= vect_external_def
;
11578 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11579 def_stmt
= stmt_vinfo
->stmt
;
11580 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11581 if (def_stmt_info_out
)
11582 *def_stmt_info_out
= stmt_vinfo
;
11585 *def_stmt_out
= def_stmt
;
11588 if (dump_enabled_p ())
11590 dump_printf (MSG_NOTE
, ", type of def: ");
11593 case vect_uninitialized_def
:
11594 dump_printf (MSG_NOTE
, "uninitialized\n");
11596 case vect_constant_def
:
11597 dump_printf (MSG_NOTE
, "constant\n");
11599 case vect_external_def
:
11600 dump_printf (MSG_NOTE
, "external\n");
11602 case vect_internal_def
:
11603 dump_printf (MSG_NOTE
, "internal\n");
11605 case vect_induction_def
:
11606 dump_printf (MSG_NOTE
, "induction\n");
11608 case vect_reduction_def
:
11609 dump_printf (MSG_NOTE
, "reduction\n");
11611 case vect_double_reduction_def
:
11612 dump_printf (MSG_NOTE
, "double reduction\n");
11614 case vect_nested_cycle
:
11615 dump_printf (MSG_NOTE
, "nested cycle\n");
11617 case vect_unknown_def_type
:
11618 dump_printf (MSG_NOTE
, "unknown\n");
11623 if (*dt
== vect_unknown_def_type
)
11625 if (dump_enabled_p ())
11626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11627 "Unsupported pattern.\n");
11634 /* Function vect_is_simple_use.
11636 Same as vect_is_simple_use but also determines the vector operand
11637 type of OPERAND and stores it to *VECTYPE. If the definition of
11638 OPERAND is vect_uninitialized_def, vect_constant_def or
11639 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11640 is responsible to compute the best suited vector type for the
11644 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11645 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11646 gimple
**def_stmt_out
)
11648 stmt_vec_info def_stmt_info
;
11650 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11654 *def_stmt_out
= def_stmt
;
11655 if (def_stmt_info_out
)
11656 *def_stmt_info_out
= def_stmt_info
;
11658 /* Now get a vector type if the def is internal, otherwise supply
11659 NULL_TREE and leave it up to the caller to figure out a proper
11660 type for the use stmt. */
11661 if (*dt
== vect_internal_def
11662 || *dt
== vect_induction_def
11663 || *dt
== vect_reduction_def
11664 || *dt
== vect_double_reduction_def
11665 || *dt
== vect_nested_cycle
)
11667 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11668 gcc_assert (*vectype
!= NULL_TREE
);
11669 if (dump_enabled_p ())
11670 dump_printf_loc (MSG_NOTE
, vect_location
,
11671 "vect_is_simple_use: vectype %T\n", *vectype
);
11673 else if (*dt
== vect_uninitialized_def
11674 || *dt
== vect_constant_def
11675 || *dt
== vect_external_def
)
11676 *vectype
= NULL_TREE
;
11678 gcc_unreachable ();
11683 /* Function vect_is_simple_use.
11685 Same as vect_is_simple_use but determines the operand by operand
11686 position OPERAND from either STMT or SLP_NODE, filling in *OP
11687 and *SLP_DEF (when SLP_NODE is not NULL). */
11690 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11691 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11692 enum vect_def_type
*dt
,
11693 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11697 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11699 *vectype
= SLP_TREE_VECTYPE (child
);
11700 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11702 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11703 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
11707 if (def_stmt_info_out
)
11708 *def_stmt_info_out
= NULL
;
11709 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11710 *dt
= SLP_TREE_DEF_TYPE (child
);
11717 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11719 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11720 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11723 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11725 *op
= gimple_op (ass
, operand
);
11727 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11728 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11730 *op
= gimple_op (ass
, operand
+ 1);
11732 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11733 *op
= gimple_call_arg (call
, operand
);
11735 gcc_unreachable ();
11736 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11740 /* If OP is not NULL and is external or constant update its vector
11741 type with VECTYPE. Returns true if successful or false if not,
11742 for example when conflicting vector types are present. */
11745 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11747 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11749 if (SLP_TREE_VECTYPE (op
))
11750 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11751 SLP_TREE_VECTYPE (op
) = vectype
;
11755 /* Function supportable_widening_operation
11757 Check whether an operation represented by the code CODE is a
11758 widening operation that is supported by the target platform in
11759 vector form (i.e., when operating on arguments of type VECTYPE_IN
11760 producing a result of type VECTYPE_OUT).
11762 Widening operations we currently support are NOP (CONVERT), FLOAT,
11763 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11764 are supported by the target platform either directly (via vector
11765 tree-codes), or via target builtins.
11768 - CODE1 and CODE2 are codes of vector operations to be used when
11769 vectorizing the operation, if available.
11770 - MULTI_STEP_CVT determines the number of required intermediate steps in
11771 case of multi-step conversion (like char->short->int - in that case
11772 MULTI_STEP_CVT will be 1).
11773 - INTERM_TYPES contains the intermediate type required to perform the
11774 widening operation (short in the above example). */
11777 supportable_widening_operation (vec_info
*vinfo
,
11778 enum tree_code code
, stmt_vec_info stmt_info
,
11779 tree vectype_out
, tree vectype_in
,
11780 enum tree_code
*code1
, enum tree_code
*code2
,
11781 int *multi_step_cvt
,
11782 vec
<tree
> *interm_types
)
11784 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11785 class loop
*vect_loop
= NULL
;
11786 machine_mode vec_mode
;
11787 enum insn_code icode1
, icode2
;
11788 optab optab1
, optab2
;
11789 tree vectype
= vectype_in
;
11790 tree wide_vectype
= vectype_out
;
11791 enum tree_code c1
, c2
;
11793 tree prev_type
, intermediate_type
;
11794 machine_mode intermediate_mode
, prev_mode
;
11795 optab optab3
, optab4
;
11797 *multi_step_cvt
= 0;
11799 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11803 case WIDEN_MULT_EXPR
:
11804 /* The result of a vectorized widening operation usually requires
11805 two vectors (because the widened results do not fit into one vector).
11806 The generated vector results would normally be expected to be
11807 generated in the same order as in the original scalar computation,
11808 i.e. if 8 results are generated in each vector iteration, they are
11809 to be organized as follows:
11810 vect1: [res1,res2,res3,res4],
11811 vect2: [res5,res6,res7,res8].
11813 However, in the special case that the result of the widening
11814 operation is used in a reduction computation only, the order doesn't
11815 matter (because when vectorizing a reduction we change the order of
11816 the computation). Some targets can take advantage of this and
11817 generate more efficient code. For example, targets like Altivec,
11818 that support widen_mult using a sequence of {mult_even,mult_odd}
11819 generate the following vectors:
11820 vect1: [res1,res3,res5,res7],
11821 vect2: [res2,res4,res6,res8].
11823 When vectorizing outer-loops, we execute the inner-loop sequentially
11824 (each vectorized inner-loop iteration contributes to VF outer-loop
11825 iterations in parallel). We therefore don't allow to change the
11826 order of the computation in the inner-loop during outer-loop
11828 /* TODO: Another case in which order doesn't *really* matter is when we
11829 widen and then contract again, e.g. (short)((int)x * y >> 8).
11830 Normally, pack_trunc performs an even/odd permute, whereas the
11831 repack from an even/odd expansion would be an interleave, which
11832 would be significantly simpler for e.g. AVX2. */
11833 /* In any case, in order to avoid duplicating the code below, recurse
11834 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11835 are properly set up for the caller. If we fail, we'll continue with
11836 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11838 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11839 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11840 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11841 stmt_info
, vectype_out
,
11842 vectype_in
, code1
, code2
,
11843 multi_step_cvt
, interm_types
))
11845 /* Elements in a vector with vect_used_by_reduction property cannot
11846 be reordered if the use chain with this property does not have the
11847 same operation. One such an example is s += a * b, where elements
11848 in a and b cannot be reordered. Here we check if the vector defined
11849 by STMT is only directly used in the reduction statement. */
11850 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11851 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11853 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11856 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11857 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11860 case DOT_PROD_EXPR
:
11861 c1
= DOT_PROD_EXPR
;
11862 c2
= DOT_PROD_EXPR
;
11870 case VEC_WIDEN_MULT_EVEN_EXPR
:
11871 /* Support the recursion induced just above. */
11872 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11873 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11876 case WIDEN_LSHIFT_EXPR
:
11877 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11878 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11881 case WIDEN_PLUS_EXPR
:
11882 c1
= VEC_WIDEN_PLUS_LO_EXPR
;
11883 c2
= VEC_WIDEN_PLUS_HI_EXPR
;
11886 case WIDEN_MINUS_EXPR
:
11887 c1
= VEC_WIDEN_MINUS_LO_EXPR
;
11888 c2
= VEC_WIDEN_MINUS_HI_EXPR
;
11892 c1
= VEC_UNPACK_LO_EXPR
;
11893 c2
= VEC_UNPACK_HI_EXPR
;
11897 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11898 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11901 case FIX_TRUNC_EXPR
:
11902 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11903 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11907 gcc_unreachable ();
11910 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11911 std::swap (c1
, c2
);
11913 if (code
== FIX_TRUNC_EXPR
)
11915 /* The signedness is determined from output operand. */
11916 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11917 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11919 else if (CONVERT_EXPR_CODE_P (code
)
11920 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11921 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11922 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11923 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11925 /* If the input and result modes are the same, a different optab
11926 is needed where we pass in the number of units in vectype. */
11927 optab1
= vec_unpacks_sbool_lo_optab
;
11928 optab2
= vec_unpacks_sbool_hi_optab
;
11932 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11933 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11936 if (!optab1
|| !optab2
)
11939 vec_mode
= TYPE_MODE (vectype
);
11940 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11941 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11947 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11948 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11950 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11952 /* For scalar masks we may have different boolean
11953 vector types having the same QImode. Thus we
11954 add additional check for elements number. */
11955 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11956 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11960 /* Check if it's a multi-step conversion that can be done using intermediate
11963 prev_type
= vectype
;
11964 prev_mode
= vec_mode
;
11966 if (!CONVERT_EXPR_CODE_P (code
))
11969 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11970 intermediate steps in promotion sequence. We try
11971 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11973 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11974 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11976 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11977 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11979 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
11982 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11983 TYPE_UNSIGNED (prev_type
));
11985 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11986 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11987 && intermediate_mode
== prev_mode
11988 && SCALAR_INT_MODE_P (prev_mode
))
11990 /* If the input and result modes are the same, a different optab
11991 is needed where we pass in the number of units in vectype. */
11992 optab3
= vec_unpacks_sbool_lo_optab
;
11993 optab4
= vec_unpacks_sbool_hi_optab
;
11997 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11998 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
12001 if (!optab3
|| !optab4
12002 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
12003 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12004 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
12005 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
12006 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
12007 == CODE_FOR_nothing
)
12008 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
12009 == CODE_FOR_nothing
))
12012 interm_types
->quick_push (intermediate_type
);
12013 (*multi_step_cvt
)++;
12015 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12016 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12018 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12020 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
12021 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12025 prev_type
= intermediate_type
;
12026 prev_mode
= intermediate_mode
;
12029 interm_types
->release ();
12034 /* Function supportable_narrowing_operation
12036 Check whether an operation represented by the code CODE is a
12037 narrowing operation that is supported by the target platform in
12038 vector form (i.e., when operating on arguments of type VECTYPE_IN
12039 and producing a result of type VECTYPE_OUT).
12041 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12042 and FLOAT. This function checks if these operations are supported by
12043 the target platform directly via vector tree-codes.
12046 - CODE1 is the code of a vector operation to be used when
12047 vectorizing the operation, if available.
12048 - MULTI_STEP_CVT determines the number of required intermediate steps in
12049 case of multi-step conversion (like int->short->char - in that case
12050 MULTI_STEP_CVT will be 1).
12051 - INTERM_TYPES contains the intermediate type required to perform the
12052 narrowing operation (short in the above example). */
12055 supportable_narrowing_operation (enum tree_code code
,
12056 tree vectype_out
, tree vectype_in
,
12057 enum tree_code
*code1
, int *multi_step_cvt
,
12058 vec
<tree
> *interm_types
)
12060 machine_mode vec_mode
;
12061 enum insn_code icode1
;
12062 optab optab1
, interm_optab
;
12063 tree vectype
= vectype_in
;
12064 tree narrow_vectype
= vectype_out
;
12066 tree intermediate_type
, prev_type
;
12067 machine_mode intermediate_mode
, prev_mode
;
12071 *multi_step_cvt
= 0;
12075 c1
= VEC_PACK_TRUNC_EXPR
;
12076 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12077 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12078 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
12079 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12080 optab1
= vec_pack_sbool_trunc_optab
;
12082 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12085 case FIX_TRUNC_EXPR
:
12086 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12087 /* The signedness is determined from output operand. */
12088 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12092 c1
= VEC_PACK_FLOAT_EXPR
;
12093 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12097 gcc_unreachable ();
12103 vec_mode
= TYPE_MODE (vectype
);
12104 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12109 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12111 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12113 /* For scalar masks we may have different boolean
12114 vector types having the same QImode. Thus we
12115 add additional check for elements number. */
12116 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12117 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12121 if (code
== FLOAT_EXPR
)
12124 /* Check if it's a multi-step conversion that can be done using intermediate
12126 prev_mode
= vec_mode
;
12127 prev_type
= vectype
;
12128 if (code
== FIX_TRUNC_EXPR
)
12129 uns
= TYPE_UNSIGNED (vectype_out
);
12131 uns
= TYPE_UNSIGNED (vectype
);
12133 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12134 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12135 costly than signed. */
12136 if (code
== FIX_TRUNC_EXPR
&& uns
)
12138 enum insn_code icode2
;
12141 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12143 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12144 if (interm_optab
!= unknown_optab
12145 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12146 && insn_data
[icode1
].operand
[0].mode
12147 == insn_data
[icode2
].operand
[0].mode
)
12150 optab1
= interm_optab
;
12155 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12156 intermediate steps in promotion sequence. We try
12157 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12158 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12159 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12161 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12162 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12164 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12167 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12168 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12169 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12170 && intermediate_mode
== prev_mode
12171 && SCALAR_INT_MODE_P (prev_mode
))
12172 interm_optab
= vec_pack_sbool_trunc_optab
;
12175 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12178 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12179 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12180 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12181 == CODE_FOR_nothing
))
12184 interm_types
->quick_push (intermediate_type
);
12185 (*multi_step_cvt
)++;
12187 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12189 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12191 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12192 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12196 prev_mode
= intermediate_mode
;
12197 prev_type
= intermediate_type
;
12198 optab1
= interm_optab
;
12201 interm_types
->release ();
12205 /* Generate and return a vector mask of MASK_TYPE such that
12206 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12207 Add the statements to SEQ. */
12210 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12211 tree end_index
, const char *name
)
12213 tree cmp_type
= TREE_TYPE (start_index
);
12214 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12215 cmp_type
, mask_type
,
12216 OPTIMIZE_FOR_SPEED
));
12217 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12218 start_index
, end_index
,
12219 build_zero_cst (mask_type
));
12222 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
12224 tmp
= make_ssa_name (mask_type
);
12225 gimple_call_set_lhs (call
, tmp
);
12226 gimple_seq_add_stmt (seq
, call
);
12230 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12231 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12234 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12237 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
12238 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12241 /* Try to compute the vector types required to vectorize STMT_INFO,
12242 returning true on success and false if vectorization isn't possible.
12243 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12244 take sure that the number of elements in the vectors is no bigger
12249 - Set *STMT_VECTYPE_OUT to:
12250 - NULL_TREE if the statement doesn't need to be vectorized;
12251 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12253 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12254 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12255 statement does not help to determine the overall number of units. */
12258 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12259 tree
*stmt_vectype_out
,
12260 tree
*nunits_vectype_out
,
12261 unsigned int group_size
)
12263 gimple
*stmt
= stmt_info
->stmt
;
12265 /* For BB vectorization, we should always have a group size once we've
12266 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12267 are tentative requests during things like early data reference
12268 analysis and pattern recognition. */
12269 if (is_a
<bb_vec_info
> (vinfo
))
12270 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12274 *stmt_vectype_out
= NULL_TREE
;
12275 *nunits_vectype_out
= NULL_TREE
;
12277 if (gimple_get_lhs (stmt
) == NULL_TREE
12278 /* MASK_STORE has no lhs, but is ok. */
12279 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12281 if (is_a
<gcall
*> (stmt
))
12283 /* Ignore calls with no lhs. These must be calls to
12284 #pragma omp simd functions, and what vectorization factor
12285 it really needs can't be determined until
12286 vectorizable_simd_clone_call. */
12287 if (dump_enabled_p ())
12288 dump_printf_loc (MSG_NOTE
, vect_location
,
12289 "defer to SIMD clone analysis.\n");
12290 return opt_result::success ();
12293 return opt_result::failure_at (stmt
,
12294 "not vectorized: irregular stmt.%G", stmt
);
12298 tree scalar_type
= NULL_TREE
;
12299 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12301 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12302 if (dump_enabled_p ())
12303 dump_printf_loc (MSG_NOTE
, vect_location
,
12304 "precomputed vectype: %T\n", vectype
);
12306 else if (vect_use_mask_type_p (stmt_info
))
12308 unsigned int precision
= stmt_info
->mask_precision
;
12309 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12310 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12312 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12313 " data-type %T\n", scalar_type
);
12314 if (dump_enabled_p ())
12315 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12319 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12320 scalar_type
= TREE_TYPE (DR_REF (dr
));
12321 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12322 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12324 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12326 if (dump_enabled_p ())
12329 dump_printf_loc (MSG_NOTE
, vect_location
,
12330 "get vectype for scalar type (group size %d):"
12331 " %T\n", group_size
, scalar_type
);
12333 dump_printf_loc (MSG_NOTE
, vect_location
,
12334 "get vectype for scalar type: %T\n", scalar_type
);
12336 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12338 return opt_result::failure_at (stmt
,
12340 " unsupported data-type %T\n",
12343 if (dump_enabled_p ())
12344 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12347 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
12348 return opt_result::failure_at (stmt
,
12349 "not vectorized: vector stmt in loop:%G",
12352 *stmt_vectype_out
= vectype
;
12354 /* Don't try to compute scalar types if the stmt produces a boolean
12355 vector; use the existing vector type instead. */
12356 tree nunits_vectype
= vectype
;
12357 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12359 /* The number of units is set according to the smallest scalar
12360 type (or the largest vector size, but we only support one
12361 vector size per vectorization). */
12362 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12363 TREE_TYPE (vectype
));
12364 if (scalar_type
!= TREE_TYPE (vectype
))
12366 if (dump_enabled_p ())
12367 dump_printf_loc (MSG_NOTE
, vect_location
,
12368 "get vectype for smallest scalar type: %T\n",
12370 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12372 if (!nunits_vectype
)
12373 return opt_result::failure_at
12374 (stmt
, "not vectorized: unsupported data-type %T\n",
12376 if (dump_enabled_p ())
12377 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12382 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12383 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
12384 return opt_result::failure_at (stmt
,
12385 "Not vectorized: Incompatible number "
12386 "of vector subparts between %T and %T\n",
12387 nunits_vectype
, *stmt_vectype_out
);
12389 if (dump_enabled_p ())
12391 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12392 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12393 dump_printf (MSG_NOTE
, "\n");
12396 *nunits_vectype_out
= nunits_vectype
;
12397 return opt_result::success ();
12400 /* Generate and return statement sequence that sets vector length LEN that is:
12402 min_of_start_and_end = min (START_INDEX, END_INDEX);
12403 left_len = END_INDEX - min_of_start_and_end;
12404 rhs = min (left_len, LEN_LIMIT);
12407 Note: the cost of the code generated by this function is modeled
12408 by vect_estimate_min_profitable_iters, so changes here may need
12409 corresponding changes there. */
12412 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12414 gimple_seq stmts
= NULL
;
12415 tree len_type
= TREE_TYPE (len
);
12416 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12418 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12419 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12420 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12421 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12422 gimple_seq_add_stmt (&stmts
, stmt
);