d: Update documentation of new D language options.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob9726450ab2dcbebd741727820ec2d90dac257152
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple *new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 tree mem_ref;
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt_info);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
261 tree op;
262 ssa_op_iter iter;
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
373 tree operand;
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
474 if (!dstmt_vinfo)
475 return opt_result::success ();
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
781 if (fatal)
782 *fatal = false;
783 return res;
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
806 int inside_cost = 0, prologue_cost = 0;
808 gcc_assert (cost_vec != NULL);
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
842 static void
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
844 enum vect_def_type *dt,
845 unsigned int ncopies, int pwr,
846 stmt_vector_for_cost *cost_vec,
847 bool widen_arith)
849 int i;
850 int inside_cost = 0, prologue_cost = 0;
852 for (i = 0; i < pwr + 1; i++)
854 inside_cost += record_stmt_cost (cost_vec, ncopies,
855 widen_arith
856 ? vector_stmt : vec_promote_demote,
857 stmt_info, 0, vect_body);
858 ncopies *= 2;
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i = 0; i < 2; i++)
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
865 stmt_info, 0, vect_prologue);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 /* Returns true if the current function returns DECL. */
875 static bool
876 cfun_returns (tree decl)
878 edge_iterator ei;
879 edge e;
880 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
882 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
883 if (!ret)
884 continue;
885 if (gimple_return_retval (ret) == decl)
886 return true;
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
889 though. */
890 gimple *def = ret;
893 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
895 while (gimple_clobber_p (def));
896 if (is_a <gassign *> (def)
897 && gimple_assign_lhs (def) == gimple_return_retval (ret)
898 && gimple_assign_rhs1 (def) == decl)
899 return true;
901 return false;
904 /* Function vect_model_store_cost
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
909 static void
910 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
911 vect_memory_access_type memory_access_type,
912 dr_alignment_support alignment_support_scheme,
913 int misalignment,
914 vec_load_store_type vls_type, slp_tree slp_node,
915 stmt_vector_for_cost *cost_vec)
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 stmt_vec_info first_stmt_info = stmt_info;
919 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
921 /* ??? Somehow we need to fix this at the callers. */
922 if (slp_node)
923 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
925 if (vls_type == VLS_STORE_INVARIANT)
927 if (!slp_node)
928 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 stmt_info, 0, vect_prologue);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node && grouped_access_p)
935 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p = (first_stmt_info == stmt_info);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (first_stmt_p
947 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
949 /* Uses a high and low interleave or shuffle operations for each
950 needed permute. */
951 int group_size = DR_GROUP_SIZE (first_stmt_info);
952 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location,
958 "vect_model_store_cost: strided group_size = %d .\n",
959 group_size);
962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963 /* Costs of the stores. */
964 if (memory_access_type == VMAT_ELEMENTWISE
965 || memory_access_type == VMAT_GATHER_SCATTER)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969 inside_cost += record_stmt_cost (cost_vec,
970 ncopies * assumed_nunits,
971 scalar_store, stmt_info, 0, vect_body);
973 else
974 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
975 misalignment, &inside_cost, cost_vec);
977 if (memory_access_type == VMAT_ELEMENTWISE
978 || memory_access_type == VMAT_STRIDED_SLP)
980 /* N scalar stores plus extracting the elements. */
981 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
982 inside_cost += record_stmt_cost (cost_vec,
983 ncopies * assumed_nunits,
984 vec_to_scalar, stmt_info, 0, vect_body);
987 /* When vectorizing a store into the function result assign
988 a penalty if the function returns in a multi-register location.
989 In this case we assume we'll end up with having to spill the
990 vector result and do piecewise loads as a conservative estimate. */
991 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
992 if (base
993 && (TREE_CODE (base) == RESULT_DECL
994 || (DECL_P (base) && cfun_returns (base)))
995 && !aggregate_value_p (base, cfun->decl))
997 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
998 /* ??? Handle PARALLEL in some way. */
999 if (REG_P (reg))
1001 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1002 /* Assume that a single reg-reg move is possible and cheap,
1003 do not account for vector to gp register move cost. */
1004 if (nregs > 1)
1006 /* Spill. */
1007 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1008 vector_store,
1009 stmt_info, 0, vect_epilogue);
1010 /* Loads. */
1011 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1012 scalar_load,
1013 stmt_info, 0, vect_epilogue);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE, vect_location,
1020 "vect_model_store_cost: inside_cost = %d, "
1021 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1025 /* Calculate cost of DR's memory access. */
1026 void
1027 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1028 dr_alignment_support alignment_support_scheme,
1029 int misalignment,
1030 unsigned int *inside_cost,
1031 stmt_vector_for_cost *body_cost_vec)
1033 switch (alignment_support_scheme)
1035 case dr_aligned:
1037 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1038 vector_store, stmt_info, 0,
1039 vect_body);
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE, vect_location,
1043 "vect_model_store_cost: aligned.\n");
1044 break;
1047 case dr_unaligned_supported:
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1051 unaligned_store, stmt_info,
1052 misalignment, vect_body);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_store_cost: unaligned supported by "
1056 "hardware.\n");
1057 break;
1060 case dr_unaligned_unsupported:
1062 *inside_cost = VECT_MAX_COST;
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1066 "vect_model_store_cost: unsupported access.\n");
1067 break;
1070 default:
1071 gcc_unreachable ();
1076 /* Function vect_model_load_cost
1078 Models cost for loads. In the case of grouped accesses, one access has
1079 the overhead of the grouped access attributed to it. Since unaligned
1080 accesses are supported for loads, we also account for the costs of the
1081 access scheme chosen. */
1083 static void
1084 vect_model_load_cost (vec_info *vinfo,
1085 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1086 vect_memory_access_type memory_access_type,
1087 dr_alignment_support alignment_support_scheme,
1088 int misalignment,
1089 gather_scatter_info *gs_info,
1090 slp_tree slp_node,
1091 stmt_vector_for_cost *cost_vec)
1093 unsigned int inside_cost = 0, prologue_cost = 0;
1094 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1096 gcc_assert (cost_vec);
1098 /* ??? Somehow we need to fix this at the callers. */
1099 if (slp_node)
1100 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1102 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1104 /* If the load is permuted then the alignment is determined by
1105 the first group element not by the first scalar stmt DR. */
1106 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1107 /* Record the cost for the permutation. */
1108 unsigned n_perms, n_loads;
1109 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1110 vf, true, &n_perms, &n_loads);
1111 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1112 first_stmt_info, 0, vect_body);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 ncopies = n_loads;
1119 /* Grouped loads read all elements in the group at once,
1120 so we want the DR for the first statement. */
1121 stmt_vec_info first_stmt_info = stmt_info;
1122 if (!slp_node && grouped_access_p)
1123 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1125 /* True if we should include any once-per-group costs as well as
1126 the cost of the statement itself. For SLP we only get called
1127 once per group anyhow. */
1128 bool first_stmt_p = (first_stmt_info == stmt_info);
1130 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1131 ones we actually need. Account for the cost of unused results. */
1132 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1134 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1135 stmt_vec_info next_stmt_info = first_stmt_info;
1138 gaps -= 1;
1139 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1141 while (next_stmt_info);
1142 if (gaps)
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: %d unused vectors.\n",
1147 gaps);
1148 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1149 alignment_support_scheme, misalignment, false,
1150 &inside_cost, &prologue_cost,
1151 cost_vec, cost_vec, true);
1155 /* We assume that the cost of a single load-lanes instruction is
1156 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1157 access is instead being provided by a load-and-permute operation,
1158 include the cost of the permutes. */
1159 if (first_stmt_p
1160 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1162 /* Uses an even and odd extract operations or shuffle operations
1163 for each needed permute. */
1164 int group_size = DR_GROUP_SIZE (first_stmt_info);
1165 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1166 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1167 stmt_info, 0, vect_body);
1169 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_NOTE, vect_location,
1171 "vect_model_load_cost: strided group_size = %d .\n",
1172 group_size);
1175 /* The loads themselves. */
1176 if (memory_access_type == VMAT_ELEMENTWISE
1177 || memory_access_type == VMAT_GATHER_SCATTER)
1179 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1180 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1181 if (memory_access_type == VMAT_GATHER_SCATTER
1182 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1183 /* For emulated gathers N offset vector element extracts
1184 (we assume the scalar scaling and ptr + offset add is consumed by
1185 the load). */
1186 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1187 vec_to_scalar, stmt_info, 0,
1188 vect_body);
1189 /* N scalar loads plus gathering them into a vector. */
1190 inside_cost += record_stmt_cost (cost_vec,
1191 ncopies * assumed_nunits,
1192 scalar_load, stmt_info, 0, vect_body);
1194 else if (memory_access_type == VMAT_INVARIANT)
1196 /* Invariant loads will ideally be hoisted and splat to a vector. */
1197 prologue_cost += record_stmt_cost (cost_vec, 1,
1198 scalar_load, stmt_info, 0,
1199 vect_prologue);
1200 prologue_cost += record_stmt_cost (cost_vec, 1,
1201 scalar_to_vec, stmt_info, 0,
1202 vect_prologue);
1204 else
1205 vect_get_load_cost (vinfo, stmt_info, ncopies,
1206 alignment_support_scheme, misalignment, first_stmt_p,
1207 &inside_cost, &prologue_cost,
1208 cost_vec, cost_vec, true);
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_STRIDED_SLP
1211 || (memory_access_type == VMAT_GATHER_SCATTER
1212 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1213 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1214 stmt_info, 0, vect_body);
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE, vect_location,
1218 "vect_model_load_cost: inside_cost = %d, "
1219 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1223 /* Calculate cost of DR's memory access. */
1224 void
1225 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1226 dr_alignment_support alignment_support_scheme,
1227 int misalignment,
1228 bool add_realign_cost, unsigned int *inside_cost,
1229 unsigned int *prologue_cost,
1230 stmt_vector_for_cost *prologue_cost_vec,
1231 stmt_vector_for_cost *body_cost_vec,
1232 bool record_prologue_costs)
1234 switch (alignment_support_scheme)
1236 case dr_aligned:
1238 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1239 stmt_info, 0, vect_body);
1241 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "vect_model_load_cost: aligned.\n");
1245 break;
1247 case dr_unaligned_supported:
1249 /* Here, we assign an additional cost for the unaligned load. */
1250 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1251 unaligned_load, stmt_info,
1252 misalignment, vect_body);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
1257 "hardware.\n");
1259 break;
1261 case dr_explicit_realign:
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1270 prologue costs. */
1271 if (targetm.vectorize.builtin_mask_for_load)
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
1279 break;
1281 case dr_explicit_realign_optimized:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: unaligned software "
1286 "pipelined.\n");
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost && record_prologue_costs)
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
1300 if (targetm.vectorize.builtin_mask_for_load)
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
1316 break;
1319 case dr_unaligned_unsupported:
1321 *inside_cost = VECT_MAX_COST;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325 "vect_model_load_cost: unsupported access.\n");
1326 break;
1329 default:
1330 gcc_unreachable ();
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1337 static void
1338 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
1341 if (gsi)
1342 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1343 else
1344 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1346 if (dump_enabled_p ())
1347 dump_printf_loc (MSG_NOTE, vect_location,
1348 "created new init_stmt: %G", new_stmt);
1351 /* Function vect_init_vector.
1353 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1354 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1355 vector type a vector with all elements equal to VAL is created first.
1356 Place the initialization at GSI if it is not NULL. Otherwise, place the
1357 initialization at the loop preheader.
1358 Return the DEF of INIT_STMT.
1359 It will be used in the vectorization of STMT_INFO. */
1361 tree
1362 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1363 gimple_stmt_iterator *gsi)
1365 gimple *init_stmt;
1366 tree new_temp;
1368 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1369 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1371 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1372 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1374 /* Scalar boolean value should be transformed into
1375 all zeros or all ones value before building a vector. */
1376 if (VECTOR_BOOLEAN_TYPE_P (type))
1378 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1379 tree false_val = build_zero_cst (TREE_TYPE (type));
1381 if (CONSTANT_CLASS_P (val))
1382 val = integer_zerop (val) ? false_val : true_val;
1383 else
1385 new_temp = make_ssa_name (TREE_TYPE (type));
1386 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1387 val, true_val, false_val);
1388 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1389 val = new_temp;
1392 else
1394 gimple_seq stmts = NULL;
1395 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1396 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1397 TREE_TYPE (type), val);
1398 else
1399 /* ??? Condition vectorization expects us to do
1400 promotion of invariant/external defs. */
1401 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1402 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1403 !gsi_end_p (gsi2); )
1405 init_stmt = gsi_stmt (gsi2);
1406 gsi_remove (&gsi2, false);
1407 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1411 val = build_vector_from_val (type, val);
1414 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1415 init_stmt = gimple_build_assign (new_temp, val);
1416 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1417 return new_temp;
1421 /* Function vect_get_vec_defs_for_operand.
1423 OP is an operand in STMT_VINFO. This function returns a vector of
1424 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1426 In the case that OP is an SSA_NAME which is defined in the loop, then
1427 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1429 In case OP is an invariant or constant, a new stmt that creates a vector def
1430 needs to be introduced. VECTYPE may be used to specify a required type for
1431 vector invariant. */
1433 void
1434 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1435 unsigned ncopies,
1436 tree op, vec<tree> *vec_oprnds, tree vectype)
1438 gimple *def_stmt;
1439 enum vect_def_type dt;
1440 bool is_simple_use;
1441 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE, vect_location,
1445 "vect_get_vec_defs_for_operand: %T\n", op);
1447 stmt_vec_info def_stmt_info;
1448 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1449 &def_stmt_info, &def_stmt);
1450 gcc_assert (is_simple_use);
1451 if (def_stmt && dump_enabled_p ())
1452 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1454 vec_oprnds->create (ncopies);
1455 if (dt == vect_constant_def || dt == vect_external_def)
1457 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1458 tree vector_type;
1460 if (vectype)
1461 vector_type = vectype;
1462 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1463 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1464 vector_type = truth_type_for (stmt_vectype);
1465 else
1466 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1468 gcc_assert (vector_type);
1469 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1470 while (ncopies--)
1471 vec_oprnds->quick_push (vop);
1473 else
1475 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1476 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1477 for (unsigned i = 0; i < ncopies; ++i)
1478 vec_oprnds->quick_push (gimple_get_lhs
1479 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1484 /* Get vectorized definitions for OP0 and OP1. */
1486 void
1487 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1488 unsigned ncopies,
1489 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1490 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1491 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1492 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1494 if (slp_node)
1496 if (op0)
1497 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1498 if (op1)
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1500 if (op2)
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1502 if (op3)
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1505 else
1507 if (op0)
1508 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1509 op0, vec_oprnds0, vectype0);
1510 if (op1)
1511 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1512 op1, vec_oprnds1, vectype1);
1513 if (op2)
1514 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1515 op2, vec_oprnds2, vectype2);
1516 if (op3)
1517 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1518 op3, vec_oprnds3, vectype3);
1522 void
1523 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1524 unsigned ncopies,
1525 tree op0, vec<tree> *vec_oprnds0,
1526 tree op1, vec<tree> *vec_oprnds1,
1527 tree op2, vec<tree> *vec_oprnds2,
1528 tree op3, vec<tree> *vec_oprnds3)
1530 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1531 op0, vec_oprnds0, NULL_TREE,
1532 op1, vec_oprnds1, NULL_TREE,
1533 op2, vec_oprnds2, NULL_TREE,
1534 op3, vec_oprnds3, NULL_TREE);
1537 /* Helper function called by vect_finish_replace_stmt and
1538 vect_finish_stmt_generation. Set the location of the new
1539 statement and create and return a stmt_vec_info for it. */
1541 static void
1542 vect_finish_stmt_generation_1 (vec_info *,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1548 if (stmt_info)
1550 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1552 /* While EH edges will generally prevent vectorization, stmt might
1553 e.g. be in a must-not-throw region. Ensure newly created stmts
1554 that could throw are part of the same region. */
1555 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1556 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1557 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1559 else
1560 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1563 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1564 which sets the same scalar result as STMT_INFO did. Create and return a
1565 stmt_vec_info for VEC_STMT. */
1567 void
1568 vect_finish_replace_stmt (vec_info *vinfo,
1569 stmt_vec_info stmt_info, gimple *vec_stmt)
1571 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1572 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1574 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1575 gsi_replace (&gsi, vec_stmt, true);
1577 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1580 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1581 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1583 void
1584 vect_finish_stmt_generation (vec_info *vinfo,
1585 stmt_vec_info stmt_info, gimple *vec_stmt,
1586 gimple_stmt_iterator *gsi)
1588 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1590 if (!gsi_end_p (*gsi)
1591 && gimple_has_mem_ops (vec_stmt))
1593 gimple *at_stmt = gsi_stmt (*gsi);
1594 tree vuse = gimple_vuse (at_stmt);
1595 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1597 tree vdef = gimple_vdef (at_stmt);
1598 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1599 gimple_set_modified (vec_stmt, true);
1600 /* If we have an SSA vuse and insert a store, update virtual
1601 SSA form to avoid triggering the renamer. Do so only
1602 if we can easily see all uses - which is what almost always
1603 happens with the way vectorized stmts are inserted. */
1604 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1605 && ((is_gimple_assign (vec_stmt)
1606 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1607 || (is_gimple_call (vec_stmt)
1608 && !(gimple_call_flags (vec_stmt)
1609 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1611 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1612 gimple_set_vdef (vec_stmt, new_vdef);
1613 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1617 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1618 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1621 /* We want to vectorize a call to combined function CFN with function
1622 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1623 as the types of all inputs. Check whether this is possible using
1624 an internal function, returning its code if so or IFN_LAST if not. */
1626 static internal_fn
1627 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1628 tree vectype_out, tree vectype_in)
1630 internal_fn ifn;
1631 if (internal_fn_p (cfn))
1632 ifn = as_internal_fn (cfn);
1633 else
1634 ifn = associated_internal_fn (fndecl);
1635 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1637 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1638 if (info.vectorizable)
1640 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1641 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1642 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1643 OPTIMIZE_FOR_SPEED))
1644 return ifn;
1647 return IFN_LAST;
1651 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1652 gimple_stmt_iterator *);
1654 /* Check whether a load or store statement in the loop described by
1655 LOOP_VINFO is possible in a loop using partial vectors. This is
1656 testing whether the vectorizer pass has the appropriate support,
1657 as well as whether the target does.
1659 VLS_TYPE says whether the statement is a load or store and VECTYPE
1660 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1661 says how the load or store is going to be implemented and GROUP_SIZE
1662 is the number of load or store statements in the containing group.
1663 If the access is a gather load or scatter store, GS_INFO describes
1664 its arguments. If the load or store is conditional, SCALAR_MASK is the
1665 condition under which it occurs.
1667 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1668 vectors is not supported, otherwise record the required rgroup control
1669 types. */
1671 static void
1672 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1673 vec_load_store_type vls_type,
1674 int group_size,
1675 vect_memory_access_type
1676 memory_access_type,
1677 unsigned int ncopies,
1678 gather_scatter_info *gs_info,
1679 tree scalar_mask)
1681 /* Invariant loads need no special support. */
1682 if (memory_access_type == VMAT_INVARIANT)
1683 return;
1685 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1686 machine_mode vecmode = TYPE_MODE (vectype);
1687 bool is_load = (vls_type == VLS_LOAD);
1688 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1690 if (is_load
1691 ? !vect_load_lanes_supported (vectype, group_size, true)
1692 : !vect_store_lanes_supported (vectype, group_size, true))
1694 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1696 "can't operate on partial vectors because"
1697 " the target doesn't have an appropriate"
1698 " load/store-lanes instruction.\n");
1699 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1700 return;
1702 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1703 return;
1706 if (memory_access_type == VMAT_GATHER_SCATTER)
1708 internal_fn ifn = (is_load
1709 ? IFN_MASK_GATHER_LOAD
1710 : IFN_MASK_SCATTER_STORE);
1711 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1712 gs_info->memory_type,
1713 gs_info->offset_vectype,
1714 gs_info->scale))
1716 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1718 "can't operate on partial vectors because"
1719 " the target doesn't have an appropriate"
1720 " gather load or scatter store instruction.\n");
1721 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1722 return;
1724 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1725 return;
1728 if (memory_access_type != VMAT_CONTIGUOUS
1729 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1731 /* Element X of the data must come from iteration i * VF + X of the
1732 scalar loop. We need more work to support other mappings. */
1733 if (dump_enabled_p ())
1734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1735 "can't operate on partial vectors because an"
1736 " access isn't contiguous.\n");
1737 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1738 return;
1741 if (!VECTOR_MODE_P (vecmode))
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1745 "can't operate on partial vectors when emulating"
1746 " vector operations.\n");
1747 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1748 return;
1751 /* We might load more scalars than we need for permuting SLP loads.
1752 We checked in get_group_load_store_type that the extra elements
1753 don't leak into a new vector. */
1754 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1756 unsigned int nvectors;
1757 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1758 return nvectors;
1759 gcc_unreachable ();
1762 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1763 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1764 machine_mode mask_mode;
1765 bool using_partial_vectors_p = false;
1766 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1767 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1769 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1770 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1771 using_partial_vectors_p = true;
1774 machine_mode vmode;
1775 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1777 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1778 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1779 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1780 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1781 using_partial_vectors_p = true;
1784 if (!using_partial_vectors_p)
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1788 "can't operate on partial vectors because the"
1789 " target doesn't have the appropriate partial"
1790 " vectorization load or store.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1795 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1796 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1797 that needs to be applied to all loads and stores in a vectorized loop.
1798 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1800 MASK_TYPE is the type of both masks. If new statements are needed,
1801 insert them before GSI. */
1803 static tree
1804 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1805 gimple_stmt_iterator *gsi)
1807 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1808 if (!loop_mask)
1809 return vec_mask;
1811 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1812 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1813 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1814 vec_mask, loop_mask);
1815 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1816 return and_res;
1819 /* Determine whether we can use a gather load or scatter store to vectorize
1820 strided load or store STMT_INFO by truncating the current offset to a
1821 smaller width. We need to be able to construct an offset vector:
1823 { 0, X, X*2, X*3, ... }
1825 without loss of precision, where X is STMT_INFO's DR_STEP.
1827 Return true if this is possible, describing the gather load or scatter
1828 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1830 static bool
1831 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1832 loop_vec_info loop_vinfo, bool masked_p,
1833 gather_scatter_info *gs_info)
1835 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1836 data_reference *dr = dr_info->dr;
1837 tree step = DR_STEP (dr);
1838 if (TREE_CODE (step) != INTEGER_CST)
1840 /* ??? Perhaps we could use range information here? */
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_NOTE, vect_location,
1843 "cannot truncate variable step.\n");
1844 return false;
1847 /* Get the number of bits in an element. */
1848 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1849 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1850 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1852 /* Set COUNT to the upper limit on the number of elements - 1.
1853 Start with the maximum vectorization factor. */
1854 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1856 /* Try lowering COUNT to the number of scalar latch iterations. */
1857 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1858 widest_int max_iters;
1859 if (max_loop_iterations (loop, &max_iters)
1860 && max_iters < count)
1861 count = max_iters.to_shwi ();
1863 /* Try scales of 1 and the element size. */
1864 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1865 wi::overflow_type overflow = wi::OVF_NONE;
1866 for (int i = 0; i < 2; ++i)
1868 int scale = scales[i];
1869 widest_int factor;
1870 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1871 continue;
1873 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1874 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1875 if (overflow)
1876 continue;
1877 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1878 unsigned int min_offset_bits = wi::min_precision (range, sign);
1880 /* Find the narrowest viable offset type. */
1881 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1882 tree offset_type = build_nonstandard_integer_type (offset_bits,
1883 sign == UNSIGNED);
1885 /* See whether the target supports the operation with an offset
1886 no narrower than OFFSET_TYPE. */
1887 tree memory_type = TREE_TYPE (DR_REF (dr));
1888 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1889 vectype, memory_type, offset_type, scale,
1890 &gs_info->ifn, &gs_info->offset_vectype)
1891 || gs_info->ifn == IFN_LAST)
1892 continue;
1894 gs_info->decl = NULL_TREE;
1895 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1896 but we don't need to store that here. */
1897 gs_info->base = NULL_TREE;
1898 gs_info->element_type = TREE_TYPE (vectype);
1899 gs_info->offset = fold_convert (offset_type, step);
1900 gs_info->offset_dt = vect_constant_def;
1901 gs_info->scale = scale;
1902 gs_info->memory_type = memory_type;
1903 return true;
1906 if (overflow && dump_enabled_p ())
1907 dump_printf_loc (MSG_NOTE, vect_location,
1908 "truncating gather/scatter offset to %d bits"
1909 " might change its value.\n", element_bits);
1911 return false;
1914 /* Return true if we can use gather/scatter internal functions to
1915 vectorize STMT_INFO, which is a grouped or strided load or store.
1916 MASKED_P is true if load or store is conditional. When returning
1917 true, fill in GS_INFO with the information required to perform the
1918 operation. */
1920 static bool
1921 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1922 loop_vec_info loop_vinfo, bool masked_p,
1923 gather_scatter_info *gs_info)
1925 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1926 || gs_info->ifn == IFN_LAST)
1927 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1928 masked_p, gs_info);
1930 tree old_offset_type = TREE_TYPE (gs_info->offset);
1931 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1933 gcc_assert (TYPE_PRECISION (new_offset_type)
1934 >= TYPE_PRECISION (old_offset_type));
1935 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1937 if (dump_enabled_p ())
1938 dump_printf_loc (MSG_NOTE, vect_location,
1939 "using gather/scatter for strided/grouped access,"
1940 " scale = %d\n", gs_info->scale);
1942 return true;
1945 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1946 elements with a known constant step. Return -1 if that step
1947 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1949 static int
1950 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1952 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1953 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1954 size_zero_node);
1957 /* If the target supports a permute mask that reverses the elements in
1958 a vector of type VECTYPE, return that mask, otherwise return null. */
1960 static tree
1961 perm_mask_for_reverse (tree vectype)
1963 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1965 /* The encoding has a single stepped pattern. */
1966 vec_perm_builder sel (nunits, 1, 3);
1967 for (int i = 0; i < 3; ++i)
1968 sel.quick_push (nunits - 1 - i);
1970 vec_perm_indices indices (sel, 1, nunits);
1971 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1972 return NULL_TREE;
1973 return vect_gen_perm_mask_checked (vectype, indices);
1976 /* A subroutine of get_load_store_type, with a subset of the same
1977 arguments. Handle the case where STMT_INFO is a load or store that
1978 accesses consecutive elements with a negative step. Sets *POFFSET
1979 to the offset to be applied to the DR for the first access. */
1981 static vect_memory_access_type
1982 get_negative_load_store_type (vec_info *vinfo,
1983 stmt_vec_info stmt_info, tree vectype,
1984 vec_load_store_type vls_type,
1985 unsigned int ncopies, poly_int64 *poffset)
1987 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1988 dr_alignment_support alignment_support_scheme;
1990 if (ncopies > 1)
1992 if (dump_enabled_p ())
1993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1994 "multiple types with negative step.\n");
1995 return VMAT_ELEMENTWISE;
1998 /* For backward running DRs the first access in vectype actually is
1999 N-1 elements before the address of the DR. */
2000 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2001 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2003 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2004 alignment_support_scheme
2005 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2006 if (alignment_support_scheme != dr_aligned
2007 && alignment_support_scheme != dr_unaligned_supported)
2009 if (dump_enabled_p ())
2010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2011 "negative step but alignment required.\n");
2012 *poffset = 0;
2013 return VMAT_ELEMENTWISE;
2016 if (vls_type == VLS_STORE_INVARIANT)
2018 if (dump_enabled_p ())
2019 dump_printf_loc (MSG_NOTE, vect_location,
2020 "negative step with invariant source;"
2021 " no permute needed.\n");
2022 return VMAT_CONTIGUOUS_DOWN;
2025 if (!perm_mask_for_reverse (vectype))
2027 if (dump_enabled_p ())
2028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2029 "negative step and reversing not supported.\n");
2030 *poffset = 0;
2031 return VMAT_ELEMENTWISE;
2034 return VMAT_CONTIGUOUS_REVERSE;
2037 /* STMT_INFO is either a masked or unconditional store. Return the value
2038 being stored. */
2040 tree
2041 vect_get_store_rhs (stmt_vec_info stmt_info)
2043 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2045 gcc_assert (gimple_assign_single_p (assign));
2046 return gimple_assign_rhs1 (assign);
2048 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2050 internal_fn ifn = gimple_call_internal_fn (call);
2051 int index = internal_fn_stored_value_index (ifn);
2052 gcc_assert (index >= 0);
2053 return gimple_call_arg (call, index);
2055 gcc_unreachable ();
2058 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2060 This function returns a vector type which can be composed with NETLS pieces,
2061 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2062 same vector size as the return vector. It checks target whether supports
2063 pieces-size vector mode for construction firstly, if target fails to, check
2064 pieces-size scalar mode for construction further. It returns NULL_TREE if
2065 fails to find the available composition.
2067 For example, for (vtype=V16QI, nelts=4), we can probably get:
2068 - V16QI with PTYPE V4QI.
2069 - V4SI with PTYPE SI.
2070 - NULL_TREE. */
2072 static tree
2073 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2075 gcc_assert (VECTOR_TYPE_P (vtype));
2076 gcc_assert (known_gt (nelts, 0U));
2078 machine_mode vmode = TYPE_MODE (vtype);
2079 if (!VECTOR_MODE_P (vmode))
2080 return NULL_TREE;
2082 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2083 unsigned int pbsize;
2084 if (constant_multiple_p (vbsize, nelts, &pbsize))
2086 /* First check if vec_init optab supports construction from
2087 vector pieces directly. */
2088 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2089 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2090 machine_mode rmode;
2091 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2092 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2093 != CODE_FOR_nothing))
2095 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2096 return vtype;
2099 /* Otherwise check if exists an integer type of the same piece size and
2100 if vec_init optab supports construction from it directly. */
2101 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2102 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2103 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2104 != CODE_FOR_nothing))
2106 *ptype = build_nonstandard_integer_type (pbsize, 1);
2107 return build_vector_type (*ptype, nelts);
2111 return NULL_TREE;
2114 /* A subroutine of get_load_store_type, with a subset of the same
2115 arguments. Handle the case where STMT_INFO is part of a grouped load
2116 or store.
2118 For stores, the statements in the group are all consecutive
2119 and there is no gap at the end. For loads, the statements in the
2120 group might not be consecutive; there can be gaps between statements
2121 as well as at the end. */
2123 static bool
2124 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2125 tree vectype, slp_tree slp_node,
2126 bool masked_p, vec_load_store_type vls_type,
2127 vect_memory_access_type *memory_access_type,
2128 poly_int64 *poffset,
2129 dr_alignment_support *alignment_support_scheme,
2130 int *misalignment,
2131 gather_scatter_info *gs_info)
2133 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2134 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2135 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2136 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2137 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2138 bool single_element_p = (stmt_info == first_stmt_info
2139 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2140 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2141 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2143 /* True if the vectorized statements would access beyond the last
2144 statement in the group. */
2145 bool overrun_p = false;
2147 /* True if we can cope with such overrun by peeling for gaps, so that
2148 there is at least one final scalar iteration after the vector loop. */
2149 bool can_overrun_p = (!masked_p
2150 && vls_type == VLS_LOAD
2151 && loop_vinfo
2152 && !loop->inner);
2154 /* There can only be a gap at the end of the group if the stride is
2155 known at compile time. */
2156 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2158 /* Stores can't yet have gaps. */
2159 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2161 if (slp_node)
2163 /* For SLP vectorization we directly vectorize a subchain
2164 without permutation. */
2165 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2166 first_dr_info
2167 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2168 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2170 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2171 separated by the stride, until we have a complete vector.
2172 Fall back to scalar accesses if that isn't possible. */
2173 if (multiple_p (nunits, group_size))
2174 *memory_access_type = VMAT_STRIDED_SLP;
2175 else
2176 *memory_access_type = VMAT_ELEMENTWISE;
2178 else
2180 overrun_p = loop_vinfo && gap != 0;
2181 if (overrun_p && vls_type != VLS_LOAD)
2183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2184 "Grouped store with gaps requires"
2185 " non-consecutive accesses\n");
2186 return false;
2188 /* An overrun is fine if the trailing elements are smaller
2189 than the alignment boundary B. Every vector access will
2190 be a multiple of B and so we are guaranteed to access a
2191 non-gap element in the same B-sized block. */
2192 if (overrun_p
2193 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2194 vectype)
2195 / vect_get_scalar_dr_size (first_dr_info)))
2196 overrun_p = false;
2198 /* If the gap splits the vector in half and the target
2199 can do half-vector operations avoid the epilogue peeling
2200 by simply loading half of the vector only. Usually
2201 the construction with an upper zero half will be elided. */
2202 dr_alignment_support alss;
2203 int misalign = dr_misalignment (first_dr_info, vectype);
2204 tree half_vtype;
2205 if (overrun_p
2206 && !masked_p
2207 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2208 vectype, misalign)))
2209 == dr_aligned
2210 || alss == dr_unaligned_supported)
2211 && known_eq (nunits, (group_size - gap) * 2)
2212 && known_eq (nunits, group_size)
2213 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2214 != NULL_TREE))
2215 overrun_p = false;
2217 if (overrun_p && !can_overrun_p)
2219 if (dump_enabled_p ())
2220 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2221 "Peeling for outer loop is not supported\n");
2222 return false;
2224 int cmp = compare_step_with_zero (vinfo, stmt_info);
2225 if (cmp < 0)
2227 if (single_element_p)
2228 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2229 only correct for single element "interleaving" SLP. */
2230 *memory_access_type = get_negative_load_store_type
2231 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2232 else
2234 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2235 separated by the stride, until we have a complete vector.
2236 Fall back to scalar accesses if that isn't possible. */
2237 if (multiple_p (nunits, group_size))
2238 *memory_access_type = VMAT_STRIDED_SLP;
2239 else
2240 *memory_access_type = VMAT_ELEMENTWISE;
2243 else
2245 gcc_assert (!loop_vinfo || cmp > 0);
2246 *memory_access_type = VMAT_CONTIGUOUS;
2250 else
2252 /* We can always handle this case using elementwise accesses,
2253 but see if something more efficient is available. */
2254 *memory_access_type = VMAT_ELEMENTWISE;
2256 /* If there is a gap at the end of the group then these optimizations
2257 would access excess elements in the last iteration. */
2258 bool would_overrun_p = (gap != 0);
2259 /* An overrun is fine if the trailing elements are smaller than the
2260 alignment boundary B. Every vector access will be a multiple of B
2261 and so we are guaranteed to access a non-gap element in the
2262 same B-sized block. */
2263 if (would_overrun_p
2264 && !masked_p
2265 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2266 / vect_get_scalar_dr_size (first_dr_info)))
2267 would_overrun_p = false;
2269 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2270 && (can_overrun_p || !would_overrun_p)
2271 && compare_step_with_zero (vinfo, stmt_info) > 0)
2273 /* First cope with the degenerate case of a single-element
2274 vector. */
2275 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2278 /* Otherwise try using LOAD/STORE_LANES. */
2279 else if (vls_type == VLS_LOAD
2280 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2281 : vect_store_lanes_supported (vectype, group_size,
2282 masked_p))
2284 *memory_access_type = VMAT_LOAD_STORE_LANES;
2285 overrun_p = would_overrun_p;
2288 /* If that fails, try using permuting loads. */
2289 else if (vls_type == VLS_LOAD
2290 ? vect_grouped_load_supported (vectype, single_element_p,
2291 group_size)
2292 : vect_grouped_store_supported (vectype, group_size))
2294 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2295 overrun_p = would_overrun_p;
2299 /* As a last resort, trying using a gather load or scatter store.
2301 ??? Although the code can handle all group sizes correctly,
2302 it probably isn't a win to use separate strided accesses based
2303 on nearby locations. Or, even if it's a win over scalar code,
2304 it might not be a win over vectorizing at a lower VF, if that
2305 allows us to use contiguous accesses. */
2306 if (*memory_access_type == VMAT_ELEMENTWISE
2307 && single_element_p
2308 && loop_vinfo
2309 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2310 masked_p, gs_info))
2311 *memory_access_type = VMAT_GATHER_SCATTER;
2314 if (*memory_access_type == VMAT_GATHER_SCATTER
2315 || *memory_access_type == VMAT_ELEMENTWISE)
2317 *alignment_support_scheme = dr_unaligned_supported;
2318 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2320 else
2322 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2323 *alignment_support_scheme
2324 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2325 *misalignment);
2328 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2330 /* STMT is the leader of the group. Check the operands of all the
2331 stmts of the group. */
2332 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2333 while (next_stmt_info)
2335 tree op = vect_get_store_rhs (next_stmt_info);
2336 enum vect_def_type dt;
2337 if (!vect_is_simple_use (op, vinfo, &dt))
2339 if (dump_enabled_p ())
2340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2341 "use not simple.\n");
2342 return false;
2344 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2348 if (overrun_p)
2350 gcc_assert (can_overrun_p);
2351 if (dump_enabled_p ())
2352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2353 "Data access with gaps requires scalar "
2354 "epilogue loop\n");
2355 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2358 return true;
2361 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2362 if there is a memory access type that the vectorized form can use,
2363 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2364 or scatters, fill in GS_INFO accordingly. In addition
2365 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2366 the target does not support the alignment scheme. *MISALIGNMENT
2367 is set according to the alignment of the access (including
2368 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2370 SLP says whether we're performing SLP rather than loop vectorization.
2371 MASKED_P is true if the statement is conditional on a vectorized mask.
2372 VECTYPE is the vector type that the vectorized statements will use.
2373 NCOPIES is the number of vector statements that will be needed. */
2375 static bool
2376 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2377 tree vectype, slp_tree slp_node,
2378 bool masked_p, vec_load_store_type vls_type,
2379 unsigned int ncopies,
2380 vect_memory_access_type *memory_access_type,
2381 poly_int64 *poffset,
2382 dr_alignment_support *alignment_support_scheme,
2383 int *misalignment,
2384 gather_scatter_info *gs_info)
2386 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2387 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2388 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2389 *poffset = 0;
2390 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2392 *memory_access_type = VMAT_GATHER_SCATTER;
2393 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2394 gcc_unreachable ();
2395 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2396 &gs_info->offset_dt,
2397 &gs_info->offset_vectype))
2399 if (dump_enabled_p ())
2400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2401 "%s index use not simple.\n",
2402 vls_type == VLS_LOAD ? "gather" : "scatter");
2403 return false;
2405 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2407 if (vls_type != VLS_LOAD)
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411 "unsupported emulated scatter.\n");
2412 return false;
2414 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2415 || !TYPE_VECTOR_SUBPARTS
2416 (gs_info->offset_vectype).is_constant ()
2417 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2418 (gs_info->offset_vectype),
2419 TYPE_VECTOR_SUBPARTS (vectype)))
2421 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2423 "unsupported vector types for emulated "
2424 "gather.\n");
2425 return false;
2428 /* Gather-scatter accesses perform only component accesses, alignment
2429 is irrelevant for them. */
2430 *alignment_support_scheme = dr_unaligned_supported;
2432 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2434 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2435 masked_p,
2436 vls_type, memory_access_type, poffset,
2437 alignment_support_scheme,
2438 misalignment, gs_info))
2439 return false;
2441 else if (STMT_VINFO_STRIDED_P (stmt_info))
2443 gcc_assert (!slp_node);
2444 if (loop_vinfo
2445 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2446 masked_p, gs_info))
2447 *memory_access_type = VMAT_GATHER_SCATTER;
2448 else
2449 *memory_access_type = VMAT_ELEMENTWISE;
2450 /* Alignment is irrelevant here. */
2451 *alignment_support_scheme = dr_unaligned_supported;
2453 else
2455 int cmp = compare_step_with_zero (vinfo, stmt_info);
2456 if (cmp == 0)
2458 gcc_assert (vls_type == VLS_LOAD);
2459 *memory_access_type = VMAT_INVARIANT;
2460 /* Invariant accesses perform only component accesses, alignment
2461 is irrelevant for them. */
2462 *alignment_support_scheme = dr_unaligned_supported;
2464 else
2466 if (cmp < 0)
2467 *memory_access_type = get_negative_load_store_type
2468 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2469 else
2470 *memory_access_type = VMAT_CONTIGUOUS;
2471 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2472 vectype, *poffset);
2473 *alignment_support_scheme
2474 = vect_supportable_dr_alignment (vinfo,
2475 STMT_VINFO_DR_INFO (stmt_info),
2476 vectype, *misalignment);
2480 if ((*memory_access_type == VMAT_ELEMENTWISE
2481 || *memory_access_type == VMAT_STRIDED_SLP)
2482 && !nunits.is_constant ())
2484 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2486 "Not using elementwise accesses due to variable "
2487 "vectorization factor.\n");
2488 return false;
2491 if (*alignment_support_scheme == dr_unaligned_unsupported)
2493 if (dump_enabled_p ())
2494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2495 "unsupported unaligned access\n");
2496 return false;
2499 /* FIXME: At the moment the cost model seems to underestimate the
2500 cost of using elementwise accesses. This check preserves the
2501 traditional behavior until that can be fixed. */
2502 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2503 if (!first_stmt_info)
2504 first_stmt_info = stmt_info;
2505 if (*memory_access_type == VMAT_ELEMENTWISE
2506 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2507 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2508 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2509 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2511 if (dump_enabled_p ())
2512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2513 "not falling back to elementwise accesses\n");
2514 return false;
2516 return true;
2519 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2520 conditional operation STMT_INFO. When returning true, store the mask
2521 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2522 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2523 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2525 static bool
2526 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2527 slp_tree slp_node, unsigned mask_index,
2528 tree *mask, slp_tree *mask_node,
2529 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2531 enum vect_def_type mask_dt;
2532 tree mask_vectype;
2533 slp_tree mask_node_1;
2534 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2535 mask, &mask_node_1, &mask_dt, &mask_vectype))
2537 if (dump_enabled_p ())
2538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2539 "mask use not simple.\n");
2540 return false;
2543 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2545 if (dump_enabled_p ())
2546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2547 "mask argument is not a boolean.\n");
2548 return false;
2551 /* If the caller is not prepared for adjusting an external/constant
2552 SLP mask vector type fail. */
2553 if (slp_node
2554 && !mask_node
2555 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2557 if (dump_enabled_p ())
2558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2559 "SLP mask argument is not vectorized.\n");
2560 return false;
2563 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2564 if (!mask_vectype)
2565 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2567 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2569 if (dump_enabled_p ())
2570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2571 "could not find an appropriate vector mask type.\n");
2572 return false;
2575 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2576 TYPE_VECTOR_SUBPARTS (vectype)))
2578 if (dump_enabled_p ())
2579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2580 "vector mask type %T"
2581 " does not match vector data type %T.\n",
2582 mask_vectype, vectype);
2584 return false;
2587 *mask_dt_out = mask_dt;
2588 *mask_vectype_out = mask_vectype;
2589 if (mask_node)
2590 *mask_node = mask_node_1;
2591 return true;
2594 /* Return true if stored value RHS is suitable for vectorizing store
2595 statement STMT_INFO. When returning true, store the type of the
2596 definition in *RHS_DT_OUT, the type of the vectorized store value in
2597 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2599 static bool
2600 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2601 slp_tree slp_node, tree rhs,
2602 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2603 vec_load_store_type *vls_type_out)
2605 /* In the case this is a store from a constant make sure
2606 native_encode_expr can handle it. */
2607 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2609 if (dump_enabled_p ())
2610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2611 "cannot encode constant as a byte sequence.\n");
2612 return false;
2615 unsigned op_no = 0;
2616 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2618 if (gimple_call_internal_p (call)
2619 && internal_store_fn_p (gimple_call_internal_fn (call)))
2620 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2623 enum vect_def_type rhs_dt;
2624 tree rhs_vectype;
2625 slp_tree slp_op;
2626 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2627 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2629 if (dump_enabled_p ())
2630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2631 "use not simple.\n");
2632 return false;
2635 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2636 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2638 if (dump_enabled_p ())
2639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2640 "incompatible vector types.\n");
2641 return false;
2644 *rhs_dt_out = rhs_dt;
2645 *rhs_vectype_out = rhs_vectype;
2646 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2647 *vls_type_out = VLS_STORE_INVARIANT;
2648 else
2649 *vls_type_out = VLS_STORE;
2650 return true;
2653 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2654 Note that we support masks with floating-point type, in which case the
2655 floats are interpreted as a bitmask. */
2657 static tree
2658 vect_build_all_ones_mask (vec_info *vinfo,
2659 stmt_vec_info stmt_info, tree masktype)
2661 if (TREE_CODE (masktype) == INTEGER_TYPE)
2662 return build_int_cst (masktype, -1);
2663 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2665 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2666 mask = build_vector_from_val (masktype, mask);
2667 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2669 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2671 REAL_VALUE_TYPE r;
2672 long tmp[6];
2673 for (int j = 0; j < 6; ++j)
2674 tmp[j] = -1;
2675 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2676 tree mask = build_real (TREE_TYPE (masktype), r);
2677 mask = build_vector_from_val (masktype, mask);
2678 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2680 gcc_unreachable ();
2683 /* Build an all-zero merge value of type VECTYPE while vectorizing
2684 STMT_INFO as a gather load. */
2686 static tree
2687 vect_build_zero_merge_argument (vec_info *vinfo,
2688 stmt_vec_info stmt_info, tree vectype)
2690 tree merge;
2691 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2692 merge = build_int_cst (TREE_TYPE (vectype), 0);
2693 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2695 REAL_VALUE_TYPE r;
2696 long tmp[6];
2697 for (int j = 0; j < 6; ++j)
2698 tmp[j] = 0;
2699 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2700 merge = build_real (TREE_TYPE (vectype), r);
2702 else
2703 gcc_unreachable ();
2704 merge = build_vector_from_val (vectype, merge);
2705 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2708 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2709 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2710 the gather load operation. If the load is conditional, MASK is the
2711 unvectorized condition and MASK_DT is its definition type, otherwise
2712 MASK is null. */
2714 static void
2715 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2716 gimple_stmt_iterator *gsi,
2717 gimple **vec_stmt,
2718 gather_scatter_info *gs_info,
2719 tree mask)
2721 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2722 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2723 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2724 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2725 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2726 edge pe = loop_preheader_edge (loop);
2727 enum { NARROW, NONE, WIDEN } modifier;
2728 poly_uint64 gather_off_nunits
2729 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2731 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2732 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2733 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2734 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2735 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2736 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2737 tree scaletype = TREE_VALUE (arglist);
2738 tree real_masktype = masktype;
2739 gcc_checking_assert (types_compatible_p (srctype, rettype)
2740 && (!mask
2741 || TREE_CODE (masktype) == INTEGER_TYPE
2742 || types_compatible_p (srctype, masktype)));
2743 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2744 masktype = truth_type_for (srctype);
2746 tree mask_halftype = masktype;
2747 tree perm_mask = NULL_TREE;
2748 tree mask_perm_mask = NULL_TREE;
2749 if (known_eq (nunits, gather_off_nunits))
2750 modifier = NONE;
2751 else if (known_eq (nunits * 2, gather_off_nunits))
2753 modifier = WIDEN;
2755 /* Currently widening gathers and scatters are only supported for
2756 fixed-length vectors. */
2757 int count = gather_off_nunits.to_constant ();
2758 vec_perm_builder sel (count, count, 1);
2759 for (int i = 0; i < count; ++i)
2760 sel.quick_push (i | (count / 2));
2762 vec_perm_indices indices (sel, 1, count);
2763 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2764 indices);
2766 else if (known_eq (nunits, gather_off_nunits * 2))
2768 modifier = NARROW;
2770 /* Currently narrowing gathers and scatters are only supported for
2771 fixed-length vectors. */
2772 int count = nunits.to_constant ();
2773 vec_perm_builder sel (count, count, 1);
2774 sel.quick_grow (count);
2775 for (int i = 0; i < count; ++i)
2776 sel[i] = i < count / 2 ? i : i + count / 2;
2777 vec_perm_indices indices (sel, 2, count);
2778 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2780 ncopies *= 2;
2782 if (mask && masktype == real_masktype)
2784 for (int i = 0; i < count; ++i)
2785 sel[i] = i | (count / 2);
2786 indices.new_vector (sel, 2, count);
2787 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2789 else if (mask)
2790 mask_halftype = truth_type_for (gs_info->offset_vectype);
2792 else
2793 gcc_unreachable ();
2795 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2796 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2798 tree ptr = fold_convert (ptrtype, gs_info->base);
2799 if (!is_gimple_min_invariant (ptr))
2801 gimple_seq seq;
2802 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2803 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2804 gcc_assert (!new_bb);
2807 tree scale = build_int_cst (scaletype, gs_info->scale);
2809 tree vec_oprnd0 = NULL_TREE;
2810 tree vec_mask = NULL_TREE;
2811 tree src_op = NULL_TREE;
2812 tree mask_op = NULL_TREE;
2813 tree prev_res = NULL_TREE;
2815 if (!mask)
2817 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2818 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2821 auto_vec<tree> vec_oprnds0;
2822 auto_vec<tree> vec_masks;
2823 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2824 modifier == WIDEN ? ncopies / 2 : ncopies,
2825 gs_info->offset, &vec_oprnds0);
2826 if (mask)
2827 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2828 modifier == NARROW ? ncopies / 2 : ncopies,
2829 mask, &vec_masks, masktype);
2830 for (int j = 0; j < ncopies; ++j)
2832 tree op, var;
2833 if (modifier == WIDEN && (j & 1))
2834 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2835 perm_mask, stmt_info, gsi);
2836 else
2837 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2839 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2841 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2842 TYPE_VECTOR_SUBPARTS (idxtype)));
2843 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2844 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2845 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2846 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2847 op = var;
2850 if (mask)
2852 if (mask_perm_mask && (j & 1))
2853 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2854 mask_perm_mask, stmt_info, gsi);
2855 else
2857 if (modifier == NARROW)
2859 if ((j & 1) == 0)
2860 vec_mask = vec_masks[j / 2];
2862 else
2863 vec_mask = vec_masks[j];
2865 mask_op = vec_mask;
2866 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2868 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2869 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2870 gcc_assert (known_eq (sub1, sub2));
2871 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2872 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2873 gassign *new_stmt
2874 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2875 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2876 mask_op = var;
2879 if (modifier == NARROW && masktype != real_masktype)
2881 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2882 gassign *new_stmt
2883 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2884 : VEC_UNPACK_LO_EXPR,
2885 mask_op);
2886 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2887 mask_op = var;
2889 src_op = mask_op;
2892 tree mask_arg = mask_op;
2893 if (masktype != real_masktype)
2895 tree utype, optype = TREE_TYPE (mask_op);
2896 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2897 utype = real_masktype;
2898 else
2899 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2900 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2901 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2902 gassign *new_stmt
2903 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2904 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2905 mask_arg = var;
2906 if (!useless_type_conversion_p (real_masktype, utype))
2908 gcc_assert (TYPE_PRECISION (utype)
2909 <= TYPE_PRECISION (real_masktype));
2910 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2911 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2912 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2913 mask_arg = var;
2915 src_op = build_zero_cst (srctype);
2917 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2918 mask_arg, scale);
2920 if (!useless_type_conversion_p (vectype, rettype))
2922 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2923 TYPE_VECTOR_SUBPARTS (rettype)));
2924 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2925 gimple_call_set_lhs (new_stmt, op);
2926 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2927 var = make_ssa_name (vec_dest);
2928 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2929 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2930 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2932 else
2934 var = make_ssa_name (vec_dest, new_stmt);
2935 gimple_call_set_lhs (new_stmt, var);
2936 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2939 if (modifier == NARROW)
2941 if ((j & 1) == 0)
2943 prev_res = var;
2944 continue;
2946 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2947 stmt_info, gsi);
2948 new_stmt = SSA_NAME_DEF_STMT (var);
2951 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2953 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2956 /* Prepare the base and offset in GS_INFO for vectorization.
2957 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2958 to the vectorized offset argument for the first copy of STMT_INFO.
2959 STMT_INFO is the statement described by GS_INFO and LOOP is the
2960 containing loop. */
2962 static void
2963 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2964 class loop *loop, stmt_vec_info stmt_info,
2965 slp_tree slp_node, gather_scatter_info *gs_info,
2966 tree *dataref_ptr, vec<tree> *vec_offset)
2968 gimple_seq stmts = NULL;
2969 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2970 if (stmts != NULL)
2972 basic_block new_bb;
2973 edge pe = loop_preheader_edge (loop);
2974 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2975 gcc_assert (!new_bb);
2977 if (slp_node)
2978 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2979 else
2981 unsigned ncopies
2982 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2983 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2984 gs_info->offset, vec_offset,
2985 gs_info->offset_vectype);
2989 /* Prepare to implement a grouped or strided load or store using
2990 the gather load or scatter store operation described by GS_INFO.
2991 STMT_INFO is the load or store statement.
2993 Set *DATAREF_BUMP to the amount that should be added to the base
2994 address after each copy of the vectorized statement. Set *VEC_OFFSET
2995 to an invariant offset vector in which element I has the value
2996 I * DR_STEP / SCALE. */
2998 static void
2999 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3000 loop_vec_info loop_vinfo,
3001 gather_scatter_info *gs_info,
3002 tree *dataref_bump, tree *vec_offset)
3004 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3005 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3007 tree bump = size_binop (MULT_EXPR,
3008 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3009 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3010 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3012 /* The offset given in GS_INFO can have pointer type, so use the element
3013 type of the vector instead. */
3014 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3016 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3017 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3018 ssize_int (gs_info->scale));
3019 step = fold_convert (offset_type, step);
3021 /* Create {0, X, X*2, X*3, ...}. */
3022 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3023 build_zero_cst (offset_type), step);
3024 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3027 /* Return the amount that should be added to a vector pointer to move
3028 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3029 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3030 vectorization. */
3032 static tree
3033 vect_get_data_ptr_increment (vec_info *vinfo,
3034 dr_vec_info *dr_info, tree aggr_type,
3035 vect_memory_access_type memory_access_type)
3037 if (memory_access_type == VMAT_INVARIANT)
3038 return size_zero_node;
3040 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3041 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3042 if (tree_int_cst_sgn (step) == -1)
3043 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3044 return iv_step;
3047 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3049 static bool
3050 vectorizable_bswap (vec_info *vinfo,
3051 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3052 gimple **vec_stmt, slp_tree slp_node,
3053 slp_tree *slp_op,
3054 tree vectype_in, stmt_vector_for_cost *cost_vec)
3056 tree op, vectype;
3057 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3058 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3059 unsigned ncopies;
3061 op = gimple_call_arg (stmt, 0);
3062 vectype = STMT_VINFO_VECTYPE (stmt_info);
3063 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3065 /* Multiple types in SLP are handled by creating the appropriate number of
3066 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3067 case of SLP. */
3068 if (slp_node)
3069 ncopies = 1;
3070 else
3071 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3073 gcc_assert (ncopies >= 1);
3075 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3076 if (! char_vectype)
3077 return false;
3079 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3080 unsigned word_bytes;
3081 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3082 return false;
3084 /* The encoding uses one stepped pattern for each byte in the word. */
3085 vec_perm_builder elts (num_bytes, word_bytes, 3);
3086 for (unsigned i = 0; i < 3; ++i)
3087 for (unsigned j = 0; j < word_bytes; ++j)
3088 elts.quick_push ((i + 1) * word_bytes - j - 1);
3090 vec_perm_indices indices (elts, 1, num_bytes);
3091 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3092 return false;
3094 if (! vec_stmt)
3096 if (slp_node
3097 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3099 if (dump_enabled_p ())
3100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3101 "incompatible vector types for invariants\n");
3102 return false;
3105 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3106 DUMP_VECT_SCOPE ("vectorizable_bswap");
3107 record_stmt_cost (cost_vec,
3108 1, vector_stmt, stmt_info, 0, vect_prologue);
3109 record_stmt_cost (cost_vec,
3110 slp_node
3111 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3112 vec_perm, stmt_info, 0, vect_body);
3113 return true;
3116 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3118 /* Transform. */
3119 vec<tree> vec_oprnds = vNULL;
3120 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3121 op, &vec_oprnds);
3122 /* Arguments are ready. create the new vector stmt. */
3123 unsigned i;
3124 tree vop;
3125 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3127 gimple *new_stmt;
3128 tree tem = make_ssa_name (char_vectype);
3129 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3130 char_vectype, vop));
3131 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3132 tree tem2 = make_ssa_name (char_vectype);
3133 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3134 tem, tem, bswap_vconst);
3135 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3136 tem = make_ssa_name (vectype);
3137 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3138 vectype, tem2));
3139 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3140 if (slp_node)
3141 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3142 else
3143 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3146 if (!slp_node)
3147 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3149 vec_oprnds.release ();
3150 return true;
3153 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3154 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3155 in a single step. On success, store the binary pack code in
3156 *CONVERT_CODE. */
3158 static bool
3159 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3160 tree_code *convert_code)
3162 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3163 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3164 return false;
3166 tree_code code;
3167 int multi_step_cvt = 0;
3168 auto_vec <tree, 8> interm_types;
3169 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3170 &code, &multi_step_cvt, &interm_types)
3171 || multi_step_cvt)
3172 return false;
3174 *convert_code = code;
3175 return true;
3178 /* Function vectorizable_call.
3180 Check if STMT_INFO performs a function call that can be vectorized.
3181 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3182 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3183 Return true if STMT_INFO is vectorizable in this way. */
3185 static bool
3186 vectorizable_call (vec_info *vinfo,
3187 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3188 gimple **vec_stmt, slp_tree slp_node,
3189 stmt_vector_for_cost *cost_vec)
3191 gcall *stmt;
3192 tree vec_dest;
3193 tree scalar_dest;
3194 tree op;
3195 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3196 tree vectype_out, vectype_in;
3197 poly_uint64 nunits_in;
3198 poly_uint64 nunits_out;
3199 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3200 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3201 tree fndecl, new_temp, rhs_type;
3202 enum vect_def_type dt[4]
3203 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3204 vect_unknown_def_type };
3205 tree vectypes[ARRAY_SIZE (dt)] = {};
3206 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3207 int ndts = ARRAY_SIZE (dt);
3208 int ncopies, j;
3209 auto_vec<tree, 8> vargs;
3210 enum { NARROW, NONE, WIDEN } modifier;
3211 size_t i, nargs;
3212 tree lhs;
3214 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3215 return false;
3217 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3218 && ! vec_stmt)
3219 return false;
3221 /* Is STMT_INFO a vectorizable call? */
3222 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3223 if (!stmt)
3224 return false;
3226 if (gimple_call_internal_p (stmt)
3227 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3228 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3229 /* Handled by vectorizable_load and vectorizable_store. */
3230 return false;
3232 if (gimple_call_lhs (stmt) == NULL_TREE
3233 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3234 return false;
3236 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3238 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3240 /* Process function arguments. */
3241 rhs_type = NULL_TREE;
3242 vectype_in = NULL_TREE;
3243 nargs = gimple_call_num_args (stmt);
3245 /* Bail out if the function has more than four arguments, we do not have
3246 interesting builtin functions to vectorize with more than two arguments
3247 except for fma. No arguments is also not good. */
3248 if (nargs == 0 || nargs > 4)
3249 return false;
3251 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3252 combined_fn cfn = gimple_call_combined_fn (stmt);
3253 if (cfn == CFN_GOMP_SIMD_LANE)
3255 nargs = 0;
3256 rhs_type = unsigned_type_node;
3259 int mask_opno = -1;
3260 if (internal_fn_p (cfn))
3261 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3263 for (i = 0; i < nargs; i++)
3265 if ((int) i == mask_opno)
3267 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3268 &op, &slp_op[i], &dt[i], &vectypes[i]))
3269 return false;
3270 continue;
3273 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3274 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3276 if (dump_enabled_p ())
3277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3278 "use not simple.\n");
3279 return false;
3282 /* We can only handle calls with arguments of the same type. */
3283 if (rhs_type
3284 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3286 if (dump_enabled_p ())
3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3288 "argument types differ.\n");
3289 return false;
3291 if (!rhs_type)
3292 rhs_type = TREE_TYPE (op);
3294 if (!vectype_in)
3295 vectype_in = vectypes[i];
3296 else if (vectypes[i]
3297 && !types_compatible_p (vectypes[i], vectype_in))
3299 if (dump_enabled_p ())
3300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3301 "argument vector types differ.\n");
3302 return false;
3305 /* If all arguments are external or constant defs, infer the vector type
3306 from the scalar type. */
3307 if (!vectype_in)
3308 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3309 if (vec_stmt)
3310 gcc_assert (vectype_in);
3311 if (!vectype_in)
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3315 "no vectype for scalar type %T\n", rhs_type);
3317 return false;
3319 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3320 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3321 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3322 by a pack of the two vectors into an SI vector. We would need
3323 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3324 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3326 if (dump_enabled_p ())
3327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3328 "mismatched vector sizes %T and %T\n",
3329 vectype_in, vectype_out);
3330 return false;
3333 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3334 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3336 if (dump_enabled_p ())
3337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3338 "mixed mask and nonmask vector types\n");
3339 return false;
3342 /* FORNOW */
3343 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3344 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3345 if (known_eq (nunits_in * 2, nunits_out))
3346 modifier = NARROW;
3347 else if (known_eq (nunits_out, nunits_in))
3348 modifier = NONE;
3349 else if (known_eq (nunits_out * 2, nunits_in))
3350 modifier = WIDEN;
3351 else
3352 return false;
3354 /* We only handle functions that do not read or clobber memory. */
3355 if (gimple_vuse (stmt))
3357 if (dump_enabled_p ())
3358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3359 "function reads from or writes to memory.\n");
3360 return false;
3363 /* For now, we only vectorize functions if a target specific builtin
3364 is available. TODO -- in some cases, it might be profitable to
3365 insert the calls for pieces of the vector, in order to be able
3366 to vectorize other operations in the loop. */
3367 fndecl = NULL_TREE;
3368 internal_fn ifn = IFN_LAST;
3369 tree callee = gimple_call_fndecl (stmt);
3371 /* First try using an internal function. */
3372 tree_code convert_code = ERROR_MARK;
3373 if (cfn != CFN_LAST
3374 && (modifier == NONE
3375 || (modifier == NARROW
3376 && simple_integer_narrowing (vectype_out, vectype_in,
3377 &convert_code))))
3378 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3379 vectype_in);
3381 /* If that fails, try asking for a target-specific built-in function. */
3382 if (ifn == IFN_LAST)
3384 if (cfn != CFN_LAST)
3385 fndecl = targetm.vectorize.builtin_vectorized_function
3386 (cfn, vectype_out, vectype_in);
3387 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3388 fndecl = targetm.vectorize.builtin_md_vectorized_function
3389 (callee, vectype_out, vectype_in);
3392 if (ifn == IFN_LAST && !fndecl)
3394 if (cfn == CFN_GOMP_SIMD_LANE
3395 && !slp_node
3396 && loop_vinfo
3397 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3398 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3399 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3400 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3402 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3403 { 0, 1, 2, ... vf - 1 } vector. */
3404 gcc_assert (nargs == 0);
3406 else if (modifier == NONE
3407 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3408 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3409 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3410 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3411 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3412 slp_op, vectype_in, cost_vec);
3413 else
3415 if (dump_enabled_p ())
3416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3417 "function is not vectorizable.\n");
3418 return false;
3422 if (slp_node)
3423 ncopies = 1;
3424 else if (modifier == NARROW && ifn == IFN_LAST)
3425 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3426 else
3427 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3429 /* Sanity check: make sure that at least one copy of the vectorized stmt
3430 needs to be generated. */
3431 gcc_assert (ncopies >= 1);
3433 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3434 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3435 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3436 if (!vec_stmt) /* transformation not required. */
3438 if (slp_node)
3439 for (i = 0; i < nargs; ++i)
3440 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3442 if (dump_enabled_p ())
3443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3444 "incompatible vector types for invariants\n");
3445 return false;
3447 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3448 DUMP_VECT_SCOPE ("vectorizable_call");
3449 vect_model_simple_cost (vinfo, stmt_info,
3450 ncopies, dt, ndts, slp_node, cost_vec);
3451 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3452 record_stmt_cost (cost_vec, ncopies / 2,
3453 vec_promote_demote, stmt_info, 0, vect_body);
3455 if (loop_vinfo
3456 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3457 && (reduc_idx >= 0 || mask_opno >= 0))
3459 if (reduc_idx >= 0
3460 && (cond_fn == IFN_LAST
3461 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3462 OPTIMIZE_FOR_SPEED)))
3464 if (dump_enabled_p ())
3465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3466 "can't use a fully-masked loop because no"
3467 " conditional operation is available.\n");
3468 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3470 else
3472 unsigned int nvectors
3473 = (slp_node
3474 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3475 : ncopies);
3476 tree scalar_mask = NULL_TREE;
3477 if (mask_opno >= 0)
3478 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3479 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3480 vectype_out, scalar_mask);
3483 return true;
3486 /* Transform. */
3488 if (dump_enabled_p ())
3489 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3491 /* Handle def. */
3492 scalar_dest = gimple_call_lhs (stmt);
3493 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3495 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3496 unsigned int vect_nargs = nargs;
3497 if (masked_loop_p && reduc_idx >= 0)
3499 ifn = cond_fn;
3500 vect_nargs += 2;
3503 if (modifier == NONE || ifn != IFN_LAST)
3505 tree prev_res = NULL_TREE;
3506 vargs.safe_grow (vect_nargs, true);
3507 auto_vec<vec<tree> > vec_defs (nargs);
3508 for (j = 0; j < ncopies; ++j)
3510 /* Build argument list for the vectorized call. */
3511 if (slp_node)
3513 vec<tree> vec_oprnds0;
3515 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3516 vec_oprnds0 = vec_defs[0];
3518 /* Arguments are ready. Create the new vector stmt. */
3519 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3521 int varg = 0;
3522 if (masked_loop_p && reduc_idx >= 0)
3524 unsigned int vec_num = vec_oprnds0.length ();
3525 /* Always true for SLP. */
3526 gcc_assert (ncopies == 1);
3527 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3528 vectype_out, i);
3530 size_t k;
3531 for (k = 0; k < nargs; k++)
3533 vec<tree> vec_oprndsk = vec_defs[k];
3534 vargs[varg++] = vec_oprndsk[i];
3536 if (masked_loop_p && reduc_idx >= 0)
3537 vargs[varg++] = vargs[reduc_idx + 1];
3538 gimple *new_stmt;
3539 if (modifier == NARROW)
3541 /* We don't define any narrowing conditional functions
3542 at present. */
3543 gcc_assert (mask_opno < 0);
3544 tree half_res = make_ssa_name (vectype_in);
3545 gcall *call
3546 = gimple_build_call_internal_vec (ifn, vargs);
3547 gimple_call_set_lhs (call, half_res);
3548 gimple_call_set_nothrow (call, true);
3549 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3550 if ((i & 1) == 0)
3552 prev_res = half_res;
3553 continue;
3555 new_temp = make_ssa_name (vec_dest);
3556 new_stmt = gimple_build_assign (new_temp, convert_code,
3557 prev_res, half_res);
3558 vect_finish_stmt_generation (vinfo, stmt_info,
3559 new_stmt, gsi);
3561 else
3563 if (mask_opno >= 0 && masked_loop_p)
3565 unsigned int vec_num = vec_oprnds0.length ();
3566 /* Always true for SLP. */
3567 gcc_assert (ncopies == 1);
3568 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3569 vectype_out, i);
3570 vargs[mask_opno] = prepare_load_store_mask
3571 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3574 gcall *call;
3575 if (ifn != IFN_LAST)
3576 call = gimple_build_call_internal_vec (ifn, vargs);
3577 else
3578 call = gimple_build_call_vec (fndecl, vargs);
3579 new_temp = make_ssa_name (vec_dest, call);
3580 gimple_call_set_lhs (call, new_temp);
3581 gimple_call_set_nothrow (call, true);
3582 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3583 new_stmt = call;
3585 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3587 continue;
3590 int varg = 0;
3591 if (masked_loop_p && reduc_idx >= 0)
3592 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3593 vectype_out, j);
3594 for (i = 0; i < nargs; i++)
3596 op = gimple_call_arg (stmt, i);
3597 if (j == 0)
3599 vec_defs.quick_push (vNULL);
3600 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3601 op, &vec_defs[i],
3602 vectypes[i]);
3604 vargs[varg++] = vec_defs[i][j];
3606 if (masked_loop_p && reduc_idx >= 0)
3607 vargs[varg++] = vargs[reduc_idx + 1];
3609 if (mask_opno >= 0 && masked_loop_p)
3611 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3612 vectype_out, j);
3613 vargs[mask_opno]
3614 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3615 vargs[mask_opno], gsi);
3618 gimple *new_stmt;
3619 if (cfn == CFN_GOMP_SIMD_LANE)
3621 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3622 tree new_var
3623 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3624 gimple *init_stmt = gimple_build_assign (new_var, cst);
3625 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3626 new_temp = make_ssa_name (vec_dest);
3627 new_stmt = gimple_build_assign (new_temp, new_var);
3628 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3630 else if (modifier == NARROW)
3632 /* We don't define any narrowing conditional functions at
3633 present. */
3634 gcc_assert (mask_opno < 0);
3635 tree half_res = make_ssa_name (vectype_in);
3636 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3637 gimple_call_set_lhs (call, half_res);
3638 gimple_call_set_nothrow (call, true);
3639 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3640 if ((j & 1) == 0)
3642 prev_res = half_res;
3643 continue;
3645 new_temp = make_ssa_name (vec_dest);
3646 new_stmt = gimple_build_assign (new_temp, convert_code,
3647 prev_res, half_res);
3648 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3650 else
3652 gcall *call;
3653 if (ifn != IFN_LAST)
3654 call = gimple_build_call_internal_vec (ifn, vargs);
3655 else
3656 call = gimple_build_call_vec (fndecl, vargs);
3657 new_temp = make_ssa_name (vec_dest, call);
3658 gimple_call_set_lhs (call, new_temp);
3659 gimple_call_set_nothrow (call, true);
3660 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3661 new_stmt = call;
3664 if (j == (modifier == NARROW ? 1 : 0))
3665 *vec_stmt = new_stmt;
3666 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3668 for (i = 0; i < nargs; i++)
3670 vec<tree> vec_oprndsi = vec_defs[i];
3671 vec_oprndsi.release ();
3674 else if (modifier == NARROW)
3676 auto_vec<vec<tree> > vec_defs (nargs);
3677 /* We don't define any narrowing conditional functions at present. */
3678 gcc_assert (mask_opno < 0);
3679 for (j = 0; j < ncopies; ++j)
3681 /* Build argument list for the vectorized call. */
3682 if (j == 0)
3683 vargs.create (nargs * 2);
3684 else
3685 vargs.truncate (0);
3687 if (slp_node)
3689 vec<tree> vec_oprnds0;
3691 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3692 vec_oprnds0 = vec_defs[0];
3694 /* Arguments are ready. Create the new vector stmt. */
3695 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3697 size_t k;
3698 vargs.truncate (0);
3699 for (k = 0; k < nargs; k++)
3701 vec<tree> vec_oprndsk = vec_defs[k];
3702 vargs.quick_push (vec_oprndsk[i]);
3703 vargs.quick_push (vec_oprndsk[i + 1]);
3705 gcall *call;
3706 if (ifn != IFN_LAST)
3707 call = gimple_build_call_internal_vec (ifn, vargs);
3708 else
3709 call = gimple_build_call_vec (fndecl, vargs);
3710 new_temp = make_ssa_name (vec_dest, call);
3711 gimple_call_set_lhs (call, new_temp);
3712 gimple_call_set_nothrow (call, true);
3713 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3714 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3716 continue;
3719 for (i = 0; i < nargs; i++)
3721 op = gimple_call_arg (stmt, i);
3722 if (j == 0)
3724 vec_defs.quick_push (vNULL);
3725 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3726 op, &vec_defs[i], vectypes[i]);
3728 vec_oprnd0 = vec_defs[i][2*j];
3729 vec_oprnd1 = vec_defs[i][2*j+1];
3731 vargs.quick_push (vec_oprnd0);
3732 vargs.quick_push (vec_oprnd1);
3735 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3736 new_temp = make_ssa_name (vec_dest, new_stmt);
3737 gimple_call_set_lhs (new_stmt, new_temp);
3738 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3740 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3743 if (!slp_node)
3744 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3746 for (i = 0; i < nargs; i++)
3748 vec<tree> vec_oprndsi = vec_defs[i];
3749 vec_oprndsi.release ();
3752 else
3753 /* No current target implements this case. */
3754 return false;
3756 vargs.release ();
3758 /* The call in STMT might prevent it from being removed in dce.
3759 We however cannot remove it here, due to the way the ssa name
3760 it defines is mapped to the new definition. So just replace
3761 rhs of the statement with something harmless. */
3763 if (slp_node)
3764 return true;
3766 stmt_info = vect_orig_stmt (stmt_info);
3767 lhs = gimple_get_lhs (stmt_info->stmt);
3769 gassign *new_stmt
3770 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3771 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3773 return true;
3777 struct simd_call_arg_info
3779 tree vectype;
3780 tree op;
3781 HOST_WIDE_INT linear_step;
3782 enum vect_def_type dt;
3783 unsigned int align;
3784 bool simd_lane_linear;
3787 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3788 is linear within simd lane (but not within whole loop), note it in
3789 *ARGINFO. */
3791 static void
3792 vect_simd_lane_linear (tree op, class loop *loop,
3793 struct simd_call_arg_info *arginfo)
3795 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3797 if (!is_gimple_assign (def_stmt)
3798 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3799 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3800 return;
3802 tree base = gimple_assign_rhs1 (def_stmt);
3803 HOST_WIDE_INT linear_step = 0;
3804 tree v = gimple_assign_rhs2 (def_stmt);
3805 while (TREE_CODE (v) == SSA_NAME)
3807 tree t;
3808 def_stmt = SSA_NAME_DEF_STMT (v);
3809 if (is_gimple_assign (def_stmt))
3810 switch (gimple_assign_rhs_code (def_stmt))
3812 case PLUS_EXPR:
3813 t = gimple_assign_rhs2 (def_stmt);
3814 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3815 return;
3816 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3817 v = gimple_assign_rhs1 (def_stmt);
3818 continue;
3819 case MULT_EXPR:
3820 t = gimple_assign_rhs2 (def_stmt);
3821 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3822 return;
3823 linear_step = tree_to_shwi (t);
3824 v = gimple_assign_rhs1 (def_stmt);
3825 continue;
3826 CASE_CONVERT:
3827 t = gimple_assign_rhs1 (def_stmt);
3828 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3829 || (TYPE_PRECISION (TREE_TYPE (v))
3830 < TYPE_PRECISION (TREE_TYPE (t))))
3831 return;
3832 if (!linear_step)
3833 linear_step = 1;
3834 v = t;
3835 continue;
3836 default:
3837 return;
3839 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3840 && loop->simduid
3841 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3842 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3843 == loop->simduid))
3845 if (!linear_step)
3846 linear_step = 1;
3847 arginfo->linear_step = linear_step;
3848 arginfo->op = base;
3849 arginfo->simd_lane_linear = true;
3850 return;
3855 /* Return the number of elements in vector type VECTYPE, which is associated
3856 with a SIMD clone. At present these vectors always have a constant
3857 length. */
3859 static unsigned HOST_WIDE_INT
3860 simd_clone_subparts (tree vectype)
3862 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3865 /* Function vectorizable_simd_clone_call.
3867 Check if STMT_INFO performs a function call that can be vectorized
3868 by calling a simd clone of the function.
3869 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3870 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3871 Return true if STMT_INFO is vectorizable in this way. */
3873 static bool
3874 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3875 gimple_stmt_iterator *gsi,
3876 gimple **vec_stmt, slp_tree slp_node,
3877 stmt_vector_for_cost *)
3879 tree vec_dest;
3880 tree scalar_dest;
3881 tree op, type;
3882 tree vec_oprnd0 = NULL_TREE;
3883 tree vectype;
3884 poly_uint64 nunits;
3885 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3886 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3887 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3888 tree fndecl, new_temp;
3889 int ncopies, j;
3890 auto_vec<simd_call_arg_info> arginfo;
3891 vec<tree> vargs = vNULL;
3892 size_t i, nargs;
3893 tree lhs, rtype, ratype;
3894 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3896 /* Is STMT a vectorizable call? */
3897 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3898 if (!stmt)
3899 return false;
3901 fndecl = gimple_call_fndecl (stmt);
3902 if (fndecl == NULL_TREE)
3903 return false;
3905 struct cgraph_node *node = cgraph_node::get (fndecl);
3906 if (node == NULL || node->simd_clones == NULL)
3907 return false;
3909 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3910 return false;
3912 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3913 && ! vec_stmt)
3914 return false;
3916 if (gimple_call_lhs (stmt)
3917 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3918 return false;
3920 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3922 vectype = STMT_VINFO_VECTYPE (stmt_info);
3924 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3925 return false;
3927 /* FORNOW */
3928 if (slp_node)
3929 return false;
3931 /* Process function arguments. */
3932 nargs = gimple_call_num_args (stmt);
3934 /* Bail out if the function has zero arguments. */
3935 if (nargs == 0)
3936 return false;
3938 arginfo.reserve (nargs, true);
3940 for (i = 0; i < nargs; i++)
3942 simd_call_arg_info thisarginfo;
3943 affine_iv iv;
3945 thisarginfo.linear_step = 0;
3946 thisarginfo.align = 0;
3947 thisarginfo.op = NULL_TREE;
3948 thisarginfo.simd_lane_linear = false;
3950 op = gimple_call_arg (stmt, i);
3951 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3952 &thisarginfo.vectype)
3953 || thisarginfo.dt == vect_uninitialized_def)
3955 if (dump_enabled_p ())
3956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3957 "use not simple.\n");
3958 return false;
3961 if (thisarginfo.dt == vect_constant_def
3962 || thisarginfo.dt == vect_external_def)
3963 gcc_assert (thisarginfo.vectype == NULL_TREE);
3964 else
3966 gcc_assert (thisarginfo.vectype != NULL_TREE);
3967 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3969 if (dump_enabled_p ())
3970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3971 "vector mask arguments are not supported\n");
3972 return false;
3976 /* For linear arguments, the analyze phase should have saved
3977 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3978 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3979 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3981 gcc_assert (vec_stmt);
3982 thisarginfo.linear_step
3983 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3984 thisarginfo.op
3985 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3986 thisarginfo.simd_lane_linear
3987 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3988 == boolean_true_node);
3989 /* If loop has been peeled for alignment, we need to adjust it. */
3990 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3991 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3992 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3994 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3995 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3996 tree opt = TREE_TYPE (thisarginfo.op);
3997 bias = fold_convert (TREE_TYPE (step), bias);
3998 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3999 thisarginfo.op
4000 = fold_build2 (POINTER_TYPE_P (opt)
4001 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4002 thisarginfo.op, bias);
4005 else if (!vec_stmt
4006 && thisarginfo.dt != vect_constant_def
4007 && thisarginfo.dt != vect_external_def
4008 && loop_vinfo
4009 && TREE_CODE (op) == SSA_NAME
4010 && simple_iv (loop, loop_containing_stmt (stmt), op,
4011 &iv, false)
4012 && tree_fits_shwi_p (iv.step))
4014 thisarginfo.linear_step = tree_to_shwi (iv.step);
4015 thisarginfo.op = iv.base;
4017 else if ((thisarginfo.dt == vect_constant_def
4018 || thisarginfo.dt == vect_external_def)
4019 && POINTER_TYPE_P (TREE_TYPE (op)))
4020 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4021 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4022 linear too. */
4023 if (POINTER_TYPE_P (TREE_TYPE (op))
4024 && !thisarginfo.linear_step
4025 && !vec_stmt
4026 && thisarginfo.dt != vect_constant_def
4027 && thisarginfo.dt != vect_external_def
4028 && loop_vinfo
4029 && !slp_node
4030 && TREE_CODE (op) == SSA_NAME)
4031 vect_simd_lane_linear (op, loop, &thisarginfo);
4033 arginfo.quick_push (thisarginfo);
4036 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4037 if (!vf.is_constant ())
4039 if (dump_enabled_p ())
4040 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4041 "not considering SIMD clones; not yet supported"
4042 " for variable-width vectors.\n");
4043 return false;
4046 unsigned int badness = 0;
4047 struct cgraph_node *bestn = NULL;
4048 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4049 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4050 else
4051 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4052 n = n->simdclone->next_clone)
4054 unsigned int this_badness = 0;
4055 unsigned int num_calls;
4056 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4057 || n->simdclone->nargs != nargs)
4058 continue;
4059 if (num_calls != 1)
4060 this_badness += exact_log2 (num_calls) * 4096;
4061 if (n->simdclone->inbranch)
4062 this_badness += 8192;
4063 int target_badness = targetm.simd_clone.usable (n);
4064 if (target_badness < 0)
4065 continue;
4066 this_badness += target_badness * 512;
4067 /* FORNOW: Have to add code to add the mask argument. */
4068 if (n->simdclone->inbranch)
4069 continue;
4070 for (i = 0; i < nargs; i++)
4072 switch (n->simdclone->args[i].arg_type)
4074 case SIMD_CLONE_ARG_TYPE_VECTOR:
4075 if (!useless_type_conversion_p
4076 (n->simdclone->args[i].orig_type,
4077 TREE_TYPE (gimple_call_arg (stmt, i))))
4078 i = -1;
4079 else if (arginfo[i].dt == vect_constant_def
4080 || arginfo[i].dt == vect_external_def
4081 || arginfo[i].linear_step)
4082 this_badness += 64;
4083 break;
4084 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4085 if (arginfo[i].dt != vect_constant_def
4086 && arginfo[i].dt != vect_external_def)
4087 i = -1;
4088 break;
4089 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4090 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4091 if (arginfo[i].dt == vect_constant_def
4092 || arginfo[i].dt == vect_external_def
4093 || (arginfo[i].linear_step
4094 != n->simdclone->args[i].linear_step))
4095 i = -1;
4096 break;
4097 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4098 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4099 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4100 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4101 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4102 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4103 /* FORNOW */
4104 i = -1;
4105 break;
4106 case SIMD_CLONE_ARG_TYPE_MASK:
4107 gcc_unreachable ();
4109 if (i == (size_t) -1)
4110 break;
4111 if (n->simdclone->args[i].alignment > arginfo[i].align)
4113 i = -1;
4114 break;
4116 if (arginfo[i].align)
4117 this_badness += (exact_log2 (arginfo[i].align)
4118 - exact_log2 (n->simdclone->args[i].alignment));
4120 if (i == (size_t) -1)
4121 continue;
4122 if (bestn == NULL || this_badness < badness)
4124 bestn = n;
4125 badness = this_badness;
4129 if (bestn == NULL)
4130 return false;
4132 for (i = 0; i < nargs; i++)
4133 if ((arginfo[i].dt == vect_constant_def
4134 || arginfo[i].dt == vect_external_def)
4135 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4137 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4138 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4139 slp_node);
4140 if (arginfo[i].vectype == NULL
4141 || !constant_multiple_p (bestn->simdclone->simdlen,
4142 simd_clone_subparts (arginfo[i].vectype)))
4143 return false;
4146 fndecl = bestn->decl;
4147 nunits = bestn->simdclone->simdlen;
4148 ncopies = vector_unroll_factor (vf, nunits);
4150 /* If the function isn't const, only allow it in simd loops where user
4151 has asserted that at least nunits consecutive iterations can be
4152 performed using SIMD instructions. */
4153 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4154 && gimple_vuse (stmt))
4155 return false;
4157 /* Sanity check: make sure that at least one copy of the vectorized stmt
4158 needs to be generated. */
4159 gcc_assert (ncopies >= 1);
4161 if (!vec_stmt) /* transformation not required. */
4163 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4164 for (i = 0; i < nargs; i++)
4165 if ((bestn->simdclone->args[i].arg_type
4166 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4167 || (bestn->simdclone->args[i].arg_type
4168 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4170 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4171 + 1,
4172 true);
4173 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4174 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4175 ? size_type_node : TREE_TYPE (arginfo[i].op);
4176 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4177 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4178 tree sll = arginfo[i].simd_lane_linear
4179 ? boolean_true_node : boolean_false_node;
4180 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4182 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4183 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4184 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4185 dt, slp_node, cost_vec); */
4186 return true;
4189 /* Transform. */
4191 if (dump_enabled_p ())
4192 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4194 /* Handle def. */
4195 scalar_dest = gimple_call_lhs (stmt);
4196 vec_dest = NULL_TREE;
4197 rtype = NULL_TREE;
4198 ratype = NULL_TREE;
4199 if (scalar_dest)
4201 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4202 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4203 if (TREE_CODE (rtype) == ARRAY_TYPE)
4205 ratype = rtype;
4206 rtype = TREE_TYPE (ratype);
4210 auto_vec<vec<tree> > vec_oprnds;
4211 auto_vec<unsigned> vec_oprnds_i;
4212 vec_oprnds.safe_grow_cleared (nargs, true);
4213 vec_oprnds_i.safe_grow_cleared (nargs, true);
4214 for (j = 0; j < ncopies; ++j)
4216 /* Build argument list for the vectorized call. */
4217 if (j == 0)
4218 vargs.create (nargs);
4219 else
4220 vargs.truncate (0);
4222 for (i = 0; i < nargs; i++)
4224 unsigned int k, l, m, o;
4225 tree atype;
4226 op = gimple_call_arg (stmt, i);
4227 switch (bestn->simdclone->args[i].arg_type)
4229 case SIMD_CLONE_ARG_TYPE_VECTOR:
4230 atype = bestn->simdclone->args[i].vector_type;
4231 o = vector_unroll_factor (nunits,
4232 simd_clone_subparts (atype));
4233 for (m = j * o; m < (j + 1) * o; m++)
4235 if (simd_clone_subparts (atype)
4236 < simd_clone_subparts (arginfo[i].vectype))
4238 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4239 k = (simd_clone_subparts (arginfo[i].vectype)
4240 / simd_clone_subparts (atype));
4241 gcc_assert ((k & (k - 1)) == 0);
4242 if (m == 0)
4244 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4245 ncopies * o / k, op,
4246 &vec_oprnds[i]);
4247 vec_oprnds_i[i] = 0;
4248 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4250 else
4252 vec_oprnd0 = arginfo[i].op;
4253 if ((m & (k - 1)) == 0)
4254 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4256 arginfo[i].op = vec_oprnd0;
4257 vec_oprnd0
4258 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4259 bitsize_int (prec),
4260 bitsize_int ((m & (k - 1)) * prec));
4261 gassign *new_stmt
4262 = gimple_build_assign (make_ssa_name (atype),
4263 vec_oprnd0);
4264 vect_finish_stmt_generation (vinfo, stmt_info,
4265 new_stmt, gsi);
4266 vargs.safe_push (gimple_assign_lhs (new_stmt));
4268 else
4270 k = (simd_clone_subparts (atype)
4271 / simd_clone_subparts (arginfo[i].vectype));
4272 gcc_assert ((k & (k - 1)) == 0);
4273 vec<constructor_elt, va_gc> *ctor_elts;
4274 if (k != 1)
4275 vec_alloc (ctor_elts, k);
4276 else
4277 ctor_elts = NULL;
4278 for (l = 0; l < k; l++)
4280 if (m == 0 && l == 0)
4282 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4283 k * o * ncopies,
4285 &vec_oprnds[i]);
4286 vec_oprnds_i[i] = 0;
4287 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4289 else
4290 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4291 arginfo[i].op = vec_oprnd0;
4292 if (k == 1)
4293 break;
4294 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4295 vec_oprnd0);
4297 if (k == 1)
4298 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4299 atype))
4301 vec_oprnd0
4302 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4303 gassign *new_stmt
4304 = gimple_build_assign (make_ssa_name (atype),
4305 vec_oprnd0);
4306 vect_finish_stmt_generation (vinfo, stmt_info,
4307 new_stmt, gsi);
4308 vargs.safe_push (gimple_assign_lhs (new_stmt));
4310 else
4311 vargs.safe_push (vec_oprnd0);
4312 else
4314 vec_oprnd0 = build_constructor (atype, ctor_elts);
4315 gassign *new_stmt
4316 = gimple_build_assign (make_ssa_name (atype),
4317 vec_oprnd0);
4318 vect_finish_stmt_generation (vinfo, stmt_info,
4319 new_stmt, gsi);
4320 vargs.safe_push (gimple_assign_lhs (new_stmt));
4324 break;
4325 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4326 vargs.safe_push (op);
4327 break;
4328 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4329 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4330 if (j == 0)
4332 gimple_seq stmts;
4333 arginfo[i].op
4334 = force_gimple_operand (unshare_expr (arginfo[i].op),
4335 &stmts, true, NULL_TREE);
4336 if (stmts != NULL)
4338 basic_block new_bb;
4339 edge pe = loop_preheader_edge (loop);
4340 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4341 gcc_assert (!new_bb);
4343 if (arginfo[i].simd_lane_linear)
4345 vargs.safe_push (arginfo[i].op);
4346 break;
4348 tree phi_res = copy_ssa_name (op);
4349 gphi *new_phi = create_phi_node (phi_res, loop->header);
4350 add_phi_arg (new_phi, arginfo[i].op,
4351 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4352 enum tree_code code
4353 = POINTER_TYPE_P (TREE_TYPE (op))
4354 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4355 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4356 ? sizetype : TREE_TYPE (op);
4357 poly_widest_int cst
4358 = wi::mul (bestn->simdclone->args[i].linear_step,
4359 ncopies * nunits);
4360 tree tcst = wide_int_to_tree (type, cst);
4361 tree phi_arg = copy_ssa_name (op);
4362 gassign *new_stmt
4363 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4364 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4365 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4366 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4367 UNKNOWN_LOCATION);
4368 arginfo[i].op = phi_res;
4369 vargs.safe_push (phi_res);
4371 else
4373 enum tree_code code
4374 = POINTER_TYPE_P (TREE_TYPE (op))
4375 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4376 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4377 ? sizetype : TREE_TYPE (op);
4378 poly_widest_int cst
4379 = wi::mul (bestn->simdclone->args[i].linear_step,
4380 j * nunits);
4381 tree tcst = wide_int_to_tree (type, cst);
4382 new_temp = make_ssa_name (TREE_TYPE (op));
4383 gassign *new_stmt
4384 = gimple_build_assign (new_temp, code,
4385 arginfo[i].op, tcst);
4386 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4387 vargs.safe_push (new_temp);
4389 break;
4390 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4391 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4392 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4393 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4394 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4395 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4396 default:
4397 gcc_unreachable ();
4401 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4402 if (vec_dest)
4404 gcc_assert (ratype
4405 || known_eq (simd_clone_subparts (rtype), nunits));
4406 if (ratype)
4407 new_temp = create_tmp_var (ratype);
4408 else if (useless_type_conversion_p (vectype, rtype))
4409 new_temp = make_ssa_name (vec_dest, new_call);
4410 else
4411 new_temp = make_ssa_name (rtype, new_call);
4412 gimple_call_set_lhs (new_call, new_temp);
4414 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4415 gimple *new_stmt = new_call;
4417 if (vec_dest)
4419 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4421 unsigned int k, l;
4422 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4423 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4424 k = vector_unroll_factor (nunits,
4425 simd_clone_subparts (vectype));
4426 gcc_assert ((k & (k - 1)) == 0);
4427 for (l = 0; l < k; l++)
4429 tree t;
4430 if (ratype)
4432 t = build_fold_addr_expr (new_temp);
4433 t = build2 (MEM_REF, vectype, t,
4434 build_int_cst (TREE_TYPE (t), l * bytes));
4436 else
4437 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4438 bitsize_int (prec), bitsize_int (l * prec));
4439 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4440 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4442 if (j == 0 && l == 0)
4443 *vec_stmt = new_stmt;
4444 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4447 if (ratype)
4448 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4449 continue;
4451 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4453 unsigned int k = (simd_clone_subparts (vectype)
4454 / simd_clone_subparts (rtype));
4455 gcc_assert ((k & (k - 1)) == 0);
4456 if ((j & (k - 1)) == 0)
4457 vec_alloc (ret_ctor_elts, k);
4458 if (ratype)
4460 unsigned int m, o;
4461 o = vector_unroll_factor (nunits,
4462 simd_clone_subparts (rtype));
4463 for (m = 0; m < o; m++)
4465 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4466 size_int (m), NULL_TREE, NULL_TREE);
4467 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4468 tem);
4469 vect_finish_stmt_generation (vinfo, stmt_info,
4470 new_stmt, gsi);
4471 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4472 gimple_assign_lhs (new_stmt));
4474 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4476 else
4477 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4478 if ((j & (k - 1)) != k - 1)
4479 continue;
4480 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4481 new_stmt
4482 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4483 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4485 if ((unsigned) j == k - 1)
4486 *vec_stmt = new_stmt;
4487 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4488 continue;
4490 else if (ratype)
4492 tree t = build_fold_addr_expr (new_temp);
4493 t = build2 (MEM_REF, vectype, t,
4494 build_int_cst (TREE_TYPE (t), 0));
4495 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4496 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4497 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4499 else if (!useless_type_conversion_p (vectype, rtype))
4501 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4502 new_stmt
4503 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4504 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4508 if (j == 0)
4509 *vec_stmt = new_stmt;
4510 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4513 for (i = 0; i < nargs; ++i)
4515 vec<tree> oprndsi = vec_oprnds[i];
4516 oprndsi.release ();
4518 vargs.release ();
4520 /* The call in STMT might prevent it from being removed in dce.
4521 We however cannot remove it here, due to the way the ssa name
4522 it defines is mapped to the new definition. So just replace
4523 rhs of the statement with something harmless. */
4525 if (slp_node)
4526 return true;
4528 gimple *new_stmt;
4529 if (scalar_dest)
4531 type = TREE_TYPE (scalar_dest);
4532 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4533 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4535 else
4536 new_stmt = gimple_build_nop ();
4537 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4538 unlink_stmt_vdef (stmt);
4540 return true;
4544 /* Function vect_gen_widened_results_half
4546 Create a vector stmt whose code, type, number of arguments, and result
4547 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4548 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4549 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4550 needs to be created (DECL is a function-decl of a target-builtin).
4551 STMT_INFO is the original scalar stmt that we are vectorizing. */
4553 static gimple *
4554 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4555 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4556 tree vec_dest, gimple_stmt_iterator *gsi,
4557 stmt_vec_info stmt_info)
4559 gimple *new_stmt;
4560 tree new_temp;
4562 /* Generate half of the widened result: */
4563 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4564 if (op_type != binary_op)
4565 vec_oprnd1 = NULL;
4566 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4567 new_temp = make_ssa_name (vec_dest, new_stmt);
4568 gimple_assign_set_lhs (new_stmt, new_temp);
4569 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4571 return new_stmt;
4575 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4576 For multi-step conversions store the resulting vectors and call the function
4577 recursively. */
4579 static void
4580 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4581 int multi_step_cvt,
4582 stmt_vec_info stmt_info,
4583 vec<tree> &vec_dsts,
4584 gimple_stmt_iterator *gsi,
4585 slp_tree slp_node, enum tree_code code)
4587 unsigned int i;
4588 tree vop0, vop1, new_tmp, vec_dest;
4590 vec_dest = vec_dsts.pop ();
4592 for (i = 0; i < vec_oprnds->length (); i += 2)
4594 /* Create demotion operation. */
4595 vop0 = (*vec_oprnds)[i];
4596 vop1 = (*vec_oprnds)[i + 1];
4597 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4598 new_tmp = make_ssa_name (vec_dest, new_stmt);
4599 gimple_assign_set_lhs (new_stmt, new_tmp);
4600 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4602 if (multi_step_cvt)
4603 /* Store the resulting vector for next recursive call. */
4604 (*vec_oprnds)[i/2] = new_tmp;
4605 else
4607 /* This is the last step of the conversion sequence. Store the
4608 vectors in SLP_NODE or in vector info of the scalar statement
4609 (or in STMT_VINFO_RELATED_STMT chain). */
4610 if (slp_node)
4611 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4612 else
4613 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4617 /* For multi-step demotion operations we first generate demotion operations
4618 from the source type to the intermediate types, and then combine the
4619 results (stored in VEC_OPRNDS) in demotion operation to the destination
4620 type. */
4621 if (multi_step_cvt)
4623 /* At each level of recursion we have half of the operands we had at the
4624 previous level. */
4625 vec_oprnds->truncate ((i+1)/2);
4626 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4627 multi_step_cvt - 1,
4628 stmt_info, vec_dsts, gsi,
4629 slp_node, VEC_PACK_TRUNC_EXPR);
4632 vec_dsts.quick_push (vec_dest);
4636 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4637 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4638 STMT_INFO. For multi-step conversions store the resulting vectors and
4639 call the function recursively. */
4641 static void
4642 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4643 vec<tree> *vec_oprnds0,
4644 vec<tree> *vec_oprnds1,
4645 stmt_vec_info stmt_info, tree vec_dest,
4646 gimple_stmt_iterator *gsi,
4647 enum tree_code code1,
4648 enum tree_code code2, int op_type)
4650 int i;
4651 tree vop0, vop1, new_tmp1, new_tmp2;
4652 gimple *new_stmt1, *new_stmt2;
4653 vec<tree> vec_tmp = vNULL;
4655 vec_tmp.create (vec_oprnds0->length () * 2);
4656 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4658 if (op_type == binary_op)
4659 vop1 = (*vec_oprnds1)[i];
4660 else
4661 vop1 = NULL_TREE;
4663 /* Generate the two halves of promotion operation. */
4664 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4665 op_type, vec_dest, gsi,
4666 stmt_info);
4667 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4668 op_type, vec_dest, gsi,
4669 stmt_info);
4670 if (is_gimple_call (new_stmt1))
4672 new_tmp1 = gimple_call_lhs (new_stmt1);
4673 new_tmp2 = gimple_call_lhs (new_stmt2);
4675 else
4677 new_tmp1 = gimple_assign_lhs (new_stmt1);
4678 new_tmp2 = gimple_assign_lhs (new_stmt2);
4681 /* Store the results for the next step. */
4682 vec_tmp.quick_push (new_tmp1);
4683 vec_tmp.quick_push (new_tmp2);
4686 vec_oprnds0->release ();
4687 *vec_oprnds0 = vec_tmp;
4690 /* Create vectorized promotion stmts for widening stmts using only half the
4691 potential vector size for input. */
4692 static void
4693 vect_create_half_widening_stmts (vec_info *vinfo,
4694 vec<tree> *vec_oprnds0,
4695 vec<tree> *vec_oprnds1,
4696 stmt_vec_info stmt_info, tree vec_dest,
4697 gimple_stmt_iterator *gsi,
4698 enum tree_code code1,
4699 int op_type)
4701 int i;
4702 tree vop0, vop1;
4703 gimple *new_stmt1;
4704 gimple *new_stmt2;
4705 gimple *new_stmt3;
4706 vec<tree> vec_tmp = vNULL;
4708 vec_tmp.create (vec_oprnds0->length ());
4709 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4711 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4713 gcc_assert (op_type == binary_op);
4714 vop1 = (*vec_oprnds1)[i];
4716 /* Widen the first vector input. */
4717 out_type = TREE_TYPE (vec_dest);
4718 new_tmp1 = make_ssa_name (out_type);
4719 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4720 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4721 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4723 /* Widen the second vector input. */
4724 new_tmp2 = make_ssa_name (out_type);
4725 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4726 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4727 /* Perform the operation. With both vector inputs widened. */
4728 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4730 else
4732 /* Perform the operation. With the single vector input widened. */
4733 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4736 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4737 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4738 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4740 /* Store the results for the next step. */
4741 vec_tmp.quick_push (new_tmp3);
4744 vec_oprnds0->release ();
4745 *vec_oprnds0 = vec_tmp;
4749 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4750 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4751 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4752 Return true if STMT_INFO is vectorizable in this way. */
4754 static bool
4755 vectorizable_conversion (vec_info *vinfo,
4756 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4757 gimple **vec_stmt, slp_tree slp_node,
4758 stmt_vector_for_cost *cost_vec)
4760 tree vec_dest;
4761 tree scalar_dest;
4762 tree op0, op1 = NULL_TREE;
4763 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4764 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4765 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4766 tree new_temp;
4767 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4768 int ndts = 2;
4769 poly_uint64 nunits_in;
4770 poly_uint64 nunits_out;
4771 tree vectype_out, vectype_in;
4772 int ncopies, i;
4773 tree lhs_type, rhs_type;
4774 enum { NARROW, NONE, WIDEN } modifier;
4775 vec<tree> vec_oprnds0 = vNULL;
4776 vec<tree> vec_oprnds1 = vNULL;
4777 tree vop0;
4778 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4779 int multi_step_cvt = 0;
4780 vec<tree> interm_types = vNULL;
4781 tree intermediate_type, cvt_type = NULL_TREE;
4782 int op_type;
4783 unsigned short fltsz;
4785 /* Is STMT a vectorizable conversion? */
4787 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4788 return false;
4790 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4791 && ! vec_stmt)
4792 return false;
4794 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4795 if (!stmt)
4796 return false;
4798 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4799 return false;
4801 code = gimple_assign_rhs_code (stmt);
4802 if (!CONVERT_EXPR_CODE_P (code)
4803 && code != FIX_TRUNC_EXPR
4804 && code != FLOAT_EXPR
4805 && code != WIDEN_PLUS_EXPR
4806 && code != WIDEN_MINUS_EXPR
4807 && code != WIDEN_MULT_EXPR
4808 && code != WIDEN_LSHIFT_EXPR)
4809 return false;
4811 bool widen_arith = (code == WIDEN_PLUS_EXPR
4812 || code == WIDEN_MINUS_EXPR
4813 || code == WIDEN_MULT_EXPR
4814 || code == WIDEN_LSHIFT_EXPR);
4815 op_type = TREE_CODE_LENGTH (code);
4817 /* Check types of lhs and rhs. */
4818 scalar_dest = gimple_assign_lhs (stmt);
4819 lhs_type = TREE_TYPE (scalar_dest);
4820 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4822 /* Check the operands of the operation. */
4823 slp_tree slp_op0, slp_op1 = NULL;
4824 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4825 0, &op0, &slp_op0, &dt[0], &vectype_in))
4827 if (dump_enabled_p ())
4828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4829 "use not simple.\n");
4830 return false;
4833 rhs_type = TREE_TYPE (op0);
4834 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4835 && !((INTEGRAL_TYPE_P (lhs_type)
4836 && INTEGRAL_TYPE_P (rhs_type))
4837 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4838 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4839 return false;
4841 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4842 && ((INTEGRAL_TYPE_P (lhs_type)
4843 && !type_has_mode_precision_p (lhs_type))
4844 || (INTEGRAL_TYPE_P (rhs_type)
4845 && !type_has_mode_precision_p (rhs_type))))
4847 if (dump_enabled_p ())
4848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4849 "type conversion to/from bit-precision unsupported."
4850 "\n");
4851 return false;
4854 if (op_type == binary_op)
4856 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4857 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4859 op1 = gimple_assign_rhs2 (stmt);
4860 tree vectype1_in;
4861 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4862 &op1, &slp_op1, &dt[1], &vectype1_in))
4864 if (dump_enabled_p ())
4865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4866 "use not simple.\n");
4867 return false;
4869 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4870 OP1. */
4871 if (!vectype_in)
4872 vectype_in = vectype1_in;
4875 /* If op0 is an external or constant def, infer the vector type
4876 from the scalar type. */
4877 if (!vectype_in)
4878 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4879 if (vec_stmt)
4880 gcc_assert (vectype_in);
4881 if (!vectype_in)
4883 if (dump_enabled_p ())
4884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4885 "no vectype for scalar type %T\n", rhs_type);
4887 return false;
4890 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4891 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4893 if (dump_enabled_p ())
4894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4895 "can't convert between boolean and non "
4896 "boolean vectors %T\n", rhs_type);
4898 return false;
4901 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4902 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4903 if (known_eq (nunits_out, nunits_in))
4904 if (widen_arith)
4905 modifier = WIDEN;
4906 else
4907 modifier = NONE;
4908 else if (multiple_p (nunits_out, nunits_in))
4909 modifier = NARROW;
4910 else
4912 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4913 modifier = WIDEN;
4916 /* Multiple types in SLP are handled by creating the appropriate number of
4917 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4918 case of SLP. */
4919 if (slp_node)
4920 ncopies = 1;
4921 else if (modifier == NARROW)
4922 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4923 else
4924 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4926 /* Sanity check: make sure that at least one copy of the vectorized stmt
4927 needs to be generated. */
4928 gcc_assert (ncopies >= 1);
4930 bool found_mode = false;
4931 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4932 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4933 opt_scalar_mode rhs_mode_iter;
4935 /* Supportable by target? */
4936 switch (modifier)
4938 case NONE:
4939 if (code != FIX_TRUNC_EXPR
4940 && code != FLOAT_EXPR
4941 && !CONVERT_EXPR_CODE_P (code))
4942 return false;
4943 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4944 break;
4945 /* FALLTHRU */
4946 unsupported:
4947 if (dump_enabled_p ())
4948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4949 "conversion not supported by target.\n");
4950 return false;
4952 case WIDEN:
4953 if (known_eq (nunits_in, nunits_out))
4955 if (!supportable_half_widening_operation (code, vectype_out,
4956 vectype_in, &code1))
4957 goto unsupported;
4958 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4959 break;
4961 if (supportable_widening_operation (vinfo, code, stmt_info,
4962 vectype_out, vectype_in, &code1,
4963 &code2, &multi_step_cvt,
4964 &interm_types))
4966 /* Binary widening operation can only be supported directly by the
4967 architecture. */
4968 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4969 break;
4972 if (code != FLOAT_EXPR
4973 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4974 goto unsupported;
4976 fltsz = GET_MODE_SIZE (lhs_mode);
4977 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4979 rhs_mode = rhs_mode_iter.require ();
4980 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4981 break;
4983 cvt_type
4984 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4985 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4986 if (cvt_type == NULL_TREE)
4987 goto unsupported;
4989 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4991 if (!supportable_convert_operation (code, vectype_out,
4992 cvt_type, &codecvt1))
4993 goto unsupported;
4995 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4996 vectype_out, cvt_type,
4997 &codecvt1, &codecvt2,
4998 &multi_step_cvt,
4999 &interm_types))
5000 continue;
5001 else
5002 gcc_assert (multi_step_cvt == 0);
5004 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5005 cvt_type,
5006 vectype_in, &code1, &code2,
5007 &multi_step_cvt, &interm_types))
5009 found_mode = true;
5010 break;
5014 if (!found_mode)
5015 goto unsupported;
5017 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5018 codecvt2 = ERROR_MARK;
5019 else
5021 multi_step_cvt++;
5022 interm_types.safe_push (cvt_type);
5023 cvt_type = NULL_TREE;
5025 break;
5027 case NARROW:
5028 gcc_assert (op_type == unary_op);
5029 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5030 &code1, &multi_step_cvt,
5031 &interm_types))
5032 break;
5034 if (code != FIX_TRUNC_EXPR
5035 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5036 goto unsupported;
5038 cvt_type
5039 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5040 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5041 if (cvt_type == NULL_TREE)
5042 goto unsupported;
5043 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5044 &codecvt1))
5045 goto unsupported;
5046 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5047 &code1, &multi_step_cvt,
5048 &interm_types))
5049 break;
5050 goto unsupported;
5052 default:
5053 gcc_unreachable ();
5056 if (!vec_stmt) /* transformation not required. */
5058 if (slp_node
5059 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5060 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5062 if (dump_enabled_p ())
5063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5064 "incompatible vector types for invariants\n");
5065 return false;
5067 DUMP_VECT_SCOPE ("vectorizable_conversion");
5068 if (modifier == NONE)
5070 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5071 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5072 cost_vec);
5074 else if (modifier == NARROW)
5076 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5077 /* The final packing step produces one vector result per copy. */
5078 unsigned int nvectors
5079 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5080 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5081 multi_step_cvt, cost_vec,
5082 widen_arith);
5084 else
5086 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5087 /* The initial unpacking step produces two vector results
5088 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5089 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5090 unsigned int nvectors
5091 = (slp_node
5092 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5093 : ncopies * 2);
5094 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5095 multi_step_cvt, cost_vec,
5096 widen_arith);
5098 interm_types.release ();
5099 return true;
5102 /* Transform. */
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_NOTE, vect_location,
5105 "transform conversion. ncopies = %d.\n", ncopies);
5107 if (op_type == binary_op)
5109 if (CONSTANT_CLASS_P (op0))
5110 op0 = fold_convert (TREE_TYPE (op1), op0);
5111 else if (CONSTANT_CLASS_P (op1))
5112 op1 = fold_convert (TREE_TYPE (op0), op1);
5115 /* In case of multi-step conversion, we first generate conversion operations
5116 to the intermediate types, and then from that types to the final one.
5117 We create vector destinations for the intermediate type (TYPES) received
5118 from supportable_*_operation, and store them in the correct order
5119 for future use in vect_create_vectorized_*_stmts (). */
5120 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5121 vec_dest = vect_create_destination_var (scalar_dest,
5122 (cvt_type && modifier == WIDEN)
5123 ? cvt_type : vectype_out);
5124 vec_dsts.quick_push (vec_dest);
5126 if (multi_step_cvt)
5128 for (i = interm_types.length () - 1;
5129 interm_types.iterate (i, &intermediate_type); i--)
5131 vec_dest = vect_create_destination_var (scalar_dest,
5132 intermediate_type);
5133 vec_dsts.quick_push (vec_dest);
5137 if (cvt_type)
5138 vec_dest = vect_create_destination_var (scalar_dest,
5139 modifier == WIDEN
5140 ? vectype_out : cvt_type);
5142 int ninputs = 1;
5143 if (!slp_node)
5145 if (modifier == WIDEN)
5147 else if (modifier == NARROW)
5149 if (multi_step_cvt)
5150 ninputs = vect_pow2 (multi_step_cvt);
5151 ninputs *= 2;
5155 switch (modifier)
5157 case NONE:
5158 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5159 op0, &vec_oprnds0);
5160 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5162 /* Arguments are ready, create the new vector stmt. */
5163 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5164 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5165 new_temp = make_ssa_name (vec_dest, new_stmt);
5166 gimple_assign_set_lhs (new_stmt, new_temp);
5167 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5169 if (slp_node)
5170 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5171 else
5172 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5174 break;
5176 case WIDEN:
5177 /* In case the vectorization factor (VF) is bigger than the number
5178 of elements that we can fit in a vectype (nunits), we have to
5179 generate more than one vector stmt - i.e - we need to "unroll"
5180 the vector stmt by a factor VF/nunits. */
5181 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5182 op0, &vec_oprnds0,
5183 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5184 &vec_oprnds1);
5185 if (code == WIDEN_LSHIFT_EXPR)
5187 int oprnds_size = vec_oprnds0.length ();
5188 vec_oprnds1.create (oprnds_size);
5189 for (i = 0; i < oprnds_size; ++i)
5190 vec_oprnds1.quick_push (op1);
5192 /* Arguments are ready. Create the new vector stmts. */
5193 for (i = multi_step_cvt; i >= 0; i--)
5195 tree this_dest = vec_dsts[i];
5196 enum tree_code c1 = code1, c2 = code2;
5197 if (i == 0 && codecvt2 != ERROR_MARK)
5199 c1 = codecvt1;
5200 c2 = codecvt2;
5202 if (known_eq (nunits_out, nunits_in))
5203 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5204 &vec_oprnds1, stmt_info,
5205 this_dest, gsi,
5206 c1, op_type);
5207 else
5208 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5209 &vec_oprnds1, stmt_info,
5210 this_dest, gsi,
5211 c1, c2, op_type);
5214 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5216 gimple *new_stmt;
5217 if (cvt_type)
5219 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5220 new_temp = make_ssa_name (vec_dest);
5221 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5222 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5224 else
5225 new_stmt = SSA_NAME_DEF_STMT (vop0);
5227 if (slp_node)
5228 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5229 else
5230 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5232 break;
5234 case NARROW:
5235 /* In case the vectorization factor (VF) is bigger than the number
5236 of elements that we can fit in a vectype (nunits), we have to
5237 generate more than one vector stmt - i.e - we need to "unroll"
5238 the vector stmt by a factor VF/nunits. */
5239 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5240 op0, &vec_oprnds0);
5241 /* Arguments are ready. Create the new vector stmts. */
5242 if (cvt_type)
5243 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5245 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5246 new_temp = make_ssa_name (vec_dest);
5247 gassign *new_stmt
5248 = gimple_build_assign (new_temp, codecvt1, vop0);
5249 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5250 vec_oprnds0[i] = new_temp;
5253 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5254 multi_step_cvt,
5255 stmt_info, vec_dsts, gsi,
5256 slp_node, code1);
5257 break;
5259 if (!slp_node)
5260 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5262 vec_oprnds0.release ();
5263 vec_oprnds1.release ();
5264 interm_types.release ();
5266 return true;
5269 /* Return true if we can assume from the scalar form of STMT_INFO that
5270 neither the scalar nor the vector forms will generate code. STMT_INFO
5271 is known not to involve a data reference. */
5273 bool
5274 vect_nop_conversion_p (stmt_vec_info stmt_info)
5276 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5277 if (!stmt)
5278 return false;
5280 tree lhs = gimple_assign_lhs (stmt);
5281 tree_code code = gimple_assign_rhs_code (stmt);
5282 tree rhs = gimple_assign_rhs1 (stmt);
5284 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5285 return true;
5287 if (CONVERT_EXPR_CODE_P (code))
5288 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5290 return false;
5293 /* Function vectorizable_assignment.
5295 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5296 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5297 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5298 Return true if STMT_INFO is vectorizable in this way. */
5300 static bool
5301 vectorizable_assignment (vec_info *vinfo,
5302 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5303 gimple **vec_stmt, slp_tree slp_node,
5304 stmt_vector_for_cost *cost_vec)
5306 tree vec_dest;
5307 tree scalar_dest;
5308 tree op;
5309 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5310 tree new_temp;
5311 enum vect_def_type dt[1] = {vect_unknown_def_type};
5312 int ndts = 1;
5313 int ncopies;
5314 int i;
5315 vec<tree> vec_oprnds = vNULL;
5316 tree vop;
5317 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5318 enum tree_code code;
5319 tree vectype_in;
5321 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5322 return false;
5324 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5325 && ! vec_stmt)
5326 return false;
5328 /* Is vectorizable assignment? */
5329 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5330 if (!stmt)
5331 return false;
5333 scalar_dest = gimple_assign_lhs (stmt);
5334 if (TREE_CODE (scalar_dest) != SSA_NAME)
5335 return false;
5337 if (STMT_VINFO_DATA_REF (stmt_info))
5338 return false;
5340 code = gimple_assign_rhs_code (stmt);
5341 if (!(gimple_assign_single_p (stmt)
5342 || code == PAREN_EXPR
5343 || CONVERT_EXPR_CODE_P (code)))
5344 return false;
5346 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5347 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5349 /* Multiple types in SLP are handled by creating the appropriate number of
5350 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5351 case of SLP. */
5352 if (slp_node)
5353 ncopies = 1;
5354 else
5355 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5357 gcc_assert (ncopies >= 1);
5359 slp_tree slp_op;
5360 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5361 &dt[0], &vectype_in))
5363 if (dump_enabled_p ())
5364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5365 "use not simple.\n");
5366 return false;
5368 if (!vectype_in)
5369 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5371 /* We can handle NOP_EXPR conversions that do not change the number
5372 of elements or the vector size. */
5373 if ((CONVERT_EXPR_CODE_P (code)
5374 || code == VIEW_CONVERT_EXPR)
5375 && (!vectype_in
5376 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5377 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5378 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5379 return false;
5381 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5382 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5384 if (dump_enabled_p ())
5385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5386 "can't convert between boolean and non "
5387 "boolean vectors %T\n", TREE_TYPE (op));
5389 return false;
5392 /* We do not handle bit-precision changes. */
5393 if ((CONVERT_EXPR_CODE_P (code)
5394 || code == VIEW_CONVERT_EXPR)
5395 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5396 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5397 || !type_has_mode_precision_p (TREE_TYPE (op)))
5398 /* But a conversion that does not change the bit-pattern is ok. */
5399 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5400 > TYPE_PRECISION (TREE_TYPE (op)))
5401 && TYPE_UNSIGNED (TREE_TYPE (op))))
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5405 "type conversion to/from bit-precision "
5406 "unsupported.\n");
5407 return false;
5410 if (!vec_stmt) /* transformation not required. */
5412 if (slp_node
5413 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5417 "incompatible vector types for invariants\n");
5418 return false;
5420 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5421 DUMP_VECT_SCOPE ("vectorizable_assignment");
5422 if (!vect_nop_conversion_p (stmt_info))
5423 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5424 cost_vec);
5425 return true;
5428 /* Transform. */
5429 if (dump_enabled_p ())
5430 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5432 /* Handle def. */
5433 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5435 /* Handle use. */
5436 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5438 /* Arguments are ready. create the new vector stmt. */
5439 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5441 if (CONVERT_EXPR_CODE_P (code)
5442 || code == VIEW_CONVERT_EXPR)
5443 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5444 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5445 new_temp = make_ssa_name (vec_dest, new_stmt);
5446 gimple_assign_set_lhs (new_stmt, new_temp);
5447 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5448 if (slp_node)
5449 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5450 else
5451 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5453 if (!slp_node)
5454 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5456 vec_oprnds.release ();
5457 return true;
5461 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5462 either as shift by a scalar or by a vector. */
5464 bool
5465 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5468 machine_mode vec_mode;
5469 optab optab;
5470 int icode;
5471 tree vectype;
5473 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5474 if (!vectype)
5475 return false;
5477 optab = optab_for_tree_code (code, vectype, optab_scalar);
5478 if (!optab
5479 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5481 optab = optab_for_tree_code (code, vectype, optab_vector);
5482 if (!optab
5483 || (optab_handler (optab, TYPE_MODE (vectype))
5484 == CODE_FOR_nothing))
5485 return false;
5488 vec_mode = TYPE_MODE (vectype);
5489 icode = (int) optab_handler (optab, vec_mode);
5490 if (icode == CODE_FOR_nothing)
5491 return false;
5493 return true;
5497 /* Function vectorizable_shift.
5499 Check if STMT_INFO performs a shift operation that can be vectorized.
5500 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5501 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5502 Return true if STMT_INFO is vectorizable in this way. */
5504 static bool
5505 vectorizable_shift (vec_info *vinfo,
5506 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5507 gimple **vec_stmt, slp_tree slp_node,
5508 stmt_vector_for_cost *cost_vec)
5510 tree vec_dest;
5511 tree scalar_dest;
5512 tree op0, op1 = NULL;
5513 tree vec_oprnd1 = NULL_TREE;
5514 tree vectype;
5515 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5516 enum tree_code code;
5517 machine_mode vec_mode;
5518 tree new_temp;
5519 optab optab;
5520 int icode;
5521 machine_mode optab_op2_mode;
5522 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5523 int ndts = 2;
5524 poly_uint64 nunits_in;
5525 poly_uint64 nunits_out;
5526 tree vectype_out;
5527 tree op1_vectype;
5528 int ncopies;
5529 int i;
5530 vec<tree> vec_oprnds0 = vNULL;
5531 vec<tree> vec_oprnds1 = vNULL;
5532 tree vop0, vop1;
5533 unsigned int k;
5534 bool scalar_shift_arg = true;
5535 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5536 bool incompatible_op1_vectype_p = false;
5538 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5539 return false;
5541 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5542 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5543 && ! vec_stmt)
5544 return false;
5546 /* Is STMT a vectorizable binary/unary operation? */
5547 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5548 if (!stmt)
5549 return false;
5551 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5552 return false;
5554 code = gimple_assign_rhs_code (stmt);
5556 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5557 || code == RROTATE_EXPR))
5558 return false;
5560 scalar_dest = gimple_assign_lhs (stmt);
5561 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5562 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5564 if (dump_enabled_p ())
5565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5566 "bit-precision shifts not supported.\n");
5567 return false;
5570 slp_tree slp_op0;
5571 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5572 0, &op0, &slp_op0, &dt[0], &vectype))
5574 if (dump_enabled_p ())
5575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5576 "use not simple.\n");
5577 return false;
5579 /* If op0 is an external or constant def, infer the vector type
5580 from the scalar type. */
5581 if (!vectype)
5582 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5583 if (vec_stmt)
5584 gcc_assert (vectype);
5585 if (!vectype)
5587 if (dump_enabled_p ())
5588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5589 "no vectype for scalar type\n");
5590 return false;
5593 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5594 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5595 if (maybe_ne (nunits_out, nunits_in))
5596 return false;
5598 stmt_vec_info op1_def_stmt_info;
5599 slp_tree slp_op1;
5600 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5601 &dt[1], &op1_vectype, &op1_def_stmt_info))
5603 if (dump_enabled_p ())
5604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5605 "use not simple.\n");
5606 return false;
5609 /* Multiple types in SLP are handled by creating the appropriate number of
5610 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5611 case of SLP. */
5612 if (slp_node)
5613 ncopies = 1;
5614 else
5615 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5617 gcc_assert (ncopies >= 1);
5619 /* Determine whether the shift amount is a vector, or scalar. If the
5620 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5622 if ((dt[1] == vect_internal_def
5623 || dt[1] == vect_induction_def
5624 || dt[1] == vect_nested_cycle)
5625 && !slp_node)
5626 scalar_shift_arg = false;
5627 else if (dt[1] == vect_constant_def
5628 || dt[1] == vect_external_def
5629 || dt[1] == vect_internal_def)
5631 /* In SLP, need to check whether the shift count is the same,
5632 in loops if it is a constant or invariant, it is always
5633 a scalar shift. */
5634 if (slp_node)
5636 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5637 stmt_vec_info slpstmt_info;
5639 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5641 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5642 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5643 scalar_shift_arg = false;
5646 /* For internal SLP defs we have to make sure we see scalar stmts
5647 for all vector elements.
5648 ??? For different vectors we could resort to a different
5649 scalar shift operand but code-generation below simply always
5650 takes the first. */
5651 if (dt[1] == vect_internal_def
5652 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5653 stmts.length ()))
5654 scalar_shift_arg = false;
5657 /* If the shift amount is computed by a pattern stmt we cannot
5658 use the scalar amount directly thus give up and use a vector
5659 shift. */
5660 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5661 scalar_shift_arg = false;
5663 else
5665 if (dump_enabled_p ())
5666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5667 "operand mode requires invariant argument.\n");
5668 return false;
5671 /* Vector shifted by vector. */
5672 bool was_scalar_shift_arg = scalar_shift_arg;
5673 if (!scalar_shift_arg)
5675 optab = optab_for_tree_code (code, vectype, optab_vector);
5676 if (dump_enabled_p ())
5677 dump_printf_loc (MSG_NOTE, vect_location,
5678 "vector/vector shift/rotate found.\n");
5680 if (!op1_vectype)
5681 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5682 slp_op1);
5683 incompatible_op1_vectype_p
5684 = (op1_vectype == NULL_TREE
5685 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5686 TYPE_VECTOR_SUBPARTS (vectype))
5687 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5688 if (incompatible_op1_vectype_p
5689 && (!slp_node
5690 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5691 || slp_op1->refcnt != 1))
5693 if (dump_enabled_p ())
5694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5695 "unusable type for last operand in"
5696 " vector/vector shift/rotate.\n");
5697 return false;
5700 /* See if the machine has a vector shifted by scalar insn and if not
5701 then see if it has a vector shifted by vector insn. */
5702 else
5704 optab = optab_for_tree_code (code, vectype, optab_scalar);
5705 if (optab
5706 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5708 if (dump_enabled_p ())
5709 dump_printf_loc (MSG_NOTE, vect_location,
5710 "vector/scalar shift/rotate found.\n");
5712 else
5714 optab = optab_for_tree_code (code, vectype, optab_vector);
5715 if (optab
5716 && (optab_handler (optab, TYPE_MODE (vectype))
5717 != CODE_FOR_nothing))
5719 scalar_shift_arg = false;
5721 if (dump_enabled_p ())
5722 dump_printf_loc (MSG_NOTE, vect_location,
5723 "vector/vector shift/rotate found.\n");
5725 if (!op1_vectype)
5726 op1_vectype = get_vectype_for_scalar_type (vinfo,
5727 TREE_TYPE (op1),
5728 slp_op1);
5730 /* Unlike the other binary operators, shifts/rotates have
5731 the rhs being int, instead of the same type as the lhs,
5732 so make sure the scalar is the right type if we are
5733 dealing with vectors of long long/long/short/char. */
5734 incompatible_op1_vectype_p
5735 = (!op1_vectype
5736 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5737 TREE_TYPE (op1)));
5738 if (incompatible_op1_vectype_p
5739 && dt[1] == vect_internal_def)
5741 if (dump_enabled_p ())
5742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5743 "unusable type for last operand in"
5744 " vector/vector shift/rotate.\n");
5745 return false;
5751 /* Supportable by target? */
5752 if (!optab)
5754 if (dump_enabled_p ())
5755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5756 "no optab.\n");
5757 return false;
5759 vec_mode = TYPE_MODE (vectype);
5760 icode = (int) optab_handler (optab, vec_mode);
5761 if (icode == CODE_FOR_nothing)
5763 if (dump_enabled_p ())
5764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5765 "op not supported by target.\n");
5766 return false;
5768 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5769 if (vect_emulated_vector_p (vectype))
5770 return false;
5772 if (!vec_stmt) /* transformation not required. */
5774 if (slp_node
5775 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5776 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5777 && (!incompatible_op1_vectype_p
5778 || dt[1] == vect_constant_def)
5779 && !vect_maybe_update_slp_op_vectype
5780 (slp_op1,
5781 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5783 if (dump_enabled_p ())
5784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5785 "incompatible vector types for invariants\n");
5786 return false;
5788 /* Now adjust the constant shift amount in place. */
5789 if (slp_node
5790 && incompatible_op1_vectype_p
5791 && dt[1] == vect_constant_def)
5793 for (unsigned i = 0;
5794 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5796 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5797 = fold_convert (TREE_TYPE (vectype),
5798 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5799 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5800 == INTEGER_CST));
5803 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5804 DUMP_VECT_SCOPE ("vectorizable_shift");
5805 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5806 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5807 return true;
5810 /* Transform. */
5812 if (dump_enabled_p ())
5813 dump_printf_loc (MSG_NOTE, vect_location,
5814 "transform binary/unary operation.\n");
5816 if (incompatible_op1_vectype_p && !slp_node)
5818 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5819 op1 = fold_convert (TREE_TYPE (vectype), op1);
5820 if (dt[1] != vect_constant_def)
5821 op1 = vect_init_vector (vinfo, stmt_info, op1,
5822 TREE_TYPE (vectype), NULL);
5825 /* Handle def. */
5826 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5828 if (scalar_shift_arg && dt[1] != vect_internal_def)
5830 /* Vector shl and shr insn patterns can be defined with scalar
5831 operand 2 (shift operand). In this case, use constant or loop
5832 invariant op1 directly, without extending it to vector mode
5833 first. */
5834 optab_op2_mode = insn_data[icode].operand[2].mode;
5835 if (!VECTOR_MODE_P (optab_op2_mode))
5837 if (dump_enabled_p ())
5838 dump_printf_loc (MSG_NOTE, vect_location,
5839 "operand 1 using scalar mode.\n");
5840 vec_oprnd1 = op1;
5841 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5842 vec_oprnds1.quick_push (vec_oprnd1);
5843 /* Store vec_oprnd1 for every vector stmt to be created.
5844 We check during the analysis that all the shift arguments
5845 are the same.
5846 TODO: Allow different constants for different vector
5847 stmts generated for an SLP instance. */
5848 for (k = 0;
5849 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5850 vec_oprnds1.quick_push (vec_oprnd1);
5853 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5855 if (was_scalar_shift_arg)
5857 /* If the argument was the same in all lanes create
5858 the correctly typed vector shift amount directly. */
5859 op1 = fold_convert (TREE_TYPE (vectype), op1);
5860 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5861 !loop_vinfo ? gsi : NULL);
5862 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5863 !loop_vinfo ? gsi : NULL);
5864 vec_oprnds1.create (slp_node->vec_stmts_size);
5865 for (k = 0; k < slp_node->vec_stmts_size; k++)
5866 vec_oprnds1.quick_push (vec_oprnd1);
5868 else if (dt[1] == vect_constant_def)
5869 /* The constant shift amount has been adjusted in place. */
5871 else
5872 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5875 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5876 (a special case for certain kind of vector shifts); otherwise,
5877 operand 1 should be of a vector type (the usual case). */
5878 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5879 op0, &vec_oprnds0,
5880 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5882 /* Arguments are ready. Create the new vector stmt. */
5883 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5885 /* For internal defs where we need to use a scalar shift arg
5886 extract the first lane. */
5887 if (scalar_shift_arg && dt[1] == vect_internal_def)
5889 vop1 = vec_oprnds1[0];
5890 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5891 gassign *new_stmt
5892 = gimple_build_assign (new_temp,
5893 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5894 vop1,
5895 TYPE_SIZE (TREE_TYPE (new_temp)),
5896 bitsize_zero_node));
5897 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5898 vop1 = new_temp;
5900 else
5901 vop1 = vec_oprnds1[i];
5902 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5903 new_temp = make_ssa_name (vec_dest, new_stmt);
5904 gimple_assign_set_lhs (new_stmt, new_temp);
5905 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5906 if (slp_node)
5907 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5908 else
5909 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5912 if (!slp_node)
5913 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5915 vec_oprnds0.release ();
5916 vec_oprnds1.release ();
5918 return true;
5922 /* Function vectorizable_operation.
5924 Check if STMT_INFO performs a binary, unary or ternary operation that can
5925 be vectorized.
5926 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5927 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5928 Return true if STMT_INFO is vectorizable in this way. */
5930 static bool
5931 vectorizable_operation (vec_info *vinfo,
5932 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5933 gimple **vec_stmt, slp_tree slp_node,
5934 stmt_vector_for_cost *cost_vec)
5936 tree vec_dest;
5937 tree scalar_dest;
5938 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5939 tree vectype;
5940 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5941 enum tree_code code, orig_code;
5942 machine_mode vec_mode;
5943 tree new_temp;
5944 int op_type;
5945 optab optab;
5946 bool target_support_p;
5947 enum vect_def_type dt[3]
5948 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5949 int ndts = 3;
5950 poly_uint64 nunits_in;
5951 poly_uint64 nunits_out;
5952 tree vectype_out;
5953 int ncopies, vec_num;
5954 int i;
5955 vec<tree> vec_oprnds0 = vNULL;
5956 vec<tree> vec_oprnds1 = vNULL;
5957 vec<tree> vec_oprnds2 = vNULL;
5958 tree vop0, vop1, vop2;
5959 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5961 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5962 return false;
5964 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5965 && ! vec_stmt)
5966 return false;
5968 /* Is STMT a vectorizable binary/unary operation? */
5969 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5970 if (!stmt)
5971 return false;
5973 /* Loads and stores are handled in vectorizable_{load,store}. */
5974 if (STMT_VINFO_DATA_REF (stmt_info))
5975 return false;
5977 orig_code = code = gimple_assign_rhs_code (stmt);
5979 /* Shifts are handled in vectorizable_shift. */
5980 if (code == LSHIFT_EXPR
5981 || code == RSHIFT_EXPR
5982 || code == LROTATE_EXPR
5983 || code == RROTATE_EXPR)
5984 return false;
5986 /* Comparisons are handled in vectorizable_comparison. */
5987 if (TREE_CODE_CLASS (code) == tcc_comparison)
5988 return false;
5990 /* Conditions are handled in vectorizable_condition. */
5991 if (code == COND_EXPR)
5992 return false;
5994 /* For pointer addition and subtraction, we should use the normal
5995 plus and minus for the vector operation. */
5996 if (code == POINTER_PLUS_EXPR)
5997 code = PLUS_EXPR;
5998 if (code == POINTER_DIFF_EXPR)
5999 code = MINUS_EXPR;
6001 /* Support only unary or binary operations. */
6002 op_type = TREE_CODE_LENGTH (code);
6003 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6005 if (dump_enabled_p ())
6006 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6007 "num. args = %d (not unary/binary/ternary op).\n",
6008 op_type);
6009 return false;
6012 scalar_dest = gimple_assign_lhs (stmt);
6013 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6015 /* Most operations cannot handle bit-precision types without extra
6016 truncations. */
6017 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6018 if (!mask_op_p
6019 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6020 /* Exception are bitwise binary operations. */
6021 && code != BIT_IOR_EXPR
6022 && code != BIT_XOR_EXPR
6023 && code != BIT_AND_EXPR)
6025 if (dump_enabled_p ())
6026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6027 "bit-precision arithmetic not supported.\n");
6028 return false;
6031 slp_tree slp_op0;
6032 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6033 0, &op0, &slp_op0, &dt[0], &vectype))
6035 if (dump_enabled_p ())
6036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6037 "use not simple.\n");
6038 return false;
6040 /* If op0 is an external or constant def, infer the vector type
6041 from the scalar type. */
6042 if (!vectype)
6044 /* For boolean type we cannot determine vectype by
6045 invariant value (don't know whether it is a vector
6046 of booleans or vector of integers). We use output
6047 vectype because operations on boolean don't change
6048 type. */
6049 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6051 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6053 if (dump_enabled_p ())
6054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6055 "not supported operation on bool value.\n");
6056 return false;
6058 vectype = vectype_out;
6060 else
6061 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6062 slp_node);
6064 if (vec_stmt)
6065 gcc_assert (vectype);
6066 if (!vectype)
6068 if (dump_enabled_p ())
6069 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6070 "no vectype for scalar type %T\n",
6071 TREE_TYPE (op0));
6073 return false;
6076 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6077 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6078 if (maybe_ne (nunits_out, nunits_in))
6079 return false;
6081 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6082 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6083 if (op_type == binary_op || op_type == ternary_op)
6085 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6086 1, &op1, &slp_op1, &dt[1], &vectype2))
6088 if (dump_enabled_p ())
6089 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6090 "use not simple.\n");
6091 return false;
6094 if (op_type == ternary_op)
6096 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6097 2, &op2, &slp_op2, &dt[2], &vectype3))
6099 if (dump_enabled_p ())
6100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6101 "use not simple.\n");
6102 return false;
6106 /* Multiple types in SLP are handled by creating the appropriate number of
6107 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6108 case of SLP. */
6109 if (slp_node)
6111 ncopies = 1;
6112 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6114 else
6116 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6117 vec_num = 1;
6120 gcc_assert (ncopies >= 1);
6122 /* Reject attempts to combine mask types with nonmask types, e.g. if
6123 we have an AND between a (nonmask) boolean loaded from memory and
6124 a (mask) boolean result of a comparison.
6126 TODO: We could easily fix these cases up using pattern statements. */
6127 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6128 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6129 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6131 if (dump_enabled_p ())
6132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6133 "mixed mask and nonmask vector types\n");
6134 return false;
6137 /* Supportable by target? */
6139 vec_mode = TYPE_MODE (vectype);
6140 if (code == MULT_HIGHPART_EXPR)
6141 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6142 else
6144 optab = optab_for_tree_code (code, vectype, optab_default);
6145 if (!optab)
6147 if (dump_enabled_p ())
6148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6149 "no optab.\n");
6150 return false;
6152 target_support_p = (optab_handler (optab, vec_mode)
6153 != CODE_FOR_nothing);
6156 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6157 if (!target_support_p)
6159 if (dump_enabled_p ())
6160 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6161 "op not supported by target.\n");
6162 /* Check only during analysis. */
6163 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6164 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6165 return false;
6166 if (dump_enabled_p ())
6167 dump_printf_loc (MSG_NOTE, vect_location,
6168 "proceeding using word mode.\n");
6169 using_emulated_vectors_p = true;
6172 if (using_emulated_vectors_p
6173 && !vect_can_vectorize_without_simd_p (code))
6175 if (dump_enabled_p ())
6176 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6177 return false;
6180 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6181 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6182 internal_fn cond_fn = get_conditional_internal_fn (code);
6184 if (!vec_stmt) /* transformation not required. */
6186 /* If this operation is part of a reduction, a fully-masked loop
6187 should only change the active lanes of the reduction chain,
6188 keeping the inactive lanes as-is. */
6189 if (loop_vinfo
6190 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6191 && reduc_idx >= 0)
6193 if (cond_fn == IFN_LAST
6194 || !direct_internal_fn_supported_p (cond_fn, vectype,
6195 OPTIMIZE_FOR_SPEED))
6197 if (dump_enabled_p ())
6198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6199 "can't use a fully-masked loop because no"
6200 " conditional operation is available.\n");
6201 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6203 else
6204 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6205 vectype, NULL);
6208 /* Put types on constant and invariant SLP children. */
6209 if (slp_node
6210 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6211 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6212 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6214 if (dump_enabled_p ())
6215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6216 "incompatible vector types for invariants\n");
6217 return false;
6220 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6221 DUMP_VECT_SCOPE ("vectorizable_operation");
6222 vect_model_simple_cost (vinfo, stmt_info,
6223 ncopies, dt, ndts, slp_node, cost_vec);
6224 if (using_emulated_vectors_p)
6226 /* The above vect_model_simple_cost call handles constants
6227 in the prologue and (mis-)costs one of the stmts as
6228 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6229 for the actual lowering that will be applied. */
6230 unsigned n
6231 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6232 switch (code)
6234 case PLUS_EXPR:
6235 n *= 5;
6236 break;
6237 case MINUS_EXPR:
6238 n *= 6;
6239 break;
6240 case NEGATE_EXPR:
6241 n *= 4;
6242 break;
6243 default:;
6245 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6247 return true;
6250 /* Transform. */
6252 if (dump_enabled_p ())
6253 dump_printf_loc (MSG_NOTE, vect_location,
6254 "transform binary/unary operation.\n");
6256 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6258 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6259 vectors with unsigned elements, but the result is signed. So, we
6260 need to compute the MINUS_EXPR into vectype temporary and
6261 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6262 tree vec_cvt_dest = NULL_TREE;
6263 if (orig_code == POINTER_DIFF_EXPR)
6265 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6266 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6268 /* Handle def. */
6269 else
6270 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6272 /* In case the vectorization factor (VF) is bigger than the number
6273 of elements that we can fit in a vectype (nunits), we have to generate
6274 more than one vector stmt - i.e - we need to "unroll" the
6275 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6276 from one copy of the vector stmt to the next, in the field
6277 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6278 stages to find the correct vector defs to be used when vectorizing
6279 stmts that use the defs of the current stmt. The example below
6280 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6281 we need to create 4 vectorized stmts):
6283 before vectorization:
6284 RELATED_STMT VEC_STMT
6285 S1: x = memref - -
6286 S2: z = x + 1 - -
6288 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6289 there):
6290 RELATED_STMT VEC_STMT
6291 VS1_0: vx0 = memref0 VS1_1 -
6292 VS1_1: vx1 = memref1 VS1_2 -
6293 VS1_2: vx2 = memref2 VS1_3 -
6294 VS1_3: vx3 = memref3 - -
6295 S1: x = load - VS1_0
6296 S2: z = x + 1 - -
6298 step2: vectorize stmt S2 (done here):
6299 To vectorize stmt S2 we first need to find the relevant vector
6300 def for the first operand 'x'. This is, as usual, obtained from
6301 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6302 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6303 relevant vector def 'vx0'. Having found 'vx0' we can generate
6304 the vector stmt VS2_0, and as usual, record it in the
6305 STMT_VINFO_VEC_STMT of stmt S2.
6306 When creating the second copy (VS2_1), we obtain the relevant vector
6307 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6308 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6309 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6310 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6311 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6312 chain of stmts and pointers:
6313 RELATED_STMT VEC_STMT
6314 VS1_0: vx0 = memref0 VS1_1 -
6315 VS1_1: vx1 = memref1 VS1_2 -
6316 VS1_2: vx2 = memref2 VS1_3 -
6317 VS1_3: vx3 = memref3 - -
6318 S1: x = load - VS1_0
6319 VS2_0: vz0 = vx0 + v1 VS2_1 -
6320 VS2_1: vz1 = vx1 + v1 VS2_2 -
6321 VS2_2: vz2 = vx2 + v1 VS2_3 -
6322 VS2_3: vz3 = vx3 + v1 - -
6323 S2: z = x + 1 - VS2_0 */
6325 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6326 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6327 /* Arguments are ready. Create the new vector stmt. */
6328 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6330 gimple *new_stmt = NULL;
6331 vop1 = ((op_type == binary_op || op_type == ternary_op)
6332 ? vec_oprnds1[i] : NULL_TREE);
6333 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6334 if (masked_loop_p && reduc_idx >= 0)
6336 /* Perform the operation on active elements only and take
6337 inactive elements from the reduction chain input. */
6338 gcc_assert (!vop2);
6339 vop2 = reduc_idx == 1 ? vop1 : vop0;
6340 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6341 vectype, i);
6342 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6343 vop0, vop1, vop2);
6344 new_temp = make_ssa_name (vec_dest, call);
6345 gimple_call_set_lhs (call, new_temp);
6346 gimple_call_set_nothrow (call, true);
6347 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6348 new_stmt = call;
6350 else
6352 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6353 new_temp = make_ssa_name (vec_dest, new_stmt);
6354 gimple_assign_set_lhs (new_stmt, new_temp);
6355 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6356 if (vec_cvt_dest)
6358 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6359 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6360 new_temp);
6361 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6362 gimple_assign_set_lhs (new_stmt, new_temp);
6363 vect_finish_stmt_generation (vinfo, stmt_info,
6364 new_stmt, gsi);
6367 if (slp_node)
6368 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6369 else
6370 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6373 if (!slp_node)
6374 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6376 vec_oprnds0.release ();
6377 vec_oprnds1.release ();
6378 vec_oprnds2.release ();
6380 return true;
6383 /* A helper function to ensure data reference DR_INFO's base alignment. */
6385 static void
6386 ensure_base_align (dr_vec_info *dr_info)
6388 /* Alignment is only analyzed for the first element of a DR group,
6389 use that to look at base alignment we need to enforce. */
6390 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6391 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6393 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6395 if (dr_info->base_misaligned)
6397 tree base_decl = dr_info->base_decl;
6399 // We should only be able to increase the alignment of a base object if
6400 // we know what its new alignment should be at compile time.
6401 unsigned HOST_WIDE_INT align_base_to =
6402 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6404 if (decl_in_symtab_p (base_decl))
6405 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6406 else if (DECL_ALIGN (base_decl) < align_base_to)
6408 SET_DECL_ALIGN (base_decl, align_base_to);
6409 DECL_USER_ALIGN (base_decl) = 1;
6411 dr_info->base_misaligned = false;
6416 /* Function get_group_alias_ptr_type.
6418 Return the alias type for the group starting at FIRST_STMT_INFO. */
6420 static tree
6421 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6423 struct data_reference *first_dr, *next_dr;
6425 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6426 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6427 while (next_stmt_info)
6429 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6430 if (get_alias_set (DR_REF (first_dr))
6431 != get_alias_set (DR_REF (next_dr)))
6433 if (dump_enabled_p ())
6434 dump_printf_loc (MSG_NOTE, vect_location,
6435 "conflicting alias set types.\n");
6436 return ptr_type_node;
6438 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6440 return reference_alias_ptr_type (DR_REF (first_dr));
6444 /* Function scan_operand_equal_p.
6446 Helper function for check_scan_store. Compare two references
6447 with .GOMP_SIMD_LANE bases. */
6449 static bool
6450 scan_operand_equal_p (tree ref1, tree ref2)
6452 tree ref[2] = { ref1, ref2 };
6453 poly_int64 bitsize[2], bitpos[2];
6454 tree offset[2], base[2];
6455 for (int i = 0; i < 2; ++i)
6457 machine_mode mode;
6458 int unsignedp, reversep, volatilep = 0;
6459 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6460 &offset[i], &mode, &unsignedp,
6461 &reversep, &volatilep);
6462 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6463 return false;
6464 if (TREE_CODE (base[i]) == MEM_REF
6465 && offset[i] == NULL_TREE
6466 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6468 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6469 if (is_gimple_assign (def_stmt)
6470 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6471 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6472 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6474 if (maybe_ne (mem_ref_offset (base[i]), 0))
6475 return false;
6476 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6477 offset[i] = gimple_assign_rhs2 (def_stmt);
6482 if (!operand_equal_p (base[0], base[1], 0))
6483 return false;
6484 if (maybe_ne (bitsize[0], bitsize[1]))
6485 return false;
6486 if (offset[0] != offset[1])
6488 if (!offset[0] || !offset[1])
6489 return false;
6490 if (!operand_equal_p (offset[0], offset[1], 0))
6492 tree step[2];
6493 for (int i = 0; i < 2; ++i)
6495 step[i] = integer_one_node;
6496 if (TREE_CODE (offset[i]) == SSA_NAME)
6498 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6499 if (is_gimple_assign (def_stmt)
6500 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6501 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6502 == INTEGER_CST))
6504 step[i] = gimple_assign_rhs2 (def_stmt);
6505 offset[i] = gimple_assign_rhs1 (def_stmt);
6508 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6510 step[i] = TREE_OPERAND (offset[i], 1);
6511 offset[i] = TREE_OPERAND (offset[i], 0);
6513 tree rhs1 = NULL_TREE;
6514 if (TREE_CODE (offset[i]) == SSA_NAME)
6516 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6517 if (gimple_assign_cast_p (def_stmt))
6518 rhs1 = gimple_assign_rhs1 (def_stmt);
6520 else if (CONVERT_EXPR_P (offset[i]))
6521 rhs1 = TREE_OPERAND (offset[i], 0);
6522 if (rhs1
6523 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6524 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6525 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6526 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6527 offset[i] = rhs1;
6529 if (!operand_equal_p (offset[0], offset[1], 0)
6530 || !operand_equal_p (step[0], step[1], 0))
6531 return false;
6534 return true;
6538 enum scan_store_kind {
6539 /* Normal permutation. */
6540 scan_store_kind_perm,
6542 /* Whole vector left shift permutation with zero init. */
6543 scan_store_kind_lshift_zero,
6545 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6546 scan_store_kind_lshift_cond
6549 /* Function check_scan_store.
6551 Verify if we can perform the needed permutations or whole vector shifts.
6552 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6553 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6554 to do at each step. */
6556 static int
6557 scan_store_can_perm_p (tree vectype, tree init,
6558 vec<enum scan_store_kind> *use_whole_vector = NULL)
6560 enum machine_mode vec_mode = TYPE_MODE (vectype);
6561 unsigned HOST_WIDE_INT nunits;
6562 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6563 return -1;
6564 int units_log2 = exact_log2 (nunits);
6565 if (units_log2 <= 0)
6566 return -1;
6568 int i;
6569 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6570 for (i = 0; i <= units_log2; ++i)
6572 unsigned HOST_WIDE_INT j, k;
6573 enum scan_store_kind kind = scan_store_kind_perm;
6574 vec_perm_builder sel (nunits, nunits, 1);
6575 sel.quick_grow (nunits);
6576 if (i == units_log2)
6578 for (j = 0; j < nunits; ++j)
6579 sel[j] = nunits - 1;
6581 else
6583 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6584 sel[j] = j;
6585 for (k = 0; j < nunits; ++j, ++k)
6586 sel[j] = nunits + k;
6588 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6589 if (!can_vec_perm_const_p (vec_mode, indices))
6591 if (i == units_log2)
6592 return -1;
6594 if (whole_vector_shift_kind == scan_store_kind_perm)
6596 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6597 return -1;
6598 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6599 /* Whole vector shifts shift in zeros, so if init is all zero
6600 constant, there is no need to do anything further. */
6601 if ((TREE_CODE (init) != INTEGER_CST
6602 && TREE_CODE (init) != REAL_CST)
6603 || !initializer_zerop (init))
6605 tree masktype = truth_type_for (vectype);
6606 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6607 return -1;
6608 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6611 kind = whole_vector_shift_kind;
6613 if (use_whole_vector)
6615 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6616 use_whole_vector->safe_grow_cleared (i, true);
6617 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6618 use_whole_vector->safe_push (kind);
6622 return units_log2;
6626 /* Function check_scan_store.
6628 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6630 static bool
6631 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6632 enum vect_def_type rhs_dt, bool slp, tree mask,
6633 vect_memory_access_type memory_access_type)
6635 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6636 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6637 tree ref_type;
6639 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6640 if (slp
6641 || mask
6642 || memory_access_type != VMAT_CONTIGUOUS
6643 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6644 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6645 || loop_vinfo == NULL
6646 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6647 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6648 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6649 || !integer_zerop (DR_INIT (dr_info->dr))
6650 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6651 || !alias_sets_conflict_p (get_alias_set (vectype),
6652 get_alias_set (TREE_TYPE (ref_type))))
6654 if (dump_enabled_p ())
6655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6656 "unsupported OpenMP scan store.\n");
6657 return false;
6660 /* We need to pattern match code built by OpenMP lowering and simplified
6661 by following optimizations into something we can handle.
6662 #pragma omp simd reduction(inscan,+:r)
6663 for (...)
6665 r += something ();
6666 #pragma omp scan inclusive (r)
6667 use (r);
6669 shall have body with:
6670 // Initialization for input phase, store the reduction initializer:
6671 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6672 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6673 D.2042[_21] = 0;
6674 // Actual input phase:
6676 r.0_5 = D.2042[_20];
6677 _6 = _4 + r.0_5;
6678 D.2042[_20] = _6;
6679 // Initialization for scan phase:
6680 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6681 _26 = D.2043[_25];
6682 _27 = D.2042[_25];
6683 _28 = _26 + _27;
6684 D.2043[_25] = _28;
6685 D.2042[_25] = _28;
6686 // Actual scan phase:
6688 r.1_8 = D.2042[_20];
6690 The "omp simd array" variable D.2042 holds the privatized copy used
6691 inside of the loop and D.2043 is another one that holds copies of
6692 the current original list item. The separate GOMP_SIMD_LANE ifn
6693 kinds are there in order to allow optimizing the initializer store
6694 and combiner sequence, e.g. if it is originally some C++ish user
6695 defined reduction, but allow the vectorizer to pattern recognize it
6696 and turn into the appropriate vectorized scan.
6698 For exclusive scan, this is slightly different:
6699 #pragma omp simd reduction(inscan,+:r)
6700 for (...)
6702 use (r);
6703 #pragma omp scan exclusive (r)
6704 r += something ();
6706 shall have body with:
6707 // Initialization for input phase, store the reduction initializer:
6708 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6709 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6710 D.2042[_21] = 0;
6711 // Actual input phase:
6713 r.0_5 = D.2042[_20];
6714 _6 = _4 + r.0_5;
6715 D.2042[_20] = _6;
6716 // Initialization for scan phase:
6717 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6718 _26 = D.2043[_25];
6719 D.2044[_25] = _26;
6720 _27 = D.2042[_25];
6721 _28 = _26 + _27;
6722 D.2043[_25] = _28;
6723 // Actual scan phase:
6725 r.1_8 = D.2044[_20];
6726 ... */
6728 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6730 /* Match the D.2042[_21] = 0; store above. Just require that
6731 it is a constant or external definition store. */
6732 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6734 fail_init:
6735 if (dump_enabled_p ())
6736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6737 "unsupported OpenMP scan initializer store.\n");
6738 return false;
6741 if (! loop_vinfo->scan_map)
6742 loop_vinfo->scan_map = new hash_map<tree, tree>;
6743 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6744 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6745 if (cached)
6746 goto fail_init;
6747 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6749 /* These stores can be vectorized normally. */
6750 return true;
6753 if (rhs_dt != vect_internal_def)
6755 fail:
6756 if (dump_enabled_p ())
6757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6758 "unsupported OpenMP scan combiner pattern.\n");
6759 return false;
6762 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6763 tree rhs = gimple_assign_rhs1 (stmt);
6764 if (TREE_CODE (rhs) != SSA_NAME)
6765 goto fail;
6767 gimple *other_store_stmt = NULL;
6768 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6769 bool inscan_var_store
6770 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6772 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6774 if (!inscan_var_store)
6776 use_operand_p use_p;
6777 imm_use_iterator iter;
6778 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6780 gimple *use_stmt = USE_STMT (use_p);
6781 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6782 continue;
6783 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6784 || !is_gimple_assign (use_stmt)
6785 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6786 || other_store_stmt
6787 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6788 goto fail;
6789 other_store_stmt = use_stmt;
6791 if (other_store_stmt == NULL)
6792 goto fail;
6793 rhs = gimple_assign_lhs (other_store_stmt);
6794 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6795 goto fail;
6798 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6800 use_operand_p use_p;
6801 imm_use_iterator iter;
6802 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6804 gimple *use_stmt = USE_STMT (use_p);
6805 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6806 continue;
6807 if (other_store_stmt)
6808 goto fail;
6809 other_store_stmt = use_stmt;
6812 else
6813 goto fail;
6815 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6816 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6817 || !is_gimple_assign (def_stmt)
6818 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6819 goto fail;
6821 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6822 /* For pointer addition, we should use the normal plus for the vector
6823 operation. */
6824 switch (code)
6826 case POINTER_PLUS_EXPR:
6827 code = PLUS_EXPR;
6828 break;
6829 case MULT_HIGHPART_EXPR:
6830 goto fail;
6831 default:
6832 break;
6834 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6835 goto fail;
6837 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6838 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6839 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6840 goto fail;
6842 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6843 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6844 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6845 || !gimple_assign_load_p (load1_stmt)
6846 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6847 || !gimple_assign_load_p (load2_stmt))
6848 goto fail;
6850 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6851 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6852 if (load1_stmt_info == NULL
6853 || load2_stmt_info == NULL
6854 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6855 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6856 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6857 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6858 goto fail;
6860 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6862 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6863 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6864 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6865 goto fail;
6866 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6867 tree lrhs;
6868 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6869 lrhs = rhs1;
6870 else
6871 lrhs = rhs2;
6872 use_operand_p use_p;
6873 imm_use_iterator iter;
6874 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6876 gimple *use_stmt = USE_STMT (use_p);
6877 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6878 continue;
6879 if (other_store_stmt)
6880 goto fail;
6881 other_store_stmt = use_stmt;
6885 if (other_store_stmt == NULL)
6886 goto fail;
6887 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6888 || !gimple_store_p (other_store_stmt))
6889 goto fail;
6891 stmt_vec_info other_store_stmt_info
6892 = loop_vinfo->lookup_stmt (other_store_stmt);
6893 if (other_store_stmt_info == NULL
6894 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6895 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6896 goto fail;
6898 gimple *stmt1 = stmt;
6899 gimple *stmt2 = other_store_stmt;
6900 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6901 std::swap (stmt1, stmt2);
6902 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6903 gimple_assign_rhs1 (load2_stmt)))
6905 std::swap (rhs1, rhs2);
6906 std::swap (load1_stmt, load2_stmt);
6907 std::swap (load1_stmt_info, load2_stmt_info);
6909 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6910 gimple_assign_rhs1 (load1_stmt)))
6911 goto fail;
6913 tree var3 = NULL_TREE;
6914 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6915 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6916 gimple_assign_rhs1 (load2_stmt)))
6917 goto fail;
6918 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6920 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6921 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6922 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6923 goto fail;
6924 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6925 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6926 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6927 || lookup_attribute ("omp simd inscan exclusive",
6928 DECL_ATTRIBUTES (var3)))
6929 goto fail;
6932 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6933 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6934 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6935 goto fail;
6937 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6938 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6939 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6940 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6941 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6942 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6943 goto fail;
6945 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6946 std::swap (var1, var2);
6948 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6950 if (!lookup_attribute ("omp simd inscan exclusive",
6951 DECL_ATTRIBUTES (var1)))
6952 goto fail;
6953 var1 = var3;
6956 if (loop_vinfo->scan_map == NULL)
6957 goto fail;
6958 tree *init = loop_vinfo->scan_map->get (var1);
6959 if (init == NULL)
6960 goto fail;
6962 /* The IL is as expected, now check if we can actually vectorize it.
6963 Inclusive scan:
6964 _26 = D.2043[_25];
6965 _27 = D.2042[_25];
6966 _28 = _26 + _27;
6967 D.2043[_25] = _28;
6968 D.2042[_25] = _28;
6969 should be vectorized as (where _40 is the vectorized rhs
6970 from the D.2042[_21] = 0; store):
6971 _30 = MEM <vector(8) int> [(int *)&D.2043];
6972 _31 = MEM <vector(8) int> [(int *)&D.2042];
6973 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6974 _33 = _31 + _32;
6975 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6976 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6977 _35 = _33 + _34;
6978 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6979 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6980 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6981 _37 = _35 + _36;
6982 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6983 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6984 _38 = _30 + _37;
6985 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6986 MEM <vector(8) int> [(int *)&D.2043] = _39;
6987 MEM <vector(8) int> [(int *)&D.2042] = _38;
6988 Exclusive scan:
6989 _26 = D.2043[_25];
6990 D.2044[_25] = _26;
6991 _27 = D.2042[_25];
6992 _28 = _26 + _27;
6993 D.2043[_25] = _28;
6994 should be vectorized as (where _40 is the vectorized rhs
6995 from the D.2042[_21] = 0; store):
6996 _30 = MEM <vector(8) int> [(int *)&D.2043];
6997 _31 = MEM <vector(8) int> [(int *)&D.2042];
6998 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6999 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7000 _34 = _32 + _33;
7001 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7002 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7003 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7004 _36 = _34 + _35;
7005 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7006 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7007 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7008 _38 = _36 + _37;
7009 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7010 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7011 _39 = _30 + _38;
7012 _50 = _31 + _39;
7013 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7014 MEM <vector(8) int> [(int *)&D.2044] = _39;
7015 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7016 enum machine_mode vec_mode = TYPE_MODE (vectype);
7017 optab optab = optab_for_tree_code (code, vectype, optab_default);
7018 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7019 goto fail;
7021 int units_log2 = scan_store_can_perm_p (vectype, *init);
7022 if (units_log2 == -1)
7023 goto fail;
7025 return true;
7029 /* Function vectorizable_scan_store.
7031 Helper of vectorizable_score, arguments like on vectorizable_store.
7032 Handle only the transformation, checking is done in check_scan_store. */
7034 static bool
7035 vectorizable_scan_store (vec_info *vinfo,
7036 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7037 gimple **vec_stmt, int ncopies)
7039 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7040 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7041 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7042 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7044 if (dump_enabled_p ())
7045 dump_printf_loc (MSG_NOTE, vect_location,
7046 "transform scan store. ncopies = %d\n", ncopies);
7048 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7049 tree rhs = gimple_assign_rhs1 (stmt);
7050 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7052 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7053 bool inscan_var_store
7054 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7056 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7058 use_operand_p use_p;
7059 imm_use_iterator iter;
7060 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7062 gimple *use_stmt = USE_STMT (use_p);
7063 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7064 continue;
7065 rhs = gimple_assign_lhs (use_stmt);
7066 break;
7070 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7071 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7072 if (code == POINTER_PLUS_EXPR)
7073 code = PLUS_EXPR;
7074 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7075 && commutative_tree_code (code));
7076 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7077 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7078 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7079 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7080 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7081 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7082 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7083 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7084 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7085 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7086 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7088 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7090 std::swap (rhs1, rhs2);
7091 std::swap (var1, var2);
7092 std::swap (load1_dr_info, load2_dr_info);
7095 tree *init = loop_vinfo->scan_map->get (var1);
7096 gcc_assert (init);
7098 unsigned HOST_WIDE_INT nunits;
7099 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7100 gcc_unreachable ();
7101 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7102 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7103 gcc_assert (units_log2 > 0);
7104 auto_vec<tree, 16> perms;
7105 perms.quick_grow (units_log2 + 1);
7106 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7107 for (int i = 0; i <= units_log2; ++i)
7109 unsigned HOST_WIDE_INT j, k;
7110 vec_perm_builder sel (nunits, nunits, 1);
7111 sel.quick_grow (nunits);
7112 if (i == units_log2)
7113 for (j = 0; j < nunits; ++j)
7114 sel[j] = nunits - 1;
7115 else
7117 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7118 sel[j] = j;
7119 for (k = 0; j < nunits; ++j, ++k)
7120 sel[j] = nunits + k;
7122 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7123 if (!use_whole_vector.is_empty ()
7124 && use_whole_vector[i] != scan_store_kind_perm)
7126 if (zero_vec == NULL_TREE)
7127 zero_vec = build_zero_cst (vectype);
7128 if (masktype == NULL_TREE
7129 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7130 masktype = truth_type_for (vectype);
7131 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7133 else
7134 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7137 tree vec_oprnd1 = NULL_TREE;
7138 tree vec_oprnd2 = NULL_TREE;
7139 tree vec_oprnd3 = NULL_TREE;
7140 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7141 tree dataref_offset = build_int_cst (ref_type, 0);
7142 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7143 vectype, VMAT_CONTIGUOUS);
7144 tree ldataref_ptr = NULL_TREE;
7145 tree orig = NULL_TREE;
7146 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7147 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7148 auto_vec<tree> vec_oprnds1;
7149 auto_vec<tree> vec_oprnds2;
7150 auto_vec<tree> vec_oprnds3;
7151 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7152 *init, &vec_oprnds1,
7153 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7154 rhs2, &vec_oprnds3);
7155 for (int j = 0; j < ncopies; j++)
7157 vec_oprnd1 = vec_oprnds1[j];
7158 if (ldataref_ptr == NULL)
7159 vec_oprnd2 = vec_oprnds2[j];
7160 vec_oprnd3 = vec_oprnds3[j];
7161 if (j == 0)
7162 orig = vec_oprnd3;
7163 else if (!inscan_var_store)
7164 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7166 if (ldataref_ptr)
7168 vec_oprnd2 = make_ssa_name (vectype);
7169 tree data_ref = fold_build2 (MEM_REF, vectype,
7170 unshare_expr (ldataref_ptr),
7171 dataref_offset);
7172 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7173 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7174 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7175 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7176 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7179 tree v = vec_oprnd2;
7180 for (int i = 0; i < units_log2; ++i)
7182 tree new_temp = make_ssa_name (vectype);
7183 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7184 (zero_vec
7185 && (use_whole_vector[i]
7186 != scan_store_kind_perm))
7187 ? zero_vec : vec_oprnd1, v,
7188 perms[i]);
7189 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7190 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7191 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7193 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7195 /* Whole vector shift shifted in zero bits, but if *init
7196 is not initializer_zerop, we need to replace those elements
7197 with elements from vec_oprnd1. */
7198 tree_vector_builder vb (masktype, nunits, 1);
7199 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7200 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7201 ? boolean_false_node : boolean_true_node);
7203 tree new_temp2 = make_ssa_name (vectype);
7204 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7205 new_temp, vec_oprnd1);
7206 vect_finish_stmt_generation (vinfo, stmt_info,
7207 g, gsi);
7208 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7209 new_temp = new_temp2;
7212 /* For exclusive scan, perform the perms[i] permutation once
7213 more. */
7214 if (i == 0
7215 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7216 && v == vec_oprnd2)
7218 v = new_temp;
7219 --i;
7220 continue;
7223 tree new_temp2 = make_ssa_name (vectype);
7224 g = gimple_build_assign (new_temp2, code, v, new_temp);
7225 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7226 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7228 v = new_temp2;
7231 tree new_temp = make_ssa_name (vectype);
7232 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7233 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7234 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7236 tree last_perm_arg = new_temp;
7237 /* For exclusive scan, new_temp computed above is the exclusive scan
7238 prefix sum. Turn it into inclusive prefix sum for the broadcast
7239 of the last element into orig. */
7240 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7242 last_perm_arg = make_ssa_name (vectype);
7243 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7244 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7245 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7248 orig = make_ssa_name (vectype);
7249 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7250 last_perm_arg, perms[units_log2]);
7251 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7252 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7254 if (!inscan_var_store)
7256 tree data_ref = fold_build2 (MEM_REF, vectype,
7257 unshare_expr (dataref_ptr),
7258 dataref_offset);
7259 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7260 g = gimple_build_assign (data_ref, new_temp);
7261 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7262 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7266 if (inscan_var_store)
7267 for (int j = 0; j < ncopies; j++)
7269 if (j != 0)
7270 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7272 tree data_ref = fold_build2 (MEM_REF, vectype,
7273 unshare_expr (dataref_ptr),
7274 dataref_offset);
7275 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7276 gimple *g = gimple_build_assign (data_ref, orig);
7277 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7278 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7280 return true;
7284 /* Function vectorizable_store.
7286 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7287 that can be vectorized.
7288 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7289 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7290 Return true if STMT_INFO is vectorizable in this way. */
7292 static bool
7293 vectorizable_store (vec_info *vinfo,
7294 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7295 gimple **vec_stmt, slp_tree slp_node,
7296 stmt_vector_for_cost *cost_vec)
7298 tree data_ref;
7299 tree op;
7300 tree vec_oprnd = NULL_TREE;
7301 tree elem_type;
7302 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7303 class loop *loop = NULL;
7304 machine_mode vec_mode;
7305 tree dummy;
7306 enum vect_def_type rhs_dt = vect_unknown_def_type;
7307 enum vect_def_type mask_dt = vect_unknown_def_type;
7308 tree dataref_ptr = NULL_TREE;
7309 tree dataref_offset = NULL_TREE;
7310 gimple *ptr_incr = NULL;
7311 int ncopies;
7312 int j;
7313 stmt_vec_info first_stmt_info;
7314 bool grouped_store;
7315 unsigned int group_size, i;
7316 vec<tree> oprnds = vNULL;
7317 vec<tree> result_chain = vNULL;
7318 vec<tree> vec_oprnds = vNULL;
7319 bool slp = (slp_node != NULL);
7320 unsigned int vec_num;
7321 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7322 tree aggr_type;
7323 gather_scatter_info gs_info;
7324 poly_uint64 vf;
7325 vec_load_store_type vls_type;
7326 tree ref_type;
7328 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7329 return false;
7331 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7332 && ! vec_stmt)
7333 return false;
7335 /* Is vectorizable store? */
7337 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7338 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7340 tree scalar_dest = gimple_assign_lhs (assign);
7341 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7342 && is_pattern_stmt_p (stmt_info))
7343 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7344 if (TREE_CODE (scalar_dest) != ARRAY_REF
7345 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7346 && TREE_CODE (scalar_dest) != INDIRECT_REF
7347 && TREE_CODE (scalar_dest) != COMPONENT_REF
7348 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7349 && TREE_CODE (scalar_dest) != REALPART_EXPR
7350 && TREE_CODE (scalar_dest) != MEM_REF)
7351 return false;
7353 else
7355 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7356 if (!call || !gimple_call_internal_p (call))
7357 return false;
7359 internal_fn ifn = gimple_call_internal_fn (call);
7360 if (!internal_store_fn_p (ifn))
7361 return false;
7363 if (slp_node != NULL)
7365 if (dump_enabled_p ())
7366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7367 "SLP of masked stores not supported.\n");
7368 return false;
7371 int mask_index = internal_fn_mask_index (ifn);
7372 if (mask_index >= 0
7373 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7374 &mask, NULL, &mask_dt, &mask_vectype))
7375 return false;
7378 op = vect_get_store_rhs (stmt_info);
7380 /* Cannot have hybrid store SLP -- that would mean storing to the
7381 same location twice. */
7382 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7384 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7385 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7387 if (loop_vinfo)
7389 loop = LOOP_VINFO_LOOP (loop_vinfo);
7390 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7392 else
7393 vf = 1;
7395 /* Multiple types in SLP are handled by creating the appropriate number of
7396 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7397 case of SLP. */
7398 if (slp)
7399 ncopies = 1;
7400 else
7401 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7403 gcc_assert (ncopies >= 1);
7405 /* FORNOW. This restriction should be relaxed. */
7406 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7408 if (dump_enabled_p ())
7409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7410 "multiple types in nested loop.\n");
7411 return false;
7414 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7415 op, &rhs_dt, &rhs_vectype, &vls_type))
7416 return false;
7418 elem_type = TREE_TYPE (vectype);
7419 vec_mode = TYPE_MODE (vectype);
7421 if (!STMT_VINFO_DATA_REF (stmt_info))
7422 return false;
7424 vect_memory_access_type memory_access_type;
7425 enum dr_alignment_support alignment_support_scheme;
7426 int misalignment;
7427 poly_int64 poffset;
7428 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7429 ncopies, &memory_access_type, &poffset,
7430 &alignment_support_scheme, &misalignment, &gs_info))
7431 return false;
7433 if (mask)
7435 if (memory_access_type == VMAT_CONTIGUOUS)
7437 if (!VECTOR_MODE_P (vec_mode)
7438 || !can_vec_mask_load_store_p (vec_mode,
7439 TYPE_MODE (mask_vectype), false))
7440 return false;
7442 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7443 && (memory_access_type != VMAT_GATHER_SCATTER
7444 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7446 if (dump_enabled_p ())
7447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7448 "unsupported access type for masked store.\n");
7449 return false;
7452 else
7454 /* FORNOW. In some cases can vectorize even if data-type not supported
7455 (e.g. - array initialization with 0). */
7456 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7457 return false;
7460 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7461 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7462 && memory_access_type != VMAT_GATHER_SCATTER
7463 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7464 if (grouped_store)
7466 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7467 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7468 group_size = DR_GROUP_SIZE (first_stmt_info);
7470 else
7472 first_stmt_info = stmt_info;
7473 first_dr_info = dr_info;
7474 group_size = vec_num = 1;
7477 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7479 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7480 memory_access_type))
7481 return false;
7484 if (!vec_stmt) /* transformation not required. */
7486 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7488 if (loop_vinfo
7489 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7490 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7491 group_size, memory_access_type,
7492 ncopies, &gs_info, mask);
7494 if (slp_node
7495 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7496 vectype))
7498 if (dump_enabled_p ())
7499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7500 "incompatible vector types for invariants\n");
7501 return false;
7504 if (dump_enabled_p ()
7505 && memory_access_type != VMAT_ELEMENTWISE
7506 && memory_access_type != VMAT_GATHER_SCATTER
7507 && alignment_support_scheme != dr_aligned)
7508 dump_printf_loc (MSG_NOTE, vect_location,
7509 "Vectorizing an unaligned access.\n");
7511 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7512 vect_model_store_cost (vinfo, stmt_info, ncopies,
7513 memory_access_type, alignment_support_scheme,
7514 misalignment, vls_type, slp_node, cost_vec);
7515 return true;
7517 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7519 /* Transform. */
7521 ensure_base_align (dr_info);
7523 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7525 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7526 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7527 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7528 tree ptr, var, scale, vec_mask;
7529 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7530 tree mask_halfvectype = mask_vectype;
7531 edge pe = loop_preheader_edge (loop);
7532 gimple_seq seq;
7533 basic_block new_bb;
7534 enum { NARROW, NONE, WIDEN } modifier;
7535 poly_uint64 scatter_off_nunits
7536 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7538 if (known_eq (nunits, scatter_off_nunits))
7539 modifier = NONE;
7540 else if (known_eq (nunits * 2, scatter_off_nunits))
7542 modifier = WIDEN;
7544 /* Currently gathers and scatters are only supported for
7545 fixed-length vectors. */
7546 unsigned int count = scatter_off_nunits.to_constant ();
7547 vec_perm_builder sel (count, count, 1);
7548 for (i = 0; i < (unsigned int) count; ++i)
7549 sel.quick_push (i | (count / 2));
7551 vec_perm_indices indices (sel, 1, count);
7552 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7553 indices);
7554 gcc_assert (perm_mask != NULL_TREE);
7556 else if (known_eq (nunits, scatter_off_nunits * 2))
7558 modifier = NARROW;
7560 /* Currently gathers and scatters are only supported for
7561 fixed-length vectors. */
7562 unsigned int count = nunits.to_constant ();
7563 vec_perm_builder sel (count, count, 1);
7564 for (i = 0; i < (unsigned int) count; ++i)
7565 sel.quick_push (i | (count / 2));
7567 vec_perm_indices indices (sel, 2, count);
7568 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7569 gcc_assert (perm_mask != NULL_TREE);
7570 ncopies *= 2;
7572 if (mask)
7573 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7575 else
7576 gcc_unreachable ();
7578 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7579 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7580 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7581 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7582 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7583 scaletype = TREE_VALUE (arglist);
7585 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7586 && TREE_CODE (rettype) == VOID_TYPE);
7588 ptr = fold_convert (ptrtype, gs_info.base);
7589 if (!is_gimple_min_invariant (ptr))
7591 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7592 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7593 gcc_assert (!new_bb);
7596 if (mask == NULL_TREE)
7598 mask_arg = build_int_cst (masktype, -1);
7599 mask_arg = vect_init_vector (vinfo, stmt_info,
7600 mask_arg, masktype, NULL);
7603 scale = build_int_cst (scaletype, gs_info.scale);
7605 auto_vec<tree> vec_oprnds0;
7606 auto_vec<tree> vec_oprnds1;
7607 auto_vec<tree> vec_masks;
7608 if (mask)
7610 tree mask_vectype = truth_type_for (vectype);
7611 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7612 modifier == NARROW
7613 ? ncopies / 2 : ncopies,
7614 mask, &vec_masks, mask_vectype);
7616 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7617 modifier == WIDEN
7618 ? ncopies / 2 : ncopies,
7619 gs_info.offset, &vec_oprnds0);
7620 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7621 modifier == NARROW
7622 ? ncopies / 2 : ncopies,
7623 op, &vec_oprnds1);
7624 for (j = 0; j < ncopies; ++j)
7626 if (modifier == WIDEN)
7628 if (j & 1)
7629 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7630 perm_mask, stmt_info, gsi);
7631 else
7632 op = vec_oprnd0 = vec_oprnds0[j / 2];
7633 src = vec_oprnd1 = vec_oprnds1[j];
7634 if (mask)
7635 mask_op = vec_mask = vec_masks[j];
7637 else if (modifier == NARROW)
7639 if (j & 1)
7640 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7641 perm_mask, stmt_info, gsi);
7642 else
7643 src = vec_oprnd1 = vec_oprnds1[j / 2];
7644 op = vec_oprnd0 = vec_oprnds0[j];
7645 if (mask)
7646 mask_op = vec_mask = vec_masks[j / 2];
7648 else
7650 op = vec_oprnd0 = vec_oprnds0[j];
7651 src = vec_oprnd1 = vec_oprnds1[j];
7652 if (mask)
7653 mask_op = vec_mask = vec_masks[j];
7656 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7658 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7659 TYPE_VECTOR_SUBPARTS (srctype)));
7660 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7661 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7662 gassign *new_stmt
7663 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7664 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7665 src = var;
7668 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7670 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7671 TYPE_VECTOR_SUBPARTS (idxtype)));
7672 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7673 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7674 gassign *new_stmt
7675 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7676 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7677 op = var;
7680 if (mask)
7682 tree utype;
7683 mask_arg = mask_op;
7684 if (modifier == NARROW)
7686 var = vect_get_new_ssa_name (mask_halfvectype,
7687 vect_simple_var);
7688 gassign *new_stmt
7689 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7690 : VEC_UNPACK_LO_EXPR,
7691 mask_op);
7692 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7693 mask_arg = var;
7695 tree optype = TREE_TYPE (mask_arg);
7696 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7697 utype = masktype;
7698 else
7699 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7700 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7701 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7702 gassign *new_stmt
7703 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7704 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7705 mask_arg = var;
7706 if (!useless_type_conversion_p (masktype, utype))
7708 gcc_assert (TYPE_PRECISION (utype)
7709 <= TYPE_PRECISION (masktype));
7710 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7711 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7712 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7713 mask_arg = var;
7717 gcall *new_stmt
7718 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7719 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7721 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7723 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7724 return true;
7726 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7727 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7729 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7730 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7732 if (grouped_store)
7734 /* FORNOW */
7735 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7737 /* We vectorize all the stmts of the interleaving group when we
7738 reach the last stmt in the group. */
7739 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7740 < DR_GROUP_SIZE (first_stmt_info)
7741 && !slp)
7743 *vec_stmt = NULL;
7744 return true;
7747 if (slp)
7749 grouped_store = false;
7750 /* VEC_NUM is the number of vect stmts to be created for this
7751 group. */
7752 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7753 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7754 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7755 == first_stmt_info);
7756 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7757 op = vect_get_store_rhs (first_stmt_info);
7759 else
7760 /* VEC_NUM is the number of vect stmts to be created for this
7761 group. */
7762 vec_num = group_size;
7764 ref_type = get_group_alias_ptr_type (first_stmt_info);
7766 else
7767 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7769 if (dump_enabled_p ())
7770 dump_printf_loc (MSG_NOTE, vect_location,
7771 "transform store. ncopies = %d\n", ncopies);
7773 if (memory_access_type == VMAT_ELEMENTWISE
7774 || memory_access_type == VMAT_STRIDED_SLP)
7776 gimple_stmt_iterator incr_gsi;
7777 bool insert_after;
7778 gimple *incr;
7779 tree offvar;
7780 tree ivstep;
7781 tree running_off;
7782 tree stride_base, stride_step, alias_off;
7783 tree vec_oprnd;
7784 tree dr_offset;
7785 unsigned int g;
7786 /* Checked by get_load_store_type. */
7787 unsigned int const_nunits = nunits.to_constant ();
7789 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7790 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7792 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7793 stride_base
7794 = fold_build_pointer_plus
7795 (DR_BASE_ADDRESS (first_dr_info->dr),
7796 size_binop (PLUS_EXPR,
7797 convert_to_ptrofftype (dr_offset),
7798 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7799 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7801 /* For a store with loop-invariant (but other than power-of-2)
7802 stride (i.e. not a grouped access) like so:
7804 for (i = 0; i < n; i += stride)
7805 array[i] = ...;
7807 we generate a new induction variable and new stores from
7808 the components of the (vectorized) rhs:
7810 for (j = 0; ; j += VF*stride)
7811 vectemp = ...;
7812 tmp1 = vectemp[0];
7813 array[j] = tmp1;
7814 tmp2 = vectemp[1];
7815 array[j + stride] = tmp2;
7819 unsigned nstores = const_nunits;
7820 unsigned lnel = 1;
7821 tree ltype = elem_type;
7822 tree lvectype = vectype;
7823 if (slp)
7825 if (group_size < const_nunits
7826 && const_nunits % group_size == 0)
7828 nstores = const_nunits / group_size;
7829 lnel = group_size;
7830 ltype = build_vector_type (elem_type, group_size);
7831 lvectype = vectype;
7833 /* First check if vec_extract optab doesn't support extraction
7834 of vector elts directly. */
7835 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7836 machine_mode vmode;
7837 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7838 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7839 group_size).exists (&vmode)
7840 || (convert_optab_handler (vec_extract_optab,
7841 TYPE_MODE (vectype), vmode)
7842 == CODE_FOR_nothing))
7844 /* Try to avoid emitting an extract of vector elements
7845 by performing the extracts using an integer type of the
7846 same size, extracting from a vector of those and then
7847 re-interpreting it as the original vector type if
7848 supported. */
7849 unsigned lsize
7850 = group_size * GET_MODE_BITSIZE (elmode);
7851 unsigned int lnunits = const_nunits / group_size;
7852 /* If we can't construct such a vector fall back to
7853 element extracts from the original vector type and
7854 element size stores. */
7855 if (int_mode_for_size (lsize, 0).exists (&elmode)
7856 && VECTOR_MODE_P (TYPE_MODE (vectype))
7857 && related_vector_mode (TYPE_MODE (vectype), elmode,
7858 lnunits).exists (&vmode)
7859 && (convert_optab_handler (vec_extract_optab,
7860 vmode, elmode)
7861 != CODE_FOR_nothing))
7863 nstores = lnunits;
7864 lnel = group_size;
7865 ltype = build_nonstandard_integer_type (lsize, 1);
7866 lvectype = build_vector_type (ltype, nstores);
7868 /* Else fall back to vector extraction anyway.
7869 Fewer stores are more important than avoiding spilling
7870 of the vector we extract from. Compared to the
7871 construction case in vectorizable_load no store-forwarding
7872 issue exists here for reasonable archs. */
7875 else if (group_size >= const_nunits
7876 && group_size % const_nunits == 0)
7878 nstores = 1;
7879 lnel = const_nunits;
7880 ltype = vectype;
7881 lvectype = vectype;
7883 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7884 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7887 ivstep = stride_step;
7888 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7889 build_int_cst (TREE_TYPE (ivstep), vf));
7891 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7893 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7894 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7895 create_iv (stride_base, ivstep, NULL,
7896 loop, &incr_gsi, insert_after,
7897 &offvar, NULL);
7898 incr = gsi_stmt (incr_gsi);
7900 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7902 alias_off = build_int_cst (ref_type, 0);
7903 stmt_vec_info next_stmt_info = first_stmt_info;
7904 for (g = 0; g < group_size; g++)
7906 running_off = offvar;
7907 if (g)
7909 tree size = TYPE_SIZE_UNIT (ltype);
7910 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7911 size);
7912 tree newoff = copy_ssa_name (running_off, NULL);
7913 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7914 running_off, pos);
7915 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7916 running_off = newoff;
7918 if (!slp)
7919 op = vect_get_store_rhs (next_stmt_info);
7920 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7921 op, &vec_oprnds);
7922 unsigned int group_el = 0;
7923 unsigned HOST_WIDE_INT
7924 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7925 for (j = 0; j < ncopies; j++)
7927 vec_oprnd = vec_oprnds[j];
7928 /* Pun the vector to extract from if necessary. */
7929 if (lvectype != vectype)
7931 tree tem = make_ssa_name (lvectype);
7932 gimple *pun
7933 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7934 lvectype, vec_oprnd));
7935 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7936 vec_oprnd = tem;
7938 for (i = 0; i < nstores; i++)
7940 tree newref, newoff;
7941 gimple *incr, *assign;
7942 tree size = TYPE_SIZE (ltype);
7943 /* Extract the i'th component. */
7944 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7945 bitsize_int (i), size);
7946 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7947 size, pos);
7949 elem = force_gimple_operand_gsi (gsi, elem, true,
7950 NULL_TREE, true,
7951 GSI_SAME_STMT);
7953 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7954 group_el * elsz);
7955 newref = build2 (MEM_REF, ltype,
7956 running_off, this_off);
7957 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7959 /* And store it to *running_off. */
7960 assign = gimple_build_assign (newref, elem);
7961 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7963 group_el += lnel;
7964 if (! slp
7965 || group_el == group_size)
7967 newoff = copy_ssa_name (running_off, NULL);
7968 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7969 running_off, stride_step);
7970 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7972 running_off = newoff;
7973 group_el = 0;
7975 if (g == group_size - 1
7976 && !slp)
7978 if (j == 0 && i == 0)
7979 *vec_stmt = assign;
7980 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7984 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7985 vec_oprnds.release ();
7986 if (slp)
7987 break;
7990 return true;
7993 auto_vec<tree> dr_chain (group_size);
7994 oprnds.create (group_size);
7996 gcc_assert (alignment_support_scheme);
7997 vec_loop_masks *loop_masks
7998 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7999 ? &LOOP_VINFO_MASKS (loop_vinfo)
8000 : NULL);
8001 vec_loop_lens *loop_lens
8002 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8003 ? &LOOP_VINFO_LENS (loop_vinfo)
8004 : NULL);
8006 /* Shouldn't go with length-based approach if fully masked. */
8007 gcc_assert (!loop_lens || !loop_masks);
8009 /* Targets with store-lane instructions must not require explicit
8010 realignment. vect_supportable_dr_alignment always returns either
8011 dr_aligned or dr_unaligned_supported for masked operations. */
8012 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8013 && !mask
8014 && !loop_masks)
8015 || alignment_support_scheme == dr_aligned
8016 || alignment_support_scheme == dr_unaligned_supported);
8018 tree offset = NULL_TREE;
8019 if (!known_eq (poffset, 0))
8020 offset = size_int (poffset);
8022 tree bump;
8023 tree vec_offset = NULL_TREE;
8024 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8026 aggr_type = NULL_TREE;
8027 bump = NULL_TREE;
8029 else if (memory_access_type == VMAT_GATHER_SCATTER)
8031 aggr_type = elem_type;
8032 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8033 &bump, &vec_offset);
8035 else
8037 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8038 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8039 else
8040 aggr_type = vectype;
8041 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8042 memory_access_type);
8045 if (mask)
8046 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8048 /* In case the vectorization factor (VF) is bigger than the number
8049 of elements that we can fit in a vectype (nunits), we have to generate
8050 more than one vector stmt - i.e - we need to "unroll" the
8051 vector stmt by a factor VF/nunits. */
8053 /* In case of interleaving (non-unit grouped access):
8055 S1: &base + 2 = x2
8056 S2: &base = x0
8057 S3: &base + 1 = x1
8058 S4: &base + 3 = x3
8060 We create vectorized stores starting from base address (the access of the
8061 first stmt in the chain (S2 in the above example), when the last store stmt
8062 of the chain (S4) is reached:
8064 VS1: &base = vx2
8065 VS2: &base + vec_size*1 = vx0
8066 VS3: &base + vec_size*2 = vx1
8067 VS4: &base + vec_size*3 = vx3
8069 Then permutation statements are generated:
8071 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8072 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8075 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8076 (the order of the data-refs in the output of vect_permute_store_chain
8077 corresponds to the order of scalar stmts in the interleaving chain - see
8078 the documentation of vect_permute_store_chain()).
8080 In case of both multiple types and interleaving, above vector stores and
8081 permutation stmts are created for every copy. The result vector stmts are
8082 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8083 STMT_VINFO_RELATED_STMT for the next copies.
8086 auto_vec<tree> vec_masks;
8087 tree vec_mask = NULL;
8088 auto_vec<tree> vec_offsets;
8089 auto_vec<vec<tree> > gvec_oprnds;
8090 gvec_oprnds.safe_grow_cleared (group_size, true);
8091 for (j = 0; j < ncopies; j++)
8093 gimple *new_stmt;
8094 if (j == 0)
8096 if (slp)
8098 /* Get vectorized arguments for SLP_NODE. */
8099 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8100 op, &vec_oprnds);
8101 vec_oprnd = vec_oprnds[0];
8103 else
8105 /* For interleaved stores we collect vectorized defs for all the
8106 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8107 used as an input to vect_permute_store_chain().
8109 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8110 and OPRNDS are of size 1. */
8111 stmt_vec_info next_stmt_info = first_stmt_info;
8112 for (i = 0; i < group_size; i++)
8114 /* Since gaps are not supported for interleaved stores,
8115 DR_GROUP_SIZE is the exact number of stmts in the chain.
8116 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8117 that there is no interleaving, DR_GROUP_SIZE is 1,
8118 and only one iteration of the loop will be executed. */
8119 op = vect_get_store_rhs (next_stmt_info);
8120 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8121 ncopies, op, &gvec_oprnds[i]);
8122 vec_oprnd = gvec_oprnds[i][0];
8123 dr_chain.quick_push (gvec_oprnds[i][0]);
8124 oprnds.quick_push (gvec_oprnds[i][0]);
8125 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8127 if (mask)
8129 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8130 mask, &vec_masks, mask_vectype);
8131 vec_mask = vec_masks[0];
8135 /* We should have catched mismatched types earlier. */
8136 gcc_assert (useless_type_conversion_p (vectype,
8137 TREE_TYPE (vec_oprnd)));
8138 bool simd_lane_access_p
8139 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8140 if (simd_lane_access_p
8141 && !loop_masks
8142 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8143 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8144 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8145 && integer_zerop (DR_INIT (first_dr_info->dr))
8146 && alias_sets_conflict_p (get_alias_set (aggr_type),
8147 get_alias_set (TREE_TYPE (ref_type))))
8149 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8150 dataref_offset = build_int_cst (ref_type, 0);
8152 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8154 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8155 slp_node, &gs_info, &dataref_ptr,
8156 &vec_offsets);
8157 vec_offset = vec_offsets[0];
8159 else
8160 dataref_ptr
8161 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8162 simd_lane_access_p ? loop : NULL,
8163 offset, &dummy, gsi, &ptr_incr,
8164 simd_lane_access_p, bump);
8166 else
8168 /* For interleaved stores we created vectorized defs for all the
8169 defs stored in OPRNDS in the previous iteration (previous copy).
8170 DR_CHAIN is then used as an input to vect_permute_store_chain().
8171 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8172 OPRNDS are of size 1. */
8173 for (i = 0; i < group_size; i++)
8175 vec_oprnd = gvec_oprnds[i][j];
8176 dr_chain[i] = gvec_oprnds[i][j];
8177 oprnds[i] = gvec_oprnds[i][j];
8179 if (mask)
8180 vec_mask = vec_masks[j];
8181 if (dataref_offset)
8182 dataref_offset
8183 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8184 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8185 vec_offset = vec_offsets[j];
8186 else
8187 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8188 stmt_info, bump);
8191 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8193 tree vec_array;
8195 /* Get an array into which we can store the individual vectors. */
8196 vec_array = create_vector_array (vectype, vec_num);
8198 /* Invalidate the current contents of VEC_ARRAY. This should
8199 become an RTL clobber too, which prevents the vector registers
8200 from being upward-exposed. */
8201 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8203 /* Store the individual vectors into the array. */
8204 for (i = 0; i < vec_num; i++)
8206 vec_oprnd = dr_chain[i];
8207 write_vector_array (vinfo, stmt_info,
8208 gsi, vec_oprnd, vec_array, i);
8211 tree final_mask = NULL;
8212 if (loop_masks)
8213 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8214 vectype, j);
8215 if (vec_mask)
8216 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8217 vec_mask, gsi);
8219 gcall *call;
8220 if (final_mask)
8222 /* Emit:
8223 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8224 VEC_ARRAY). */
8225 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8226 tree alias_ptr = build_int_cst (ref_type, align);
8227 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8228 dataref_ptr, alias_ptr,
8229 final_mask, vec_array);
8231 else
8233 /* Emit:
8234 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8235 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8236 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8237 vec_array);
8238 gimple_call_set_lhs (call, data_ref);
8240 gimple_call_set_nothrow (call, true);
8241 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8242 new_stmt = call;
8244 /* Record that VEC_ARRAY is now dead. */
8245 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8247 else
8249 new_stmt = NULL;
8250 if (grouped_store)
8252 if (j == 0)
8253 result_chain.create (group_size);
8254 /* Permute. */
8255 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8256 gsi, &result_chain);
8259 stmt_vec_info next_stmt_info = first_stmt_info;
8260 for (i = 0; i < vec_num; i++)
8262 unsigned misalign;
8263 unsigned HOST_WIDE_INT align;
8265 tree final_mask = NULL_TREE;
8266 if (loop_masks)
8267 final_mask = vect_get_loop_mask (gsi, loop_masks,
8268 vec_num * ncopies,
8269 vectype, vec_num * j + i);
8270 if (vec_mask)
8271 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8272 vec_mask, gsi);
8274 if (memory_access_type == VMAT_GATHER_SCATTER)
8276 tree scale = size_int (gs_info.scale);
8277 gcall *call;
8278 if (final_mask)
8279 call = gimple_build_call_internal
8280 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8281 scale, vec_oprnd, final_mask);
8282 else
8283 call = gimple_build_call_internal
8284 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8285 scale, vec_oprnd);
8286 gimple_call_set_nothrow (call, true);
8287 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8288 new_stmt = call;
8289 break;
8292 if (i > 0)
8293 /* Bump the vector pointer. */
8294 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8295 gsi, stmt_info, bump);
8297 if (slp)
8298 vec_oprnd = vec_oprnds[i];
8299 else if (grouped_store)
8300 /* For grouped stores vectorized defs are interleaved in
8301 vect_permute_store_chain(). */
8302 vec_oprnd = result_chain[i];
8304 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8305 if (alignment_support_scheme == dr_aligned)
8306 misalign = 0;
8307 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8309 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8310 misalign = 0;
8312 else
8313 misalign = misalignment;
8314 if (dataref_offset == NULL_TREE
8315 && TREE_CODE (dataref_ptr) == SSA_NAME)
8316 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8317 misalign);
8318 align = least_bit_hwi (misalign | align);
8320 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8322 tree perm_mask = perm_mask_for_reverse (vectype);
8323 tree perm_dest = vect_create_destination_var
8324 (vect_get_store_rhs (stmt_info), vectype);
8325 tree new_temp = make_ssa_name (perm_dest);
8327 /* Generate the permute statement. */
8328 gimple *perm_stmt
8329 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8330 vec_oprnd, perm_mask);
8331 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8333 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8334 vec_oprnd = new_temp;
8337 /* Arguments are ready. Create the new vector stmt. */
8338 if (final_mask)
8340 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8341 gcall *call
8342 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8343 dataref_ptr, ptr,
8344 final_mask, vec_oprnd);
8345 gimple_call_set_nothrow (call, true);
8346 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8347 new_stmt = call;
8349 else if (loop_lens)
8351 tree final_len
8352 = vect_get_loop_len (loop_vinfo, loop_lens,
8353 vec_num * ncopies, vec_num * j + i);
8354 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8355 machine_mode vmode = TYPE_MODE (vectype);
8356 opt_machine_mode new_ovmode
8357 = get_len_load_store_mode (vmode, false);
8358 machine_mode new_vmode = new_ovmode.require ();
8359 /* Need conversion if it's wrapped with VnQI. */
8360 if (vmode != new_vmode)
8362 tree new_vtype
8363 = build_vector_type_for_mode (unsigned_intQI_type_node,
8364 new_vmode);
8365 tree var
8366 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8367 vec_oprnd
8368 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8369 gassign *new_stmt
8370 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8371 vec_oprnd);
8372 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8373 gsi);
8374 vec_oprnd = var;
8376 gcall *call
8377 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8378 ptr, final_len, vec_oprnd);
8379 gimple_call_set_nothrow (call, true);
8380 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8381 new_stmt = call;
8383 else
8385 data_ref = fold_build2 (MEM_REF, vectype,
8386 dataref_ptr,
8387 dataref_offset
8388 ? dataref_offset
8389 : build_int_cst (ref_type, 0));
8390 if (alignment_support_scheme == dr_aligned)
8392 else
8393 TREE_TYPE (data_ref)
8394 = build_aligned_type (TREE_TYPE (data_ref),
8395 align * BITS_PER_UNIT);
8396 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8397 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8398 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8401 if (slp)
8402 continue;
8404 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8405 if (!next_stmt_info)
8406 break;
8409 if (!slp)
8411 if (j == 0)
8412 *vec_stmt = new_stmt;
8413 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8417 for (i = 0; i < group_size; ++i)
8419 vec<tree> oprndsi = gvec_oprnds[i];
8420 oprndsi.release ();
8422 oprnds.release ();
8423 result_chain.release ();
8424 vec_oprnds.release ();
8426 return true;
8429 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8430 VECTOR_CST mask. No checks are made that the target platform supports the
8431 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8432 vect_gen_perm_mask_checked. */
8434 tree
8435 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8437 tree mask_type;
8439 poly_uint64 nunits = sel.length ();
8440 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8442 mask_type = build_vector_type (ssizetype, nunits);
8443 return vec_perm_indices_to_tree (mask_type, sel);
8446 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8447 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8449 tree
8450 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8452 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8453 return vect_gen_perm_mask_any (vectype, sel);
8456 /* Given a vector variable X and Y, that was generated for the scalar
8457 STMT_INFO, generate instructions to permute the vector elements of X and Y
8458 using permutation mask MASK_VEC, insert them at *GSI and return the
8459 permuted vector variable. */
8461 static tree
8462 permute_vec_elements (vec_info *vinfo,
8463 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8464 gimple_stmt_iterator *gsi)
8466 tree vectype = TREE_TYPE (x);
8467 tree perm_dest, data_ref;
8468 gimple *perm_stmt;
8470 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8471 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8472 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8473 else
8474 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8475 data_ref = make_ssa_name (perm_dest);
8477 /* Generate the permute statement. */
8478 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8479 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8481 return data_ref;
8484 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8485 inserting them on the loops preheader edge. Returns true if we
8486 were successful in doing so (and thus STMT_INFO can be moved then),
8487 otherwise returns false. */
8489 static bool
8490 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8492 ssa_op_iter i;
8493 tree op;
8494 bool any = false;
8496 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8498 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8499 if (!gimple_nop_p (def_stmt)
8500 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8502 /* Make sure we don't need to recurse. While we could do
8503 so in simple cases when there are more complex use webs
8504 we don't have an easy way to preserve stmt order to fulfil
8505 dependencies within them. */
8506 tree op2;
8507 ssa_op_iter i2;
8508 if (gimple_code (def_stmt) == GIMPLE_PHI)
8509 return false;
8510 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8512 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8513 if (!gimple_nop_p (def_stmt2)
8514 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8515 return false;
8517 any = true;
8521 if (!any)
8522 return true;
8524 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8526 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8527 if (!gimple_nop_p (def_stmt)
8528 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8530 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8531 gsi_remove (&gsi, false);
8532 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8536 return true;
8539 /* vectorizable_load.
8541 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8542 that can be vectorized.
8543 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8544 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8545 Return true if STMT_INFO is vectorizable in this way. */
8547 static bool
8548 vectorizable_load (vec_info *vinfo,
8549 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8550 gimple **vec_stmt, slp_tree slp_node,
8551 stmt_vector_for_cost *cost_vec)
8553 tree scalar_dest;
8554 tree vec_dest = NULL;
8555 tree data_ref = NULL;
8556 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8557 class loop *loop = NULL;
8558 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8559 bool nested_in_vect_loop = false;
8560 tree elem_type;
8561 tree new_temp;
8562 machine_mode mode;
8563 tree dummy;
8564 tree dataref_ptr = NULL_TREE;
8565 tree dataref_offset = NULL_TREE;
8566 gimple *ptr_incr = NULL;
8567 int ncopies;
8568 int i, j;
8569 unsigned int group_size;
8570 poly_uint64 group_gap_adj;
8571 tree msq = NULL_TREE, lsq;
8572 tree realignment_token = NULL_TREE;
8573 gphi *phi = NULL;
8574 vec<tree> dr_chain = vNULL;
8575 bool grouped_load = false;
8576 stmt_vec_info first_stmt_info;
8577 stmt_vec_info first_stmt_info_for_drptr = NULL;
8578 bool compute_in_loop = false;
8579 class loop *at_loop;
8580 int vec_num;
8581 bool slp = (slp_node != NULL);
8582 bool slp_perm = false;
8583 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8584 poly_uint64 vf;
8585 tree aggr_type;
8586 gather_scatter_info gs_info;
8587 tree ref_type;
8588 enum vect_def_type mask_dt = vect_unknown_def_type;
8590 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8591 return false;
8593 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8594 && ! vec_stmt)
8595 return false;
8597 if (!STMT_VINFO_DATA_REF (stmt_info))
8598 return false;
8600 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8601 int mask_index = -1;
8602 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8604 scalar_dest = gimple_assign_lhs (assign);
8605 if (TREE_CODE (scalar_dest) != SSA_NAME)
8606 return false;
8608 tree_code code = gimple_assign_rhs_code (assign);
8609 if (code != ARRAY_REF
8610 && code != BIT_FIELD_REF
8611 && code != INDIRECT_REF
8612 && code != COMPONENT_REF
8613 && code != IMAGPART_EXPR
8614 && code != REALPART_EXPR
8615 && code != MEM_REF
8616 && TREE_CODE_CLASS (code) != tcc_declaration)
8617 return false;
8619 else
8621 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8622 if (!call || !gimple_call_internal_p (call))
8623 return false;
8625 internal_fn ifn = gimple_call_internal_fn (call);
8626 if (!internal_load_fn_p (ifn))
8627 return false;
8629 scalar_dest = gimple_call_lhs (call);
8630 if (!scalar_dest)
8631 return false;
8633 mask_index = internal_fn_mask_index (ifn);
8634 /* ??? For SLP the mask operand is always last. */
8635 if (mask_index >= 0 && slp_node)
8636 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8637 if (mask_index >= 0
8638 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8639 &mask, NULL, &mask_dt, &mask_vectype))
8640 return false;
8643 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8644 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8646 if (loop_vinfo)
8648 loop = LOOP_VINFO_LOOP (loop_vinfo);
8649 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8650 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8652 else
8653 vf = 1;
8655 /* Multiple types in SLP are handled by creating the appropriate number of
8656 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8657 case of SLP. */
8658 if (slp)
8659 ncopies = 1;
8660 else
8661 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8663 gcc_assert (ncopies >= 1);
8665 /* FORNOW. This restriction should be relaxed. */
8666 if (nested_in_vect_loop && ncopies > 1)
8668 if (dump_enabled_p ())
8669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8670 "multiple types in nested loop.\n");
8671 return false;
8674 /* Invalidate assumptions made by dependence analysis when vectorization
8675 on the unrolled body effectively re-orders stmts. */
8676 if (ncopies > 1
8677 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8678 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8679 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8681 if (dump_enabled_p ())
8682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8683 "cannot perform implicit CSE when unrolling "
8684 "with negative dependence distance\n");
8685 return false;
8688 elem_type = TREE_TYPE (vectype);
8689 mode = TYPE_MODE (vectype);
8691 /* FORNOW. In some cases can vectorize even if data-type not supported
8692 (e.g. - data copies). */
8693 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8695 if (dump_enabled_p ())
8696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8697 "Aligned load, but unsupported type.\n");
8698 return false;
8701 /* Check if the load is a part of an interleaving chain. */
8702 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8704 grouped_load = true;
8705 /* FORNOW */
8706 gcc_assert (!nested_in_vect_loop);
8707 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8709 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8710 group_size = DR_GROUP_SIZE (first_stmt_info);
8712 /* Refuse non-SLP vectorization of SLP-only groups. */
8713 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8715 if (dump_enabled_p ())
8716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8717 "cannot vectorize load in non-SLP mode.\n");
8718 return false;
8721 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8723 slp_perm = true;
8725 if (!loop_vinfo)
8727 /* In BB vectorization we may not actually use a loaded vector
8728 accessing elements in excess of DR_GROUP_SIZE. */
8729 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8730 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8731 unsigned HOST_WIDE_INT nunits;
8732 unsigned j, k, maxk = 0;
8733 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8734 if (k > maxk)
8735 maxk = k;
8736 tree vectype = SLP_TREE_VECTYPE (slp_node);
8737 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8738 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8740 if (dump_enabled_p ())
8741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8742 "BB vectorization with gaps at the end of "
8743 "a load is not supported\n");
8744 return false;
8748 auto_vec<tree> tem;
8749 unsigned n_perms;
8750 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8751 true, &n_perms))
8753 if (dump_enabled_p ())
8754 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8755 vect_location,
8756 "unsupported load permutation\n");
8757 return false;
8761 /* Invalidate assumptions made by dependence analysis when vectorization
8762 on the unrolled body effectively re-orders stmts. */
8763 if (!PURE_SLP_STMT (stmt_info)
8764 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8765 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8766 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8768 if (dump_enabled_p ())
8769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8770 "cannot perform implicit CSE when performing "
8771 "group loads with negative dependence distance\n");
8772 return false;
8775 else
8776 group_size = 1;
8778 vect_memory_access_type memory_access_type;
8779 enum dr_alignment_support alignment_support_scheme;
8780 int misalignment;
8781 poly_int64 poffset;
8782 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8783 ncopies, &memory_access_type, &poffset,
8784 &alignment_support_scheme, &misalignment, &gs_info))
8785 return false;
8787 if (mask)
8789 if (memory_access_type == VMAT_CONTIGUOUS)
8791 machine_mode vec_mode = TYPE_MODE (vectype);
8792 if (!VECTOR_MODE_P (vec_mode)
8793 || !can_vec_mask_load_store_p (vec_mode,
8794 TYPE_MODE (mask_vectype), true))
8795 return false;
8797 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8798 && memory_access_type != VMAT_GATHER_SCATTER)
8800 if (dump_enabled_p ())
8801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8802 "unsupported access type for masked load.\n");
8803 return false;
8805 else if (memory_access_type == VMAT_GATHER_SCATTER
8806 && gs_info.ifn == IFN_LAST
8807 && !gs_info.decl)
8809 if (dump_enabled_p ())
8810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8811 "unsupported masked emulated gather.\n");
8812 return false;
8816 if (!vec_stmt) /* transformation not required. */
8818 if (slp_node
8819 && mask
8820 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8821 mask_vectype))
8823 if (dump_enabled_p ())
8824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8825 "incompatible vector types for invariants\n");
8826 return false;
8829 if (!slp)
8830 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8832 if (loop_vinfo
8833 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8834 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8835 group_size, memory_access_type,
8836 ncopies, &gs_info, mask);
8838 if (dump_enabled_p ()
8839 && memory_access_type != VMAT_ELEMENTWISE
8840 && memory_access_type != VMAT_GATHER_SCATTER
8841 && alignment_support_scheme != dr_aligned)
8842 dump_printf_loc (MSG_NOTE, vect_location,
8843 "Vectorizing an unaligned access.\n");
8845 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8846 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8847 alignment_support_scheme, misalignment,
8848 &gs_info, slp_node, cost_vec);
8849 return true;
8852 if (!slp)
8853 gcc_assert (memory_access_type
8854 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8856 if (dump_enabled_p ())
8857 dump_printf_loc (MSG_NOTE, vect_location,
8858 "transform load. ncopies = %d\n", ncopies);
8860 /* Transform. */
8862 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8863 ensure_base_align (dr_info);
8865 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8867 vect_build_gather_load_calls (vinfo,
8868 stmt_info, gsi, vec_stmt, &gs_info, mask);
8869 return true;
8872 if (memory_access_type == VMAT_INVARIANT)
8874 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8875 /* If we have versioned for aliasing or the loop doesn't
8876 have any data dependencies that would preclude this,
8877 then we are sure this is a loop invariant load and
8878 thus we can insert it on the preheader edge. */
8879 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8880 && !nested_in_vect_loop
8881 && hoist_defs_of_uses (stmt_info, loop));
8882 if (hoist_p)
8884 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8885 if (dump_enabled_p ())
8886 dump_printf_loc (MSG_NOTE, vect_location,
8887 "hoisting out of the vectorized loop: %G", stmt);
8888 scalar_dest = copy_ssa_name (scalar_dest);
8889 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8890 gsi_insert_on_edge_immediate
8891 (loop_preheader_edge (loop),
8892 gimple_build_assign (scalar_dest, rhs));
8894 /* These copies are all equivalent, but currently the representation
8895 requires a separate STMT_VINFO_VEC_STMT for each one. */
8896 gimple_stmt_iterator gsi2 = *gsi;
8897 gsi_next (&gsi2);
8898 for (j = 0; j < ncopies; j++)
8900 if (hoist_p)
8901 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8902 vectype, NULL);
8903 else
8904 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8905 vectype, &gsi2);
8906 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8907 if (slp)
8908 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8909 else
8911 if (j == 0)
8912 *vec_stmt = new_stmt;
8913 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8916 return true;
8919 if (memory_access_type == VMAT_ELEMENTWISE
8920 || memory_access_type == VMAT_STRIDED_SLP)
8922 gimple_stmt_iterator incr_gsi;
8923 bool insert_after;
8924 tree offvar;
8925 tree ivstep;
8926 tree running_off;
8927 vec<constructor_elt, va_gc> *v = NULL;
8928 tree stride_base, stride_step, alias_off;
8929 /* Checked by get_load_store_type. */
8930 unsigned int const_nunits = nunits.to_constant ();
8931 unsigned HOST_WIDE_INT cst_offset = 0;
8932 tree dr_offset;
8934 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8935 gcc_assert (!nested_in_vect_loop);
8937 if (grouped_load)
8939 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8940 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8942 else
8944 first_stmt_info = stmt_info;
8945 first_dr_info = dr_info;
8947 if (slp && grouped_load)
8949 group_size = DR_GROUP_SIZE (first_stmt_info);
8950 ref_type = get_group_alias_ptr_type (first_stmt_info);
8952 else
8954 if (grouped_load)
8955 cst_offset
8956 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8957 * vect_get_place_in_interleaving_chain (stmt_info,
8958 first_stmt_info));
8959 group_size = 1;
8960 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8963 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8964 stride_base
8965 = fold_build_pointer_plus
8966 (DR_BASE_ADDRESS (first_dr_info->dr),
8967 size_binop (PLUS_EXPR,
8968 convert_to_ptrofftype (dr_offset),
8969 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8970 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8972 /* For a load with loop-invariant (but other than power-of-2)
8973 stride (i.e. not a grouped access) like so:
8975 for (i = 0; i < n; i += stride)
8976 ... = array[i];
8978 we generate a new induction variable and new accesses to
8979 form a new vector (or vectors, depending on ncopies):
8981 for (j = 0; ; j += VF*stride)
8982 tmp1 = array[j];
8983 tmp2 = array[j + stride];
8985 vectemp = {tmp1, tmp2, ...}
8988 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8989 build_int_cst (TREE_TYPE (stride_step), vf));
8991 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8993 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8994 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8995 create_iv (stride_base, ivstep, NULL,
8996 loop, &incr_gsi, insert_after,
8997 &offvar, NULL);
8999 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9001 running_off = offvar;
9002 alias_off = build_int_cst (ref_type, 0);
9003 int nloads = const_nunits;
9004 int lnel = 1;
9005 tree ltype = TREE_TYPE (vectype);
9006 tree lvectype = vectype;
9007 auto_vec<tree> dr_chain;
9008 if (memory_access_type == VMAT_STRIDED_SLP)
9010 if (group_size < const_nunits)
9012 /* First check if vec_init optab supports construction from vector
9013 elts directly. Otherwise avoid emitting a constructor of
9014 vector elements by performing the loads using an integer type
9015 of the same size, constructing a vector of those and then
9016 re-interpreting it as the original vector type. This avoids a
9017 huge runtime penalty due to the general inability to perform
9018 store forwarding from smaller stores to a larger load. */
9019 tree ptype;
9020 tree vtype
9021 = vector_vector_composition_type (vectype,
9022 const_nunits / group_size,
9023 &ptype);
9024 if (vtype != NULL_TREE)
9026 nloads = const_nunits / group_size;
9027 lnel = group_size;
9028 lvectype = vtype;
9029 ltype = ptype;
9032 else
9034 nloads = 1;
9035 lnel = const_nunits;
9036 ltype = vectype;
9038 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9040 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9041 else if (nloads == 1)
9042 ltype = vectype;
9044 if (slp)
9046 /* For SLP permutation support we need to load the whole group,
9047 not only the number of vector stmts the permutation result
9048 fits in. */
9049 if (slp_perm)
9051 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9052 variable VF. */
9053 unsigned int const_vf = vf.to_constant ();
9054 ncopies = CEIL (group_size * const_vf, const_nunits);
9055 dr_chain.create (ncopies);
9057 else
9058 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9060 unsigned int group_el = 0;
9061 unsigned HOST_WIDE_INT
9062 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9063 for (j = 0; j < ncopies; j++)
9065 if (nloads > 1)
9066 vec_alloc (v, nloads);
9067 gimple *new_stmt = NULL;
9068 for (i = 0; i < nloads; i++)
9070 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9071 group_el * elsz + cst_offset);
9072 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9073 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9074 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9075 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9076 if (nloads > 1)
9077 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9078 gimple_assign_lhs (new_stmt));
9080 group_el += lnel;
9081 if (! slp
9082 || group_el == group_size)
9084 tree newoff = copy_ssa_name (running_off);
9085 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9086 running_off, stride_step);
9087 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9089 running_off = newoff;
9090 group_el = 0;
9093 if (nloads > 1)
9095 tree vec_inv = build_constructor (lvectype, v);
9096 new_temp = vect_init_vector (vinfo, stmt_info,
9097 vec_inv, lvectype, gsi);
9098 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9099 if (lvectype != vectype)
9101 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9102 VIEW_CONVERT_EXPR,
9103 build1 (VIEW_CONVERT_EXPR,
9104 vectype, new_temp));
9105 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9109 if (slp)
9111 if (slp_perm)
9112 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9113 else
9114 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9116 else
9118 if (j == 0)
9119 *vec_stmt = new_stmt;
9120 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9123 if (slp_perm)
9125 unsigned n_perms;
9126 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9127 false, &n_perms);
9129 return true;
9132 if (memory_access_type == VMAT_GATHER_SCATTER
9133 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9134 grouped_load = false;
9136 if (grouped_load)
9138 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9139 group_size = DR_GROUP_SIZE (first_stmt_info);
9140 /* For SLP vectorization we directly vectorize a subchain
9141 without permutation. */
9142 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9143 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9144 /* For BB vectorization always use the first stmt to base
9145 the data ref pointer on. */
9146 if (bb_vinfo)
9147 first_stmt_info_for_drptr
9148 = vect_find_first_scalar_stmt_in_slp (slp_node);
9150 /* Check if the chain of loads is already vectorized. */
9151 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9152 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9153 ??? But we can only do so if there is exactly one
9154 as we have no way to get at the rest. Leave the CSE
9155 opportunity alone.
9156 ??? With the group load eventually participating
9157 in multiple different permutations (having multiple
9158 slp nodes which refer to the same group) the CSE
9159 is even wrong code. See PR56270. */
9160 && !slp)
9162 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9163 return true;
9165 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9166 group_gap_adj = 0;
9168 /* VEC_NUM is the number of vect stmts to be created for this group. */
9169 if (slp)
9171 grouped_load = false;
9172 /* If an SLP permutation is from N elements to N elements,
9173 and if one vector holds a whole number of N, we can load
9174 the inputs to the permutation in the same way as an
9175 unpermuted sequence. In other cases we need to load the
9176 whole group, not only the number of vector stmts the
9177 permutation result fits in. */
9178 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9179 if (slp_perm
9180 && (group_size != scalar_lanes
9181 || !multiple_p (nunits, group_size)))
9183 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9184 variable VF; see vect_transform_slp_perm_load. */
9185 unsigned int const_vf = vf.to_constant ();
9186 unsigned int const_nunits = nunits.to_constant ();
9187 vec_num = CEIL (group_size * const_vf, const_nunits);
9188 group_gap_adj = vf * group_size - nunits * vec_num;
9190 else
9192 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9193 group_gap_adj
9194 = group_size - scalar_lanes;
9197 else
9198 vec_num = group_size;
9200 ref_type = get_group_alias_ptr_type (first_stmt_info);
9202 else
9204 first_stmt_info = stmt_info;
9205 first_dr_info = dr_info;
9206 group_size = vec_num = 1;
9207 group_gap_adj = 0;
9208 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9211 gcc_assert (alignment_support_scheme);
9212 vec_loop_masks *loop_masks
9213 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9214 ? &LOOP_VINFO_MASKS (loop_vinfo)
9215 : NULL);
9216 vec_loop_lens *loop_lens
9217 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9218 ? &LOOP_VINFO_LENS (loop_vinfo)
9219 : NULL);
9221 /* Shouldn't go with length-based approach if fully masked. */
9222 gcc_assert (!loop_lens || !loop_masks);
9224 /* Targets with store-lane instructions must not require explicit
9225 realignment. vect_supportable_dr_alignment always returns either
9226 dr_aligned or dr_unaligned_supported for masked operations. */
9227 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9228 && !mask
9229 && !loop_masks)
9230 || alignment_support_scheme == dr_aligned
9231 || alignment_support_scheme == dr_unaligned_supported);
9233 /* In case the vectorization factor (VF) is bigger than the number
9234 of elements that we can fit in a vectype (nunits), we have to generate
9235 more than one vector stmt - i.e - we need to "unroll" the
9236 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9237 from one copy of the vector stmt to the next, in the field
9238 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9239 stages to find the correct vector defs to be used when vectorizing
9240 stmts that use the defs of the current stmt. The example below
9241 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9242 need to create 4 vectorized stmts):
9244 before vectorization:
9245 RELATED_STMT VEC_STMT
9246 S1: x = memref - -
9247 S2: z = x + 1 - -
9249 step 1: vectorize stmt S1:
9250 We first create the vector stmt VS1_0, and, as usual, record a
9251 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9252 Next, we create the vector stmt VS1_1, and record a pointer to
9253 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9254 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9255 stmts and pointers:
9256 RELATED_STMT VEC_STMT
9257 VS1_0: vx0 = memref0 VS1_1 -
9258 VS1_1: vx1 = memref1 VS1_2 -
9259 VS1_2: vx2 = memref2 VS1_3 -
9260 VS1_3: vx3 = memref3 - -
9261 S1: x = load - VS1_0
9262 S2: z = x + 1 - -
9265 /* In case of interleaving (non-unit grouped access):
9267 S1: x2 = &base + 2
9268 S2: x0 = &base
9269 S3: x1 = &base + 1
9270 S4: x3 = &base + 3
9272 Vectorized loads are created in the order of memory accesses
9273 starting from the access of the first stmt of the chain:
9275 VS1: vx0 = &base
9276 VS2: vx1 = &base + vec_size*1
9277 VS3: vx3 = &base + vec_size*2
9278 VS4: vx4 = &base + vec_size*3
9280 Then permutation statements are generated:
9282 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9283 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9286 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9287 (the order of the data-refs in the output of vect_permute_load_chain
9288 corresponds to the order of scalar stmts in the interleaving chain - see
9289 the documentation of vect_permute_load_chain()).
9290 The generation of permutation stmts and recording them in
9291 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9293 In case of both multiple types and interleaving, the vector loads and
9294 permutation stmts above are created for every copy. The result vector
9295 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9296 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9298 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9299 on a target that supports unaligned accesses (dr_unaligned_supported)
9300 we generate the following code:
9301 p = initial_addr;
9302 indx = 0;
9303 loop {
9304 p = p + indx * vectype_size;
9305 vec_dest = *(p);
9306 indx = indx + 1;
9309 Otherwise, the data reference is potentially unaligned on a target that
9310 does not support unaligned accesses (dr_explicit_realign_optimized) -
9311 then generate the following code, in which the data in each iteration is
9312 obtained by two vector loads, one from the previous iteration, and one
9313 from the current iteration:
9314 p1 = initial_addr;
9315 msq_init = *(floor(p1))
9316 p2 = initial_addr + VS - 1;
9317 realignment_token = call target_builtin;
9318 indx = 0;
9319 loop {
9320 p2 = p2 + indx * vectype_size
9321 lsq = *(floor(p2))
9322 vec_dest = realign_load (msq, lsq, realignment_token)
9323 indx = indx + 1;
9324 msq = lsq;
9325 } */
9327 /* If the misalignment remains the same throughout the execution of the
9328 loop, we can create the init_addr and permutation mask at the loop
9329 preheader. Otherwise, it needs to be created inside the loop.
9330 This can only occur when vectorizing memory accesses in the inner-loop
9331 nested within an outer-loop that is being vectorized. */
9333 if (nested_in_vect_loop
9334 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9335 GET_MODE_SIZE (TYPE_MODE (vectype))))
9337 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9338 compute_in_loop = true;
9341 bool diff_first_stmt_info
9342 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9344 tree offset = NULL_TREE;
9345 if ((alignment_support_scheme == dr_explicit_realign_optimized
9346 || alignment_support_scheme == dr_explicit_realign)
9347 && !compute_in_loop)
9349 /* If we have different first_stmt_info, we can't set up realignment
9350 here, since we can't guarantee first_stmt_info DR has been
9351 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9352 distance from first_stmt_info DR instead as below. */
9353 if (!diff_first_stmt_info)
9354 msq = vect_setup_realignment (vinfo,
9355 first_stmt_info, gsi, &realignment_token,
9356 alignment_support_scheme, NULL_TREE,
9357 &at_loop);
9358 if (alignment_support_scheme == dr_explicit_realign_optimized)
9360 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9361 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9362 size_one_node);
9363 gcc_assert (!first_stmt_info_for_drptr);
9366 else
9367 at_loop = loop;
9369 if (!known_eq (poffset, 0))
9370 offset = (offset
9371 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9372 : size_int (poffset));
9374 tree bump;
9375 tree vec_offset = NULL_TREE;
9376 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9378 aggr_type = NULL_TREE;
9379 bump = NULL_TREE;
9381 else if (memory_access_type == VMAT_GATHER_SCATTER)
9383 aggr_type = elem_type;
9384 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9385 &bump, &vec_offset);
9387 else
9389 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9390 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9391 else
9392 aggr_type = vectype;
9393 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9394 memory_access_type);
9397 vec<tree> vec_offsets = vNULL;
9398 auto_vec<tree> vec_masks;
9399 if (mask)
9401 if (slp_node)
9402 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9403 &vec_masks);
9404 else
9405 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9406 &vec_masks, mask_vectype);
9408 tree vec_mask = NULL_TREE;
9409 poly_uint64 group_elt = 0;
9410 for (j = 0; j < ncopies; j++)
9412 /* 1. Create the vector or array pointer update chain. */
9413 if (j == 0)
9415 bool simd_lane_access_p
9416 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9417 if (simd_lane_access_p
9418 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9419 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9420 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9421 && integer_zerop (DR_INIT (first_dr_info->dr))
9422 && alias_sets_conflict_p (get_alias_set (aggr_type),
9423 get_alias_set (TREE_TYPE (ref_type)))
9424 && (alignment_support_scheme == dr_aligned
9425 || alignment_support_scheme == dr_unaligned_supported))
9427 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9428 dataref_offset = build_int_cst (ref_type, 0);
9430 else if (diff_first_stmt_info)
9432 dataref_ptr
9433 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9434 aggr_type, at_loop, offset, &dummy,
9435 gsi, &ptr_incr, simd_lane_access_p,
9436 bump);
9437 /* Adjust the pointer by the difference to first_stmt. */
9438 data_reference_p ptrdr
9439 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9440 tree diff
9441 = fold_convert (sizetype,
9442 size_binop (MINUS_EXPR,
9443 DR_INIT (first_dr_info->dr),
9444 DR_INIT (ptrdr)));
9445 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9446 stmt_info, diff);
9447 if (alignment_support_scheme == dr_explicit_realign)
9449 msq = vect_setup_realignment (vinfo,
9450 first_stmt_info_for_drptr, gsi,
9451 &realignment_token,
9452 alignment_support_scheme,
9453 dataref_ptr, &at_loop);
9454 gcc_assert (!compute_in_loop);
9457 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9459 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9460 slp_node, &gs_info, &dataref_ptr,
9461 &vec_offsets);
9463 else
9464 dataref_ptr
9465 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9466 at_loop,
9467 offset, &dummy, gsi, &ptr_incr,
9468 simd_lane_access_p, bump);
9469 if (mask)
9470 vec_mask = vec_masks[0];
9472 else
9474 if (dataref_offset)
9475 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9476 bump);
9477 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9478 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9479 stmt_info, bump);
9480 if (mask)
9481 vec_mask = vec_masks[j];
9484 if (grouped_load || slp_perm)
9485 dr_chain.create (vec_num);
9487 gimple *new_stmt = NULL;
9488 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9490 tree vec_array;
9492 vec_array = create_vector_array (vectype, vec_num);
9494 tree final_mask = NULL_TREE;
9495 if (loop_masks)
9496 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9497 vectype, j);
9498 if (vec_mask)
9499 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9500 vec_mask, gsi);
9502 gcall *call;
9503 if (final_mask)
9505 /* Emit:
9506 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9507 VEC_MASK). */
9508 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9509 tree alias_ptr = build_int_cst (ref_type, align);
9510 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9511 dataref_ptr, alias_ptr,
9512 final_mask);
9514 else
9516 /* Emit:
9517 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9518 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9519 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9521 gimple_call_set_lhs (call, vec_array);
9522 gimple_call_set_nothrow (call, true);
9523 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9524 new_stmt = call;
9526 /* Extract each vector into an SSA_NAME. */
9527 for (i = 0; i < vec_num; i++)
9529 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9530 vec_array, i);
9531 dr_chain.quick_push (new_temp);
9534 /* Record the mapping between SSA_NAMEs and statements. */
9535 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9537 /* Record that VEC_ARRAY is now dead. */
9538 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9540 else
9542 for (i = 0; i < vec_num; i++)
9544 tree final_mask = NULL_TREE;
9545 if (loop_masks
9546 && memory_access_type != VMAT_INVARIANT)
9547 final_mask = vect_get_loop_mask (gsi, loop_masks,
9548 vec_num * ncopies,
9549 vectype, vec_num * j + i);
9550 if (vec_mask)
9551 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9552 vec_mask, gsi);
9554 if (i > 0)
9555 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9556 gsi, stmt_info, bump);
9558 /* 2. Create the vector-load in the loop. */
9559 switch (alignment_support_scheme)
9561 case dr_aligned:
9562 case dr_unaligned_supported:
9564 unsigned int misalign;
9565 unsigned HOST_WIDE_INT align;
9567 if (memory_access_type == VMAT_GATHER_SCATTER
9568 && gs_info.ifn != IFN_LAST)
9570 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9571 vec_offset = vec_offsets[j];
9572 tree zero = build_zero_cst (vectype);
9573 tree scale = size_int (gs_info.scale);
9574 gcall *call;
9575 if (final_mask)
9576 call = gimple_build_call_internal
9577 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9578 vec_offset, scale, zero, final_mask);
9579 else
9580 call = gimple_build_call_internal
9581 (IFN_GATHER_LOAD, 4, dataref_ptr,
9582 vec_offset, scale, zero);
9583 gimple_call_set_nothrow (call, true);
9584 new_stmt = call;
9585 data_ref = NULL_TREE;
9586 break;
9588 else if (memory_access_type == VMAT_GATHER_SCATTER)
9590 /* Emulated gather-scatter. */
9591 gcc_assert (!final_mask);
9592 unsigned HOST_WIDE_INT const_nunits
9593 = nunits.to_constant ();
9594 unsigned HOST_WIDE_INT const_offset_nunits
9595 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9596 .to_constant ();
9597 vec<constructor_elt, va_gc> *ctor_elts;
9598 vec_alloc (ctor_elts, const_nunits);
9599 gimple_seq stmts = NULL;
9600 /* We support offset vectors with more elements
9601 than the data vector for now. */
9602 unsigned HOST_WIDE_INT factor
9603 = const_offset_nunits / const_nunits;
9604 vec_offset = vec_offsets[j / factor];
9605 unsigned elt_offset = (j % factor) * const_nunits;
9606 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9607 tree scale = size_int (gs_info.scale);
9608 align
9609 = get_object_alignment (DR_REF (first_dr_info->dr));
9610 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9611 align);
9612 for (unsigned k = 0; k < const_nunits; ++k)
9614 tree boff = size_binop (MULT_EXPR,
9615 TYPE_SIZE (idx_type),
9616 bitsize_int
9617 (k + elt_offset));
9618 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9619 idx_type, vec_offset,
9620 TYPE_SIZE (idx_type),
9621 boff);
9622 idx = gimple_convert (&stmts, sizetype, idx);
9623 idx = gimple_build (&stmts, MULT_EXPR,
9624 sizetype, idx, scale);
9625 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9626 TREE_TYPE (dataref_ptr),
9627 dataref_ptr, idx);
9628 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9629 tree elt = make_ssa_name (TREE_TYPE (vectype));
9630 tree ref = build2 (MEM_REF, ltype, ptr,
9631 build_int_cst (ref_type, 0));
9632 new_stmt = gimple_build_assign (elt, ref);
9633 gimple_seq_add_stmt (&stmts, new_stmt);
9634 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9636 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9637 new_stmt = gimple_build_assign (NULL_TREE,
9638 build_constructor
9639 (vectype, ctor_elts));
9640 data_ref = NULL_TREE;
9641 break;
9644 align =
9645 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9646 if (alignment_support_scheme == dr_aligned)
9647 misalign = 0;
9648 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9650 align = dr_alignment
9651 (vect_dr_behavior (vinfo, first_dr_info));
9652 misalign = 0;
9654 else
9655 misalign = misalignment;
9656 if (dataref_offset == NULL_TREE
9657 && TREE_CODE (dataref_ptr) == SSA_NAME)
9658 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9659 align, misalign);
9660 align = least_bit_hwi (misalign | align);
9662 if (final_mask)
9664 tree ptr = build_int_cst (ref_type,
9665 align * BITS_PER_UNIT);
9666 gcall *call
9667 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9668 dataref_ptr, ptr,
9669 final_mask);
9670 gimple_call_set_nothrow (call, true);
9671 new_stmt = call;
9672 data_ref = NULL_TREE;
9674 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9676 tree final_len
9677 = vect_get_loop_len (loop_vinfo, loop_lens,
9678 vec_num * ncopies,
9679 vec_num * j + i);
9680 tree ptr = build_int_cst (ref_type,
9681 align * BITS_PER_UNIT);
9682 gcall *call
9683 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9684 dataref_ptr, ptr,
9685 final_len);
9686 gimple_call_set_nothrow (call, true);
9687 new_stmt = call;
9688 data_ref = NULL_TREE;
9690 /* Need conversion if it's wrapped with VnQI. */
9691 machine_mode vmode = TYPE_MODE (vectype);
9692 opt_machine_mode new_ovmode
9693 = get_len_load_store_mode (vmode, true);
9694 machine_mode new_vmode = new_ovmode.require ();
9695 if (vmode != new_vmode)
9697 tree qi_type = unsigned_intQI_type_node;
9698 tree new_vtype
9699 = build_vector_type_for_mode (qi_type, new_vmode);
9700 tree var = vect_get_new_ssa_name (new_vtype,
9701 vect_simple_var);
9702 gimple_set_lhs (call, var);
9703 vect_finish_stmt_generation (vinfo, stmt_info, call,
9704 gsi);
9705 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9706 new_stmt
9707 = gimple_build_assign (vec_dest,
9708 VIEW_CONVERT_EXPR, op);
9711 else
9713 tree ltype = vectype;
9714 tree new_vtype = NULL_TREE;
9715 unsigned HOST_WIDE_INT gap
9716 = DR_GROUP_GAP (first_stmt_info);
9717 unsigned int vect_align
9718 = vect_known_alignment_in_bytes (first_dr_info,
9719 vectype);
9720 unsigned int scalar_dr_size
9721 = vect_get_scalar_dr_size (first_dr_info);
9722 /* If there's no peeling for gaps but we have a gap
9723 with slp loads then load the lower half of the
9724 vector only. See get_group_load_store_type for
9725 when we apply this optimization. */
9726 if (slp
9727 && loop_vinfo
9728 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9729 && gap != 0
9730 && known_eq (nunits, (group_size - gap) * 2)
9731 && known_eq (nunits, group_size)
9732 && gap >= (vect_align / scalar_dr_size))
9734 tree half_vtype;
9735 new_vtype
9736 = vector_vector_composition_type (vectype, 2,
9737 &half_vtype);
9738 if (new_vtype != NULL_TREE)
9739 ltype = half_vtype;
9741 tree offset
9742 = (dataref_offset ? dataref_offset
9743 : build_int_cst (ref_type, 0));
9744 if (ltype != vectype
9745 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9747 unsigned HOST_WIDE_INT gap_offset
9748 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9749 tree gapcst = build_int_cst (ref_type, gap_offset);
9750 offset = size_binop (PLUS_EXPR, offset, gapcst);
9752 data_ref
9753 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9754 if (alignment_support_scheme == dr_aligned)
9756 else
9757 TREE_TYPE (data_ref)
9758 = build_aligned_type (TREE_TYPE (data_ref),
9759 align * BITS_PER_UNIT);
9760 if (ltype != vectype)
9762 vect_copy_ref_info (data_ref,
9763 DR_REF (first_dr_info->dr));
9764 tree tem = make_ssa_name (ltype);
9765 new_stmt = gimple_build_assign (tem, data_ref);
9766 vect_finish_stmt_generation (vinfo, stmt_info,
9767 new_stmt, gsi);
9768 data_ref = NULL;
9769 vec<constructor_elt, va_gc> *v;
9770 vec_alloc (v, 2);
9771 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9773 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9774 build_zero_cst (ltype));
9775 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9777 else
9779 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9780 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9781 build_zero_cst (ltype));
9783 gcc_assert (new_vtype != NULL_TREE);
9784 if (new_vtype == vectype)
9785 new_stmt = gimple_build_assign (
9786 vec_dest, build_constructor (vectype, v));
9787 else
9789 tree new_vname = make_ssa_name (new_vtype);
9790 new_stmt = gimple_build_assign (
9791 new_vname, build_constructor (new_vtype, v));
9792 vect_finish_stmt_generation (vinfo, stmt_info,
9793 new_stmt, gsi);
9794 new_stmt = gimple_build_assign (
9795 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9796 new_vname));
9800 break;
9802 case dr_explicit_realign:
9804 tree ptr, bump;
9806 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9808 if (compute_in_loop)
9809 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9810 &realignment_token,
9811 dr_explicit_realign,
9812 dataref_ptr, NULL);
9814 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9815 ptr = copy_ssa_name (dataref_ptr);
9816 else
9817 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9818 // For explicit realign the target alignment should be
9819 // known at compile time.
9820 unsigned HOST_WIDE_INT align =
9821 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9822 new_stmt = gimple_build_assign
9823 (ptr, BIT_AND_EXPR, dataref_ptr,
9824 build_int_cst
9825 (TREE_TYPE (dataref_ptr),
9826 -(HOST_WIDE_INT) align));
9827 vect_finish_stmt_generation (vinfo, stmt_info,
9828 new_stmt, gsi);
9829 data_ref
9830 = build2 (MEM_REF, vectype, ptr,
9831 build_int_cst (ref_type, 0));
9832 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9833 vec_dest = vect_create_destination_var (scalar_dest,
9834 vectype);
9835 new_stmt = gimple_build_assign (vec_dest, data_ref);
9836 new_temp = make_ssa_name (vec_dest, new_stmt);
9837 gimple_assign_set_lhs (new_stmt, new_temp);
9838 gimple_move_vops (new_stmt, stmt_info->stmt);
9839 vect_finish_stmt_generation (vinfo, stmt_info,
9840 new_stmt, gsi);
9841 msq = new_temp;
9843 bump = size_binop (MULT_EXPR, vs,
9844 TYPE_SIZE_UNIT (elem_type));
9845 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9846 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9847 stmt_info, bump);
9848 new_stmt = gimple_build_assign
9849 (NULL_TREE, BIT_AND_EXPR, ptr,
9850 build_int_cst
9851 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9852 ptr = copy_ssa_name (ptr, new_stmt);
9853 gimple_assign_set_lhs (new_stmt, ptr);
9854 vect_finish_stmt_generation (vinfo, stmt_info,
9855 new_stmt, gsi);
9856 data_ref
9857 = build2 (MEM_REF, vectype, ptr,
9858 build_int_cst (ref_type, 0));
9859 break;
9861 case dr_explicit_realign_optimized:
9863 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9864 new_temp = copy_ssa_name (dataref_ptr);
9865 else
9866 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9867 // We should only be doing this if we know the target
9868 // alignment at compile time.
9869 unsigned HOST_WIDE_INT align =
9870 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9871 new_stmt = gimple_build_assign
9872 (new_temp, BIT_AND_EXPR, dataref_ptr,
9873 build_int_cst (TREE_TYPE (dataref_ptr),
9874 -(HOST_WIDE_INT) align));
9875 vect_finish_stmt_generation (vinfo, stmt_info,
9876 new_stmt, gsi);
9877 data_ref
9878 = build2 (MEM_REF, vectype, new_temp,
9879 build_int_cst (ref_type, 0));
9880 break;
9882 default:
9883 gcc_unreachable ();
9885 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9886 /* DATA_REF is null if we've already built the statement. */
9887 if (data_ref)
9889 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9890 new_stmt = gimple_build_assign (vec_dest, data_ref);
9892 new_temp = make_ssa_name (vec_dest, new_stmt);
9893 gimple_set_lhs (new_stmt, new_temp);
9894 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9896 /* 3. Handle explicit realignment if necessary/supported.
9897 Create in loop:
9898 vec_dest = realign_load (msq, lsq, realignment_token) */
9899 if (alignment_support_scheme == dr_explicit_realign_optimized
9900 || alignment_support_scheme == dr_explicit_realign)
9902 lsq = gimple_assign_lhs (new_stmt);
9903 if (!realignment_token)
9904 realignment_token = dataref_ptr;
9905 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9906 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9907 msq, lsq, realignment_token);
9908 new_temp = make_ssa_name (vec_dest, new_stmt);
9909 gimple_assign_set_lhs (new_stmt, new_temp);
9910 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9912 if (alignment_support_scheme == dr_explicit_realign_optimized)
9914 gcc_assert (phi);
9915 if (i == vec_num - 1 && j == ncopies - 1)
9916 add_phi_arg (phi, lsq,
9917 loop_latch_edge (containing_loop),
9918 UNKNOWN_LOCATION);
9919 msq = lsq;
9923 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9925 tree perm_mask = perm_mask_for_reverse (vectype);
9926 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9927 perm_mask, stmt_info, gsi);
9928 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9931 /* Collect vector loads and later create their permutation in
9932 vect_transform_grouped_load (). */
9933 if (grouped_load || slp_perm)
9934 dr_chain.quick_push (new_temp);
9936 /* Store vector loads in the corresponding SLP_NODE. */
9937 if (slp && !slp_perm)
9938 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9940 /* With SLP permutation we load the gaps as well, without
9941 we need to skip the gaps after we manage to fully load
9942 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9943 group_elt += nunits;
9944 if (maybe_ne (group_gap_adj, 0U)
9945 && !slp_perm
9946 && known_eq (group_elt, group_size - group_gap_adj))
9948 poly_wide_int bump_val
9949 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9950 * group_gap_adj);
9951 if (tree_int_cst_sgn
9952 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9953 bump_val = -bump_val;
9954 tree bump = wide_int_to_tree (sizetype, bump_val);
9955 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9956 gsi, stmt_info, bump);
9957 group_elt = 0;
9960 /* Bump the vector pointer to account for a gap or for excess
9961 elements loaded for a permuted SLP load. */
9962 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9964 poly_wide_int bump_val
9965 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9966 * group_gap_adj);
9967 if (tree_int_cst_sgn
9968 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9969 bump_val = -bump_val;
9970 tree bump = wide_int_to_tree (sizetype, bump_val);
9971 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9972 stmt_info, bump);
9976 if (slp && !slp_perm)
9977 continue;
9979 if (slp_perm)
9981 unsigned n_perms;
9982 /* For SLP we know we've seen all possible uses of dr_chain so
9983 direct vect_transform_slp_perm_load to DCE the unused parts.
9984 ??? This is a hack to prevent compile-time issues as seen
9985 in PR101120 and friends. */
9986 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9987 gsi, vf, false, &n_perms,
9988 nullptr, true);
9989 gcc_assert (ok);
9991 else
9993 if (grouped_load)
9995 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9996 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9997 group_size, gsi);
9998 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10000 else
10002 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10005 dr_chain.release ();
10007 if (!slp)
10008 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10010 return true;
10013 /* Function vect_is_simple_cond.
10015 Input:
10016 LOOP - the loop that is being vectorized.
10017 COND - Condition that is checked for simple use.
10019 Output:
10020 *COMP_VECTYPE - the vector type for the comparison.
10021 *DTS - The def types for the arguments of the comparison
10023 Returns whether a COND can be vectorized. Checks whether
10024 condition operands are supportable using vec_is_simple_use. */
10026 static bool
10027 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10028 slp_tree slp_node, tree *comp_vectype,
10029 enum vect_def_type *dts, tree vectype)
10031 tree lhs, rhs;
10032 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10033 slp_tree slp_op;
10035 /* Mask case. */
10036 if (TREE_CODE (cond) == SSA_NAME
10037 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10039 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10040 &slp_op, &dts[0], comp_vectype)
10041 || !*comp_vectype
10042 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10043 return false;
10044 return true;
10047 if (!COMPARISON_CLASS_P (cond))
10048 return false;
10050 lhs = TREE_OPERAND (cond, 0);
10051 rhs = TREE_OPERAND (cond, 1);
10053 if (TREE_CODE (lhs) == SSA_NAME)
10055 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10056 &lhs, &slp_op, &dts[0], &vectype1))
10057 return false;
10059 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10060 || TREE_CODE (lhs) == FIXED_CST)
10061 dts[0] = vect_constant_def;
10062 else
10063 return false;
10065 if (TREE_CODE (rhs) == SSA_NAME)
10067 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10068 &rhs, &slp_op, &dts[1], &vectype2))
10069 return false;
10071 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10072 || TREE_CODE (rhs) == FIXED_CST)
10073 dts[1] = vect_constant_def;
10074 else
10075 return false;
10077 if (vectype1 && vectype2
10078 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10079 TYPE_VECTOR_SUBPARTS (vectype2)))
10080 return false;
10082 *comp_vectype = vectype1 ? vectype1 : vectype2;
10083 /* Invariant comparison. */
10084 if (! *comp_vectype)
10086 tree scalar_type = TREE_TYPE (lhs);
10087 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10088 *comp_vectype = truth_type_for (vectype);
10089 else
10091 /* If we can widen the comparison to match vectype do so. */
10092 if (INTEGRAL_TYPE_P (scalar_type)
10093 && !slp_node
10094 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10095 TYPE_SIZE (TREE_TYPE (vectype))))
10096 scalar_type = build_nonstandard_integer_type
10097 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10098 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10099 slp_node);
10103 return true;
10106 /* vectorizable_condition.
10108 Check if STMT_INFO is conditional modify expression that can be vectorized.
10109 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10110 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10111 at GSI.
10113 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10115 Return true if STMT_INFO is vectorizable in this way. */
10117 static bool
10118 vectorizable_condition (vec_info *vinfo,
10119 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10120 gimple **vec_stmt,
10121 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10123 tree scalar_dest = NULL_TREE;
10124 tree vec_dest = NULL_TREE;
10125 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10126 tree then_clause, else_clause;
10127 tree comp_vectype = NULL_TREE;
10128 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10129 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10130 tree vec_compare;
10131 tree new_temp;
10132 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10133 enum vect_def_type dts[4]
10134 = {vect_unknown_def_type, vect_unknown_def_type,
10135 vect_unknown_def_type, vect_unknown_def_type};
10136 int ndts = 4;
10137 int ncopies;
10138 int vec_num;
10139 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10140 int i;
10141 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10142 vec<tree> vec_oprnds0 = vNULL;
10143 vec<tree> vec_oprnds1 = vNULL;
10144 vec<tree> vec_oprnds2 = vNULL;
10145 vec<tree> vec_oprnds3 = vNULL;
10146 tree vec_cmp_type;
10147 bool masked = false;
10149 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10150 return false;
10152 /* Is vectorizable conditional operation? */
10153 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10154 if (!stmt)
10155 return false;
10157 code = gimple_assign_rhs_code (stmt);
10158 if (code != COND_EXPR)
10159 return false;
10161 stmt_vec_info reduc_info = NULL;
10162 int reduc_index = -1;
10163 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10164 bool for_reduction
10165 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10166 if (for_reduction)
10168 if (STMT_SLP_TYPE (stmt_info))
10169 return false;
10170 reduc_info = info_for_reduction (vinfo, stmt_info);
10171 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10172 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10173 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10174 || reduc_index != -1);
10176 else
10178 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10179 return false;
10182 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10183 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10185 if (slp_node)
10187 ncopies = 1;
10188 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10190 else
10192 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10193 vec_num = 1;
10196 gcc_assert (ncopies >= 1);
10197 if (for_reduction && ncopies > 1)
10198 return false; /* FORNOW */
10200 cond_expr = gimple_assign_rhs1 (stmt);
10202 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10203 &comp_vectype, &dts[0], vectype)
10204 || !comp_vectype)
10205 return false;
10207 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10208 slp_tree then_slp_node, else_slp_node;
10209 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10210 &then_clause, &then_slp_node, &dts[2], &vectype1))
10211 return false;
10212 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10213 &else_clause, &else_slp_node, &dts[3], &vectype2))
10214 return false;
10216 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10217 return false;
10219 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10220 return false;
10222 masked = !COMPARISON_CLASS_P (cond_expr);
10223 vec_cmp_type = truth_type_for (comp_vectype);
10225 if (vec_cmp_type == NULL_TREE)
10226 return false;
10228 cond_code = TREE_CODE (cond_expr);
10229 if (!masked)
10231 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10232 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10235 /* For conditional reductions, the "then" value needs to be the candidate
10236 value calculated by this iteration while the "else" value needs to be
10237 the result carried over from previous iterations. If the COND_EXPR
10238 is the other way around, we need to swap it. */
10239 bool must_invert_cmp_result = false;
10240 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10242 if (masked)
10243 must_invert_cmp_result = true;
10244 else
10246 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10247 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10248 if (new_code == ERROR_MARK)
10249 must_invert_cmp_result = true;
10250 else
10252 cond_code = new_code;
10253 /* Make sure we don't accidentally use the old condition. */
10254 cond_expr = NULL_TREE;
10257 std::swap (then_clause, else_clause);
10260 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10262 /* Boolean values may have another representation in vectors
10263 and therefore we prefer bit operations over comparison for
10264 them (which also works for scalar masks). We store opcodes
10265 to use in bitop1 and bitop2. Statement is vectorized as
10266 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10267 depending on bitop1 and bitop2 arity. */
10268 switch (cond_code)
10270 case GT_EXPR:
10271 bitop1 = BIT_NOT_EXPR;
10272 bitop2 = BIT_AND_EXPR;
10273 break;
10274 case GE_EXPR:
10275 bitop1 = BIT_NOT_EXPR;
10276 bitop2 = BIT_IOR_EXPR;
10277 break;
10278 case LT_EXPR:
10279 bitop1 = BIT_NOT_EXPR;
10280 bitop2 = BIT_AND_EXPR;
10281 std::swap (cond_expr0, cond_expr1);
10282 break;
10283 case LE_EXPR:
10284 bitop1 = BIT_NOT_EXPR;
10285 bitop2 = BIT_IOR_EXPR;
10286 std::swap (cond_expr0, cond_expr1);
10287 break;
10288 case NE_EXPR:
10289 bitop1 = BIT_XOR_EXPR;
10290 break;
10291 case EQ_EXPR:
10292 bitop1 = BIT_XOR_EXPR;
10293 bitop2 = BIT_NOT_EXPR;
10294 break;
10295 default:
10296 return false;
10298 cond_code = SSA_NAME;
10301 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10302 && reduction_type == EXTRACT_LAST_REDUCTION
10303 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10305 if (dump_enabled_p ())
10306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10307 "reduction comparison operation not supported.\n");
10308 return false;
10311 if (!vec_stmt)
10313 if (bitop1 != NOP_EXPR)
10315 machine_mode mode = TYPE_MODE (comp_vectype);
10316 optab optab;
10318 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10319 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10320 return false;
10322 if (bitop2 != NOP_EXPR)
10324 optab = optab_for_tree_code (bitop2, comp_vectype,
10325 optab_default);
10326 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10327 return false;
10331 vect_cost_for_stmt kind = vector_stmt;
10332 if (reduction_type == EXTRACT_LAST_REDUCTION)
10333 /* Count one reduction-like operation per vector. */
10334 kind = vec_to_scalar;
10335 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10336 return false;
10338 if (slp_node
10339 && (!vect_maybe_update_slp_op_vectype
10340 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10341 || (op_adjust == 1
10342 && !vect_maybe_update_slp_op_vectype
10343 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10344 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10345 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10347 if (dump_enabled_p ())
10348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10349 "incompatible vector types for invariants\n");
10350 return false;
10353 if (loop_vinfo && for_reduction
10354 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10356 if (reduction_type == EXTRACT_LAST_REDUCTION)
10357 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10358 ncopies * vec_num, vectype, NULL);
10359 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10360 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10362 if (dump_enabled_p ())
10363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10364 "conditional reduction prevents the use"
10365 " of partial vectors.\n");
10366 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10370 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10371 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10372 cost_vec, kind);
10373 return true;
10376 /* Transform. */
10378 /* Handle def. */
10379 scalar_dest = gimple_assign_lhs (stmt);
10380 if (reduction_type != EXTRACT_LAST_REDUCTION)
10381 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10383 bool swap_cond_operands = false;
10385 /* See whether another part of the vectorized code applies a loop
10386 mask to the condition, or to its inverse. */
10388 vec_loop_masks *masks = NULL;
10389 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10391 if (reduction_type == EXTRACT_LAST_REDUCTION)
10392 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10393 else
10395 scalar_cond_masked_key cond (cond_expr, ncopies);
10396 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10397 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10398 else
10400 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10401 tree_code orig_code = cond.code;
10402 cond.code = invert_tree_comparison (cond.code, honor_nans);
10403 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10405 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10406 cond_code = cond.code;
10407 swap_cond_operands = true;
10409 else
10411 /* Try the inverse of the current mask. We check if the
10412 inverse mask is live and if so we generate a negate of
10413 the current mask such that we still honor NaNs. */
10414 cond.inverted_p = true;
10415 cond.code = orig_code;
10416 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10418 bitop1 = orig_code;
10419 bitop2 = BIT_NOT_EXPR;
10420 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10421 cond_code = cond.code;
10422 swap_cond_operands = true;
10429 /* Handle cond expr. */
10430 if (masked)
10431 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10432 cond_expr, &vec_oprnds0, comp_vectype,
10433 then_clause, &vec_oprnds2, vectype,
10434 reduction_type != EXTRACT_LAST_REDUCTION
10435 ? else_clause : NULL, &vec_oprnds3, vectype);
10436 else
10437 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10438 cond_expr0, &vec_oprnds0, comp_vectype,
10439 cond_expr1, &vec_oprnds1, comp_vectype,
10440 then_clause, &vec_oprnds2, vectype,
10441 reduction_type != EXTRACT_LAST_REDUCTION
10442 ? else_clause : NULL, &vec_oprnds3, vectype);
10444 /* Arguments are ready. Create the new vector stmt. */
10445 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10447 vec_then_clause = vec_oprnds2[i];
10448 if (reduction_type != EXTRACT_LAST_REDUCTION)
10449 vec_else_clause = vec_oprnds3[i];
10451 if (swap_cond_operands)
10452 std::swap (vec_then_clause, vec_else_clause);
10454 if (masked)
10455 vec_compare = vec_cond_lhs;
10456 else
10458 vec_cond_rhs = vec_oprnds1[i];
10459 if (bitop1 == NOP_EXPR)
10461 gimple_seq stmts = NULL;
10462 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10463 vec_cond_lhs, vec_cond_rhs);
10464 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10466 else
10468 new_temp = make_ssa_name (vec_cmp_type);
10469 gassign *new_stmt;
10470 if (bitop1 == BIT_NOT_EXPR)
10471 new_stmt = gimple_build_assign (new_temp, bitop1,
10472 vec_cond_rhs);
10473 else
10474 new_stmt
10475 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10476 vec_cond_rhs);
10477 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10478 if (bitop2 == NOP_EXPR)
10479 vec_compare = new_temp;
10480 else if (bitop2 == BIT_NOT_EXPR)
10482 /* Instead of doing ~x ? y : z do x ? z : y. */
10483 vec_compare = new_temp;
10484 std::swap (vec_then_clause, vec_else_clause);
10486 else
10488 vec_compare = make_ssa_name (vec_cmp_type);
10489 new_stmt
10490 = gimple_build_assign (vec_compare, bitop2,
10491 vec_cond_lhs, new_temp);
10492 vect_finish_stmt_generation (vinfo, stmt_info,
10493 new_stmt, gsi);
10498 /* If we decided to apply a loop mask to the result of the vector
10499 comparison, AND the comparison with the mask now. Later passes
10500 should then be able to reuse the AND results between mulitple
10501 vector statements.
10503 For example:
10504 for (int i = 0; i < 100; ++i)
10505 x[i] = y[i] ? z[i] : 10;
10507 results in following optimized GIMPLE:
10509 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10510 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10511 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10512 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10513 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10514 vect_iftmp.11_47, { 10, ... }>;
10516 instead of using a masked and unmasked forms of
10517 vec != { 0, ... } (masked in the MASK_LOAD,
10518 unmasked in the VEC_COND_EXPR). */
10520 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10521 in cases where that's necessary. */
10523 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10525 if (!is_gimple_val (vec_compare))
10527 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10528 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10529 vec_compare);
10530 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10531 vec_compare = vec_compare_name;
10534 if (must_invert_cmp_result)
10536 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10537 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10538 BIT_NOT_EXPR,
10539 vec_compare);
10540 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10541 vec_compare = vec_compare_name;
10544 if (masks)
10546 tree loop_mask
10547 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10548 vectype, i);
10549 tree tmp2 = make_ssa_name (vec_cmp_type);
10550 gassign *g
10551 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10552 loop_mask);
10553 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10554 vec_compare = tmp2;
10558 gimple *new_stmt;
10559 if (reduction_type == EXTRACT_LAST_REDUCTION)
10561 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10562 tree lhs = gimple_get_lhs (old_stmt);
10563 new_stmt = gimple_build_call_internal
10564 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10565 vec_then_clause);
10566 gimple_call_set_lhs (new_stmt, lhs);
10567 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10568 if (old_stmt == gsi_stmt (*gsi))
10569 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10570 else
10572 /* In this case we're moving the definition to later in the
10573 block. That doesn't matter because the only uses of the
10574 lhs are in phi statements. */
10575 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10576 gsi_remove (&old_gsi, true);
10577 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10580 else
10582 new_temp = make_ssa_name (vec_dest);
10583 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10584 vec_then_clause, vec_else_clause);
10585 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10587 if (slp_node)
10588 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10589 else
10590 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10593 if (!slp_node)
10594 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10596 vec_oprnds0.release ();
10597 vec_oprnds1.release ();
10598 vec_oprnds2.release ();
10599 vec_oprnds3.release ();
10601 return true;
10604 /* vectorizable_comparison.
10606 Check if STMT_INFO is comparison expression that can be vectorized.
10607 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10608 comparison, put it in VEC_STMT, and insert it at GSI.
10610 Return true if STMT_INFO is vectorizable in this way. */
10612 static bool
10613 vectorizable_comparison (vec_info *vinfo,
10614 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10615 gimple **vec_stmt,
10616 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10618 tree lhs, rhs1, rhs2;
10619 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10620 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10621 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10622 tree new_temp;
10623 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10624 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10625 int ndts = 2;
10626 poly_uint64 nunits;
10627 int ncopies;
10628 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10629 int i;
10630 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10631 vec<tree> vec_oprnds0 = vNULL;
10632 vec<tree> vec_oprnds1 = vNULL;
10633 tree mask_type;
10634 tree mask;
10636 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10637 return false;
10639 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10640 return false;
10642 mask_type = vectype;
10643 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10645 if (slp_node)
10646 ncopies = 1;
10647 else
10648 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10650 gcc_assert (ncopies >= 1);
10651 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10652 return false;
10654 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10655 if (!stmt)
10656 return false;
10658 code = gimple_assign_rhs_code (stmt);
10660 if (TREE_CODE_CLASS (code) != tcc_comparison)
10661 return false;
10663 slp_tree slp_rhs1, slp_rhs2;
10664 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10665 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10666 return false;
10668 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10669 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10670 return false;
10672 if (vectype1 && vectype2
10673 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10674 TYPE_VECTOR_SUBPARTS (vectype2)))
10675 return false;
10677 vectype = vectype1 ? vectype1 : vectype2;
10679 /* Invariant comparison. */
10680 if (!vectype)
10682 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10683 vectype = mask_type;
10684 else
10685 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10686 slp_node);
10687 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10688 return false;
10690 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10691 return false;
10693 /* Can't compare mask and non-mask types. */
10694 if (vectype1 && vectype2
10695 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10696 return false;
10698 /* Boolean values may have another representation in vectors
10699 and therefore we prefer bit operations over comparison for
10700 them (which also works for scalar masks). We store opcodes
10701 to use in bitop1 and bitop2. Statement is vectorized as
10702 BITOP2 (rhs1 BITOP1 rhs2) or
10703 rhs1 BITOP2 (BITOP1 rhs2)
10704 depending on bitop1 and bitop2 arity. */
10705 bool swap_p = false;
10706 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10708 if (code == GT_EXPR)
10710 bitop1 = BIT_NOT_EXPR;
10711 bitop2 = BIT_AND_EXPR;
10713 else if (code == GE_EXPR)
10715 bitop1 = BIT_NOT_EXPR;
10716 bitop2 = BIT_IOR_EXPR;
10718 else if (code == LT_EXPR)
10720 bitop1 = BIT_NOT_EXPR;
10721 bitop2 = BIT_AND_EXPR;
10722 swap_p = true;
10724 else if (code == LE_EXPR)
10726 bitop1 = BIT_NOT_EXPR;
10727 bitop2 = BIT_IOR_EXPR;
10728 swap_p = true;
10730 else
10732 bitop1 = BIT_XOR_EXPR;
10733 if (code == EQ_EXPR)
10734 bitop2 = BIT_NOT_EXPR;
10738 if (!vec_stmt)
10740 if (bitop1 == NOP_EXPR)
10742 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10743 return false;
10745 else
10747 machine_mode mode = TYPE_MODE (vectype);
10748 optab optab;
10750 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10751 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10752 return false;
10754 if (bitop2 != NOP_EXPR)
10756 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10757 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10758 return false;
10762 /* Put types on constant and invariant SLP children. */
10763 if (slp_node
10764 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10765 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10767 if (dump_enabled_p ())
10768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10769 "incompatible vector types for invariants\n");
10770 return false;
10773 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10774 vect_model_simple_cost (vinfo, stmt_info,
10775 ncopies * (1 + (bitop2 != NOP_EXPR)),
10776 dts, ndts, slp_node, cost_vec);
10777 return true;
10780 /* Transform. */
10782 /* Handle def. */
10783 lhs = gimple_assign_lhs (stmt);
10784 mask = vect_create_destination_var (lhs, mask_type);
10786 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10787 rhs1, &vec_oprnds0, vectype,
10788 rhs2, &vec_oprnds1, vectype);
10789 if (swap_p)
10790 std::swap (vec_oprnds0, vec_oprnds1);
10792 /* Arguments are ready. Create the new vector stmt. */
10793 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10795 gimple *new_stmt;
10796 vec_rhs2 = vec_oprnds1[i];
10798 new_temp = make_ssa_name (mask);
10799 if (bitop1 == NOP_EXPR)
10801 new_stmt = gimple_build_assign (new_temp, code,
10802 vec_rhs1, vec_rhs2);
10803 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10805 else
10807 if (bitop1 == BIT_NOT_EXPR)
10808 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10809 else
10810 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10811 vec_rhs2);
10812 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10813 if (bitop2 != NOP_EXPR)
10815 tree res = make_ssa_name (mask);
10816 if (bitop2 == BIT_NOT_EXPR)
10817 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10818 else
10819 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10820 new_temp);
10821 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10824 if (slp_node)
10825 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10826 else
10827 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10830 if (!slp_node)
10831 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10833 vec_oprnds0.release ();
10834 vec_oprnds1.release ();
10836 return true;
10839 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10840 can handle all live statements in the node. Otherwise return true
10841 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10842 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10844 static bool
10845 can_vectorize_live_stmts (vec_info *vinfo,
10846 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10847 slp_tree slp_node, slp_instance slp_node_instance,
10848 bool vec_stmt_p,
10849 stmt_vector_for_cost *cost_vec)
10851 if (slp_node)
10853 stmt_vec_info slp_stmt_info;
10854 unsigned int i;
10855 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10857 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10858 && !vectorizable_live_operation (vinfo,
10859 slp_stmt_info, gsi, slp_node,
10860 slp_node_instance, i,
10861 vec_stmt_p, cost_vec))
10862 return false;
10865 else if (STMT_VINFO_LIVE_P (stmt_info)
10866 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10867 slp_node, slp_node_instance, -1,
10868 vec_stmt_p, cost_vec))
10869 return false;
10871 return true;
10874 /* Make sure the statement is vectorizable. */
10876 opt_result
10877 vect_analyze_stmt (vec_info *vinfo,
10878 stmt_vec_info stmt_info, bool *need_to_vectorize,
10879 slp_tree node, slp_instance node_instance,
10880 stmt_vector_for_cost *cost_vec)
10882 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10883 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10884 bool ok;
10885 gimple_seq pattern_def_seq;
10887 if (dump_enabled_p ())
10888 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10889 stmt_info->stmt);
10891 if (gimple_has_volatile_ops (stmt_info->stmt))
10892 return opt_result::failure_at (stmt_info->stmt,
10893 "not vectorized:"
10894 " stmt has volatile operands: %G\n",
10895 stmt_info->stmt);
10897 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10898 && node == NULL
10899 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10901 gimple_stmt_iterator si;
10903 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10905 stmt_vec_info pattern_def_stmt_info
10906 = vinfo->lookup_stmt (gsi_stmt (si));
10907 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10908 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10910 /* Analyze def stmt of STMT if it's a pattern stmt. */
10911 if (dump_enabled_p ())
10912 dump_printf_loc (MSG_NOTE, vect_location,
10913 "==> examining pattern def statement: %G",
10914 pattern_def_stmt_info->stmt);
10916 opt_result res
10917 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10918 need_to_vectorize, node, node_instance,
10919 cost_vec);
10920 if (!res)
10921 return res;
10926 /* Skip stmts that do not need to be vectorized. In loops this is expected
10927 to include:
10928 - the COND_EXPR which is the loop exit condition
10929 - any LABEL_EXPRs in the loop
10930 - computations that are used only for array indexing or loop control.
10931 In basic blocks we only analyze statements that are a part of some SLP
10932 instance, therefore, all the statements are relevant.
10934 Pattern statement needs to be analyzed instead of the original statement
10935 if the original statement is not relevant. Otherwise, we analyze both
10936 statements. In basic blocks we are called from some SLP instance
10937 traversal, don't analyze pattern stmts instead, the pattern stmts
10938 already will be part of SLP instance. */
10940 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10941 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10942 && !STMT_VINFO_LIVE_P (stmt_info))
10944 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10945 && pattern_stmt_info
10946 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10947 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10949 /* Analyze PATTERN_STMT instead of the original stmt. */
10950 stmt_info = pattern_stmt_info;
10951 if (dump_enabled_p ())
10952 dump_printf_loc (MSG_NOTE, vect_location,
10953 "==> examining pattern statement: %G",
10954 stmt_info->stmt);
10956 else
10958 if (dump_enabled_p ())
10959 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10961 return opt_result::success ();
10964 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10965 && node == NULL
10966 && pattern_stmt_info
10967 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10968 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10970 /* Analyze PATTERN_STMT too. */
10971 if (dump_enabled_p ())
10972 dump_printf_loc (MSG_NOTE, vect_location,
10973 "==> examining pattern statement: %G",
10974 pattern_stmt_info->stmt);
10976 opt_result res
10977 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10978 node_instance, cost_vec);
10979 if (!res)
10980 return res;
10983 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10985 case vect_internal_def:
10986 break;
10988 case vect_reduction_def:
10989 case vect_nested_cycle:
10990 gcc_assert (!bb_vinfo
10991 && (relevance == vect_used_in_outer
10992 || relevance == vect_used_in_outer_by_reduction
10993 || relevance == vect_used_by_reduction
10994 || relevance == vect_unused_in_scope
10995 || relevance == vect_used_only_live));
10996 break;
10998 case vect_induction_def:
10999 gcc_assert (!bb_vinfo);
11000 break;
11002 case vect_constant_def:
11003 case vect_external_def:
11004 case vect_unknown_def_type:
11005 default:
11006 gcc_unreachable ();
11009 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11010 if (node)
11011 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11013 if (STMT_VINFO_RELEVANT_P (stmt_info))
11015 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11016 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11017 || (call && gimple_call_lhs (call) == NULL_TREE));
11018 *need_to_vectorize = true;
11021 if (PURE_SLP_STMT (stmt_info) && !node)
11023 if (dump_enabled_p ())
11024 dump_printf_loc (MSG_NOTE, vect_location,
11025 "handled only by SLP analysis\n");
11026 return opt_result::success ();
11029 ok = true;
11030 if (!bb_vinfo
11031 && (STMT_VINFO_RELEVANT_P (stmt_info)
11032 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11033 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11034 -mveclibabi= takes preference over library functions with
11035 the simd attribute. */
11036 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11037 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11038 cost_vec)
11039 || vectorizable_conversion (vinfo, stmt_info,
11040 NULL, NULL, node, cost_vec)
11041 || vectorizable_operation (vinfo, stmt_info,
11042 NULL, NULL, node, cost_vec)
11043 || vectorizable_assignment (vinfo, stmt_info,
11044 NULL, NULL, node, cost_vec)
11045 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11046 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11047 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11048 node, node_instance, cost_vec)
11049 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11050 NULL, node, cost_vec)
11051 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11052 || vectorizable_condition (vinfo, stmt_info,
11053 NULL, NULL, node, cost_vec)
11054 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11055 cost_vec)
11056 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11057 stmt_info, NULL, node));
11058 else
11060 if (bb_vinfo)
11061 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11062 || vectorizable_simd_clone_call (vinfo, stmt_info,
11063 NULL, NULL, node, cost_vec)
11064 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11065 cost_vec)
11066 || vectorizable_shift (vinfo, stmt_info,
11067 NULL, NULL, node, cost_vec)
11068 || vectorizable_operation (vinfo, stmt_info,
11069 NULL, NULL, node, cost_vec)
11070 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11071 cost_vec)
11072 || vectorizable_load (vinfo, stmt_info,
11073 NULL, NULL, node, cost_vec)
11074 || vectorizable_store (vinfo, stmt_info,
11075 NULL, NULL, node, cost_vec)
11076 || vectorizable_condition (vinfo, stmt_info,
11077 NULL, NULL, node, cost_vec)
11078 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11079 cost_vec)
11080 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11083 if (node)
11084 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11086 if (!ok)
11087 return opt_result::failure_at (stmt_info->stmt,
11088 "not vectorized:"
11089 " relevant stmt not supported: %G",
11090 stmt_info->stmt);
11092 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11093 need extra handling, except for vectorizable reductions. */
11094 if (!bb_vinfo
11095 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11096 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11097 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11098 stmt_info, NULL, node, node_instance,
11099 false, cost_vec))
11100 return opt_result::failure_at (stmt_info->stmt,
11101 "not vectorized:"
11102 " live stmt not supported: %G",
11103 stmt_info->stmt);
11105 return opt_result::success ();
11109 /* Function vect_transform_stmt.
11111 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11113 bool
11114 vect_transform_stmt (vec_info *vinfo,
11115 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11116 slp_tree slp_node, slp_instance slp_node_instance)
11118 bool is_store = false;
11119 gimple *vec_stmt = NULL;
11120 bool done;
11122 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11124 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11125 if (slp_node)
11126 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11128 switch (STMT_VINFO_TYPE (stmt_info))
11130 case type_demotion_vec_info_type:
11131 case type_promotion_vec_info_type:
11132 case type_conversion_vec_info_type:
11133 done = vectorizable_conversion (vinfo, stmt_info,
11134 gsi, &vec_stmt, slp_node, NULL);
11135 gcc_assert (done);
11136 break;
11138 case induc_vec_info_type:
11139 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11140 stmt_info, &vec_stmt, slp_node,
11141 NULL);
11142 gcc_assert (done);
11143 break;
11145 case shift_vec_info_type:
11146 done = vectorizable_shift (vinfo, stmt_info,
11147 gsi, &vec_stmt, slp_node, NULL);
11148 gcc_assert (done);
11149 break;
11151 case op_vec_info_type:
11152 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11153 NULL);
11154 gcc_assert (done);
11155 break;
11157 case assignment_vec_info_type:
11158 done = vectorizable_assignment (vinfo, stmt_info,
11159 gsi, &vec_stmt, slp_node, NULL);
11160 gcc_assert (done);
11161 break;
11163 case load_vec_info_type:
11164 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11165 NULL);
11166 gcc_assert (done);
11167 break;
11169 case store_vec_info_type:
11170 done = vectorizable_store (vinfo, stmt_info,
11171 gsi, &vec_stmt, slp_node, NULL);
11172 gcc_assert (done);
11173 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11175 /* In case of interleaving, the whole chain is vectorized when the
11176 last store in the chain is reached. Store stmts before the last
11177 one are skipped, and there vec_stmt_info shouldn't be freed
11178 meanwhile. */
11179 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11180 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11181 is_store = true;
11183 else
11184 is_store = true;
11185 break;
11187 case condition_vec_info_type:
11188 done = vectorizable_condition (vinfo, stmt_info,
11189 gsi, &vec_stmt, slp_node, NULL);
11190 gcc_assert (done);
11191 break;
11193 case comparison_vec_info_type:
11194 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11195 slp_node, NULL);
11196 gcc_assert (done);
11197 break;
11199 case call_vec_info_type:
11200 done = vectorizable_call (vinfo, stmt_info,
11201 gsi, &vec_stmt, slp_node, NULL);
11202 break;
11204 case call_simd_clone_vec_info_type:
11205 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11206 slp_node, NULL);
11207 break;
11209 case reduc_vec_info_type:
11210 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11211 gsi, &vec_stmt, slp_node);
11212 gcc_assert (done);
11213 break;
11215 case cycle_phi_info_type:
11216 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11217 &vec_stmt, slp_node, slp_node_instance);
11218 gcc_assert (done);
11219 break;
11221 case lc_phi_info_type:
11222 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11223 stmt_info, &vec_stmt, slp_node);
11224 gcc_assert (done);
11225 break;
11227 case phi_info_type:
11228 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11229 gcc_assert (done);
11230 break;
11232 default:
11233 if (!STMT_VINFO_LIVE_P (stmt_info))
11235 if (dump_enabled_p ())
11236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11237 "stmt not supported.\n");
11238 gcc_unreachable ();
11240 done = true;
11243 if (!slp_node && vec_stmt)
11244 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11246 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11248 /* Handle stmts whose DEF is used outside the loop-nest that is
11249 being vectorized. */
11250 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11251 slp_node_instance, true, NULL);
11252 gcc_assert (done);
11255 if (slp_node)
11256 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11258 return is_store;
11262 /* Remove a group of stores (for SLP or interleaving), free their
11263 stmt_vec_info. */
11265 void
11266 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11268 stmt_vec_info next_stmt_info = first_stmt_info;
11270 while (next_stmt_info)
11272 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11273 next_stmt_info = vect_orig_stmt (next_stmt_info);
11274 /* Free the attached stmt_vec_info and remove the stmt. */
11275 vinfo->remove_stmt (next_stmt_info);
11276 next_stmt_info = tmp;
11280 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11281 elements of type SCALAR_TYPE, or null if the target doesn't support
11282 such a type.
11284 If NUNITS is zero, return a vector type that contains elements of
11285 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11287 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11288 for this vectorization region and want to "autodetect" the best choice.
11289 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11290 and we want the new type to be interoperable with it. PREVAILING_MODE
11291 in this case can be a scalar integer mode or a vector mode; when it
11292 is a vector mode, the function acts like a tree-level version of
11293 related_vector_mode. */
11295 tree
11296 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11297 tree scalar_type, poly_uint64 nunits)
11299 tree orig_scalar_type = scalar_type;
11300 scalar_mode inner_mode;
11301 machine_mode simd_mode;
11302 tree vectype;
11304 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11305 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11306 return NULL_TREE;
11308 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11310 /* For vector types of elements whose mode precision doesn't
11311 match their types precision we use a element type of mode
11312 precision. The vectorization routines will have to make sure
11313 they support the proper result truncation/extension.
11314 We also make sure to build vector types with INTEGER_TYPE
11315 component type only. */
11316 if (INTEGRAL_TYPE_P (scalar_type)
11317 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11318 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11319 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11320 TYPE_UNSIGNED (scalar_type));
11322 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11323 When the component mode passes the above test simply use a type
11324 corresponding to that mode. The theory is that any use that
11325 would cause problems with this will disable vectorization anyway. */
11326 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11327 && !INTEGRAL_TYPE_P (scalar_type))
11328 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11330 /* We can't build a vector type of elements with alignment bigger than
11331 their size. */
11332 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11333 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11334 TYPE_UNSIGNED (scalar_type));
11336 /* If we felt back to using the mode fail if there was
11337 no scalar type for it. */
11338 if (scalar_type == NULL_TREE)
11339 return NULL_TREE;
11341 /* If no prevailing mode was supplied, use the mode the target prefers.
11342 Otherwise lookup a vector mode based on the prevailing mode. */
11343 if (prevailing_mode == VOIDmode)
11345 gcc_assert (known_eq (nunits, 0U));
11346 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11347 if (SCALAR_INT_MODE_P (simd_mode))
11349 /* Traditional behavior is not to take the integer mode
11350 literally, but simply to use it as a way of determining
11351 the vector size. It is up to mode_for_vector to decide
11352 what the TYPE_MODE should be.
11354 Note that nunits == 1 is allowed in order to support single
11355 element vector types. */
11356 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11357 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11358 return NULL_TREE;
11361 else if (SCALAR_INT_MODE_P (prevailing_mode)
11362 || !related_vector_mode (prevailing_mode,
11363 inner_mode, nunits).exists (&simd_mode))
11365 /* Fall back to using mode_for_vector, mostly in the hope of being
11366 able to use an integer mode. */
11367 if (known_eq (nunits, 0U)
11368 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11369 return NULL_TREE;
11371 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11372 return NULL_TREE;
11375 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11377 /* In cases where the mode was chosen by mode_for_vector, check that
11378 the target actually supports the chosen mode, or that it at least
11379 allows the vector mode to be replaced by a like-sized integer. */
11380 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11381 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11382 return NULL_TREE;
11384 /* Re-attach the address-space qualifier if we canonicalized the scalar
11385 type. */
11386 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11387 return build_qualified_type
11388 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11390 return vectype;
11393 /* Function get_vectype_for_scalar_type.
11395 Returns the vector type corresponding to SCALAR_TYPE as supported
11396 by the target. If GROUP_SIZE is nonzero and we're performing BB
11397 vectorization, make sure that the number of elements in the vector
11398 is no bigger than GROUP_SIZE. */
11400 tree
11401 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11402 unsigned int group_size)
11404 /* For BB vectorization, we should always have a group size once we've
11405 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11406 are tentative requests during things like early data reference
11407 analysis and pattern recognition. */
11408 if (is_a <bb_vec_info> (vinfo))
11409 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11410 else
11411 group_size = 0;
11413 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11414 scalar_type);
11415 if (vectype && vinfo->vector_mode == VOIDmode)
11416 vinfo->vector_mode = TYPE_MODE (vectype);
11418 /* Register the natural choice of vector type, before the group size
11419 has been applied. */
11420 if (vectype)
11421 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11423 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11424 try again with an explicit number of elements. */
11425 if (vectype
11426 && group_size
11427 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11429 /* Start with the biggest number of units that fits within
11430 GROUP_SIZE and halve it until we find a valid vector type.
11431 Usually either the first attempt will succeed or all will
11432 fail (in the latter case because GROUP_SIZE is too small
11433 for the target), but it's possible that a target could have
11434 a hole between supported vector types.
11436 If GROUP_SIZE is not a power of 2, this has the effect of
11437 trying the largest power of 2 that fits within the group,
11438 even though the group is not a multiple of that vector size.
11439 The BB vectorizer will then try to carve up the group into
11440 smaller pieces. */
11441 unsigned int nunits = 1 << floor_log2 (group_size);
11444 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11445 scalar_type, nunits);
11446 nunits /= 2;
11448 while (nunits > 1 && !vectype);
11451 return vectype;
11454 /* Return the vector type corresponding to SCALAR_TYPE as supported
11455 by the target. NODE, if nonnull, is the SLP tree node that will
11456 use the returned vector type. */
11458 tree
11459 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11461 unsigned int group_size = 0;
11462 if (node)
11463 group_size = SLP_TREE_LANES (node);
11464 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11467 /* Function get_mask_type_for_scalar_type.
11469 Returns the mask type corresponding to a result of comparison
11470 of vectors of specified SCALAR_TYPE as supported by target.
11471 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11472 make sure that the number of elements in the vector is no bigger
11473 than GROUP_SIZE. */
11475 tree
11476 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11477 unsigned int group_size)
11479 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11481 if (!vectype)
11482 return NULL;
11484 return truth_type_for (vectype);
11487 /* Function get_same_sized_vectype
11489 Returns a vector type corresponding to SCALAR_TYPE of size
11490 VECTOR_TYPE if supported by the target. */
11492 tree
11493 get_same_sized_vectype (tree scalar_type, tree vector_type)
11495 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11496 return truth_type_for (vector_type);
11498 poly_uint64 nunits;
11499 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11500 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11501 return NULL_TREE;
11503 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11504 scalar_type, nunits);
11507 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11508 would not change the chosen vector modes. */
11510 bool
11511 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11513 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11514 i != vinfo->used_vector_modes.end (); ++i)
11515 if (!VECTOR_MODE_P (*i)
11516 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11517 return false;
11518 return true;
11521 /* Function vect_is_simple_use.
11523 Input:
11524 VINFO - the vect info of the loop or basic block that is being vectorized.
11525 OPERAND - operand in the loop or bb.
11526 Output:
11527 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11528 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11529 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11530 the definition could be anywhere in the function
11531 DT - the type of definition
11533 Returns whether a stmt with OPERAND can be vectorized.
11534 For loops, supportable operands are constants, loop invariants, and operands
11535 that are defined by the current iteration of the loop. Unsupportable
11536 operands are those that are defined by a previous iteration of the loop (as
11537 is the case in reduction/induction computations).
11538 For basic blocks, supportable operands are constants and bb invariants.
11539 For now, operands defined outside the basic block are not supported. */
11541 bool
11542 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11543 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11545 if (def_stmt_info_out)
11546 *def_stmt_info_out = NULL;
11547 if (def_stmt_out)
11548 *def_stmt_out = NULL;
11549 *dt = vect_unknown_def_type;
11551 if (dump_enabled_p ())
11553 dump_printf_loc (MSG_NOTE, vect_location,
11554 "vect_is_simple_use: operand ");
11555 if (TREE_CODE (operand) == SSA_NAME
11556 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11557 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11558 else
11559 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11562 if (CONSTANT_CLASS_P (operand))
11563 *dt = vect_constant_def;
11564 else if (is_gimple_min_invariant (operand))
11565 *dt = vect_external_def;
11566 else if (TREE_CODE (operand) != SSA_NAME)
11567 *dt = vect_unknown_def_type;
11568 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11569 *dt = vect_external_def;
11570 else
11572 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11573 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11574 if (!stmt_vinfo)
11575 *dt = vect_external_def;
11576 else
11578 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11579 def_stmt = stmt_vinfo->stmt;
11580 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11581 if (def_stmt_info_out)
11582 *def_stmt_info_out = stmt_vinfo;
11584 if (def_stmt_out)
11585 *def_stmt_out = def_stmt;
11588 if (dump_enabled_p ())
11590 dump_printf (MSG_NOTE, ", type of def: ");
11591 switch (*dt)
11593 case vect_uninitialized_def:
11594 dump_printf (MSG_NOTE, "uninitialized\n");
11595 break;
11596 case vect_constant_def:
11597 dump_printf (MSG_NOTE, "constant\n");
11598 break;
11599 case vect_external_def:
11600 dump_printf (MSG_NOTE, "external\n");
11601 break;
11602 case vect_internal_def:
11603 dump_printf (MSG_NOTE, "internal\n");
11604 break;
11605 case vect_induction_def:
11606 dump_printf (MSG_NOTE, "induction\n");
11607 break;
11608 case vect_reduction_def:
11609 dump_printf (MSG_NOTE, "reduction\n");
11610 break;
11611 case vect_double_reduction_def:
11612 dump_printf (MSG_NOTE, "double reduction\n");
11613 break;
11614 case vect_nested_cycle:
11615 dump_printf (MSG_NOTE, "nested cycle\n");
11616 break;
11617 case vect_unknown_def_type:
11618 dump_printf (MSG_NOTE, "unknown\n");
11619 break;
11623 if (*dt == vect_unknown_def_type)
11625 if (dump_enabled_p ())
11626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11627 "Unsupported pattern.\n");
11628 return false;
11631 return true;
11634 /* Function vect_is_simple_use.
11636 Same as vect_is_simple_use but also determines the vector operand
11637 type of OPERAND and stores it to *VECTYPE. If the definition of
11638 OPERAND is vect_uninitialized_def, vect_constant_def or
11639 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11640 is responsible to compute the best suited vector type for the
11641 scalar operand. */
11643 bool
11644 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11645 tree *vectype, stmt_vec_info *def_stmt_info_out,
11646 gimple **def_stmt_out)
11648 stmt_vec_info def_stmt_info;
11649 gimple *def_stmt;
11650 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11651 return false;
11653 if (def_stmt_out)
11654 *def_stmt_out = def_stmt;
11655 if (def_stmt_info_out)
11656 *def_stmt_info_out = def_stmt_info;
11658 /* Now get a vector type if the def is internal, otherwise supply
11659 NULL_TREE and leave it up to the caller to figure out a proper
11660 type for the use stmt. */
11661 if (*dt == vect_internal_def
11662 || *dt == vect_induction_def
11663 || *dt == vect_reduction_def
11664 || *dt == vect_double_reduction_def
11665 || *dt == vect_nested_cycle)
11667 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11668 gcc_assert (*vectype != NULL_TREE);
11669 if (dump_enabled_p ())
11670 dump_printf_loc (MSG_NOTE, vect_location,
11671 "vect_is_simple_use: vectype %T\n", *vectype);
11673 else if (*dt == vect_uninitialized_def
11674 || *dt == vect_constant_def
11675 || *dt == vect_external_def)
11676 *vectype = NULL_TREE;
11677 else
11678 gcc_unreachable ();
11680 return true;
11683 /* Function vect_is_simple_use.
11685 Same as vect_is_simple_use but determines the operand by operand
11686 position OPERAND from either STMT or SLP_NODE, filling in *OP
11687 and *SLP_DEF (when SLP_NODE is not NULL). */
11689 bool
11690 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11691 unsigned operand, tree *op, slp_tree *slp_def,
11692 enum vect_def_type *dt,
11693 tree *vectype, stmt_vec_info *def_stmt_info_out)
11695 if (slp_node)
11697 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11698 *slp_def = child;
11699 *vectype = SLP_TREE_VECTYPE (child);
11700 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11702 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11703 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11705 else
11707 if (def_stmt_info_out)
11708 *def_stmt_info_out = NULL;
11709 *op = SLP_TREE_SCALAR_OPS (child)[0];
11710 *dt = SLP_TREE_DEF_TYPE (child);
11711 return true;
11714 else
11716 *slp_def = NULL;
11717 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11719 if (gimple_assign_rhs_code (ass) == COND_EXPR
11720 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11722 if (operand < 2)
11723 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11724 else
11725 *op = gimple_op (ass, operand);
11727 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11728 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11729 else
11730 *op = gimple_op (ass, operand + 1);
11732 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11733 *op = gimple_call_arg (call, operand);
11734 else
11735 gcc_unreachable ();
11736 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11740 /* If OP is not NULL and is external or constant update its vector
11741 type with VECTYPE. Returns true if successful or false if not,
11742 for example when conflicting vector types are present. */
11744 bool
11745 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11747 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11748 return true;
11749 if (SLP_TREE_VECTYPE (op))
11750 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11751 SLP_TREE_VECTYPE (op) = vectype;
11752 return true;
11755 /* Function supportable_widening_operation
11757 Check whether an operation represented by the code CODE is a
11758 widening operation that is supported by the target platform in
11759 vector form (i.e., when operating on arguments of type VECTYPE_IN
11760 producing a result of type VECTYPE_OUT).
11762 Widening operations we currently support are NOP (CONVERT), FLOAT,
11763 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11764 are supported by the target platform either directly (via vector
11765 tree-codes), or via target builtins.
11767 Output:
11768 - CODE1 and CODE2 are codes of vector operations to be used when
11769 vectorizing the operation, if available.
11770 - MULTI_STEP_CVT determines the number of required intermediate steps in
11771 case of multi-step conversion (like char->short->int - in that case
11772 MULTI_STEP_CVT will be 1).
11773 - INTERM_TYPES contains the intermediate type required to perform the
11774 widening operation (short in the above example). */
11776 bool
11777 supportable_widening_operation (vec_info *vinfo,
11778 enum tree_code code, stmt_vec_info stmt_info,
11779 tree vectype_out, tree vectype_in,
11780 enum tree_code *code1, enum tree_code *code2,
11781 int *multi_step_cvt,
11782 vec<tree> *interm_types)
11784 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11785 class loop *vect_loop = NULL;
11786 machine_mode vec_mode;
11787 enum insn_code icode1, icode2;
11788 optab optab1, optab2;
11789 tree vectype = vectype_in;
11790 tree wide_vectype = vectype_out;
11791 enum tree_code c1, c2;
11792 int i;
11793 tree prev_type, intermediate_type;
11794 machine_mode intermediate_mode, prev_mode;
11795 optab optab3, optab4;
11797 *multi_step_cvt = 0;
11798 if (loop_info)
11799 vect_loop = LOOP_VINFO_LOOP (loop_info);
11801 switch (code)
11803 case WIDEN_MULT_EXPR:
11804 /* The result of a vectorized widening operation usually requires
11805 two vectors (because the widened results do not fit into one vector).
11806 The generated vector results would normally be expected to be
11807 generated in the same order as in the original scalar computation,
11808 i.e. if 8 results are generated in each vector iteration, they are
11809 to be organized as follows:
11810 vect1: [res1,res2,res3,res4],
11811 vect2: [res5,res6,res7,res8].
11813 However, in the special case that the result of the widening
11814 operation is used in a reduction computation only, the order doesn't
11815 matter (because when vectorizing a reduction we change the order of
11816 the computation). Some targets can take advantage of this and
11817 generate more efficient code. For example, targets like Altivec,
11818 that support widen_mult using a sequence of {mult_even,mult_odd}
11819 generate the following vectors:
11820 vect1: [res1,res3,res5,res7],
11821 vect2: [res2,res4,res6,res8].
11823 When vectorizing outer-loops, we execute the inner-loop sequentially
11824 (each vectorized inner-loop iteration contributes to VF outer-loop
11825 iterations in parallel). We therefore don't allow to change the
11826 order of the computation in the inner-loop during outer-loop
11827 vectorization. */
11828 /* TODO: Another case in which order doesn't *really* matter is when we
11829 widen and then contract again, e.g. (short)((int)x * y >> 8).
11830 Normally, pack_trunc performs an even/odd permute, whereas the
11831 repack from an even/odd expansion would be an interleave, which
11832 would be significantly simpler for e.g. AVX2. */
11833 /* In any case, in order to avoid duplicating the code below, recurse
11834 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11835 are properly set up for the caller. If we fail, we'll continue with
11836 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11837 if (vect_loop
11838 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11839 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11840 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11841 stmt_info, vectype_out,
11842 vectype_in, code1, code2,
11843 multi_step_cvt, interm_types))
11845 /* Elements in a vector with vect_used_by_reduction property cannot
11846 be reordered if the use chain with this property does not have the
11847 same operation. One such an example is s += a * b, where elements
11848 in a and b cannot be reordered. Here we check if the vector defined
11849 by STMT is only directly used in the reduction statement. */
11850 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11851 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11852 if (use_stmt_info
11853 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11854 return true;
11856 c1 = VEC_WIDEN_MULT_LO_EXPR;
11857 c2 = VEC_WIDEN_MULT_HI_EXPR;
11858 break;
11860 case DOT_PROD_EXPR:
11861 c1 = DOT_PROD_EXPR;
11862 c2 = DOT_PROD_EXPR;
11863 break;
11865 case SAD_EXPR:
11866 c1 = SAD_EXPR;
11867 c2 = SAD_EXPR;
11868 break;
11870 case VEC_WIDEN_MULT_EVEN_EXPR:
11871 /* Support the recursion induced just above. */
11872 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11873 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11874 break;
11876 case WIDEN_LSHIFT_EXPR:
11877 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11878 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11879 break;
11881 case WIDEN_PLUS_EXPR:
11882 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11883 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11884 break;
11886 case WIDEN_MINUS_EXPR:
11887 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11888 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11889 break;
11891 CASE_CONVERT:
11892 c1 = VEC_UNPACK_LO_EXPR;
11893 c2 = VEC_UNPACK_HI_EXPR;
11894 break;
11896 case FLOAT_EXPR:
11897 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11898 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11899 break;
11901 case FIX_TRUNC_EXPR:
11902 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11903 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11904 break;
11906 default:
11907 gcc_unreachable ();
11910 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11911 std::swap (c1, c2);
11913 if (code == FIX_TRUNC_EXPR)
11915 /* The signedness is determined from output operand. */
11916 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11917 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11919 else if (CONVERT_EXPR_CODE_P (code)
11920 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11921 && VECTOR_BOOLEAN_TYPE_P (vectype)
11922 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11923 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11925 /* If the input and result modes are the same, a different optab
11926 is needed where we pass in the number of units in vectype. */
11927 optab1 = vec_unpacks_sbool_lo_optab;
11928 optab2 = vec_unpacks_sbool_hi_optab;
11930 else
11932 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11933 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11936 if (!optab1 || !optab2)
11937 return false;
11939 vec_mode = TYPE_MODE (vectype);
11940 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11941 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11942 return false;
11944 *code1 = c1;
11945 *code2 = c2;
11947 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11948 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11950 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11951 return true;
11952 /* For scalar masks we may have different boolean
11953 vector types having the same QImode. Thus we
11954 add additional check for elements number. */
11955 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11956 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11957 return true;
11960 /* Check if it's a multi-step conversion that can be done using intermediate
11961 types. */
11963 prev_type = vectype;
11964 prev_mode = vec_mode;
11966 if (!CONVERT_EXPR_CODE_P (code))
11967 return false;
11969 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11970 intermediate steps in promotion sequence. We try
11971 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11972 not. */
11973 interm_types->create (MAX_INTERM_CVT_STEPS);
11974 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11976 intermediate_mode = insn_data[icode1].operand[0].mode;
11977 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11978 intermediate_type
11979 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11980 else
11981 intermediate_type
11982 = lang_hooks.types.type_for_mode (intermediate_mode,
11983 TYPE_UNSIGNED (prev_type));
11985 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11986 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11987 && intermediate_mode == prev_mode
11988 && SCALAR_INT_MODE_P (prev_mode))
11990 /* If the input and result modes are the same, a different optab
11991 is needed where we pass in the number of units in vectype. */
11992 optab3 = vec_unpacks_sbool_lo_optab;
11993 optab4 = vec_unpacks_sbool_hi_optab;
11995 else
11997 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11998 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12001 if (!optab3 || !optab4
12002 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12003 || insn_data[icode1].operand[0].mode != intermediate_mode
12004 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12005 || insn_data[icode2].operand[0].mode != intermediate_mode
12006 || ((icode1 = optab_handler (optab3, intermediate_mode))
12007 == CODE_FOR_nothing)
12008 || ((icode2 = optab_handler (optab4, intermediate_mode))
12009 == CODE_FOR_nothing))
12010 break;
12012 interm_types->quick_push (intermediate_type);
12013 (*multi_step_cvt)++;
12015 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12016 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12018 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12019 return true;
12020 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12021 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12022 return true;
12025 prev_type = intermediate_type;
12026 prev_mode = intermediate_mode;
12029 interm_types->release ();
12030 return false;
12034 /* Function supportable_narrowing_operation
12036 Check whether an operation represented by the code CODE is a
12037 narrowing operation that is supported by the target platform in
12038 vector form (i.e., when operating on arguments of type VECTYPE_IN
12039 and producing a result of type VECTYPE_OUT).
12041 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12042 and FLOAT. This function checks if these operations are supported by
12043 the target platform directly via vector tree-codes.
12045 Output:
12046 - CODE1 is the code of a vector operation to be used when
12047 vectorizing the operation, if available.
12048 - MULTI_STEP_CVT determines the number of required intermediate steps in
12049 case of multi-step conversion (like int->short->char - in that case
12050 MULTI_STEP_CVT will be 1).
12051 - INTERM_TYPES contains the intermediate type required to perform the
12052 narrowing operation (short in the above example). */
12054 bool
12055 supportable_narrowing_operation (enum tree_code code,
12056 tree vectype_out, tree vectype_in,
12057 enum tree_code *code1, int *multi_step_cvt,
12058 vec<tree> *interm_types)
12060 machine_mode vec_mode;
12061 enum insn_code icode1;
12062 optab optab1, interm_optab;
12063 tree vectype = vectype_in;
12064 tree narrow_vectype = vectype_out;
12065 enum tree_code c1;
12066 tree intermediate_type, prev_type;
12067 machine_mode intermediate_mode, prev_mode;
12068 int i;
12069 bool uns;
12071 *multi_step_cvt = 0;
12072 switch (code)
12074 CASE_CONVERT:
12075 c1 = VEC_PACK_TRUNC_EXPR;
12076 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12077 && VECTOR_BOOLEAN_TYPE_P (vectype)
12078 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12079 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12080 optab1 = vec_pack_sbool_trunc_optab;
12081 else
12082 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12083 break;
12085 case FIX_TRUNC_EXPR:
12086 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12087 /* The signedness is determined from output operand. */
12088 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12089 break;
12091 case FLOAT_EXPR:
12092 c1 = VEC_PACK_FLOAT_EXPR;
12093 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12094 break;
12096 default:
12097 gcc_unreachable ();
12100 if (!optab1)
12101 return false;
12103 vec_mode = TYPE_MODE (vectype);
12104 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12105 return false;
12107 *code1 = c1;
12109 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12111 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12112 return true;
12113 /* For scalar masks we may have different boolean
12114 vector types having the same QImode. Thus we
12115 add additional check for elements number. */
12116 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12117 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12118 return true;
12121 if (code == FLOAT_EXPR)
12122 return false;
12124 /* Check if it's a multi-step conversion that can be done using intermediate
12125 types. */
12126 prev_mode = vec_mode;
12127 prev_type = vectype;
12128 if (code == FIX_TRUNC_EXPR)
12129 uns = TYPE_UNSIGNED (vectype_out);
12130 else
12131 uns = TYPE_UNSIGNED (vectype);
12133 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12134 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12135 costly than signed. */
12136 if (code == FIX_TRUNC_EXPR && uns)
12138 enum insn_code icode2;
12140 intermediate_type
12141 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12142 interm_optab
12143 = optab_for_tree_code (c1, intermediate_type, optab_default);
12144 if (interm_optab != unknown_optab
12145 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12146 && insn_data[icode1].operand[0].mode
12147 == insn_data[icode2].operand[0].mode)
12149 uns = false;
12150 optab1 = interm_optab;
12151 icode1 = icode2;
12155 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12156 intermediate steps in promotion sequence. We try
12157 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12158 interm_types->create (MAX_INTERM_CVT_STEPS);
12159 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12161 intermediate_mode = insn_data[icode1].operand[0].mode;
12162 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12163 intermediate_type
12164 = vect_double_mask_nunits (prev_type, intermediate_mode);
12165 else
12166 intermediate_type
12167 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12168 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12169 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12170 && intermediate_mode == prev_mode
12171 && SCALAR_INT_MODE_P (prev_mode))
12172 interm_optab = vec_pack_sbool_trunc_optab;
12173 else
12174 interm_optab
12175 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12176 optab_default);
12177 if (!interm_optab
12178 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12179 || insn_data[icode1].operand[0].mode != intermediate_mode
12180 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12181 == CODE_FOR_nothing))
12182 break;
12184 interm_types->quick_push (intermediate_type);
12185 (*multi_step_cvt)++;
12187 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12189 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12190 return true;
12191 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12192 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12193 return true;
12196 prev_mode = intermediate_mode;
12197 prev_type = intermediate_type;
12198 optab1 = interm_optab;
12201 interm_types->release ();
12202 return false;
12205 /* Generate and return a vector mask of MASK_TYPE such that
12206 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12207 Add the statements to SEQ. */
12209 tree
12210 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12211 tree end_index, const char *name)
12213 tree cmp_type = TREE_TYPE (start_index);
12214 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12215 cmp_type, mask_type,
12216 OPTIMIZE_FOR_SPEED));
12217 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12218 start_index, end_index,
12219 build_zero_cst (mask_type));
12220 tree tmp;
12221 if (name)
12222 tmp = make_temp_ssa_name (mask_type, NULL, name);
12223 else
12224 tmp = make_ssa_name (mask_type);
12225 gimple_call_set_lhs (call, tmp);
12226 gimple_seq_add_stmt (seq, call);
12227 return tmp;
12230 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12231 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12233 tree
12234 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12235 tree end_index)
12237 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12238 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12241 /* Try to compute the vector types required to vectorize STMT_INFO,
12242 returning true on success and false if vectorization isn't possible.
12243 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12244 take sure that the number of elements in the vectors is no bigger
12245 than GROUP_SIZE.
12247 On success:
12249 - Set *STMT_VECTYPE_OUT to:
12250 - NULL_TREE if the statement doesn't need to be vectorized;
12251 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12253 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12254 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12255 statement does not help to determine the overall number of units. */
12257 opt_result
12258 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12259 tree *stmt_vectype_out,
12260 tree *nunits_vectype_out,
12261 unsigned int group_size)
12263 gimple *stmt = stmt_info->stmt;
12265 /* For BB vectorization, we should always have a group size once we've
12266 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12267 are tentative requests during things like early data reference
12268 analysis and pattern recognition. */
12269 if (is_a <bb_vec_info> (vinfo))
12270 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12271 else
12272 group_size = 0;
12274 *stmt_vectype_out = NULL_TREE;
12275 *nunits_vectype_out = NULL_TREE;
12277 if (gimple_get_lhs (stmt) == NULL_TREE
12278 /* MASK_STORE has no lhs, but is ok. */
12279 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12281 if (is_a <gcall *> (stmt))
12283 /* Ignore calls with no lhs. These must be calls to
12284 #pragma omp simd functions, and what vectorization factor
12285 it really needs can't be determined until
12286 vectorizable_simd_clone_call. */
12287 if (dump_enabled_p ())
12288 dump_printf_loc (MSG_NOTE, vect_location,
12289 "defer to SIMD clone analysis.\n");
12290 return opt_result::success ();
12293 return opt_result::failure_at (stmt,
12294 "not vectorized: irregular stmt.%G", stmt);
12297 tree vectype;
12298 tree scalar_type = NULL_TREE;
12299 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12301 vectype = STMT_VINFO_VECTYPE (stmt_info);
12302 if (dump_enabled_p ())
12303 dump_printf_loc (MSG_NOTE, vect_location,
12304 "precomputed vectype: %T\n", vectype);
12306 else if (vect_use_mask_type_p (stmt_info))
12308 unsigned int precision = stmt_info->mask_precision;
12309 scalar_type = build_nonstandard_integer_type (precision, 1);
12310 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12311 if (!vectype)
12312 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12313 " data-type %T\n", scalar_type);
12314 if (dump_enabled_p ())
12315 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12317 else
12319 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12320 scalar_type = TREE_TYPE (DR_REF (dr));
12321 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12322 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12323 else
12324 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12326 if (dump_enabled_p ())
12328 if (group_size)
12329 dump_printf_loc (MSG_NOTE, vect_location,
12330 "get vectype for scalar type (group size %d):"
12331 " %T\n", group_size, scalar_type);
12332 else
12333 dump_printf_loc (MSG_NOTE, vect_location,
12334 "get vectype for scalar type: %T\n", scalar_type);
12336 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12337 if (!vectype)
12338 return opt_result::failure_at (stmt,
12339 "not vectorized:"
12340 " unsupported data-type %T\n",
12341 scalar_type);
12343 if (dump_enabled_p ())
12344 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12347 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12348 return opt_result::failure_at (stmt,
12349 "not vectorized: vector stmt in loop:%G",
12350 stmt);
12352 *stmt_vectype_out = vectype;
12354 /* Don't try to compute scalar types if the stmt produces a boolean
12355 vector; use the existing vector type instead. */
12356 tree nunits_vectype = vectype;
12357 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12359 /* The number of units is set according to the smallest scalar
12360 type (or the largest vector size, but we only support one
12361 vector size per vectorization). */
12362 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12363 TREE_TYPE (vectype));
12364 if (scalar_type != TREE_TYPE (vectype))
12366 if (dump_enabled_p ())
12367 dump_printf_loc (MSG_NOTE, vect_location,
12368 "get vectype for smallest scalar type: %T\n",
12369 scalar_type);
12370 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12371 group_size);
12372 if (!nunits_vectype)
12373 return opt_result::failure_at
12374 (stmt, "not vectorized: unsupported data-type %T\n",
12375 scalar_type);
12376 if (dump_enabled_p ())
12377 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12378 nunits_vectype);
12382 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12383 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12384 return opt_result::failure_at (stmt,
12385 "Not vectorized: Incompatible number "
12386 "of vector subparts between %T and %T\n",
12387 nunits_vectype, *stmt_vectype_out);
12389 if (dump_enabled_p ())
12391 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12392 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12393 dump_printf (MSG_NOTE, "\n");
12396 *nunits_vectype_out = nunits_vectype;
12397 return opt_result::success ();
12400 /* Generate and return statement sequence that sets vector length LEN that is:
12402 min_of_start_and_end = min (START_INDEX, END_INDEX);
12403 left_len = END_INDEX - min_of_start_and_end;
12404 rhs = min (left_len, LEN_LIMIT);
12405 LEN = rhs;
12407 Note: the cost of the code generated by this function is modeled
12408 by vect_estimate_min_profitable_iters, so changes here may need
12409 corresponding changes there. */
12411 gimple_seq
12412 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12414 gimple_seq stmts = NULL;
12415 tree len_type = TREE_TYPE (len);
12416 gcc_assert (TREE_TYPE (start_index) == len_type);
12418 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12419 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12420 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12421 gimple* stmt = gimple_build_assign (len, rhs);
12422 gimple_seq_add_stmt (&stmts, stmt);
12424 return stmts;