tree-optimization/116974 - Handle single-lane SLP for OMP scan store
[official-gcc.git] / gcc / tree-vect-stmts.cc
blob43358767934612a9788c587f35f66e007faa8863
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #define INCLUDE_MEMORY
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "gimple.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "cgraph.h"
36 #include "dumpfile.h"
37 #include "alias.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "tree-eh.h"
41 #include "gimplify.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-cfg.h"
45 #include "tree-ssa-loop-manip.h"
46 #include "cfgloop.h"
47 #include "explow.h"
48 #include "tree-ssa-loop.h"
49 #include "tree-scalar-evolution.h"
50 #include "tree-vectorizer.h"
51 #include "builtins.h"
52 #include "internal-fn.h"
53 #include "tree-vector-builder.h"
54 #include "vec-perm-indices.h"
55 #include "gimple-range.h"
56 #include "tree-ssa-loop-niter.h"
57 #include "gimple-fold.h"
58 #include "regs.h"
59 #include "attribs.h"
60 #include "optabs-libfuncs.h"
62 /* For lang_hooks.types.type_for_mode. */
63 #include "langhooks.h"
65 /* Return the vectorized type for the given statement. */
67 tree
68 stmt_vectype (class _stmt_vec_info *stmt_info)
70 return STMT_VINFO_VECTYPE (stmt_info);
73 /* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
75 bool
76 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
78 gimple *stmt = STMT_VINFO_STMT (stmt_info);
79 basic_block bb = gimple_bb (stmt);
80 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
81 class loop* loop;
83 if (!loop_vinfo)
84 return false;
86 loop = LOOP_VINFO_LOOP (loop_vinfo);
88 return (bb->loop_father == loop->inner);
91 /* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
95 static unsigned
96 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
97 enum vect_cost_for_stmt kind,
98 stmt_vec_info stmt_info, slp_tree node,
99 tree vectype, int misalign,
100 enum vect_cost_model_location where)
102 if ((kind == vector_load || kind == unaligned_load)
103 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104 kind = vector_gather_load;
105 if ((kind == vector_store || kind == unaligned_store)
106 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
107 kind = vector_scatter_store;
109 stmt_info_for_cost si
110 = { count, kind, where, stmt_info, node, vectype, misalign };
111 body_cost_vec->safe_push (si);
113 return (unsigned)
114 (builtin_vectorization_cost (kind, vectype, misalign) * count);
117 unsigned
118 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
119 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
120 tree vectype, int misalign,
121 enum vect_cost_model_location where)
123 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
124 vectype, misalign, where);
127 unsigned
128 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
129 enum vect_cost_for_stmt kind, slp_tree node,
130 tree vectype, int misalign,
131 enum vect_cost_model_location where)
133 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
134 vectype, misalign, where);
137 unsigned
138 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
139 enum vect_cost_for_stmt kind,
140 enum vect_cost_model_location where)
142 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
143 || kind == scalar_stmt);
144 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
145 NULL_TREE, 0, where);
148 /* Return a variable of type ELEM_TYPE[NELEMS]. */
150 static tree
151 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
153 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
154 "vect_array");
157 /* ARRAY is an array of vectors created by create_vector_array.
158 Return an SSA_NAME for the vector in index N. The reference
159 is part of the vectorization of STMT_INFO and the vector is associated
160 with scalar destination SCALAR_DEST. */
162 static tree
163 read_vector_array (vec_info *vinfo,
164 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
165 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
167 tree vect_type, vect, vect_name, array_ref;
168 gimple *new_stmt;
170 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
171 vect_type = TREE_TYPE (TREE_TYPE (array));
172 vect = vect_create_destination_var (scalar_dest, vect_type);
173 array_ref = build4 (ARRAY_REF, vect_type, array,
174 build_int_cst (size_type_node, n),
175 NULL_TREE, NULL_TREE);
177 new_stmt = gimple_build_assign (vect, array_ref);
178 vect_name = make_ssa_name (vect, new_stmt);
179 gimple_assign_set_lhs (new_stmt, vect_name);
180 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
182 return vect_name;
185 /* ARRAY is an array of vectors created by create_vector_array.
186 Emit code to store SSA_NAME VECT in index N of the array.
187 The store is part of the vectorization of STMT_INFO. */
189 static void
190 write_vector_array (vec_info *vinfo,
191 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
192 tree vect, tree array, unsigned HOST_WIDE_INT n)
194 tree array_ref;
195 gimple *new_stmt;
197 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
198 build_int_cst (size_type_node, n),
199 NULL_TREE, NULL_TREE);
201 new_stmt = gimple_build_assign (array_ref, vect);
202 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
205 /* PTR is a pointer to an array of type TYPE. Return a representation
206 of *PTR. The memory reference replaces those in FIRST_DR
207 (and its group). */
209 static tree
210 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
212 tree mem_ref;
214 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
215 /* Arrays have the same alignment as their type. */
216 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
217 return mem_ref;
220 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
221 Emit the clobber before *GSI. */
223 static void
224 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
225 gimple_stmt_iterator *gsi, tree var)
227 tree clobber = build_clobber (TREE_TYPE (var));
228 gimple *new_stmt = gimple_build_assign (var, clobber);
229 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
232 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
234 /* Function vect_mark_relevant.
236 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
238 static void
239 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
240 enum vect_relevant relevant, bool live_p)
242 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
243 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
245 if (dump_enabled_p ())
246 dump_printf_loc (MSG_NOTE, vect_location,
247 "mark relevant %d, live %d: %G", relevant, live_p,
248 stmt_info->stmt);
250 /* If this stmt is an original stmt in a pattern, we might need to mark its
251 related pattern stmt instead of the original stmt. However, such stmts
252 may have their own uses that are not in any pattern, in such cases the
253 stmt itself should be marked. */
254 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
256 /* This is the last stmt in a sequence that was detected as a
257 pattern that can potentially be vectorized. Don't mark the stmt
258 as relevant/live because it's not going to be vectorized.
259 Instead mark the pattern-stmt that replaces it. */
261 if (dump_enabled_p ())
262 dump_printf_loc (MSG_NOTE, vect_location,
263 "last stmt in pattern. don't mark"
264 " relevant/live.\n");
266 stmt_vec_info old_stmt_info = stmt_info;
267 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
268 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
269 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
270 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
272 if (live_p && relevant == vect_unused_in_scope)
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_NOTE, vect_location,
276 "vec_stmt_relevant_p: forcing live pattern stmt "
277 "relevant.\n");
278 relevant = vect_used_only_live;
281 if (dump_enabled_p ())
282 dump_printf_loc (MSG_NOTE, vect_location,
283 "mark relevant %d, live %d: %G", relevant, live_p,
284 stmt_info->stmt);
287 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
288 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
289 STMT_VINFO_RELEVANT (stmt_info) = relevant;
291 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
292 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
294 if (dump_enabled_p ())
295 dump_printf_loc (MSG_NOTE, vect_location,
296 "already marked relevant/live.\n");
297 return;
300 worklist->safe_push (stmt_info);
304 /* Function is_simple_and_all_uses_invariant
306 Return true if STMT_INFO is simple and all uses of it are invariant. */
308 bool
309 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
310 loop_vec_info loop_vinfo)
312 tree op;
313 ssa_op_iter iter;
315 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
316 if (!stmt)
317 return false;
319 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
321 enum vect_def_type dt = vect_uninitialized_def;
323 if (!vect_is_simple_use (op, loop_vinfo, &dt))
325 if (dump_enabled_p ())
326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
327 "use not simple.\n");
328 return false;
331 if (dt != vect_external_def && dt != vect_constant_def)
332 return false;
334 return true;
337 /* Function vect_stmt_relevant_p.
339 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
340 is "relevant for vectorization".
342 A stmt is considered "relevant for vectorization" if:
343 - it has uses outside the loop.
344 - it has vdefs (it alters memory).
345 - control stmts in the loop (except for the exit condition).
346 - it is an induction and we have multiple exits.
348 CHECKME: what other side effects would the vectorizer allow? */
350 static bool
351 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
352 enum vect_relevant *relevant, bool *live_p)
354 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
355 ssa_op_iter op_iter;
356 imm_use_iterator imm_iter;
357 use_operand_p use_p;
358 def_operand_p def_p;
360 *relevant = vect_unused_in_scope;
361 *live_p = false;
363 /* cond stmt other than loop exit cond. */
364 gimple *stmt = STMT_VINFO_STMT (stmt_info);
365 if (is_ctrl_stmt (stmt)
366 && LOOP_VINFO_LOOP_IV_COND (loop_vinfo) != stmt
367 && (!loop->inner || gimple_bb (stmt)->loop_father == loop))
368 *relevant = vect_used_in_scope;
370 /* changing memory. */
371 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
372 if (gimple_vdef (stmt_info->stmt)
373 && !gimple_clobber_p (stmt_info->stmt))
375 if (dump_enabled_p ())
376 dump_printf_loc (MSG_NOTE, vect_location,
377 "vec_stmt_relevant_p: stmt has vdefs.\n");
378 *relevant = vect_used_in_scope;
381 /* uses outside the loop. */
382 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
384 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
386 basic_block bb = gimple_bb (USE_STMT (use_p));
387 if (!flow_bb_inside_loop_p (loop, bb))
389 if (is_gimple_debug (USE_STMT (use_p)))
390 continue;
392 if (dump_enabled_p ())
393 dump_printf_loc (MSG_NOTE, vect_location,
394 "vec_stmt_relevant_p: used out of loop.\n");
396 /* We expect all such uses to be in the loop exit phis
397 (because of loop closed form) */
398 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
400 *live_p = true;
405 /* Check if it's an induction and multiple exits. In this case there will be
406 a usage later on after peeling which is needed for the alternate exit. */
407 if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
408 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
410 if (dump_enabled_p ())
411 dump_printf_loc (MSG_NOTE, vect_location,
412 "vec_stmt_relevant_p: induction forced for "
413 "early break.\n");
414 *live_p = true;
418 if (*live_p && *relevant == vect_unused_in_scope
419 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
421 if (dump_enabled_p ())
422 dump_printf_loc (MSG_NOTE, vect_location,
423 "vec_stmt_relevant_p: stmt live but not relevant.\n");
424 *relevant = vect_used_only_live;
427 return (*live_p || *relevant);
431 /* Function exist_non_indexing_operands_for_use_p
433 USE is one of the uses attached to STMT_INFO. Check if USE is
434 used in STMT_INFO for anything other than indexing an array. */
436 static bool
437 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
439 tree operand;
441 /* USE corresponds to some operand in STMT. If there is no data
442 reference in STMT, then any operand that corresponds to USE
443 is not indexing an array. */
444 if (!STMT_VINFO_DATA_REF (stmt_info))
445 return true;
447 /* STMT has a data_ref. FORNOW this means that its of one of
448 the following forms:
449 -1- ARRAY_REF = var
450 -2- var = ARRAY_REF
451 (This should have been verified in analyze_data_refs).
453 'var' in the second case corresponds to a def, not a use,
454 so USE cannot correspond to any operands that are not used
455 for array indexing.
457 Therefore, all we need to check is if STMT falls into the
458 first case, and whether var corresponds to USE. */
460 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
461 if (!assign || !gimple_assign_copy_p (assign))
463 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
464 if (call && gimple_call_internal_p (call))
466 internal_fn ifn = gimple_call_internal_fn (call);
467 int mask_index = internal_fn_mask_index (ifn);
468 if (mask_index >= 0
469 && use == gimple_call_arg (call, mask_index))
470 return true;
471 int stored_value_index = internal_fn_stored_value_index (ifn);
472 if (stored_value_index >= 0
473 && use == gimple_call_arg (call, stored_value_index))
474 return true;
475 if (internal_gather_scatter_fn_p (ifn)
476 && use == gimple_call_arg (call, 1))
477 return true;
479 return false;
482 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
483 return false;
484 operand = gimple_assign_rhs1 (assign);
485 if (TREE_CODE (operand) != SSA_NAME)
486 return false;
488 if (operand == use)
489 return true;
491 return false;
496 Function process_use.
498 Inputs:
499 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
500 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
501 that defined USE. This is done by calling mark_relevant and passing it
502 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
503 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
504 be performed.
506 Outputs:
507 Generally, LIVE_P and RELEVANT are used to define the liveness and
508 relevance info of the DEF_STMT of this USE:
509 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
510 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
511 Exceptions:
512 - case 1: If USE is used only for address computations (e.g. array indexing),
513 which does not need to be directly vectorized, then the liveness/relevance
514 of the respective DEF_STMT is left unchanged.
515 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
516 we skip DEF_STMT cause it had already been processed.
517 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
518 "relevant" will be modified accordingly.
520 Return true if everything is as expected. Return false otherwise. */
522 static opt_result
523 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
524 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
525 bool force)
527 stmt_vec_info dstmt_vinfo;
528 enum vect_def_type dt;
530 /* case 1: we are only interested in uses that need to be vectorized. Uses
531 that are used for address computation are not considered relevant. */
532 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
533 return opt_result::success ();
535 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
536 return opt_result::failure_at (stmt_vinfo->stmt,
537 "not vectorized:"
538 " unsupported use in stmt.\n");
540 if (!dstmt_vinfo)
541 return opt_result::success ();
543 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
544 basic_block bb = gimple_bb (stmt_vinfo->stmt);
546 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
547 We have to force the stmt live since the epilogue loop needs it to
548 continue computing the reduction. */
549 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
550 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
552 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
553 && bb->loop_father == def_bb->loop_father)
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "reduc-stmt defining reduc-phi in the same nest.\n");
558 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
559 return opt_result::success ();
562 /* case 3a: outer-loop stmt defining an inner-loop stmt:
563 outer-loop-header-bb:
564 d = dstmt_vinfo
565 inner-loop:
566 stmt # use (d)
567 outer-loop-tail-bb:
568 ... */
569 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_NOTE, vect_location,
573 "outer-loop def-stmt defining inner-loop stmt.\n");
575 switch (relevant)
577 case vect_unused_in_scope:
578 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
579 vect_used_in_scope : vect_unused_in_scope;
580 break;
582 case vect_used_in_outer_by_reduction:
583 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
584 relevant = vect_used_by_reduction;
585 break;
587 case vect_used_in_outer:
588 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
589 relevant = vect_used_in_scope;
590 break;
592 case vect_used_in_scope:
593 break;
595 default:
596 gcc_unreachable ();
600 /* case 3b: inner-loop stmt defining an outer-loop stmt:
601 outer-loop-header-bb:
603 inner-loop:
604 d = dstmt_vinfo
605 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
606 stmt # use (d) */
607 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
609 if (dump_enabled_p ())
610 dump_printf_loc (MSG_NOTE, vect_location,
611 "inner-loop def-stmt defining outer-loop stmt.\n");
613 switch (relevant)
615 case vect_unused_in_scope:
616 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
617 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
618 vect_used_in_outer_by_reduction : vect_unused_in_scope;
619 break;
621 case vect_used_by_reduction:
622 case vect_used_only_live:
623 relevant = vect_used_in_outer_by_reduction;
624 break;
626 case vect_used_in_scope:
627 relevant = vect_used_in_outer;
628 break;
630 default:
631 gcc_unreachable ();
634 /* We are also not interested in uses on loop PHI backedges that are
635 inductions. Otherwise we'll needlessly vectorize the IV increment
636 and cause hybrid SLP for SLP inductions. Unless the PHI is live
637 of course. */
638 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
639 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
640 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
641 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
642 loop_latch_edge (bb->loop_father))
643 == use))
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE, vect_location,
647 "induction value on backedge.\n");
648 return opt_result::success ();
652 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
653 return opt_result::success ();
657 /* Function vect_mark_stmts_to_be_vectorized.
659 Not all stmts in the loop need to be vectorized. For example:
661 for i...
662 for j...
663 1. T0 = i + j
664 2. T1 = a[T0]
666 3. j = j + 1
668 Stmt 1 and 3 do not need to be vectorized, because loop control and
669 addressing of vectorized data-refs are handled differently.
671 This pass detects such stmts. */
673 opt_result
674 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
676 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
677 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
678 unsigned int nbbs = loop->num_nodes;
679 gimple_stmt_iterator si;
680 unsigned int i;
681 basic_block bb;
682 bool live_p;
683 enum vect_relevant relevant;
685 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
687 auto_vec<stmt_vec_info, 64> worklist;
689 /* 1. Init worklist. */
690 for (i = 0; i < nbbs; i++)
692 bb = bbs[i];
693 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
695 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
696 if (dump_enabled_p ())
697 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
698 phi_info->stmt);
700 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
701 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
703 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
705 if (is_gimple_debug (gsi_stmt (si)))
706 continue;
707 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "init: stmt relevant? %G", stmt_info->stmt);
712 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
713 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
717 /* 2. Process_worklist */
718 while (worklist.length () > 0)
720 use_operand_p use_p;
721 ssa_op_iter iter;
723 stmt_vec_info stmt_vinfo = worklist.pop ();
724 if (dump_enabled_p ())
725 dump_printf_loc (MSG_NOTE, vect_location,
726 "worklist: examine stmt: %G", stmt_vinfo->stmt);
728 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
729 (DEF_STMT) as relevant/irrelevant according to the relevance property
730 of STMT. */
731 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
733 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
734 propagated as is to the DEF_STMTs of its USEs.
736 One exception is when STMT has been identified as defining a reduction
737 variable; in this case we set the relevance to vect_used_by_reduction.
738 This is because we distinguish between two kinds of relevant stmts -
739 those that are used by a reduction computation, and those that are
740 (also) used by a regular computation. This allows us later on to
741 identify stmts that are used solely by a reduction, and therefore the
742 order of the results that they produce does not have to be kept. */
744 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
746 case vect_reduction_def:
747 gcc_assert (relevant != vect_unused_in_scope);
748 if (relevant != vect_unused_in_scope
749 && relevant != vect_used_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
754 break;
756 case vect_nested_cycle:
757 if (relevant != vect_unused_in_scope
758 && relevant != vect_used_in_outer_by_reduction
759 && relevant != vect_used_in_outer)
760 return opt_result::failure_at
761 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
762 break;
764 case vect_double_reduction_def:
765 if (relevant != vect_unused_in_scope
766 && relevant != vect_used_by_reduction
767 && relevant != vect_used_only_live)
768 return opt_result::failure_at
769 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
770 break;
772 default:
773 break;
776 if (is_pattern_stmt_p (stmt_vinfo))
778 /* Pattern statements are not inserted into the code, so
779 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
780 have to scan the RHS or function arguments instead. */
781 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
783 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
784 tree op = gimple_assign_rhs1 (assign);
786 i = 1;
787 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
789 opt_result res
790 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
791 loop_vinfo, relevant, &worklist, false);
792 if (!res)
793 return res;
794 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
795 loop_vinfo, relevant, &worklist, false);
796 if (!res)
797 return res;
798 i = 2;
800 for (; i < gimple_num_ops (assign); i++)
802 op = gimple_op (assign, i);
803 if (TREE_CODE (op) == SSA_NAME)
805 opt_result res
806 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
807 &worklist, false);
808 if (!res)
809 return res;
813 else if (gcond *cond = dyn_cast <gcond *> (stmt_vinfo->stmt))
815 tree_code rhs_code = gimple_cond_code (cond);
816 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
817 opt_result res
818 = process_use (stmt_vinfo, gimple_cond_lhs (cond),
819 loop_vinfo, relevant, &worklist, false);
820 if (!res)
821 return res;
822 res = process_use (stmt_vinfo, gimple_cond_rhs (cond),
823 loop_vinfo, relevant, &worklist, false);
824 if (!res)
825 return res;
827 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
829 for (i = 0; i < gimple_call_num_args (call); i++)
831 tree arg = gimple_call_arg (call, i);
832 opt_result res
833 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
834 &worklist, false);
835 if (!res)
836 return res;
839 else
840 gcc_unreachable ();
842 else
843 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
845 tree op = USE_FROM_PTR (use_p);
846 opt_result res
847 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
848 &worklist, false);
849 if (!res)
850 return res;
853 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
855 gather_scatter_info gs_info;
856 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
857 gcc_unreachable ();
858 opt_result res
859 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
860 &worklist, true);
861 if (!res)
863 if (fatal)
864 *fatal = false;
865 return res;
868 } /* while worklist */
870 return opt_result::success ();
873 /* Function vect_model_simple_cost.
875 Models cost for simple operations, i.e. those that only emit ncopies of a
876 single op. Right now, this does not account for multiple insns that could
877 be generated for the single vector op. We will handle that shortly. */
879 static void
880 vect_model_simple_cost (vec_info *,
881 stmt_vec_info stmt_info, int ncopies,
882 enum vect_def_type *dt,
883 int ndts,
884 slp_tree node,
885 stmt_vector_for_cost *cost_vec,
886 vect_cost_for_stmt kind = vector_stmt)
888 int inside_cost = 0, prologue_cost = 0;
890 gcc_assert (cost_vec != NULL);
892 /* ??? Somehow we need to fix this at the callers. */
893 if (node)
894 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
896 if (!node)
897 /* Cost the "broadcast" of a scalar operand in to a vector operand.
898 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
899 cost model. */
900 for (int i = 0; i < ndts; i++)
901 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
902 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
903 stmt_info, 0, vect_prologue);
905 /* Pass the inside-of-loop statements to the target-specific cost model. */
906 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
907 stmt_info, 0, vect_body);
909 if (dump_enabled_p ())
910 dump_printf_loc (MSG_NOTE, vect_location,
911 "vect_model_simple_cost: inside_cost = %d, "
912 "prologue_cost = %d .\n", inside_cost, prologue_cost);
916 /* Model cost for type demotion and promotion operations. PWR is
917 normally zero for single-step promotions and demotions. It will be
918 one if two-step promotion/demotion is required, and so on. NCOPIES
919 is the number of vector results (and thus number of instructions)
920 for the narrowest end of the operation chain. Each additional
921 step doubles the number of instructions required. If WIDEN_ARITH
922 is true the stmt is doing widening arithmetic. */
924 static void
925 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
926 enum vect_def_type *dt,
927 unsigned int ncopies, int pwr,
928 stmt_vector_for_cost *cost_vec,
929 bool widen_arith)
931 int i;
932 int inside_cost = 0, prologue_cost = 0;
934 for (i = 0; i < pwr + 1; i++)
936 inside_cost += record_stmt_cost (cost_vec, ncopies,
937 widen_arith
938 ? vector_stmt : vec_promote_demote,
939 stmt_info, 0, vect_body);
940 ncopies *= 2;
943 /* FORNOW: Assuming maximum 2 args per stmts. */
944 for (i = 0; i < 2; i++)
945 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
946 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
947 stmt_info, 0, vect_prologue);
949 if (dump_enabled_p ())
950 dump_printf_loc (MSG_NOTE, vect_location,
951 "vect_model_promotion_demotion_cost: inside_cost = %d, "
952 "prologue_cost = %d .\n", inside_cost, prologue_cost);
955 /* Returns true if the current function returns DECL. */
957 static bool
958 cfun_returns (tree decl)
960 edge_iterator ei;
961 edge e;
962 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
964 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
965 if (!ret)
966 continue;
967 if (gimple_return_retval (ret) == decl)
968 return true;
969 /* We often end up with an aggregate copy to the result decl,
970 handle that case as well. First skip intermediate clobbers
971 though. */
972 gimple *def = ret;
975 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
977 while (gimple_clobber_p (def));
978 if (is_a <gassign *> (def)
979 && gimple_assign_lhs (def) == gimple_return_retval (ret)
980 && gimple_assign_rhs1 (def) == decl)
981 return true;
983 return false;
986 /* Calculate cost of DR's memory access. */
987 void
988 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
989 dr_alignment_support alignment_support_scheme,
990 int misalignment,
991 unsigned int *inside_cost,
992 stmt_vector_for_cost *body_cost_vec)
994 switch (alignment_support_scheme)
996 case dr_aligned:
998 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
999 vector_store, stmt_info, 0,
1000 vect_body);
1002 if (dump_enabled_p ())
1003 dump_printf_loc (MSG_NOTE, vect_location,
1004 "vect_model_store_cost: aligned.\n");
1005 break;
1008 case dr_unaligned_supported:
1010 /* Here, we assign an additional cost for the unaligned store. */
1011 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1012 unaligned_store, stmt_info,
1013 misalignment, vect_body);
1014 if (dump_enabled_p ())
1015 dump_printf_loc (MSG_NOTE, vect_location,
1016 "vect_model_store_cost: unaligned supported by "
1017 "hardware.\n");
1018 break;
1021 case dr_unaligned_unsupported:
1023 *inside_cost = VECT_MAX_COST;
1025 if (dump_enabled_p ())
1026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1027 "vect_model_store_cost: unsupported access.\n");
1028 break;
1031 default:
1032 gcc_unreachable ();
1036 /* Calculate cost of DR's memory access. */
1037 void
1038 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1039 dr_alignment_support alignment_support_scheme,
1040 int misalignment,
1041 bool add_realign_cost, unsigned int *inside_cost,
1042 unsigned int *prologue_cost,
1043 stmt_vector_for_cost *prologue_cost_vec,
1044 stmt_vector_for_cost *body_cost_vec,
1045 bool record_prologue_costs)
1047 switch (alignment_support_scheme)
1049 case dr_aligned:
1051 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1052 stmt_info, 0, vect_body);
1054 if (dump_enabled_p ())
1055 dump_printf_loc (MSG_NOTE, vect_location,
1056 "vect_model_load_cost: aligned.\n");
1058 break;
1060 case dr_unaligned_supported:
1062 /* Here, we assign an additional cost for the unaligned load. */
1063 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1064 unaligned_load, stmt_info,
1065 misalignment, vect_body);
1067 if (dump_enabled_p ())
1068 dump_printf_loc (MSG_NOTE, vect_location,
1069 "vect_model_load_cost: unaligned supported by "
1070 "hardware.\n");
1072 break;
1074 case dr_explicit_realign:
1076 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1077 vector_load, stmt_info, 0, vect_body);
1078 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1079 vec_perm, stmt_info, 0, vect_body);
1081 /* FIXME: If the misalignment remains fixed across the iterations of
1082 the containing loop, the following cost should be added to the
1083 prologue costs. */
1084 if (targetm.vectorize.builtin_mask_for_load)
1085 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1086 stmt_info, 0, vect_body);
1088 if (dump_enabled_p ())
1089 dump_printf_loc (MSG_NOTE, vect_location,
1090 "vect_model_load_cost: explicit realign\n");
1092 break;
1094 case dr_explicit_realign_optimized:
1096 if (dump_enabled_p ())
1097 dump_printf_loc (MSG_NOTE, vect_location,
1098 "vect_model_load_cost: unaligned software "
1099 "pipelined.\n");
1101 /* Unaligned software pipeline has a load of an address, an initial
1102 load, and possibly a mask operation to "prime" the loop. However,
1103 if this is an access in a group of loads, which provide grouped
1104 access, then the above cost should only be considered for one
1105 access in the group. Inside the loop, there is a load op
1106 and a realignment op. */
1108 if (add_realign_cost && record_prologue_costs)
1110 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1111 vector_stmt, stmt_info,
1112 0, vect_prologue);
1113 if (targetm.vectorize.builtin_mask_for_load)
1114 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1115 vector_stmt, stmt_info,
1116 0, vect_prologue);
1119 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1120 stmt_info, 0, vect_body);
1121 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1122 stmt_info, 0, vect_body);
1124 if (dump_enabled_p ())
1125 dump_printf_loc (MSG_NOTE, vect_location,
1126 "vect_model_load_cost: explicit realign optimized"
1127 "\n");
1129 break;
1132 case dr_unaligned_unsupported:
1134 *inside_cost = VECT_MAX_COST;
1136 if (dump_enabled_p ())
1137 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1138 "vect_model_load_cost: unsupported access.\n");
1139 break;
1142 default:
1143 gcc_unreachable ();
1147 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1148 the loop preheader for the vectorized stmt STMT_VINFO. */
1150 static void
1151 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1152 gimple_stmt_iterator *gsi)
1154 if (gsi)
1155 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1156 else
1157 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1159 if (dump_enabled_p ())
1160 dump_printf_loc (MSG_NOTE, vect_location,
1161 "created new init_stmt: %G", new_stmt);
1164 /* Function vect_init_vector.
1166 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1167 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1168 vector type a vector with all elements equal to VAL is created first.
1169 Place the initialization at GSI if it is not NULL. Otherwise, place the
1170 initialization at the loop preheader.
1171 Return the DEF of INIT_STMT.
1172 It will be used in the vectorization of STMT_INFO. */
1174 tree
1175 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1176 gimple_stmt_iterator *gsi)
1178 gimple *init_stmt;
1179 tree new_temp;
1181 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1182 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1184 gcc_assert (VECTOR_TYPE_P (type));
1185 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1187 /* Scalar boolean value should be transformed into
1188 all zeros or all ones value before building a vector. */
1189 if (VECTOR_BOOLEAN_TYPE_P (type))
1191 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1192 tree false_val = build_zero_cst (TREE_TYPE (type));
1194 if (CONSTANT_CLASS_P (val))
1195 val = integer_zerop (val) ? false_val : true_val;
1196 else
1198 new_temp = make_ssa_name (TREE_TYPE (type));
1199 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1200 val, true_val, false_val);
1201 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1202 val = new_temp;
1205 else
1207 gimple_seq stmts = NULL;
1208 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1209 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1210 TREE_TYPE (type), val);
1211 else
1212 /* ??? Condition vectorization expects us to do
1213 promotion of invariant/external defs. */
1214 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1215 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1216 !gsi_end_p (gsi2); )
1218 init_stmt = gsi_stmt (gsi2);
1219 gsi_remove (&gsi2, false);
1220 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1224 val = build_vector_from_val (type, val);
1227 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1228 init_stmt = gimple_build_assign (new_temp, val);
1229 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1230 return new_temp;
1234 /* Function vect_get_vec_defs_for_operand.
1236 OP is an operand in STMT_VINFO. This function returns a vector of
1237 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1239 In the case that OP is an SSA_NAME which is defined in the loop, then
1240 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1242 In case OP is an invariant or constant, a new stmt that creates a vector def
1243 needs to be introduced. VECTYPE may be used to specify a required type for
1244 vector invariant. */
1246 void
1247 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1248 unsigned ncopies,
1249 tree op, vec<tree> *vec_oprnds, tree vectype)
1251 gimple *def_stmt;
1252 enum vect_def_type dt;
1253 bool is_simple_use;
1254 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1256 if (dump_enabled_p ())
1257 dump_printf_loc (MSG_NOTE, vect_location,
1258 "vect_get_vec_defs_for_operand: %T\n", op);
1260 stmt_vec_info def_stmt_info;
1261 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1262 &def_stmt_info, &def_stmt);
1263 gcc_assert (is_simple_use);
1264 if (def_stmt && dump_enabled_p ())
1265 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1267 vec_oprnds->create (ncopies);
1268 if (dt == vect_constant_def || dt == vect_external_def)
1270 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1271 tree vector_type;
1273 if (vectype)
1274 vector_type = vectype;
1275 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1276 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1277 vector_type = truth_type_for (stmt_vectype);
1278 else
1279 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1281 gcc_assert (vector_type);
1282 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1283 while (ncopies--)
1284 vec_oprnds->quick_push (vop);
1286 else
1288 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1289 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1290 for (unsigned i = 0; i < ncopies; ++i)
1291 vec_oprnds->quick_push (gimple_get_lhs
1292 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1297 /* Get vectorized definitions for OP0 and OP1. */
1299 void
1300 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1301 unsigned ncopies,
1302 tree op0, tree vectype0, vec<tree> *vec_oprnds0,
1303 tree op1, tree vectype1, vec<tree> *vec_oprnds1,
1304 tree op2, tree vectype2, vec<tree> *vec_oprnds2,
1305 tree op3, tree vectype3, vec<tree> *vec_oprnds3)
1307 if (slp_node)
1309 if (op0)
1310 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1311 if (op1)
1312 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1313 if (op2)
1314 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1315 if (op3)
1316 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1318 else
1320 if (op0)
1321 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1322 op0, vec_oprnds0, vectype0);
1323 if (op1)
1324 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1325 op1, vec_oprnds1, vectype1);
1326 if (op2)
1327 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1328 op2, vec_oprnds2, vectype2);
1329 if (op3)
1330 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1331 op3, vec_oprnds3, vectype3);
1335 void
1336 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1337 unsigned ncopies,
1338 tree op0, vec<tree> *vec_oprnds0,
1339 tree op1, vec<tree> *vec_oprnds1,
1340 tree op2, vec<tree> *vec_oprnds2,
1341 tree op3, vec<tree> *vec_oprnds3)
1343 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1344 op0, NULL_TREE, vec_oprnds0,
1345 op1, NULL_TREE, vec_oprnds1,
1346 op2, NULL_TREE, vec_oprnds2,
1347 op3, NULL_TREE, vec_oprnds3);
1350 /* Helper function called by vect_finish_replace_stmt and
1351 vect_finish_stmt_generation. Set the location of the new
1352 statement and create and return a stmt_vec_info for it. */
1354 static void
1355 vect_finish_stmt_generation_1 (vec_info *,
1356 stmt_vec_info stmt_info, gimple *vec_stmt)
1358 if (dump_enabled_p ())
1359 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1361 if (stmt_info)
1363 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1365 /* While EH edges will generally prevent vectorization, stmt might
1366 e.g. be in a must-not-throw region. Ensure newly created stmts
1367 that could throw are part of the same region. */
1368 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1369 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1370 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1372 else
1373 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1376 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1377 which sets the same scalar result as STMT_INFO did. Create and return a
1378 stmt_vec_info for VEC_STMT. */
1380 void
1381 vect_finish_replace_stmt (vec_info *vinfo,
1382 stmt_vec_info stmt_info, gimple *vec_stmt)
1384 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1385 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1387 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1388 gsi_replace (&gsi, vec_stmt, true);
1390 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1393 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1394 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1396 void
1397 vect_finish_stmt_generation (vec_info *vinfo,
1398 stmt_vec_info stmt_info, gimple *vec_stmt,
1399 gimple_stmt_iterator *gsi)
1401 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1403 if (!gsi_end_p (*gsi)
1404 && gimple_has_mem_ops (vec_stmt))
1406 gimple *at_stmt = gsi_stmt (*gsi);
1407 tree vuse = gimple_vuse (at_stmt);
1408 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1410 tree vdef = gimple_vdef (at_stmt);
1411 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1412 gimple_set_modified (vec_stmt, true);
1413 /* If we have an SSA vuse and insert a store, update virtual
1414 SSA form to avoid triggering the renamer. Do so only
1415 if we can easily see all uses - which is what almost always
1416 happens with the way vectorized stmts are inserted. */
1417 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1418 && ((is_gimple_assign (vec_stmt)
1419 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1420 || (is_gimple_call (vec_stmt)
1421 && (!(gimple_call_flags (vec_stmt)
1422 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1423 || (gimple_call_lhs (vec_stmt)
1424 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1426 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1427 gimple_set_vdef (vec_stmt, new_vdef);
1428 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1432 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1433 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1436 /* We want to vectorize a call to combined function CFN with function
1437 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1438 as the types of all inputs. Check whether this is possible using
1439 an internal function, returning its code if so or IFN_LAST if not. */
1441 static internal_fn
1442 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1443 tree vectype_out, tree vectype_in)
1445 internal_fn ifn;
1446 if (internal_fn_p (cfn))
1447 ifn = as_internal_fn (cfn);
1448 else
1449 ifn = associated_internal_fn (fndecl);
1450 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1452 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1453 if (info.vectorizable)
1455 bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1456 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1457 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1459 /* The type size of both the vectype_in and vectype_out should be
1460 exactly the same when vectype_out isn't participating the optab.
1461 While there is no restriction for type size when vectype_out
1462 is part of the optab query. */
1463 if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1464 return IFN_LAST;
1466 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1467 OPTIMIZE_FOR_SPEED))
1468 return ifn;
1471 return IFN_LAST;
1475 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1476 gimple_stmt_iterator *);
1478 /* Check whether a load or store statement in the loop described by
1479 LOOP_VINFO is possible in a loop using partial vectors. This is
1480 testing whether the vectorizer pass has the appropriate support,
1481 as well as whether the target does.
1483 VLS_TYPE says whether the statement is a load or store and VECTYPE
1484 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1485 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1486 says how the load or store is going to be implemented and GROUP_SIZE
1487 is the number of load or store statements in the containing group.
1488 If the access is a gather load or scatter store, GS_INFO describes
1489 its arguments. If the load or store is conditional, SCALAR_MASK is the
1490 condition under which it occurs.
1492 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1493 vectors is not supported, otherwise record the required rgroup control
1494 types. */
1496 static void
1497 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1498 slp_tree slp_node,
1499 vec_load_store_type vls_type,
1500 int group_size,
1501 vect_memory_access_type
1502 memory_access_type,
1503 gather_scatter_info *gs_info,
1504 tree scalar_mask)
1506 /* Invariant loads need no special support. */
1507 if (memory_access_type == VMAT_INVARIANT)
1508 return;
1510 unsigned int nvectors = vect_get_num_copies (loop_vinfo, slp_node, vectype);
1511 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1512 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1513 machine_mode vecmode = TYPE_MODE (vectype);
1514 bool is_load = (vls_type == VLS_LOAD);
1515 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1517 if (slp_node)
1518 nvectors /= group_size;
1519 internal_fn ifn
1520 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1521 : vect_store_lanes_supported (vectype, group_size, true));
1522 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1523 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1524 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1525 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1526 scalar_mask);
1527 else
1529 if (dump_enabled_p ())
1530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1531 "can't operate on partial vectors because"
1532 " the target doesn't have an appropriate"
1533 " load/store-lanes instruction.\n");
1534 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1536 return;
1539 if (memory_access_type == VMAT_GATHER_SCATTER)
1541 internal_fn ifn = (is_load
1542 ? IFN_MASK_GATHER_LOAD
1543 : IFN_MASK_SCATTER_STORE);
1544 internal_fn len_ifn = (is_load
1545 ? IFN_MASK_LEN_GATHER_LOAD
1546 : IFN_MASK_LEN_SCATTER_STORE);
1547 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1548 gs_info->memory_type,
1549 gs_info->offset_vectype,
1550 gs_info->scale))
1551 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1552 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1553 gs_info->memory_type,
1554 gs_info->offset_vectype,
1555 gs_info->scale))
1556 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1557 scalar_mask);
1558 else
1560 if (dump_enabled_p ())
1561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1562 "can't operate on partial vectors because"
1563 " the target doesn't have an appropriate"
1564 " gather load or scatter store instruction.\n");
1565 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1567 return;
1570 if (memory_access_type != VMAT_CONTIGUOUS
1571 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1573 /* Element X of the data must come from iteration i * VF + X of the
1574 scalar loop. We need more work to support other mappings. */
1575 if (dump_enabled_p ())
1576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1577 "can't operate on partial vectors because an"
1578 " access isn't contiguous.\n");
1579 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1580 return;
1583 if (!VECTOR_MODE_P (vecmode))
1585 if (dump_enabled_p ())
1586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1587 "can't operate on partial vectors when emulating"
1588 " vector operations.\n");
1589 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1590 return;
1593 /* We might load more scalars than we need for permuting SLP loads.
1594 We checked in get_group_load_store_type that the extra elements
1595 don't leak into a new vector. */
1596 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1598 unsigned int nvectors;
1599 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1600 return nvectors;
1601 gcc_unreachable ();
1604 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1605 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1606 machine_mode mask_mode;
1607 machine_mode vmode;
1608 bool using_partial_vectors_p = false;
1609 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1611 nvectors = group_memory_nvectors (group_size * vf, nunits);
1612 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1613 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1614 using_partial_vectors_p = true;
1616 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1617 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1619 nvectors = group_memory_nvectors (group_size * vf, nunits);
1620 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1621 using_partial_vectors_p = true;
1624 if (!using_partial_vectors_p)
1626 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1628 "can't operate on partial vectors because the"
1629 " target doesn't have the appropriate partial"
1630 " vectorization load or store.\n");
1631 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1635 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1636 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1637 that needs to be applied to all loads and stores in a vectorized loop.
1638 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1639 otherwise return VEC_MASK & LOOP_MASK.
1641 MASK_TYPE is the type of both masks. If new statements are needed,
1642 insert them before GSI. */
1644 tree
1645 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1646 tree vec_mask, gimple_stmt_iterator *gsi)
1648 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1649 if (!loop_mask)
1650 return vec_mask;
1652 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1654 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1655 return vec_mask;
1657 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1658 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1659 vec_mask, loop_mask);
1661 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1662 return and_res;
1665 /* Determine whether we can use a gather load or scatter store to vectorize
1666 strided load or store STMT_INFO by truncating the current offset to a
1667 smaller width. We need to be able to construct an offset vector:
1669 { 0, X, X*2, X*3, ... }
1671 without loss of precision, where X is STMT_INFO's DR_STEP.
1673 Return true if this is possible, describing the gather load or scatter
1674 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1676 static bool
1677 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1678 loop_vec_info loop_vinfo, bool masked_p,
1679 gather_scatter_info *gs_info)
1681 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1682 data_reference *dr = dr_info->dr;
1683 tree step = DR_STEP (dr);
1684 if (TREE_CODE (step) != INTEGER_CST)
1686 /* ??? Perhaps we could use range information here? */
1687 if (dump_enabled_p ())
1688 dump_printf_loc (MSG_NOTE, vect_location,
1689 "cannot truncate variable step.\n");
1690 return false;
1693 /* Get the number of bits in an element. */
1694 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1695 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1696 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1698 /* Set COUNT to the upper limit on the number of elements - 1.
1699 Start with the maximum vectorization factor. */
1700 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1702 /* Try lowering COUNT to the number of scalar latch iterations. */
1703 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1704 widest_int max_iters;
1705 if (max_loop_iterations (loop, &max_iters)
1706 && max_iters < count)
1707 count = max_iters.to_shwi ();
1709 /* Try scales of 1 and the element size. */
1710 unsigned int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1711 wi::overflow_type overflow = wi::OVF_NONE;
1712 for (int i = 0; i < 2; ++i)
1714 unsigned int scale = scales[i];
1715 widest_int factor;
1716 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1717 continue;
1719 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1720 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1721 if (overflow)
1722 continue;
1723 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1724 unsigned int min_offset_bits = wi::min_precision (range, sign);
1726 /* Find the narrowest viable offset type. */
1727 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1728 tree offset_type = build_nonstandard_integer_type (offset_bits,
1729 sign == UNSIGNED);
1731 /* See whether the target supports the operation with an offset
1732 no narrower than OFFSET_TYPE. */
1733 tree memory_type = TREE_TYPE (DR_REF (dr));
1734 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1735 vectype, memory_type, offset_type, scale,
1736 &gs_info->ifn, &gs_info->offset_vectype)
1737 || gs_info->ifn == IFN_LAST)
1738 continue;
1740 gs_info->decl = NULL_TREE;
1741 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1742 but we don't need to store that here. */
1743 gs_info->base = NULL_TREE;
1744 gs_info->element_type = TREE_TYPE (vectype);
1745 gs_info->offset = fold_convert (offset_type, step);
1746 gs_info->offset_dt = vect_constant_def;
1747 gs_info->scale = scale;
1748 gs_info->memory_type = memory_type;
1749 return true;
1752 if (overflow && dump_enabled_p ())
1753 dump_printf_loc (MSG_NOTE, vect_location,
1754 "truncating gather/scatter offset to %d bits"
1755 " might change its value.\n", element_bits);
1757 return false;
1760 /* Return true if we can use gather/scatter internal functions to
1761 vectorize STMT_INFO, which is a grouped or strided load or store.
1762 MASKED_P is true if load or store is conditional. When returning
1763 true, fill in GS_INFO with the information required to perform the
1764 operation. */
1766 static bool
1767 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1768 loop_vec_info loop_vinfo, bool masked_p,
1769 gather_scatter_info *gs_info)
1771 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1772 || gs_info->ifn == IFN_LAST)
1773 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1774 masked_p, gs_info);
1776 tree old_offset_type = TREE_TYPE (gs_info->offset);
1777 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1779 gcc_assert (TYPE_PRECISION (new_offset_type)
1780 >= TYPE_PRECISION (old_offset_type));
1781 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1783 if (dump_enabled_p ())
1784 dump_printf_loc (MSG_NOTE, vect_location,
1785 "using gather/scatter for strided/grouped access,"
1786 " scale = %d\n", gs_info->scale);
1788 return true;
1791 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1792 elements with a known constant step. Return -1 if that step
1793 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1796 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1798 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1799 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1800 size_zero_node);
1803 /* If the target supports a permute mask that reverses the elements in
1804 a vector of type VECTYPE, return that mask, otherwise return null. */
1806 tree
1807 perm_mask_for_reverse (tree vectype)
1809 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1811 /* The encoding has a single stepped pattern. */
1812 vec_perm_builder sel (nunits, 1, 3);
1813 for (int i = 0; i < 3; ++i)
1814 sel.quick_push (nunits - 1 - i);
1816 vec_perm_indices indices (sel, 1, nunits);
1817 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1818 indices))
1819 return NULL_TREE;
1820 return vect_gen_perm_mask_checked (vectype, indices);
1823 /* A subroutine of get_load_store_type, with a subset of the same
1824 arguments. Handle the case where STMT_INFO is a load or store that
1825 accesses consecutive elements with a negative step. Sets *POFFSET
1826 to the offset to be applied to the DR for the first access. */
1828 static vect_memory_access_type
1829 get_negative_load_store_type (vec_info *vinfo,
1830 stmt_vec_info stmt_info, tree vectype,
1831 vec_load_store_type vls_type,
1832 unsigned int ncopies, poly_int64 *poffset)
1834 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1835 dr_alignment_support alignment_support_scheme;
1837 if (ncopies > 1)
1839 if (dump_enabled_p ())
1840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1841 "multiple types with negative step.\n");
1842 return VMAT_ELEMENTWISE;
1845 /* For backward running DRs the first access in vectype actually is
1846 N-1 elements before the address of the DR. */
1847 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1848 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1850 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1851 alignment_support_scheme
1852 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1853 if (alignment_support_scheme != dr_aligned
1854 && alignment_support_scheme != dr_unaligned_supported)
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1858 "negative step but alignment required.\n");
1859 *poffset = 0;
1860 return VMAT_ELEMENTWISE;
1863 if (vls_type == VLS_STORE_INVARIANT)
1865 if (dump_enabled_p ())
1866 dump_printf_loc (MSG_NOTE, vect_location,
1867 "negative step with invariant source;"
1868 " no permute needed.\n");
1869 return VMAT_CONTIGUOUS_DOWN;
1872 if (!perm_mask_for_reverse (vectype))
1874 if (dump_enabled_p ())
1875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1876 "negative step and reversing not supported.\n");
1877 *poffset = 0;
1878 return VMAT_ELEMENTWISE;
1881 return VMAT_CONTIGUOUS_REVERSE;
1884 /* STMT_INFO is either a masked or unconditional store. Return the value
1885 being stored. */
1887 tree
1888 vect_get_store_rhs (stmt_vec_info stmt_info)
1890 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1892 gcc_assert (gimple_assign_single_p (assign));
1893 return gimple_assign_rhs1 (assign);
1895 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1897 internal_fn ifn = gimple_call_internal_fn (call);
1898 int index = internal_fn_stored_value_index (ifn);
1899 gcc_assert (index >= 0);
1900 return gimple_call_arg (call, index);
1902 gcc_unreachable ();
1905 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1907 This function returns a vector type which can be composed with NETLS pieces,
1908 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1909 same vector size as the return vector. It checks target whether supports
1910 pieces-size vector mode for construction firstly, if target fails to, check
1911 pieces-size scalar mode for construction further. It returns NULL_TREE if
1912 fails to find the available composition.
1914 For example, for (vtype=V16QI, nelts=4), we can probably get:
1915 - V16QI with PTYPE V4QI.
1916 - V4SI with PTYPE SI.
1917 - NULL_TREE. */
1919 static tree
1920 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1922 gcc_assert (VECTOR_TYPE_P (vtype));
1923 gcc_assert (known_gt (nelts, 0U));
1925 machine_mode vmode = TYPE_MODE (vtype);
1926 if (!VECTOR_MODE_P (vmode))
1927 return NULL_TREE;
1929 /* When we are asked to compose the vector from its components let
1930 that happen directly. */
1931 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1933 *ptype = TREE_TYPE (vtype);
1934 return vtype;
1937 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
1938 unsigned int pbsize;
1939 if (constant_multiple_p (vbsize, nelts, &pbsize))
1941 /* First check if vec_init optab supports construction from
1942 vector pieces directly. */
1943 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1944 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
1945 machine_mode rmode;
1946 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
1947 && (convert_optab_handler (vec_init_optab, vmode, rmode)
1948 != CODE_FOR_nothing))
1950 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1951 return vtype;
1954 /* Otherwise check if exists an integer type of the same piece size and
1955 if vec_init optab supports construction from it directly. */
1956 if (int_mode_for_size (pbsize, 0).exists (&elmode)
1957 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
1958 && (convert_optab_handler (vec_init_optab, rmode, elmode)
1959 != CODE_FOR_nothing))
1961 *ptype = build_nonstandard_integer_type (pbsize, 1);
1962 return build_vector_type (*ptype, nelts);
1966 return NULL_TREE;
1969 /* A subroutine of get_load_store_type, with a subset of the same
1970 arguments. Handle the case where STMT_INFO is part of a grouped load
1971 or store.
1973 For stores, the statements in the group are all consecutive
1974 and there is no gap at the end. For loads, the statements in the
1975 group might not be consecutive; there can be gaps between statements
1976 as well as at the end. */
1978 static bool
1979 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1980 tree vectype, slp_tree slp_node,
1981 bool masked_p, vec_load_store_type vls_type,
1982 vect_memory_access_type *memory_access_type,
1983 poly_int64 *poffset,
1984 dr_alignment_support *alignment_support_scheme,
1985 int *misalignment,
1986 gather_scatter_info *gs_info,
1987 internal_fn *lanes_ifn)
1989 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1990 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1991 stmt_vec_info first_stmt_info;
1992 unsigned int group_size;
1993 unsigned HOST_WIDE_INT gap;
1994 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1996 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1997 group_size = DR_GROUP_SIZE (first_stmt_info);
1998 gap = DR_GROUP_GAP (first_stmt_info);
2000 else
2002 first_stmt_info = stmt_info;
2003 group_size = 1;
2004 gap = 0;
2006 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2007 bool single_element_p = (stmt_info == first_stmt_info
2008 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2009 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2011 /* True if the vectorized statements would access beyond the last
2012 statement in the group. */
2013 bool overrun_p = false;
2015 /* True if we can cope with such overrun by peeling for gaps, so that
2016 there is at least one final scalar iteration after the vector loop. */
2017 bool can_overrun_p = (!masked_p
2018 && vls_type == VLS_LOAD
2019 && loop_vinfo
2020 && !loop->inner);
2022 /* There can only be a gap at the end of the group if the stride is
2023 known at compile time. */
2024 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2026 /* Stores can't yet have gaps. */
2027 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2029 if (slp_node)
2031 /* For SLP vectorization we directly vectorize a subchain
2032 without permutation. */
2033 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2034 first_dr_info
2035 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2036 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2037 /* Try to use consecutive accesses of as many elements as possible,
2038 separated by the stride, until we have a complete vector.
2039 Fall back to scalar accesses if that isn't possible. */
2040 *memory_access_type = VMAT_STRIDED_SLP;
2041 else
2043 int cmp = compare_step_with_zero (vinfo, stmt_info);
2044 if (cmp < 0)
2046 if (single_element_p)
2047 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2048 only correct for single element "interleaving" SLP. */
2049 *memory_access_type = get_negative_load_store_type
2050 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2051 else
2053 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2054 separated by the stride, until we have a complete vector.
2055 Fall back to scalar accesses if that isn't possible. */
2056 if (multiple_p (nunits, group_size))
2057 *memory_access_type = VMAT_STRIDED_SLP;
2058 else
2059 *memory_access_type = VMAT_ELEMENTWISE;
2062 else if (cmp == 0 && loop_vinfo)
2064 gcc_assert (vls_type == VLS_LOAD);
2065 *memory_access_type = VMAT_INVARIANT;
2066 /* Invariant accesses perform only component accesses, alignment
2067 is irrelevant for them. */
2068 *alignment_support_scheme = dr_unaligned_supported;
2070 /* Try using LOAD/STORE_LANES. */
2071 else if (slp_node->ldst_lanes
2072 && (*lanes_ifn
2073 = (vls_type == VLS_LOAD
2074 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2075 : vect_store_lanes_supported (vectype, group_size,
2076 masked_p))) != IFN_LAST)
2077 *memory_access_type = VMAT_LOAD_STORE_LANES;
2078 else
2079 *memory_access_type = VMAT_CONTIGUOUS;
2081 overrun_p = loop_vinfo && gap != 0;
2082 if (overrun_p && vls_type != VLS_LOAD)
2084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2085 "Grouped store with gaps requires"
2086 " non-consecutive accesses\n");
2087 return false;
2089 /* An overrun is fine if the trailing elements are smaller
2090 than the alignment boundary B. Every vector access will
2091 be a multiple of B and so we are guaranteed to access a
2092 non-gap element in the same B-sized block. */
2093 if (overrun_p
2094 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2095 vectype)
2096 / vect_get_scalar_dr_size (first_dr_info)))
2097 overrun_p = false;
2099 /* When we have a contiguous access across loop iterations
2100 but the access in the loop doesn't cover the full vector
2101 we can end up with no gap recorded but still excess
2102 elements accessed, see PR103116. Make sure we peel for
2103 gaps if necessary and sufficient and give up if not.
2105 If there is a combination of the access not covering the full
2106 vector and a gap recorded then we may need to peel twice. */
2107 if (loop_vinfo
2108 && (*memory_access_type == VMAT_CONTIGUOUS
2109 || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
2110 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2111 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2112 nunits))
2113 overrun_p = true;
2115 /* If the gap splits the vector in half and the target
2116 can do half-vector operations avoid the epilogue peeling
2117 by simply loading half of the vector only. Usually
2118 the construction with an upper zero half will be elided. */
2119 dr_alignment_support alss;
2120 int misalign = dr_misalignment (first_dr_info, vectype);
2121 tree half_vtype;
2122 poly_uint64 remain;
2123 unsigned HOST_WIDE_INT tem, num;
2124 if (overrun_p
2125 && !masked_p
2126 && *memory_access_type != VMAT_LOAD_STORE_LANES
2127 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2128 vectype, misalign)))
2129 == dr_aligned
2130 || alss == dr_unaligned_supported)
2131 && can_div_trunc_p (group_size
2132 * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap,
2133 nunits, &tem, &remain)
2134 && (known_eq (remain, 0u)
2135 || (known_ne (remain, 0u)
2136 && constant_multiple_p (nunits, remain, &num)
2137 && (vector_vector_composition_type (vectype, num,
2138 &half_vtype)
2139 != NULL_TREE))))
2140 overrun_p = false;
2142 if (overrun_p && !can_overrun_p)
2144 if (dump_enabled_p ())
2145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2146 "Peeling for outer loop is not supported\n");
2147 return false;
2149 /* Peeling for gaps assumes that a single scalar iteration
2150 is enough to make sure the last vector iteration doesn't
2151 access excess elements. */
2152 if (overrun_p
2153 && (!can_div_trunc_p (group_size
2154 * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap,
2155 nunits, &tem, &remain)
2156 || maybe_lt (remain + group_size, nunits)))
2158 /* But peeling a single scalar iteration is enough if
2159 we can use the next power-of-two sized partial
2160 access and that is sufficiently small to be covered
2161 by the single scalar iteration. */
2162 unsigned HOST_WIDE_INT cnunits, cvf, cremain, cpart_size;
2163 if (!nunits.is_constant (&cnunits)
2164 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2165 || (((cremain = group_size * cvf - gap % cnunits), true)
2166 && ((cpart_size = (1 << ceil_log2 (cremain))) != cnunits)
2167 && (cremain + group_size < cpart_size
2168 || vector_vector_composition_type
2169 (vectype, cnunits / cpart_size,
2170 &half_vtype) == NULL_TREE)))
2172 if (dump_enabled_p ())
2173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2174 "peeling for gaps insufficient for "
2175 "access\n");
2176 return false;
2180 /* If this is single-element interleaving with an element
2181 distance that leaves unused vector loads around punt - we
2182 at least create very sub-optimal code in that case (and
2183 blow up memory, see PR65518). */
2184 if (loop_vinfo
2185 && *memory_access_type == VMAT_CONTIGUOUS
2186 && single_element_p
2187 && maybe_gt (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
2189 if (SLP_TREE_LANES (slp_node) == 1)
2191 *memory_access_type = VMAT_ELEMENTWISE;
2192 if (dump_enabled_p ())
2193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2194 "single-element interleaving not supported "
2195 "for not adjacent vector loads, using "
2196 "elementwise access\n");
2198 else
2200 if (dump_enabled_p ())
2201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2202 "single-element interleaving not supported "
2203 "for not adjacent vector loads\n");
2204 return false;
2209 else
2211 /* We can always handle this case using elementwise accesses,
2212 but see if something more efficient is available. */
2213 *memory_access_type = VMAT_ELEMENTWISE;
2215 /* If there is a gap at the end of the group then these optimizations
2216 would access excess elements in the last iteration. */
2217 bool would_overrun_p = (gap != 0);
2218 /* An overrun is fine if the trailing elements are smaller than the
2219 alignment boundary B. Every vector access will be a multiple of B
2220 and so we are guaranteed to access a non-gap element in the
2221 same B-sized block. */
2222 if (would_overrun_p
2223 && !masked_p
2224 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2225 / vect_get_scalar_dr_size (first_dr_info)))
2226 would_overrun_p = false;
2228 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2229 && (can_overrun_p || !would_overrun_p)
2230 && compare_step_with_zero (vinfo, stmt_info) > 0)
2232 /* First cope with the degenerate case of a single-element
2233 vector. */
2234 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2237 else
2239 /* Otherwise try using LOAD/STORE_LANES. */
2240 *lanes_ifn
2241 = vls_type == VLS_LOAD
2242 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2243 : vect_store_lanes_supported (vectype, group_size,
2244 masked_p);
2245 if (*lanes_ifn != IFN_LAST)
2247 *memory_access_type = VMAT_LOAD_STORE_LANES;
2248 overrun_p = would_overrun_p;
2251 /* If that fails, try using permuting loads. */
2252 else if (vls_type == VLS_LOAD
2253 ? vect_grouped_load_supported (vectype,
2254 single_element_p,
2255 group_size)
2256 : vect_grouped_store_supported (vectype, group_size))
2258 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2259 overrun_p = would_overrun_p;
2265 /* As a last resort, trying using a gather load or scatter store.
2267 ??? Although the code can handle all group sizes correctly,
2268 it probably isn't a win to use separate strided accesses based
2269 on nearby locations. Or, even if it's a win over scalar code,
2270 it might not be a win over vectorizing at a lower VF, if that
2271 allows us to use contiguous accesses. */
2272 if (*memory_access_type == VMAT_ELEMENTWISE
2273 && single_element_p
2274 && loop_vinfo
2275 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2276 masked_p, gs_info))
2277 *memory_access_type = VMAT_GATHER_SCATTER;
2279 if (*memory_access_type == VMAT_GATHER_SCATTER
2280 || *memory_access_type == VMAT_ELEMENTWISE)
2282 *alignment_support_scheme = dr_unaligned_supported;
2283 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2285 else
2287 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2288 *alignment_support_scheme
2289 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2290 *misalignment);
2293 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2295 /* STMT is the leader of the group. Check the operands of all the
2296 stmts of the group. */
2297 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2298 while (next_stmt_info)
2300 tree op = vect_get_store_rhs (next_stmt_info);
2301 enum vect_def_type dt;
2302 if (!vect_is_simple_use (op, vinfo, &dt))
2304 if (dump_enabled_p ())
2305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2306 "use not simple.\n");
2307 return false;
2309 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2313 if (overrun_p)
2315 gcc_assert (can_overrun_p);
2316 if (dump_enabled_p ())
2317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2318 "Data access with gaps requires scalar "
2319 "epilogue loop\n");
2320 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2323 return true;
2326 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2327 if there is a memory access type that the vectorized form can use,
2328 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2329 or scatters, fill in GS_INFO accordingly. In addition
2330 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2331 the target does not support the alignment scheme. *MISALIGNMENT
2332 is set according to the alignment of the access (including
2333 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2335 SLP says whether we're performing SLP rather than loop vectorization.
2336 MASKED_P is true if the statement is conditional on a vectorized mask.
2337 VECTYPE is the vector type that the vectorized statements will use.
2338 NCOPIES is the number of vector statements that will be needed. */
2340 static bool
2341 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2342 tree vectype, slp_tree slp_node,
2343 bool masked_p, vec_load_store_type vls_type,
2344 unsigned int ncopies,
2345 vect_memory_access_type *memory_access_type,
2346 poly_int64 *poffset,
2347 dr_alignment_support *alignment_support_scheme,
2348 int *misalignment,
2349 gather_scatter_info *gs_info,
2350 internal_fn *lanes_ifn)
2352 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2353 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2354 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2355 *poffset = 0;
2356 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2358 *memory_access_type = VMAT_GATHER_SCATTER;
2359 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2360 gcc_unreachable ();
2361 /* When using internal functions, we rely on pattern recognition
2362 to convert the type of the offset to the type that the target
2363 requires, with the result being a call to an internal function.
2364 If that failed for some reason (e.g. because another pattern
2365 took priority), just handle cases in which the offset already
2366 has the right type. */
2367 else if (gs_info->ifn != IFN_LAST
2368 && !is_gimple_call (stmt_info->stmt)
2369 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2370 TREE_TYPE (gs_info->offset_vectype)))
2372 if (dump_enabled_p ())
2373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2374 "%s offset requires a conversion\n",
2375 vls_type == VLS_LOAD ? "gather" : "scatter");
2376 return false;
2378 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2379 &gs_info->offset_dt,
2380 &gs_info->offset_vectype))
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2384 "%s index use not simple.\n",
2385 vls_type == VLS_LOAD ? "gather" : "scatter");
2386 return false;
2388 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2390 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2391 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2392 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2393 (gs_info->offset_vectype),
2394 TYPE_VECTOR_SUBPARTS (vectype)))
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2398 "unsupported vector types for emulated "
2399 "gather.\n");
2400 return false;
2403 /* Gather-scatter accesses perform only component accesses, alignment
2404 is irrelevant for them. */
2405 *alignment_support_scheme = dr_unaligned_supported;
2407 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2409 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2410 masked_p,
2411 vls_type, memory_access_type, poffset,
2412 alignment_support_scheme,
2413 misalignment, gs_info, lanes_ifn))
2414 return false;
2416 else if (STMT_VINFO_STRIDED_P (stmt_info))
2418 gcc_assert (!slp_node);
2419 if (loop_vinfo
2420 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2421 masked_p, gs_info))
2422 *memory_access_type = VMAT_GATHER_SCATTER;
2423 else
2424 *memory_access_type = VMAT_ELEMENTWISE;
2425 /* Alignment is irrelevant here. */
2426 *alignment_support_scheme = dr_unaligned_supported;
2428 else
2430 int cmp = compare_step_with_zero (vinfo, stmt_info);
2431 if (cmp == 0)
2433 gcc_assert (vls_type == VLS_LOAD);
2434 *memory_access_type = VMAT_INVARIANT;
2435 /* Invariant accesses perform only component accesses, alignment
2436 is irrelevant for them. */
2437 *alignment_support_scheme = dr_unaligned_supported;
2439 else
2441 if (cmp < 0)
2442 *memory_access_type = get_negative_load_store_type
2443 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2444 else
2445 *memory_access_type = VMAT_CONTIGUOUS;
2446 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2447 vectype, *poffset);
2448 *alignment_support_scheme
2449 = vect_supportable_dr_alignment (vinfo,
2450 STMT_VINFO_DR_INFO (stmt_info),
2451 vectype, *misalignment);
2455 if ((*memory_access_type == VMAT_ELEMENTWISE
2456 || *memory_access_type == VMAT_STRIDED_SLP)
2457 && !nunits.is_constant ())
2459 if (dump_enabled_p ())
2460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2461 "Not using elementwise accesses due to variable "
2462 "vectorization factor.\n");
2463 return false;
2466 if (*alignment_support_scheme == dr_unaligned_unsupported)
2468 if (dump_enabled_p ())
2469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2470 "unsupported unaligned access\n");
2471 return false;
2474 /* FIXME: At the moment the cost model seems to underestimate the
2475 cost of using elementwise accesses. This check preserves the
2476 traditional behavior until that can be fixed. */
2477 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2478 if (!first_stmt_info)
2479 first_stmt_info = stmt_info;
2480 if (*memory_access_type == VMAT_ELEMENTWISE
2481 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2482 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2483 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2484 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2486 if (dump_enabled_p ())
2487 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2488 "not falling back to elementwise accesses\n");
2489 return false;
2491 return true;
2494 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2495 conditional operation STMT_INFO. When returning true, store the mask
2496 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2497 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2498 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2500 static bool
2501 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2502 slp_tree slp_node, unsigned mask_index,
2503 tree *mask, slp_tree *mask_node,
2504 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2506 enum vect_def_type mask_dt;
2507 tree mask_vectype;
2508 slp_tree mask_node_1;
2509 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2510 mask, &mask_node_1, &mask_dt, &mask_vectype))
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2514 "mask use not simple.\n");
2515 return false;
2518 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2520 if (dump_enabled_p ())
2521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2522 "mask argument is not a boolean.\n");
2523 return false;
2526 /* If the caller is not prepared for adjusting an external/constant
2527 SLP mask vector type fail. */
2528 if (slp_node
2529 && !mask_node
2530 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2532 if (dump_enabled_p ())
2533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2534 "SLP mask argument is not vectorized.\n");
2535 return false;
2538 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2539 if (!mask_vectype)
2540 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2541 mask_node_1);
2543 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2545 if (dump_enabled_p ())
2546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2547 "could not find an appropriate vector mask type.\n");
2548 return false;
2551 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2552 TYPE_VECTOR_SUBPARTS (vectype)))
2554 if (dump_enabled_p ())
2555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2556 "vector mask type %T"
2557 " does not match vector data type %T.\n",
2558 mask_vectype, vectype);
2560 return false;
2563 *mask_dt_out = mask_dt;
2564 *mask_vectype_out = mask_vectype;
2565 if (mask_node)
2566 *mask_node = mask_node_1;
2567 return true;
2570 /* Return true if stored value is suitable for vectorizing store
2571 statement STMT_INFO. When returning true, store the scalar stored
2572 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2573 the type of the vectorized store value in
2574 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2576 static bool
2577 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2578 slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
2579 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2580 vec_load_store_type *vls_type_out)
2582 int op_no = 0;
2583 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2585 if (gimple_call_internal_p (call)
2586 && internal_store_fn_p (gimple_call_internal_fn (call)))
2587 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2589 if (slp_node)
2590 op_no = vect_slp_child_index_for_operand
2591 (stmt_info->stmt, op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2593 enum vect_def_type rhs_dt;
2594 tree rhs_vectype;
2595 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2596 rhs, rhs_node, &rhs_dt, &rhs_vectype))
2598 if (dump_enabled_p ())
2599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2600 "use not simple.\n");
2601 return false;
2604 /* In the case this is a store from a constant make sure
2605 native_encode_expr can handle it. */
2606 if (rhs_dt == vect_constant_def
2607 && CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
2609 if (dump_enabled_p ())
2610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2611 "cannot encode constant as a byte sequence.\n");
2612 return false;
2615 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2616 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2618 if (dump_enabled_p ())
2619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2620 "incompatible vector types.\n");
2621 return false;
2624 *rhs_dt_out = rhs_dt;
2625 *rhs_vectype_out = rhs_vectype;
2626 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2627 *vls_type_out = VLS_STORE_INVARIANT;
2628 else
2629 *vls_type_out = VLS_STORE;
2630 return true;
2633 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2634 Note that we support masks with floating-point type, in which case the
2635 floats are interpreted as a bitmask. */
2637 static tree
2638 vect_build_all_ones_mask (vec_info *vinfo,
2639 stmt_vec_info stmt_info, tree masktype)
2641 if (TREE_CODE (masktype) == INTEGER_TYPE)
2642 return build_int_cst (masktype, -1);
2643 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2644 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2646 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2647 mask = build_vector_from_val (masktype, mask);
2648 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2650 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2652 REAL_VALUE_TYPE r;
2653 long tmp[6];
2654 for (int j = 0; j < 6; ++j)
2655 tmp[j] = -1;
2656 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2657 tree mask = build_real (TREE_TYPE (masktype), r);
2658 mask = build_vector_from_val (masktype, mask);
2659 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2661 gcc_unreachable ();
2664 /* Build an all-zero merge value of type VECTYPE while vectorizing
2665 STMT_INFO as a gather load. */
2667 static tree
2668 vect_build_zero_merge_argument (vec_info *vinfo,
2669 stmt_vec_info stmt_info, tree vectype)
2671 tree merge;
2672 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2673 merge = build_int_cst (TREE_TYPE (vectype), 0);
2674 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2676 REAL_VALUE_TYPE r;
2677 long tmp[6];
2678 for (int j = 0; j < 6; ++j)
2679 tmp[j] = 0;
2680 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2681 merge = build_real (TREE_TYPE (vectype), r);
2683 else
2684 gcc_unreachable ();
2685 merge = build_vector_from_val (vectype, merge);
2686 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2689 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2690 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2691 the gather load operation. If the load is conditional, MASK is the
2692 vectorized condition, otherwise MASK is null. PTR is the base
2693 pointer and OFFSET is the vectorized offset. */
2695 static gimple *
2696 vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2697 gimple_stmt_iterator *gsi,
2698 gather_scatter_info *gs_info,
2699 tree ptr, tree offset, tree mask)
2701 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2702 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2703 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2704 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2705 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2706 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2707 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2708 tree scaletype = TREE_VALUE (arglist);
2709 tree var;
2710 gcc_checking_assert (types_compatible_p (srctype, rettype)
2711 && (!mask
2712 || TREE_CODE (masktype) == INTEGER_TYPE
2713 || types_compatible_p (srctype, masktype)));
2715 tree op = offset;
2716 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2718 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2719 TYPE_VECTOR_SUBPARTS (idxtype)));
2720 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2721 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2722 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2723 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2724 op = var;
2727 tree src_op = NULL_TREE;
2728 tree mask_op = NULL_TREE;
2729 if (mask)
2731 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2733 tree utype, optype = TREE_TYPE (mask);
2734 if (VECTOR_TYPE_P (masktype)
2735 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2736 utype = masktype;
2737 else
2738 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2739 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2740 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2741 gassign *new_stmt
2742 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2743 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2744 mask_arg = var;
2745 if (!useless_type_conversion_p (masktype, utype))
2747 gcc_assert (TYPE_PRECISION (utype)
2748 <= TYPE_PRECISION (masktype));
2749 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2750 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2751 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2752 mask_arg = var;
2754 src_op = build_zero_cst (srctype);
2755 mask_op = mask_arg;
2757 else
2759 src_op = mask;
2760 mask_op = mask;
2763 else
2765 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2766 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2769 tree scale = build_int_cst (scaletype, gs_info->scale);
2770 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2771 mask_op, scale);
2773 if (!useless_type_conversion_p (vectype, rettype))
2775 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2776 TYPE_VECTOR_SUBPARTS (rettype)));
2777 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2778 gimple_call_set_lhs (new_stmt, op);
2779 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2780 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2781 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2784 return new_stmt;
2787 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2788 instructions before GSI. GS_INFO describes the scatter store operation.
2789 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2790 vectorized data to store.
2791 If the store is conditional, MASK is the vectorized condition, otherwise
2792 MASK is null. */
2794 static gimple *
2795 vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
2796 gimple_stmt_iterator *gsi,
2797 gather_scatter_info *gs_info,
2798 tree ptr, tree offset, tree oprnd, tree mask)
2800 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2801 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2802 /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
2803 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2804 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2805 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2806 tree scaletype = TREE_VALUE (arglist);
2807 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2808 && TREE_CODE (rettype) == VOID_TYPE);
2810 tree mask_arg = NULL_TREE;
2811 if (mask)
2813 mask_arg = mask;
2814 tree optype = TREE_TYPE (mask_arg);
2815 tree utype;
2816 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2817 utype = masktype;
2818 else
2819 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2820 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2821 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2822 gassign *new_stmt
2823 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2824 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2825 mask_arg = var;
2826 if (!useless_type_conversion_p (masktype, utype))
2828 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2829 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2830 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2831 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2832 mask_arg = var;
2835 else
2837 mask_arg = build_int_cst (masktype, -1);
2838 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
2841 tree src = oprnd;
2842 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2844 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2845 TYPE_VECTOR_SUBPARTS (srctype)));
2846 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2847 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2848 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2849 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2850 src = var;
2853 tree op = offset;
2854 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2856 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2857 TYPE_VECTOR_SUBPARTS (idxtype)));
2858 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2859 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2860 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2861 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2862 op = var;
2865 tree scale = build_int_cst (scaletype, gs_info->scale);
2866 gcall *new_stmt
2867 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2868 return new_stmt;
2871 /* Prepare the base and offset in GS_INFO for vectorization.
2872 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2873 to the vectorized offset argument for the first copy of STMT_INFO.
2874 STMT_INFO is the statement described by GS_INFO and LOOP is the
2875 containing loop. */
2877 static void
2878 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2879 class loop *loop, stmt_vec_info stmt_info,
2880 slp_tree slp_node, gather_scatter_info *gs_info,
2881 tree *dataref_ptr, vec<tree> *vec_offset)
2883 gimple_seq stmts = NULL;
2884 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2885 if (stmts != NULL)
2887 basic_block new_bb;
2888 edge pe = loop_preheader_edge (loop);
2889 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2890 gcc_assert (!new_bb);
2892 if (slp_node)
2893 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2894 else
2896 unsigned ncopies
2897 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2898 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2899 gs_info->offset, vec_offset,
2900 gs_info->offset_vectype);
2904 /* Prepare to implement a grouped or strided load or store using
2905 the gather load or scatter store operation described by GS_INFO.
2906 STMT_INFO is the load or store statement.
2908 Set *DATAREF_BUMP to the amount that should be added to the base
2909 address after each copy of the vectorized statement. Set *VEC_OFFSET
2910 to an invariant offset vector in which element I has the value
2911 I * DR_STEP / SCALE. */
2913 static void
2914 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2915 loop_vec_info loop_vinfo,
2916 gimple_stmt_iterator *gsi,
2917 gather_scatter_info *gs_info,
2918 tree *dataref_bump, tree *vec_offset,
2919 vec_loop_lens *loop_lens)
2921 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2922 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2924 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2926 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2927 ivtmp_8 = _31 * 16 (step in bytes);
2928 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2929 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2930 tree loop_len
2931 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
2932 tree tmp
2933 = fold_build2 (MULT_EXPR, sizetype,
2934 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2935 loop_len);
2936 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
2937 GSI_SAME_STMT);
2939 else
2941 tree bump
2942 = size_binop (MULT_EXPR,
2943 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2944 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2945 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2948 /* The offset given in GS_INFO can have pointer type, so use the element
2949 type of the vector instead. */
2950 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2952 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2953 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2954 ssize_int (gs_info->scale));
2955 step = fold_convert (offset_type, step);
2957 /* Create {0, X, X*2, X*3, ...}. */
2958 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2959 build_zero_cst (offset_type), step);
2960 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2963 /* Prepare the pointer IVs which needs to be updated by a variable amount.
2964 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2965 allow each iteration process the flexible number of elements as long as
2966 the number <= vf elments.
2968 Return data reference according to SELECT_VL.
2969 If new statements are needed, insert them before GSI. */
2971 static tree
2972 vect_get_loop_variant_data_ptr_increment (
2973 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
2974 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
2975 vect_memory_access_type memory_access_type)
2977 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2978 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2980 /* gather/scatter never reach here. */
2981 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
2983 /* When we support SELECT_VL pattern, we dynamic adjust
2984 the memory address by .SELECT_VL result.
2986 The result of .SELECT_VL is the number of elements to
2987 be processed of each iteration. So the memory address
2988 adjustment operation should be:
2990 addr = addr + .SELECT_VL (ARG..) * step;
2992 tree loop_len
2993 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
2994 tree len_type = TREE_TYPE (loop_len);
2995 /* Since the outcome of .SELECT_VL is element size, we should adjust
2996 it into bytesize so that it can be used in address pointer variable
2997 amount IVs adjustment. */
2998 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
2999 wide_int_to_tree (len_type, wi::to_widest (step)));
3000 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3001 gassign *assign = gimple_build_assign (bump, tmp);
3002 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3003 return bump;
3006 /* Return the amount that should be added to a vector pointer to move
3007 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3008 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3009 vectorization. */
3011 static tree
3012 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3013 dr_vec_info *dr_info, tree aggr_type,
3014 vect_memory_access_type memory_access_type,
3015 vec_loop_lens *loop_lens = nullptr)
3017 if (memory_access_type == VMAT_INVARIANT)
3018 return size_zero_node;
3020 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3021 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3022 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3023 loop_lens, dr_info,
3024 memory_access_type);
3026 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3027 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3028 if (tree_int_cst_sgn (step) == -1)
3029 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3030 return iv_step;
3033 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3035 static bool
3036 vectorizable_bswap (vec_info *vinfo,
3037 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3038 gimple **vec_stmt, slp_tree slp_node,
3039 slp_tree *slp_op,
3040 tree vectype_in, stmt_vector_for_cost *cost_vec)
3042 tree op, vectype;
3043 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3044 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3045 unsigned ncopies;
3047 op = gimple_call_arg (stmt, 0);
3048 vectype = STMT_VINFO_VECTYPE (stmt_info);
3049 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3051 /* Multiple types in SLP are handled by creating the appropriate number of
3052 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3053 case of SLP. */
3054 if (slp_node)
3055 ncopies = 1;
3056 else
3057 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3059 gcc_assert (ncopies >= 1);
3061 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype))
3063 if (dump_enabled_p ())
3064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3065 "mismatched vector sizes %T and %T\n",
3066 vectype_in, vectype);
3067 return false;
3070 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3071 if (! char_vectype)
3072 return false;
3074 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3075 unsigned word_bytes;
3076 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3077 return false;
3079 /* The encoding uses one stepped pattern for each byte in the word. */
3080 vec_perm_builder elts (num_bytes, word_bytes, 3);
3081 for (unsigned i = 0; i < 3; ++i)
3082 for (unsigned j = 0; j < word_bytes; ++j)
3083 elts.quick_push ((i + 1) * word_bytes - j - 1);
3085 vec_perm_indices indices (elts, 1, num_bytes);
3086 machine_mode vmode = TYPE_MODE (char_vectype);
3087 if (!can_vec_perm_const_p (vmode, vmode, indices))
3088 return false;
3090 if (! vec_stmt)
3092 if (slp_node
3093 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3095 if (dump_enabled_p ())
3096 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3097 "incompatible vector types for invariants\n");
3098 return false;
3101 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3102 DUMP_VECT_SCOPE ("vectorizable_bswap");
3103 record_stmt_cost (cost_vec,
3104 1, vector_stmt, stmt_info, 0, vect_prologue);
3105 record_stmt_cost (cost_vec,
3106 slp_node
3107 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3108 vec_perm, stmt_info, 0, vect_body);
3109 return true;
3112 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3114 /* Transform. */
3115 vec<tree> vec_oprnds = vNULL;
3116 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3117 op, &vec_oprnds);
3118 /* Arguments are ready. create the new vector stmt. */
3119 unsigned i;
3120 tree vop;
3121 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3123 gimple *new_stmt;
3124 tree tem = make_ssa_name (char_vectype);
3125 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3126 char_vectype, vop));
3127 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3128 tree tem2 = make_ssa_name (char_vectype);
3129 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3130 tem, tem, bswap_vconst);
3131 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3132 tem = make_ssa_name (vectype);
3133 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3134 vectype, tem2));
3135 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3136 if (slp_node)
3137 slp_node->push_vec_def (new_stmt);
3138 else
3139 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3142 if (!slp_node)
3143 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3145 vec_oprnds.release ();
3146 return true;
3149 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3150 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3151 in a single step. On success, store the binary pack code in
3152 *CONVERT_CODE. */
3154 static bool
3155 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3156 code_helper *convert_code)
3158 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3159 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3160 return false;
3162 code_helper code;
3163 int multi_step_cvt = 0;
3164 auto_vec <tree, 8> interm_types;
3165 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3166 &code, &multi_step_cvt, &interm_types)
3167 || multi_step_cvt)
3168 return false;
3170 *convert_code = code;
3171 return true;
3174 /* Function vectorizable_call.
3176 Check if STMT_INFO performs a function call that can be vectorized.
3177 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3178 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3179 Return true if STMT_INFO is vectorizable in this way. */
3181 static bool
3182 vectorizable_call (vec_info *vinfo,
3183 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3184 gimple **vec_stmt, slp_tree slp_node,
3185 stmt_vector_for_cost *cost_vec)
3187 gcall *stmt;
3188 tree vec_dest;
3189 tree scalar_dest;
3190 tree op;
3191 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3192 tree vectype_out, vectype_in;
3193 poly_uint64 nunits_in;
3194 poly_uint64 nunits_out;
3195 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3196 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3197 tree fndecl, new_temp, rhs_type;
3198 enum vect_def_type dt[4]
3199 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3200 vect_unknown_def_type };
3201 tree vectypes[ARRAY_SIZE (dt)] = {};
3202 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3203 int ndts = ARRAY_SIZE (dt);
3204 int ncopies, j;
3205 auto_vec<tree, 8> vargs;
3206 enum { NARROW, NONE, WIDEN } modifier;
3207 size_t i, nargs;
3208 tree lhs;
3209 tree clz_ctz_arg1 = NULL_TREE;
3211 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3212 return false;
3214 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3215 && ! vec_stmt)
3216 return false;
3218 /* Is STMT_INFO a vectorizable call? */
3219 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3220 if (!stmt)
3221 return false;
3223 if (gimple_call_internal_p (stmt)
3224 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3225 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3226 /* Handled by vectorizable_load and vectorizable_store. */
3227 return false;
3229 if (gimple_call_lhs (stmt) == NULL_TREE
3230 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3231 return false;
3233 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3235 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3237 /* Process function arguments. */
3238 rhs_type = NULL_TREE;
3239 vectype_in = NULL_TREE;
3240 nargs = gimple_call_num_args (stmt);
3242 /* Bail out if the function has more than four arguments, we do not have
3243 interesting builtin functions to vectorize with more than two arguments
3244 except for fma. No arguments is also not good. */
3245 if (nargs == 0 || nargs > 4)
3246 return false;
3248 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3249 combined_fn cfn = gimple_call_combined_fn (stmt);
3250 if (cfn == CFN_GOMP_SIMD_LANE)
3252 nargs = 0;
3253 rhs_type = unsigned_type_node;
3255 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3256 argument just says whether it is well-defined at zero or not and what
3257 value should be returned for it. */
3258 if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
3260 nargs = 1;
3261 clz_ctz_arg1 = gimple_call_arg (stmt, 1);
3264 int mask_opno = -1;
3265 if (internal_fn_p (cfn))
3266 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3268 for (i = 0; i < nargs; i++)
3270 if ((int) i == mask_opno)
3272 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3273 &op, &slp_op[i], &dt[i], &vectypes[i]))
3274 return false;
3275 continue;
3278 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3279 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3281 if (dump_enabled_p ())
3282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3283 "use not simple.\n");
3284 return false;
3287 /* We can only handle calls with arguments of the same type. */
3288 if (rhs_type
3289 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3291 if (dump_enabled_p ())
3292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3293 "argument types differ.\n");
3294 return false;
3296 if (!rhs_type)
3297 rhs_type = TREE_TYPE (op);
3299 if (!vectype_in)
3300 vectype_in = vectypes[i];
3301 else if (vectypes[i]
3302 && !types_compatible_p (vectypes[i], vectype_in))
3304 if (dump_enabled_p ())
3305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3306 "argument vector types differ.\n");
3307 return false;
3310 /* If all arguments are external or constant defs, infer the vector type
3311 from the scalar type. */
3312 if (!vectype_in)
3313 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3314 if (vec_stmt)
3315 gcc_assert (vectype_in);
3316 if (!vectype_in)
3318 if (dump_enabled_p ())
3319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3320 "no vectype for scalar type %T\n", rhs_type);
3322 return false;
3325 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3326 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3328 if (dump_enabled_p ())
3329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3330 "mixed mask and nonmask vector types\n");
3331 return false;
3334 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3336 if (dump_enabled_p ())
3337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3338 "use emulated vector type for call\n");
3339 return false;
3342 /* FORNOW */
3343 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3344 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3345 if (known_eq (nunits_in * 2, nunits_out))
3346 modifier = NARROW;
3347 else if (known_eq (nunits_out, nunits_in))
3348 modifier = NONE;
3349 else if (known_eq (nunits_out * 2, nunits_in))
3350 modifier = WIDEN;
3351 else
3352 return false;
3354 /* We only handle functions that do not read or clobber memory. */
3355 if (gimple_vuse (stmt))
3357 if (dump_enabled_p ())
3358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3359 "function reads from or writes to memory.\n");
3360 return false;
3363 /* For now, we only vectorize functions if a target specific builtin
3364 is available. TODO -- in some cases, it might be profitable to
3365 insert the calls for pieces of the vector, in order to be able
3366 to vectorize other operations in the loop. */
3367 fndecl = NULL_TREE;
3368 internal_fn ifn = IFN_LAST;
3369 tree callee = gimple_call_fndecl (stmt);
3371 /* First try using an internal function. */
3372 code_helper convert_code = MAX_TREE_CODES;
3373 if (cfn != CFN_LAST
3374 && (modifier == NONE
3375 || (modifier == NARROW
3376 && simple_integer_narrowing (vectype_out, vectype_in,
3377 &convert_code))))
3378 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3379 vectype_in);
3381 /* If that fails, try asking for a target-specific built-in function. */
3382 if (ifn == IFN_LAST)
3384 if (cfn != CFN_LAST)
3385 fndecl = targetm.vectorize.builtin_vectorized_function
3386 (cfn, vectype_out, vectype_in);
3387 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3388 fndecl = targetm.vectorize.builtin_md_vectorized_function
3389 (callee, vectype_out, vectype_in);
3392 if (ifn == IFN_LAST && !fndecl)
3394 if (cfn == CFN_GOMP_SIMD_LANE
3395 && (!slp_node || SLP_TREE_LANES (slp_node) == 1)
3396 && loop_vinfo
3397 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3398 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3399 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3400 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3402 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3403 { 0, 1, 2, ... vf - 1 } vector. */
3404 gcc_assert (nargs == 0);
3406 else if (modifier == NONE
3407 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3408 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3409 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3410 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3411 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3412 slp_op, vectype_in, cost_vec);
3413 else
3415 if (dump_enabled_p ())
3416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3417 "function is not vectorizable.\n");
3418 return false;
3422 if (slp_node)
3423 ncopies = 1;
3424 else if (modifier == NARROW && ifn == IFN_LAST)
3425 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3426 else
3427 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3429 /* Sanity check: make sure that at least one copy of the vectorized stmt
3430 needs to be generated. */
3431 gcc_assert (ncopies >= 1);
3433 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3434 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3435 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3436 int len_opno = internal_fn_len_index (cond_len_fn);
3437 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3438 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3439 if (!vec_stmt) /* transformation not required. */
3441 if (slp_node)
3442 for (i = 0; i < nargs; ++i)
3443 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3444 vectypes[i]
3445 ? vectypes[i] : vectype_in))
3447 if (dump_enabled_p ())
3448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3449 "incompatible vector types for invariants\n");
3450 return false;
3452 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3453 DUMP_VECT_SCOPE ("vectorizable_call");
3454 vect_model_simple_cost (vinfo, stmt_info,
3455 ncopies, dt, ndts, slp_node, cost_vec);
3456 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3457 record_stmt_cost (cost_vec, ncopies / 2,
3458 vec_promote_demote, stmt_info, 0, vect_body);
3460 if (loop_vinfo
3461 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3462 && (reduc_idx >= 0 || mask_opno >= 0))
3464 if (reduc_idx >= 0
3465 && (cond_fn == IFN_LAST
3466 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3467 OPTIMIZE_FOR_SPEED))
3468 && (cond_len_fn == IFN_LAST
3469 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3470 OPTIMIZE_FOR_SPEED)))
3472 if (dump_enabled_p ())
3473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3474 "can't use a fully-masked loop because no"
3475 " conditional operation is available.\n");
3476 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3478 else
3480 unsigned int nvectors
3481 = (slp_node
3482 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3483 : ncopies);
3484 tree scalar_mask = NULL_TREE;
3485 if (mask_opno >= 0)
3486 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3487 if (cond_len_fn != IFN_LAST
3488 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3489 OPTIMIZE_FOR_SPEED))
3490 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3492 else
3493 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3494 scalar_mask);
3497 return true;
3500 /* Transform. */
3502 if (dump_enabled_p ())
3503 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3505 /* Handle def. */
3506 scalar_dest = gimple_call_lhs (stmt);
3507 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3509 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3510 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3511 unsigned int vect_nargs = nargs;
3512 if (len_loop_p)
3514 if (len_opno >= 0)
3516 ifn = cond_len_fn;
3517 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3518 vect_nargs += 2;
3520 else if (reduc_idx >= 0)
3521 gcc_unreachable ();
3523 else if (masked_loop_p && reduc_idx >= 0)
3525 ifn = cond_fn;
3526 vect_nargs += 2;
3528 if (clz_ctz_arg1)
3529 ++vect_nargs;
3531 if (modifier == NONE || ifn != IFN_LAST)
3533 tree prev_res = NULL_TREE;
3534 vargs.safe_grow (vect_nargs, true);
3535 auto_vec<vec<tree> > vec_defs (nargs);
3536 for (j = 0; j < ncopies; ++j)
3538 /* Build argument list for the vectorized call. */
3539 if (slp_node)
3541 if (cfn == CFN_GOMP_SIMD_LANE)
3543 for (i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++i)
3545 /* ??? For multi-lane SLP we'd need to build
3546 { 0, 0, .., 1, 1, ... }. */
3547 tree cst = build_index_vector (vectype_out,
3548 i * nunits_out, 1);
3549 tree new_var
3550 = vect_get_new_ssa_name (vectype_out, vect_simple_var,
3551 "cst_");
3552 gimple *init_stmt = gimple_build_assign (new_var, cst);
3553 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3554 new_temp = make_ssa_name (vec_dest);
3555 gimple *new_stmt
3556 = gimple_build_assign (new_temp, new_var);
3557 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
3558 gsi);
3559 slp_node->push_vec_def (new_stmt);
3561 continue;
3564 vec<tree> vec_oprnds0;
3565 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3566 vec_oprnds0 = vec_defs[0];
3568 /* Arguments are ready. Create the new vector stmt. */
3569 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3571 int varg = 0;
3572 if (masked_loop_p && reduc_idx >= 0)
3574 unsigned int vec_num = vec_oprnds0.length ();
3575 /* Always true for SLP. */
3576 gcc_assert (ncopies == 1);
3577 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3578 gsi, masks, vec_num,
3579 vectype_out, i);
3581 size_t k;
3582 for (k = 0; k < nargs; k++)
3584 vec<tree> vec_oprndsk = vec_defs[k];
3585 vargs[varg++] = vec_oprndsk[i];
3587 if (masked_loop_p && reduc_idx >= 0)
3588 vargs[varg++] = vargs[reduc_idx + 1];
3589 if (clz_ctz_arg1)
3590 vargs[varg++] = clz_ctz_arg1;
3592 gimple *new_stmt;
3593 if (modifier == NARROW)
3595 /* We don't define any narrowing conditional functions
3596 at present. */
3597 gcc_assert (mask_opno < 0);
3598 tree half_res = make_ssa_name (vectype_in);
3599 gcall *call
3600 = gimple_build_call_internal_vec (ifn, vargs);
3601 gimple_call_set_lhs (call, half_res);
3602 gimple_call_set_nothrow (call, true);
3603 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3604 if ((i & 1) == 0)
3606 prev_res = half_res;
3607 continue;
3609 new_temp = make_ssa_name (vec_dest);
3610 new_stmt = vect_gimple_build (new_temp, convert_code,
3611 prev_res, half_res);
3612 vect_finish_stmt_generation (vinfo, stmt_info,
3613 new_stmt, gsi);
3615 else
3617 if (len_opno >= 0 && len_loop_p)
3619 unsigned int vec_num = vec_oprnds0.length ();
3620 /* Always true for SLP. */
3621 gcc_assert (ncopies == 1);
3622 tree len
3623 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3624 vectype_out, i, 1);
3625 signed char biasval
3626 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3627 tree bias = build_int_cst (intQI_type_node, biasval);
3628 vargs[len_opno] = len;
3629 vargs[len_opno + 1] = bias;
3631 else if (mask_opno >= 0 && masked_loop_p)
3633 unsigned int vec_num = vec_oprnds0.length ();
3634 /* Always true for SLP. */
3635 gcc_assert (ncopies == 1);
3636 tree mask = vect_get_loop_mask (loop_vinfo,
3637 gsi, masks, vec_num,
3638 vectype_out, i);
3639 vargs[mask_opno] = prepare_vec_mask
3640 (loop_vinfo, TREE_TYPE (mask), mask,
3641 vargs[mask_opno], gsi);
3644 gcall *call;
3645 if (ifn != IFN_LAST)
3646 call = gimple_build_call_internal_vec (ifn, vargs);
3647 else
3648 call = gimple_build_call_vec (fndecl, vargs);
3649 new_temp = make_ssa_name (vec_dest, call);
3650 gimple_call_set_lhs (call, new_temp);
3651 gimple_call_set_nothrow (call, true);
3652 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3653 new_stmt = call;
3655 slp_node->push_vec_def (new_stmt);
3657 continue;
3660 int varg = 0;
3661 if (masked_loop_p && reduc_idx >= 0)
3662 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3663 vectype_out, j);
3664 for (i = 0; i < nargs; i++)
3666 op = gimple_call_arg (stmt, i);
3667 if (j == 0)
3669 vec_defs.quick_push (vNULL);
3670 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3671 op, &vec_defs[i],
3672 vectypes[i]);
3674 vargs[varg++] = vec_defs[i][j];
3676 if (masked_loop_p && reduc_idx >= 0)
3677 vargs[varg++] = vargs[reduc_idx + 1];
3678 if (clz_ctz_arg1)
3679 vargs[varg++] = clz_ctz_arg1;
3681 if (len_opno >= 0 && len_loop_p)
3683 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3684 vectype_out, j, 1);
3685 signed char biasval
3686 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3687 tree bias = build_int_cst (intQI_type_node, biasval);
3688 vargs[len_opno] = len;
3689 vargs[len_opno + 1] = bias;
3691 else if (mask_opno >= 0 && masked_loop_p)
3693 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3694 vectype_out, j);
3695 vargs[mask_opno]
3696 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3697 vargs[mask_opno], gsi);
3700 gimple *new_stmt;
3701 if (cfn == CFN_GOMP_SIMD_LANE)
3703 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3704 tree new_var
3705 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3706 gimple *init_stmt = gimple_build_assign (new_var, cst);
3707 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3708 new_temp = make_ssa_name (vec_dest);
3709 new_stmt = gimple_build_assign (new_temp, new_var);
3710 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3712 else if (modifier == NARROW)
3714 /* We don't define any narrowing conditional functions at
3715 present. */
3716 gcc_assert (mask_opno < 0);
3717 tree half_res = make_ssa_name (vectype_in);
3718 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3719 gimple_call_set_lhs (call, half_res);
3720 gimple_call_set_nothrow (call, true);
3721 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3722 if ((j & 1) == 0)
3724 prev_res = half_res;
3725 continue;
3727 new_temp = make_ssa_name (vec_dest);
3728 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3729 half_res);
3730 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3732 else
3734 gcall *call;
3735 if (ifn != IFN_LAST)
3736 call = gimple_build_call_internal_vec (ifn, vargs);
3737 else
3738 call = gimple_build_call_vec (fndecl, vargs);
3739 new_temp = make_ssa_name (vec_dest, call);
3740 gimple_call_set_lhs (call, new_temp);
3741 gimple_call_set_nothrow (call, true);
3742 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3743 new_stmt = call;
3746 if (j == (modifier == NARROW ? 1 : 0))
3747 *vec_stmt = new_stmt;
3748 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3750 for (i = 0; i < nargs; i++)
3752 vec<tree> vec_oprndsi = vec_defs[i];
3753 vec_oprndsi.release ();
3756 else if (modifier == NARROW)
3758 auto_vec<vec<tree> > vec_defs (nargs);
3759 /* We don't define any narrowing conditional functions at present. */
3760 gcc_assert (mask_opno < 0);
3761 for (j = 0; j < ncopies; ++j)
3763 /* Build argument list for the vectorized call. */
3764 if (j == 0)
3765 vargs.create (nargs * 2);
3766 else
3767 vargs.truncate (0);
3769 if (slp_node)
3771 vec<tree> vec_oprnds0;
3773 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3774 vec_oprnds0 = vec_defs[0];
3776 /* Arguments are ready. Create the new vector stmt. */
3777 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3779 size_t k;
3780 vargs.truncate (0);
3781 for (k = 0; k < nargs; k++)
3783 vec<tree> vec_oprndsk = vec_defs[k];
3784 vargs.quick_push (vec_oprndsk[i]);
3785 vargs.quick_push (vec_oprndsk[i + 1]);
3787 gcall *call;
3788 if (ifn != IFN_LAST)
3789 call = gimple_build_call_internal_vec (ifn, vargs);
3790 else
3791 call = gimple_build_call_vec (fndecl, vargs);
3792 new_temp = make_ssa_name (vec_dest, call);
3793 gimple_call_set_lhs (call, new_temp);
3794 gimple_call_set_nothrow (call, true);
3795 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3796 slp_node->push_vec_def (call);
3798 continue;
3801 for (i = 0; i < nargs; i++)
3803 op = gimple_call_arg (stmt, i);
3804 if (j == 0)
3806 vec_defs.quick_push (vNULL);
3807 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3808 op, &vec_defs[i], vectypes[i]);
3810 vec_oprnd0 = vec_defs[i][2*j];
3811 vec_oprnd1 = vec_defs[i][2*j+1];
3813 vargs.quick_push (vec_oprnd0);
3814 vargs.quick_push (vec_oprnd1);
3817 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3818 new_temp = make_ssa_name (vec_dest, new_stmt);
3819 gimple_call_set_lhs (new_stmt, new_temp);
3820 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3822 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3825 if (!slp_node)
3826 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3828 for (i = 0; i < nargs; i++)
3830 vec<tree> vec_oprndsi = vec_defs[i];
3831 vec_oprndsi.release ();
3834 else
3835 /* No current target implements this case. */
3836 return false;
3838 vargs.release ();
3840 /* The call in STMT might prevent it from being removed in dce.
3841 We however cannot remove it here, due to the way the ssa name
3842 it defines is mapped to the new definition. So just replace
3843 rhs of the statement with something harmless. */
3845 if (slp_node)
3846 return true;
3848 stmt_info = vect_orig_stmt (stmt_info);
3849 lhs = gimple_get_lhs (stmt_info->stmt);
3851 gassign *new_stmt
3852 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3853 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3855 return true;
3859 struct simd_call_arg_info
3861 tree vectype;
3862 tree op;
3863 HOST_WIDE_INT linear_step;
3864 enum vect_def_type dt;
3865 unsigned int align;
3866 bool simd_lane_linear;
3869 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3870 is linear within simd lane (but not within whole loop), note it in
3871 *ARGINFO. */
3873 static void
3874 vect_simd_lane_linear (tree op, class loop *loop,
3875 struct simd_call_arg_info *arginfo)
3877 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3879 if (!is_gimple_assign (def_stmt)
3880 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3881 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3882 return;
3884 tree base = gimple_assign_rhs1 (def_stmt);
3885 HOST_WIDE_INT linear_step = 0;
3886 tree v = gimple_assign_rhs2 (def_stmt);
3887 while (TREE_CODE (v) == SSA_NAME)
3889 tree t;
3890 def_stmt = SSA_NAME_DEF_STMT (v);
3891 if (is_gimple_assign (def_stmt))
3892 switch (gimple_assign_rhs_code (def_stmt))
3894 case PLUS_EXPR:
3895 t = gimple_assign_rhs2 (def_stmt);
3896 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3897 return;
3898 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3899 v = gimple_assign_rhs1 (def_stmt);
3900 continue;
3901 case MULT_EXPR:
3902 t = gimple_assign_rhs2 (def_stmt);
3903 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3904 return;
3905 linear_step = tree_to_shwi (t);
3906 v = gimple_assign_rhs1 (def_stmt);
3907 continue;
3908 CASE_CONVERT:
3909 t = gimple_assign_rhs1 (def_stmt);
3910 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3911 || (TYPE_PRECISION (TREE_TYPE (v))
3912 < TYPE_PRECISION (TREE_TYPE (t))))
3913 return;
3914 if (!linear_step)
3915 linear_step = 1;
3916 v = t;
3917 continue;
3918 default:
3919 return;
3921 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3922 && loop->simduid
3923 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3924 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3925 == loop->simduid))
3927 if (!linear_step)
3928 linear_step = 1;
3929 arginfo->linear_step = linear_step;
3930 arginfo->op = base;
3931 arginfo->simd_lane_linear = true;
3932 return;
3937 /* Function vectorizable_simd_clone_call.
3939 Check if STMT_INFO performs a function call that can be vectorized
3940 by calling a simd clone of the function.
3941 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3942 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3943 Return true if STMT_INFO is vectorizable in this way. */
3945 static bool
3946 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3947 gimple_stmt_iterator *gsi,
3948 gimple **vec_stmt, slp_tree slp_node,
3949 stmt_vector_for_cost *)
3951 tree vec_dest;
3952 tree scalar_dest;
3953 tree op, type;
3954 tree vec_oprnd0 = NULL_TREE;
3955 tree vectype;
3956 poly_uint64 nunits;
3957 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3958 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3959 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3960 tree fndecl, new_temp;
3961 int ncopies, j;
3962 auto_vec<simd_call_arg_info> arginfo;
3963 vec<tree> vargs = vNULL;
3964 size_t i, nargs;
3965 tree lhs, rtype, ratype;
3966 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3967 int masked_call_offset = 0;
3969 /* Is STMT a vectorizable call? */
3970 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3971 if (!stmt)
3972 return false;
3974 fndecl = gimple_call_fndecl (stmt);
3975 if (fndecl == NULL_TREE
3976 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
3978 fndecl = gimple_call_arg (stmt, 0);
3979 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
3980 fndecl = TREE_OPERAND (fndecl, 0);
3981 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
3982 masked_call_offset = 1;
3984 if (fndecl == NULL_TREE)
3985 return false;
3987 struct cgraph_node *node = cgraph_node::get (fndecl);
3988 if (node == NULL || node->simd_clones == NULL)
3989 return false;
3991 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3992 return false;
3994 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3995 && ! vec_stmt)
3996 return false;
3998 if (gimple_call_lhs (stmt)
3999 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4000 return false;
4002 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4004 vectype = STMT_VINFO_VECTYPE (stmt_info);
4006 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4007 return false;
4009 /* Process function arguments. */
4010 nargs = gimple_call_num_args (stmt) - masked_call_offset;
4012 /* Bail out if the function has zero arguments. */
4013 if (nargs == 0)
4014 return false;
4016 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
4017 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
4018 if (!vec_stmt)
4019 simd_clone_info.truncate (0);
4020 arginfo.reserve (nargs, true);
4021 auto_vec<slp_tree> slp_op;
4022 slp_op.safe_grow_cleared (nargs);
4024 for (i = 0; i < nargs; i++)
4026 simd_call_arg_info thisarginfo;
4027 affine_iv iv;
4029 thisarginfo.linear_step = 0;
4030 thisarginfo.align = 0;
4031 thisarginfo.op = NULL_TREE;
4032 thisarginfo.simd_lane_linear = false;
4034 int op_no = i + masked_call_offset;
4035 if (slp_node)
4036 op_no = vect_slp_child_index_for_operand (stmt, op_no, false);
4037 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4038 op_no, &op, &slp_op[i],
4039 &thisarginfo.dt, &thisarginfo.vectype)
4040 || thisarginfo.dt == vect_uninitialized_def)
4042 if (dump_enabled_p ())
4043 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4044 "use not simple.\n");
4045 return false;
4048 if (thisarginfo.dt == vect_constant_def
4049 || thisarginfo.dt == vect_external_def)
4051 /* With SLP we determine the vector type of constants/externals
4052 at analysis time, handling conflicts via
4053 vect_maybe_update_slp_op_vectype. At transform time
4054 we have a vector type recorded for SLP. */
4055 gcc_assert (!vec_stmt
4056 || !slp_node
4057 || thisarginfo.vectype != NULL_TREE);
4058 if (!vec_stmt)
4059 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
4060 TREE_TYPE (op),
4061 slp_node);
4063 else
4064 gcc_assert (thisarginfo.vectype != NULL_TREE);
4066 /* For linear arguments, the analyze phase should have saved
4067 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
4068 if (vec_stmt
4069 && i * 3 + 4 <= simd_clone_info.length ()
4070 && simd_clone_info[i * 3 + 2])
4072 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
4073 thisarginfo.op = simd_clone_info[i * 3 + 1];
4074 thisarginfo.simd_lane_linear
4075 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
4076 /* If loop has been peeled for alignment, we need to adjust it. */
4077 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4078 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4079 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4081 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4082 tree step = simd_clone_info[i * 3 + 2];
4083 tree opt = TREE_TYPE (thisarginfo.op);
4084 bias = fold_convert (TREE_TYPE (step), bias);
4085 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4086 thisarginfo.op
4087 = fold_build2 (POINTER_TYPE_P (opt)
4088 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4089 thisarginfo.op, bias);
4092 else if (!vec_stmt
4093 && thisarginfo.dt != vect_constant_def
4094 && thisarginfo.dt != vect_external_def
4095 && loop_vinfo
4096 && TREE_CODE (op) == SSA_NAME
4097 && simple_iv (loop, loop_containing_stmt (stmt), op,
4098 &iv, false)
4099 && tree_fits_shwi_p (iv.step))
4101 thisarginfo.linear_step = tree_to_shwi (iv.step);
4102 thisarginfo.op = iv.base;
4104 else if ((thisarginfo.dt == vect_constant_def
4105 || thisarginfo.dt == vect_external_def)
4106 && POINTER_TYPE_P (TREE_TYPE (op)))
4107 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4108 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4109 linear too. */
4110 if (POINTER_TYPE_P (TREE_TYPE (op))
4111 && !thisarginfo.linear_step
4112 && !vec_stmt
4113 && thisarginfo.dt != vect_constant_def
4114 && thisarginfo.dt != vect_external_def
4115 && loop_vinfo
4116 && TREE_CODE (op) == SSA_NAME)
4117 vect_simd_lane_linear (op, loop, &thisarginfo);
4119 arginfo.quick_push (thisarginfo);
4122 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4123 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4124 unsigned int badness = 0;
4125 struct cgraph_node *bestn = NULL;
4126 if (vec_stmt)
4127 bestn = cgraph_node::get (simd_clone_info[0]);
4128 else
4129 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4130 n = n->simdclone->next_clone)
4132 unsigned int this_badness = 0;
4133 unsigned int num_calls;
4134 /* The number of arguments in the call and the number of parameters in
4135 the simdclone should match. However, when the simdclone is
4136 'inbranch', it could have one more paramater than nargs when using
4137 an inbranch simdclone to call a non-inbranch call, either in a
4138 non-masked loop using a all true constant mask, or inside a masked
4139 loop using it's mask. */
4140 size_t simd_nargs = n->simdclone->nargs;
4141 if (!masked_call_offset && n->simdclone->inbranch)
4142 simd_nargs--;
4143 if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4144 &num_calls)
4145 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4146 || (nargs != simd_nargs))
4147 continue;
4148 if (num_calls != 1)
4149 this_badness += floor_log2 (num_calls) * 4096;
4150 if (n->simdclone->inbranch)
4151 this_badness += 8192;
4152 int target_badness = targetm.simd_clone.usable (n);
4153 if (target_badness < 0)
4154 continue;
4155 this_badness += target_badness * 512;
4156 for (i = 0; i < nargs; i++)
4158 switch (n->simdclone->args[i].arg_type)
4160 case SIMD_CLONE_ARG_TYPE_VECTOR:
4161 if (!useless_type_conversion_p
4162 (n->simdclone->args[i].orig_type,
4163 TREE_TYPE (gimple_call_arg (stmt,
4164 i + masked_call_offset))))
4165 i = -1;
4166 else if (arginfo[i].dt == vect_constant_def
4167 || arginfo[i].dt == vect_external_def
4168 || arginfo[i].linear_step)
4169 this_badness += 64;
4170 break;
4171 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4172 if (arginfo[i].dt != vect_constant_def
4173 && arginfo[i].dt != vect_external_def)
4174 i = -1;
4175 break;
4176 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4177 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4178 if (arginfo[i].dt == vect_constant_def
4179 || arginfo[i].dt == vect_external_def
4180 || (arginfo[i].linear_step
4181 != n->simdclone->args[i].linear_step))
4182 i = -1;
4183 break;
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4186 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4187 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4188 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4189 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4190 /* FORNOW */
4191 i = -1;
4192 break;
4193 case SIMD_CLONE_ARG_TYPE_MASK:
4194 /* While we can create a traditional data vector from
4195 an incoming integer mode mask we have no good way to
4196 force generate an integer mode mask from a traditional
4197 boolean vector input. */
4198 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4199 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4200 i = -1;
4201 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4202 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4203 this_badness += 2048;
4204 break;
4206 if (i == (size_t) -1)
4207 break;
4208 if (n->simdclone->args[i].alignment > arginfo[i].align)
4210 i = -1;
4211 break;
4213 if (arginfo[i].align)
4214 this_badness += (exact_log2 (arginfo[i].align)
4215 - exact_log2 (n->simdclone->args[i].alignment));
4217 if (i == (size_t) -1)
4218 continue;
4219 if (masked_call_offset == 0
4220 && n->simdclone->inbranch
4221 && n->simdclone->nargs > nargs)
4223 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4224 SIMD_CLONE_ARG_TYPE_MASK);
4225 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4226 not in a branch, as we'd have to construct an all-true mask. */
4227 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4228 this_badness += 64;
4230 if (bestn == NULL || this_badness < badness)
4232 bestn = n;
4233 badness = this_badness;
4237 if (bestn == NULL)
4238 return false;
4240 unsigned int num_mask_args = 0;
4241 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4242 for (i = 0; i < nargs; i++)
4243 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4244 num_mask_args++;
4246 for (i = 0; i < nargs; i++)
4248 if ((arginfo[i].dt == vect_constant_def
4249 || arginfo[i].dt == vect_external_def)
4250 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4252 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4253 i + masked_call_offset));
4254 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4255 slp_node);
4256 if (arginfo[i].vectype == NULL
4257 || !constant_multiple_p (bestn->simdclone->simdlen,
4258 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4259 return false;
4262 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4263 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4265 if (dump_enabled_p ())
4266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4267 "vector mask arguments are not supported.\n");
4268 return false;
4271 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4273 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4274 if (bestn->simdclone->mask_mode == VOIDmode)
4276 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype),
4277 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4279 /* FORNOW we only have partial support for vector-type masks
4280 that can't hold all of simdlen. */
4281 if (dump_enabled_p ())
4282 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4283 vect_location,
4284 "in-branch vector clones are not yet"
4285 " supported for mismatched vector sizes.\n");
4286 return false;
4288 if (!expand_vec_cond_expr_p (clone_arg_vectype,
4289 arginfo[i].vectype, ERROR_MARK))
4291 if (dump_enabled_p ())
4292 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4293 vect_location,
4294 "cannot compute mask argument for"
4295 " in-branch vector clones.\n");
4296 return false;
4299 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4301 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4302 || maybe_ne (exact_div (bestn->simdclone->simdlen,
4303 num_mask_args),
4304 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4306 /* FORNOW we only have partial support for integer-type masks
4307 that represent the same number of lanes as the
4308 vectorized mask inputs. */
4309 if (dump_enabled_p ())
4310 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4311 vect_location,
4312 "in-branch vector clones are not yet "
4313 "supported for mismatched vector sizes.\n");
4314 return false;
4317 else
4319 if (dump_enabled_p ())
4320 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4321 vect_location,
4322 "in-branch vector clones not supported"
4323 " on this target.\n");
4324 return false;
4329 fndecl = bestn->decl;
4330 nunits = bestn->simdclone->simdlen;
4331 if (slp_node)
4332 ncopies = vector_unroll_factor (vf * group_size, nunits);
4333 else
4334 ncopies = vector_unroll_factor (vf, nunits);
4336 /* If the function isn't const, only allow it in simd loops where user
4337 has asserted that at least nunits consecutive iterations can be
4338 performed using SIMD instructions. */
4339 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4340 && gimple_vuse (stmt))
4341 return false;
4343 /* Sanity check: make sure that at least one copy of the vectorized stmt
4344 needs to be generated. */
4345 gcc_assert (ncopies >= 1);
4347 if (!vec_stmt) /* transformation not required. */
4349 if (slp_node)
4350 for (unsigned i = 0; i < nargs; ++i)
4351 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4353 if (dump_enabled_p ())
4354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4355 "incompatible vector types for invariants\n");
4356 return false;
4358 /* When the original call is pure or const but the SIMD ABI dictates
4359 an aggregate return we will have to use a virtual definition and
4360 in a loop eventually even need to add a virtual PHI. That's
4361 not straight-forward so allow to fix this up via renaming. */
4362 if (gimple_call_lhs (stmt)
4363 && !gimple_vdef (stmt)
4364 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4365 vinfo->any_known_not_updated_vssa = true;
4366 /* ??? For SLP code-gen we end up inserting after the last
4367 vector argument def rather than at the original call position
4368 so automagic virtual operand updating doesn't work. */
4369 if (gimple_vuse (stmt) && slp_node)
4370 vinfo->any_known_not_updated_vssa = true;
4371 simd_clone_info.safe_push (bestn->decl);
4372 for (i = 0; i < bestn->simdclone->nargs; i++)
4374 switch (bestn->simdclone->args[i].arg_type)
4376 default:
4377 continue;
4378 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4379 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4381 simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
4382 simd_clone_info.safe_push (arginfo[i].op);
4383 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4384 ? size_type_node : TREE_TYPE (arginfo[i].op);
4385 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4386 simd_clone_info.safe_push (ls);
4387 tree sll = arginfo[i].simd_lane_linear
4388 ? boolean_true_node : boolean_false_node;
4389 simd_clone_info.safe_push (sll);
4391 break;
4392 case SIMD_CLONE_ARG_TYPE_MASK:
4393 if (loop_vinfo
4394 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4396 unsigned nmasks
4397 = exact_div (ncopies * bestn->simdclone->simdlen,
4398 TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
4399 vect_record_loop_mask (loop_vinfo,
4400 &LOOP_VINFO_MASKS (loop_vinfo),
4401 nmasks, vectype, op);
4404 break;
4408 if (!bestn->simdclone->inbranch && loop_vinfo)
4410 if (dump_enabled_p ()
4411 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4412 dump_printf_loc (MSG_NOTE, vect_location,
4413 "can't use a fully-masked loop because a"
4414 " non-masked simd clone was selected.\n");
4415 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4418 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4419 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4420 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4421 dt, slp_node, cost_vec); */
4422 return true;
4425 /* Transform. */
4427 if (dump_enabled_p ())
4428 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4430 /* Handle def. */
4431 scalar_dest = gimple_call_lhs (stmt);
4432 vec_dest = NULL_TREE;
4433 rtype = NULL_TREE;
4434 ratype = NULL_TREE;
4435 if (scalar_dest)
4437 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4438 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4439 if (TREE_CODE (rtype) == ARRAY_TYPE)
4441 ratype = rtype;
4442 rtype = TREE_TYPE (ratype);
4446 auto_vec<vec<tree> > vec_oprnds;
4447 auto_vec<unsigned> vec_oprnds_i;
4448 vec_oprnds_i.safe_grow_cleared (nargs, true);
4449 if (slp_node)
4451 vec_oprnds.reserve_exact (nargs);
4452 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4454 else
4455 vec_oprnds.safe_grow_cleared (nargs, true);
4456 for (j = 0; j < ncopies; ++j)
4458 poly_uint64 callee_nelements;
4459 poly_uint64 caller_nelements;
4460 /* Build argument list for the vectorized call. */
4461 if (j == 0)
4462 vargs.create (nargs);
4463 else
4464 vargs.truncate (0);
4466 for (i = 0; i < nargs; i++)
4468 unsigned int k, l, m, o;
4469 tree atype;
4470 op = gimple_call_arg (stmt, i + masked_call_offset);
4471 switch (bestn->simdclone->args[i].arg_type)
4473 case SIMD_CLONE_ARG_TYPE_VECTOR:
4474 atype = bestn->simdclone->args[i].vector_type;
4475 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4476 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4477 o = vector_unroll_factor (nunits, callee_nelements);
4478 for (m = j * o; m < (j + 1) * o; m++)
4480 if (known_lt (callee_nelements, caller_nelements))
4482 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4483 if (!constant_multiple_p (caller_nelements,
4484 callee_nelements, &k))
4485 gcc_unreachable ();
4487 gcc_assert ((k & (k - 1)) == 0);
4488 if (m == 0)
4490 if (!slp_node)
4491 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4492 ncopies * o / k, op,
4493 &vec_oprnds[i]);
4494 vec_oprnds_i[i] = 0;
4495 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4497 else
4499 vec_oprnd0 = arginfo[i].op;
4500 if ((m & (k - 1)) == 0)
4501 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4503 arginfo[i].op = vec_oprnd0;
4504 vec_oprnd0
4505 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4506 bitsize_int (prec),
4507 bitsize_int ((m & (k - 1)) * prec));
4508 gassign *new_stmt
4509 = gimple_build_assign (make_ssa_name (atype),
4510 vec_oprnd0);
4511 vect_finish_stmt_generation (vinfo, stmt_info,
4512 new_stmt, gsi);
4513 vargs.safe_push (gimple_assign_lhs (new_stmt));
4515 else
4517 if (!constant_multiple_p (callee_nelements,
4518 caller_nelements, &k))
4519 gcc_unreachable ();
4520 gcc_assert ((k & (k - 1)) == 0);
4521 vec<constructor_elt, va_gc> *ctor_elts;
4522 if (k != 1)
4523 vec_alloc (ctor_elts, k);
4524 else
4525 ctor_elts = NULL;
4526 for (l = 0; l < k; l++)
4528 if (m == 0 && l == 0)
4530 if (!slp_node)
4531 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4532 k * o * ncopies,
4534 &vec_oprnds[i]);
4535 vec_oprnds_i[i] = 0;
4536 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4538 else
4539 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4540 arginfo[i].op = vec_oprnd0;
4541 if (k == 1)
4542 break;
4543 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4544 vec_oprnd0);
4546 if (k == 1)
4547 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4548 atype))
4550 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4551 vec_oprnd0);
4552 gassign *new_stmt
4553 = gimple_build_assign (make_ssa_name (atype),
4554 vec_oprnd0);
4555 vect_finish_stmt_generation (vinfo, stmt_info,
4556 new_stmt, gsi);
4557 vargs.safe_push (gimple_get_lhs (new_stmt));
4559 else
4560 vargs.safe_push (vec_oprnd0);
4561 else
4563 vec_oprnd0 = build_constructor (atype, ctor_elts);
4564 gassign *new_stmt
4565 = gimple_build_assign (make_ssa_name (atype),
4566 vec_oprnd0);
4567 vect_finish_stmt_generation (vinfo, stmt_info,
4568 new_stmt, gsi);
4569 vargs.safe_push (gimple_assign_lhs (new_stmt));
4573 break;
4574 case SIMD_CLONE_ARG_TYPE_MASK:
4575 if (bestn->simdclone->mask_mode == VOIDmode)
4577 atype = bestn->simdclone->args[i].vector_type;
4578 tree elt_type = TREE_TYPE (atype);
4579 tree one = fold_convert (elt_type, integer_one_node);
4580 tree zero = fold_convert (elt_type, integer_zero_node);
4581 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4582 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4583 o = vector_unroll_factor (nunits, callee_nelements);
4584 for (m = j * o; m < (j + 1) * o; m++)
4586 if (maybe_lt (callee_nelements, caller_nelements))
4588 /* The mask type has fewer elements than simdlen. */
4590 /* FORNOW */
4591 gcc_unreachable ();
4593 else if (known_eq (callee_nelements, caller_nelements))
4595 /* The SIMD clone function has the same number of
4596 elements as the current function. */
4597 if (m == 0)
4599 if (!slp_node)
4600 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4601 o * ncopies,
4603 &vec_oprnds[i]);
4604 vec_oprnds_i[i] = 0;
4606 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4607 if (loop_vinfo
4608 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4610 vec_loop_masks *loop_masks
4611 = &LOOP_VINFO_MASKS (loop_vinfo);
4612 tree loop_mask
4613 = vect_get_loop_mask (loop_vinfo, gsi,
4614 loop_masks, ncopies,
4615 vectype, j);
4616 vec_oprnd0
4617 = prepare_vec_mask (loop_vinfo,
4618 TREE_TYPE (loop_mask),
4619 loop_mask, vec_oprnd0,
4620 gsi);
4621 loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4622 loop_mask });
4625 vec_oprnd0
4626 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4627 build_vector_from_val (atype, one),
4628 build_vector_from_val (atype, zero));
4629 gassign *new_stmt
4630 = gimple_build_assign (make_ssa_name (atype),
4631 vec_oprnd0);
4632 vect_finish_stmt_generation (vinfo, stmt_info,
4633 new_stmt, gsi);
4634 vargs.safe_push (gimple_assign_lhs (new_stmt));
4636 else
4638 /* The mask type has more elements than simdlen. */
4640 /* FORNOW */
4641 gcc_unreachable ();
4645 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4647 atype = bestn->simdclone->args[i].vector_type;
4648 /* Guess the number of lanes represented by atype. */
4649 poly_uint64 atype_subparts
4650 = exact_div (bestn->simdclone->simdlen,
4651 num_mask_args);
4652 o = vector_unroll_factor (nunits, atype_subparts);
4653 for (m = j * o; m < (j + 1) * o; m++)
4655 if (m == 0)
4657 if (!slp_node)
4658 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4659 o * ncopies,
4661 &vec_oprnds[i]);
4662 vec_oprnds_i[i] = 0;
4664 if (maybe_lt (atype_subparts,
4665 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4667 /* The mask argument has fewer elements than the
4668 input vector. */
4669 /* FORNOW */
4670 gcc_unreachable ();
4672 else if (known_eq (atype_subparts,
4673 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4675 /* The vector mask argument matches the input
4676 in the number of lanes, but not necessarily
4677 in the mode. */
4678 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4679 tree st = lang_hooks.types.type_for_mode
4680 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4681 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4682 vec_oprnd0);
4683 gassign *new_stmt
4684 = gimple_build_assign (make_ssa_name (st),
4685 vec_oprnd0);
4686 vect_finish_stmt_generation (vinfo, stmt_info,
4687 new_stmt, gsi);
4688 if (!types_compatible_p (atype, st))
4690 new_stmt
4691 = gimple_build_assign (make_ssa_name (atype),
4692 NOP_EXPR,
4693 gimple_assign_lhs
4694 (new_stmt));
4695 vect_finish_stmt_generation (vinfo, stmt_info,
4696 new_stmt, gsi);
4698 vargs.safe_push (gimple_assign_lhs (new_stmt));
4700 else
4702 /* The mask argument has more elements than the
4703 input vector. */
4704 /* FORNOW */
4705 gcc_unreachable ();
4709 else
4710 gcc_unreachable ();
4711 break;
4712 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4713 vargs.safe_push (op);
4714 break;
4715 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4716 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4717 if (j == 0)
4719 gimple_seq stmts;
4720 arginfo[i].op
4721 = force_gimple_operand (unshare_expr (arginfo[i].op),
4722 &stmts, true, NULL_TREE);
4723 if (stmts != NULL)
4725 basic_block new_bb;
4726 edge pe = loop_preheader_edge (loop);
4727 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4728 gcc_assert (!new_bb);
4730 if (arginfo[i].simd_lane_linear)
4732 vargs.safe_push (arginfo[i].op);
4733 break;
4735 tree phi_res = copy_ssa_name (op);
4736 gphi *new_phi = create_phi_node (phi_res, loop->header);
4737 add_phi_arg (new_phi, arginfo[i].op,
4738 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4739 enum tree_code code
4740 = POINTER_TYPE_P (TREE_TYPE (op))
4741 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4742 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4743 ? sizetype : TREE_TYPE (op);
4744 poly_widest_int cst
4745 = wi::mul (bestn->simdclone->args[i].linear_step,
4746 ncopies * nunits);
4747 tree tcst = wide_int_to_tree (type, cst);
4748 tree phi_arg = copy_ssa_name (op);
4749 gassign *new_stmt
4750 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4751 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4752 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4753 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4754 UNKNOWN_LOCATION);
4755 arginfo[i].op = phi_res;
4756 vargs.safe_push (phi_res);
4758 else
4760 enum tree_code code
4761 = POINTER_TYPE_P (TREE_TYPE (op))
4762 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4763 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4764 ? sizetype : TREE_TYPE (op);
4765 poly_widest_int cst
4766 = wi::mul (bestn->simdclone->args[i].linear_step,
4767 j * nunits);
4768 tree tcst = wide_int_to_tree (type, cst);
4769 new_temp = make_ssa_name (TREE_TYPE (op));
4770 gassign *new_stmt
4771 = gimple_build_assign (new_temp, code,
4772 arginfo[i].op, tcst);
4773 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4774 vargs.safe_push (new_temp);
4776 break;
4777 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4778 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4779 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4780 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4781 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4782 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4783 default:
4784 gcc_unreachable ();
4788 if (masked_call_offset == 0
4789 && bestn->simdclone->inbranch
4790 && bestn->simdclone->nargs > nargs)
4792 unsigned long m, o;
4793 size_t mask_i = bestn->simdclone->nargs - 1;
4794 tree mask;
4795 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4796 SIMD_CLONE_ARG_TYPE_MASK);
4798 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4799 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4800 /* Guess the number of lanes represented by masktype. */
4801 callee_nelements = exact_div (bestn->simdclone->simdlen,
4802 bestn->simdclone->nargs - nargs);
4803 else
4804 callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4805 o = vector_unroll_factor (nunits, callee_nelements);
4806 for (m = j * o; m < (j + 1) * o; m++)
4808 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4810 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4811 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4812 ncopies, vectype, j);
4814 else
4815 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4817 gassign *new_stmt;
4818 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4820 /* This means we are dealing with integer mask modes.
4821 First convert to an integer type with the same size as
4822 the current vector type. */
4823 unsigned HOST_WIDE_INT intermediate_size
4824 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4825 tree mid_int_type =
4826 build_nonstandard_integer_type (intermediate_size, 1);
4827 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4828 new_stmt
4829 = gimple_build_assign (make_ssa_name (mid_int_type),
4830 mask);
4831 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4832 /* Then zero-extend to the mask mode. */
4833 mask = fold_build1 (NOP_EXPR, masktype,
4834 gimple_get_lhs (new_stmt));
4836 else if (bestn->simdclone->mask_mode == VOIDmode)
4838 tree one = fold_convert (TREE_TYPE (masktype),
4839 integer_one_node);
4840 tree zero = fold_convert (TREE_TYPE (masktype),
4841 integer_zero_node);
4842 mask = build3 (VEC_COND_EXPR, masktype, mask,
4843 build_vector_from_val (masktype, one),
4844 build_vector_from_val (masktype, zero));
4846 else
4847 gcc_unreachable ();
4849 new_stmt = gimple_build_assign (make_ssa_name (masktype), mask);
4850 vect_finish_stmt_generation (vinfo, stmt_info,
4851 new_stmt, gsi);
4852 mask = gimple_assign_lhs (new_stmt);
4853 vargs.safe_push (mask);
4857 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4858 if (vec_dest)
4860 gcc_assert (ratype
4861 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4862 if (ratype)
4863 new_temp = create_tmp_var (ratype);
4864 else if (useless_type_conversion_p (vectype, rtype))
4865 new_temp = make_ssa_name (vec_dest, new_call);
4866 else
4867 new_temp = make_ssa_name (rtype, new_call);
4868 gimple_call_set_lhs (new_call, new_temp);
4870 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4871 gimple *new_stmt = new_call;
4873 if (vec_dest)
4875 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4877 unsigned int k, l;
4878 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4879 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4880 k = vector_unroll_factor (nunits,
4881 TYPE_VECTOR_SUBPARTS (vectype));
4882 gcc_assert ((k & (k - 1)) == 0);
4883 for (l = 0; l < k; l++)
4885 tree t;
4886 if (ratype)
4888 t = build_fold_addr_expr (new_temp);
4889 t = build2 (MEM_REF, vectype, t,
4890 build_int_cst (TREE_TYPE (t), l * bytes));
4892 else
4893 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4894 bitsize_int (prec), bitsize_int (l * prec));
4895 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4896 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4898 if (j == 0 && l == 0)
4899 *vec_stmt = new_stmt;
4900 if (slp_node)
4901 SLP_TREE_VEC_DEFS (slp_node)
4902 .quick_push (gimple_assign_lhs (new_stmt));
4903 else
4904 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4907 if (ratype)
4908 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4909 continue;
4911 else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
4913 unsigned int k;
4914 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
4915 TYPE_VECTOR_SUBPARTS (rtype), &k))
4916 gcc_unreachable ();
4917 gcc_assert ((k & (k - 1)) == 0);
4918 if ((j & (k - 1)) == 0)
4919 vec_alloc (ret_ctor_elts, k);
4920 if (ratype)
4922 unsigned int m, o;
4923 o = vector_unroll_factor (nunits,
4924 TYPE_VECTOR_SUBPARTS (rtype));
4925 for (m = 0; m < o; m++)
4927 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4928 size_int (m), NULL_TREE, NULL_TREE);
4929 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4930 tem);
4931 vect_finish_stmt_generation (vinfo, stmt_info,
4932 new_stmt, gsi);
4933 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4934 gimple_assign_lhs (new_stmt));
4936 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4938 else
4939 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4940 if ((j & (k - 1)) != k - 1)
4941 continue;
4942 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4943 new_stmt
4944 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4945 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4947 if ((unsigned) j == k - 1)
4948 *vec_stmt = new_stmt;
4949 if (slp_node)
4950 SLP_TREE_VEC_DEFS (slp_node)
4951 .quick_push (gimple_assign_lhs (new_stmt));
4952 else
4953 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4954 continue;
4956 else if (ratype)
4958 tree t = build_fold_addr_expr (new_temp);
4959 t = build2 (MEM_REF, vectype, t,
4960 build_int_cst (TREE_TYPE (t), 0));
4961 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4962 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4963 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4965 else if (!useless_type_conversion_p (vectype, rtype))
4967 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4968 new_stmt
4969 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4970 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4974 if (j == 0)
4975 *vec_stmt = new_stmt;
4976 if (slp_node)
4977 SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
4978 else
4979 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4982 for (i = 0; i < nargs; ++i)
4984 vec<tree> oprndsi = vec_oprnds[i];
4985 oprndsi.release ();
4987 vargs.release ();
4989 /* Mark the clone as no longer being a candidate for GC. */
4990 bestn->gc_candidate = false;
4992 /* The call in STMT might prevent it from being removed in dce.
4993 We however cannot remove it here, due to the way the ssa name
4994 it defines is mapped to the new definition. So just replace
4995 rhs of the statement with something harmless. */
4997 if (slp_node)
4998 return true;
5000 gimple *new_stmt;
5001 if (scalar_dest)
5003 type = TREE_TYPE (scalar_dest);
5004 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
5005 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
5007 else
5008 new_stmt = gimple_build_nop ();
5009 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
5010 unlink_stmt_vdef (stmt);
5012 return true;
5016 /* Function vect_gen_widened_results_half
5018 Create a vector stmt whose code, type, number of arguments, and result
5019 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5020 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5021 In the case that CODE is a CALL_EXPR, this means that a call to DECL
5022 needs to be created (DECL is a function-decl of a target-builtin).
5023 STMT_INFO is the original scalar stmt that we are vectorizing. */
5025 static gimple *
5026 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
5027 tree vec_oprnd0, tree vec_oprnd1, int op_type,
5028 tree vec_dest, gimple_stmt_iterator *gsi,
5029 stmt_vec_info stmt_info)
5031 gimple *new_stmt;
5032 tree new_temp;
5034 /* Generate half of the widened result: */
5035 if (op_type != binary_op)
5036 vec_oprnd1 = NULL;
5037 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
5038 new_temp = make_ssa_name (vec_dest, new_stmt);
5039 gimple_set_lhs (new_stmt, new_temp);
5040 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5042 return new_stmt;
5046 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5047 For multi-step conversions store the resulting vectors and call the function
5048 recursively. When NARROW_SRC_P is true, there's still a conversion after
5049 narrowing, don't store the vectors in the SLP_NODE or in vector info of
5050 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5052 static void
5053 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
5054 int multi_step_cvt,
5055 stmt_vec_info stmt_info,
5056 vec<tree> &vec_dsts,
5057 gimple_stmt_iterator *gsi,
5058 slp_tree slp_node, code_helper code,
5059 bool narrow_src_p)
5061 unsigned int i;
5062 tree vop0, vop1, new_tmp, vec_dest;
5064 vec_dest = vec_dsts.pop ();
5066 for (i = 0; i < vec_oprnds->length (); i += 2)
5068 /* Create demotion operation. */
5069 vop0 = (*vec_oprnds)[i];
5070 vop1 = (*vec_oprnds)[i + 1];
5071 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
5072 new_tmp = make_ssa_name (vec_dest, new_stmt);
5073 gimple_set_lhs (new_stmt, new_tmp);
5074 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5075 if (multi_step_cvt || narrow_src_p)
5076 /* Store the resulting vector for next recursive call,
5077 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5078 (*vec_oprnds)[i/2] = new_tmp;
5079 else
5081 /* This is the last step of the conversion sequence. Store the
5082 vectors in SLP_NODE or in vector info of the scalar statement
5083 (or in STMT_VINFO_RELATED_STMT chain). */
5084 if (slp_node)
5085 slp_node->push_vec_def (new_stmt);
5086 else
5087 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5091 /* For multi-step demotion operations we first generate demotion operations
5092 from the source type to the intermediate types, and then combine the
5093 results (stored in VEC_OPRNDS) in demotion operation to the destination
5094 type. */
5095 if (multi_step_cvt)
5097 /* At each level of recursion we have half of the operands we had at the
5098 previous level. */
5099 vec_oprnds->truncate ((i+1)/2);
5100 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5101 multi_step_cvt - 1,
5102 stmt_info, vec_dsts, gsi,
5103 slp_node, VEC_PACK_TRUNC_EXPR,
5104 narrow_src_p);
5107 vec_dsts.quick_push (vec_dest);
5111 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5112 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5113 STMT_INFO. For multi-step conversions store the resulting vectors and
5114 call the function recursively. */
5116 static void
5117 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5118 vec<tree> *vec_oprnds0,
5119 vec<tree> *vec_oprnds1,
5120 stmt_vec_info stmt_info, tree vec_dest,
5121 gimple_stmt_iterator *gsi,
5122 code_helper ch1,
5123 code_helper ch2, int op_type)
5125 int i;
5126 tree vop0, vop1, new_tmp1, new_tmp2;
5127 gimple *new_stmt1, *new_stmt2;
5128 vec<tree> vec_tmp = vNULL;
5130 vec_tmp.create (vec_oprnds0->length () * 2);
5131 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5133 if (op_type == binary_op)
5134 vop1 = (*vec_oprnds1)[i];
5135 else
5136 vop1 = NULL_TREE;
5138 /* Generate the two halves of promotion operation. */
5139 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5140 op_type, vec_dest, gsi,
5141 stmt_info);
5142 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5143 op_type, vec_dest, gsi,
5144 stmt_info);
5145 if (is_gimple_call (new_stmt1))
5147 new_tmp1 = gimple_call_lhs (new_stmt1);
5148 new_tmp2 = gimple_call_lhs (new_stmt2);
5150 else
5152 new_tmp1 = gimple_assign_lhs (new_stmt1);
5153 new_tmp2 = gimple_assign_lhs (new_stmt2);
5156 /* Store the results for the next step. */
5157 vec_tmp.quick_push (new_tmp1);
5158 vec_tmp.quick_push (new_tmp2);
5161 vec_oprnds0->release ();
5162 *vec_oprnds0 = vec_tmp;
5165 /* Create vectorized promotion stmts for widening stmts using only half the
5166 potential vector size for input. */
5167 static void
5168 vect_create_half_widening_stmts (vec_info *vinfo,
5169 vec<tree> *vec_oprnds0,
5170 vec<tree> *vec_oprnds1,
5171 stmt_vec_info stmt_info, tree vec_dest,
5172 gimple_stmt_iterator *gsi,
5173 code_helper code1,
5174 int op_type)
5176 int i;
5177 tree vop0, vop1;
5178 gimple *new_stmt1;
5179 gimple *new_stmt2;
5180 gimple *new_stmt3;
5181 vec<tree> vec_tmp = vNULL;
5183 vec_tmp.create (vec_oprnds0->length ());
5184 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5186 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5188 gcc_assert (op_type == binary_op);
5189 vop1 = (*vec_oprnds1)[i];
5191 /* Widen the first vector input. */
5192 out_type = TREE_TYPE (vec_dest);
5193 new_tmp1 = make_ssa_name (out_type);
5194 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5195 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5196 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5198 /* Widen the second vector input. */
5199 new_tmp2 = make_ssa_name (out_type);
5200 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5202 /* Perform the operation. With both vector inputs widened. */
5203 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5205 else
5207 /* Perform the operation. With the single vector input widened. */
5208 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5211 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5212 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5213 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5215 /* Store the results for the next step. */
5216 vec_tmp.quick_push (new_tmp3);
5219 vec_oprnds0->release ();
5220 *vec_oprnds0 = vec_tmp;
5224 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5225 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5226 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5227 Return true if STMT_INFO is vectorizable in this way. */
5229 static bool
5230 vectorizable_conversion (vec_info *vinfo,
5231 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5232 gimple **vec_stmt, slp_tree slp_node,
5233 stmt_vector_for_cost *cost_vec)
5235 tree vec_dest, cvt_op = NULL_TREE;
5236 tree scalar_dest;
5237 tree op0, op1 = NULL_TREE;
5238 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5239 tree_code tc1;
5240 code_helper code, code1, code2;
5241 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5242 tree new_temp;
5243 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5244 int ndts = 2;
5245 poly_uint64 nunits_in;
5246 poly_uint64 nunits_out;
5247 tree vectype_out, vectype_in;
5248 int ncopies, i;
5249 tree lhs_type, rhs_type;
5250 /* For conversions between floating point and integer, there're 2 NARROW
5251 cases. NARROW_SRC is for FLOAT_EXPR, means
5252 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5253 This is safe when the range of the source integer can fit into the lower
5254 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5255 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5256 For other conversions, when there's narrowing, NARROW_DST is used as
5257 default. */
5258 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5259 vec<tree> vec_oprnds0 = vNULL;
5260 vec<tree> vec_oprnds1 = vNULL;
5261 tree vop0;
5262 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5263 int multi_step_cvt = 0;
5264 vec<tree> interm_types = vNULL;
5265 tree intermediate_type, cvt_type = NULL_TREE;
5266 int op_type;
5267 unsigned short fltsz;
5269 /* Is STMT a vectorizable conversion? */
5271 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5272 return false;
5274 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5275 && ! vec_stmt)
5276 return false;
5278 gimple* stmt = stmt_info->stmt;
5279 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5280 return false;
5282 if (gimple_get_lhs (stmt) == NULL_TREE
5283 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5284 return false;
5286 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5287 return false;
5289 if (is_gimple_assign (stmt))
5291 code = gimple_assign_rhs_code (stmt);
5292 op_type = TREE_CODE_LENGTH ((tree_code) code);
5294 else if (gimple_call_internal_p (stmt))
5296 code = gimple_call_internal_fn (stmt);
5297 op_type = gimple_call_num_args (stmt);
5299 else
5300 return false;
5302 bool widen_arith = (code == WIDEN_MULT_EXPR
5303 || code == WIDEN_LSHIFT_EXPR
5304 || widening_fn_p (code));
5306 if (!widen_arith
5307 && !CONVERT_EXPR_CODE_P (code)
5308 && code != FIX_TRUNC_EXPR
5309 && code != FLOAT_EXPR)
5310 return false;
5312 /* Check types of lhs and rhs. */
5313 scalar_dest = gimple_get_lhs (stmt);
5314 lhs_type = TREE_TYPE (scalar_dest);
5315 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5317 /* Check the operands of the operation. */
5318 slp_tree slp_op0, slp_op1 = NULL;
5319 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5320 0, &op0, &slp_op0, &dt[0], &vectype_in))
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5324 "use not simple.\n");
5325 return false;
5328 rhs_type = TREE_TYPE (op0);
5329 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5330 && !((INTEGRAL_TYPE_P (lhs_type)
5331 && INTEGRAL_TYPE_P (rhs_type))
5332 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5333 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5334 return false;
5336 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5337 && ((INTEGRAL_TYPE_P (lhs_type)
5338 && !type_has_mode_precision_p (lhs_type))
5339 || (INTEGRAL_TYPE_P (rhs_type)
5340 && !type_has_mode_precision_p (rhs_type))))
5342 if (dump_enabled_p ())
5343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5344 "type conversion to/from bit-precision unsupported."
5345 "\n");
5346 return false;
5349 if (op_type == binary_op)
5351 gcc_assert (code == WIDEN_MULT_EXPR
5352 || code == WIDEN_LSHIFT_EXPR
5353 || widening_fn_p (code));
5355 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5356 gimple_call_arg (stmt, 0);
5357 tree vectype1_in;
5358 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5359 &op1, &slp_op1, &dt[1], &vectype1_in))
5361 if (dump_enabled_p ())
5362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5363 "use not simple.\n");
5364 return false;
5366 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5367 OP1. */
5368 if (!vectype_in)
5369 vectype_in = vectype1_in;
5372 /* If op0 is an external or constant def, infer the vector type
5373 from the scalar type. */
5374 if (!vectype_in)
5375 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5376 if (vec_stmt)
5377 gcc_assert (vectype_in);
5378 if (!vectype_in)
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5382 "no vectype for scalar type %T\n", rhs_type);
5384 return false;
5387 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5388 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5390 if (dump_enabled_p ())
5391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5392 "can't convert between boolean and non "
5393 "boolean vectors %T\n", rhs_type);
5395 return false;
5398 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5399 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5400 if (known_eq (nunits_out, nunits_in))
5401 if (widen_arith)
5402 modifier = WIDEN;
5403 else
5404 modifier = NONE;
5405 else if (multiple_p (nunits_out, nunits_in))
5406 modifier = NARROW_DST;
5407 else
5409 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5410 modifier = WIDEN;
5413 /* Multiple types in SLP are handled by creating the appropriate number of
5414 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5415 case of SLP. */
5416 if (slp_node)
5417 ncopies = 1;
5418 else if (modifier == NARROW_DST)
5419 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5420 else
5421 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5423 /* Sanity check: make sure that at least one copy of the vectorized stmt
5424 needs to be generated. */
5425 gcc_assert (ncopies >= 1);
5427 bool found_mode = false;
5428 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5429 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5430 opt_scalar_mode rhs_mode_iter;
5431 vec<std::pair<tree, tree_code> > converts = vNULL;
5433 /* Supportable by target? */
5434 switch (modifier)
5436 case NONE:
5437 if (code != FIX_TRUNC_EXPR
5438 && code != FLOAT_EXPR
5439 && !CONVERT_EXPR_CODE_P (code))
5440 return false;
5441 gcc_assert (code.is_tree_code ());
5442 if (supportable_indirect_convert_operation (code,
5443 vectype_out,
5444 vectype_in,
5445 &converts,
5446 op0))
5448 gcc_assert (converts.length () <= 2);
5449 if (converts.length () == 1)
5450 code1 = converts[0].second;
5451 else
5453 cvt_type = NULL_TREE;
5454 multi_step_cvt = converts.length () - 1;
5455 codecvt1 = converts[0].second;
5456 code1 = converts[1].second;
5457 interm_types.safe_push (converts[0].first);
5459 break;
5462 /* FALLTHRU */
5463 unsupported:
5464 if (dump_enabled_p ())
5465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5466 "conversion not supported by target.\n");
5467 return false;
5469 case WIDEN:
5470 if (known_eq (nunits_in, nunits_out))
5472 if (!(code.is_tree_code ()
5473 && supportable_half_widening_operation ((tree_code) code,
5474 vectype_out, vectype_in,
5475 &tc1)))
5476 goto unsupported;
5477 code1 = tc1;
5478 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5479 break;
5481 if (supportable_widening_operation (vinfo, code, stmt_info,
5482 vectype_out, vectype_in, &code1,
5483 &code2, &multi_step_cvt,
5484 &interm_types))
5486 /* Binary widening operation can only be supported directly by the
5487 architecture. */
5488 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5489 break;
5492 if (code != FLOAT_EXPR
5493 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5494 goto unsupported;
5496 fltsz = GET_MODE_SIZE (lhs_mode);
5497 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5499 rhs_mode = rhs_mode_iter.require ();
5500 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5501 break;
5503 cvt_type
5504 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5505 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5506 if (cvt_type == NULL_TREE)
5507 goto unsupported;
5509 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5511 tc1 = ERROR_MARK;
5512 gcc_assert (code.is_tree_code ());
5513 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5514 cvt_type, &tc1))
5515 goto unsupported;
5516 codecvt1 = tc1;
5518 else if (!supportable_widening_operation (vinfo, code,
5519 stmt_info, vectype_out,
5520 cvt_type, &codecvt1,
5521 &codecvt2, &multi_step_cvt,
5522 &interm_types))
5523 continue;
5524 else
5525 gcc_assert (multi_step_cvt == 0);
5527 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5528 cvt_type,
5529 vectype_in, &code1,
5530 &code2, &multi_step_cvt,
5531 &interm_types))
5533 found_mode = true;
5534 break;
5538 if (!found_mode)
5539 goto unsupported;
5541 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5542 codecvt2 = ERROR_MARK;
5543 else
5545 multi_step_cvt++;
5546 interm_types.safe_push (cvt_type);
5547 cvt_type = NULL_TREE;
5549 break;
5551 case NARROW_DST:
5552 gcc_assert (op_type == unary_op);
5553 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5554 &code1, &multi_step_cvt,
5555 &interm_types))
5556 break;
5558 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5559 goto unsupported;
5561 if (code == FIX_TRUNC_EXPR)
5563 cvt_type
5564 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5565 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5566 if (cvt_type == NULL_TREE)
5567 goto unsupported;
5568 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5569 &tc1))
5570 codecvt1 = tc1;
5571 else
5572 goto unsupported;
5573 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5574 &code1, &multi_step_cvt,
5575 &interm_types))
5576 break;
5578 /* If op0 can be represented with low precision integer,
5579 truncate it to cvt_type and the do FLOAT_EXPR. */
5580 else if (code == FLOAT_EXPR)
5582 wide_int op_min_value, op_max_value;
5583 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5584 goto unsupported;
5586 cvt_type
5587 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5588 if (cvt_type == NULL_TREE
5589 || (wi::min_precision (op_max_value, SIGNED)
5590 > TYPE_PRECISION (cvt_type))
5591 || (wi::min_precision (op_min_value, SIGNED)
5592 > TYPE_PRECISION (cvt_type)))
5593 goto unsupported;
5595 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5596 if (cvt_type == NULL_TREE)
5597 goto unsupported;
5598 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5599 &code1, &multi_step_cvt,
5600 &interm_types))
5601 goto unsupported;
5602 if (supportable_convert_operation ((tree_code) code, vectype_out,
5603 cvt_type, &tc1))
5605 codecvt1 = tc1;
5606 modifier = NARROW_SRC;
5607 break;
5611 goto unsupported;
5613 default:
5614 gcc_unreachable ();
5617 if (!vec_stmt) /* transformation not required. */
5619 if (slp_node
5620 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5621 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5623 if (dump_enabled_p ())
5624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5625 "incompatible vector types for invariants\n");
5626 return false;
5628 DUMP_VECT_SCOPE ("vectorizable_conversion");
5629 if (modifier == NONE)
5631 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5632 vect_model_simple_cost (vinfo, stmt_info,
5633 ncopies * (1 + multi_step_cvt),
5634 dt, ndts, slp_node, cost_vec);
5636 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5638 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5639 /* The final packing step produces one vector result per copy. */
5640 unsigned int nvectors
5641 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5642 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5643 multi_step_cvt, cost_vec,
5644 widen_arith);
5646 else
5648 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5649 /* The initial unpacking step produces two vector results
5650 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5651 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5652 unsigned int nvectors
5653 = (slp_node
5654 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5655 : ncopies * 2);
5656 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5657 multi_step_cvt, cost_vec,
5658 widen_arith);
5660 interm_types.release ();
5661 return true;
5664 /* Transform. */
5665 if (dump_enabled_p ())
5666 dump_printf_loc (MSG_NOTE, vect_location,
5667 "transform conversion. ncopies = %d.\n", ncopies);
5669 if (op_type == binary_op)
5671 if (CONSTANT_CLASS_P (op0))
5672 op0 = fold_convert (TREE_TYPE (op1), op0);
5673 else if (CONSTANT_CLASS_P (op1))
5674 op1 = fold_convert (TREE_TYPE (op0), op1);
5677 /* In case of multi-step conversion, we first generate conversion operations
5678 to the intermediate types, and then from that types to the final one.
5679 We create vector destinations for the intermediate type (TYPES) received
5680 from supportable_*_operation, and store them in the correct order
5681 for future use in vect_create_vectorized_*_stmts (). */
5682 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5683 bool widen_or_narrow_float_p
5684 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5685 vec_dest = vect_create_destination_var (scalar_dest,
5686 widen_or_narrow_float_p
5687 ? cvt_type : vectype_out);
5688 vec_dsts.quick_push (vec_dest);
5690 if (multi_step_cvt)
5692 for (i = interm_types.length () - 1;
5693 interm_types.iterate (i, &intermediate_type); i--)
5695 vec_dest = vect_create_destination_var (scalar_dest,
5696 intermediate_type);
5697 vec_dsts.quick_push (vec_dest);
5701 if (cvt_type)
5702 vec_dest = vect_create_destination_var (scalar_dest,
5703 widen_or_narrow_float_p
5704 ? vectype_out : cvt_type);
5706 int ninputs = 1;
5707 if (!slp_node)
5709 if (modifier == WIDEN)
5711 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5713 if (multi_step_cvt)
5714 ninputs = vect_pow2 (multi_step_cvt);
5715 ninputs *= 2;
5719 switch (modifier)
5721 case NONE:
5722 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5723 op0, vectype_in, &vec_oprnds0);
5724 /* vec_dest is intermediate type operand when multi_step_cvt. */
5725 if (multi_step_cvt)
5727 cvt_op = vec_dest;
5728 vec_dest = vec_dsts[0];
5731 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5733 /* Arguments are ready, create the new vector stmt. */
5734 gimple* new_stmt;
5735 if (multi_step_cvt)
5737 gcc_assert (multi_step_cvt == 1);
5738 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5739 new_temp = make_ssa_name (cvt_op, new_stmt);
5740 gimple_assign_set_lhs (new_stmt, new_temp);
5741 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5742 vop0 = new_temp;
5744 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5745 new_temp = make_ssa_name (vec_dest, new_stmt);
5746 gimple_set_lhs (new_stmt, new_temp);
5747 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5749 if (slp_node)
5750 slp_node->push_vec_def (new_stmt);
5751 else
5752 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5754 break;
5756 case WIDEN:
5757 /* In case the vectorization factor (VF) is bigger than the number
5758 of elements that we can fit in a vectype (nunits), we have to
5759 generate more than one vector stmt - i.e - we need to "unroll"
5760 the vector stmt by a factor VF/nunits. */
5761 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5762 op0, vectype_in, &vec_oprnds0,
5763 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5764 vectype_in, &vec_oprnds1);
5765 if (code == WIDEN_LSHIFT_EXPR)
5767 int oprnds_size = vec_oprnds0.length ();
5768 vec_oprnds1.create (oprnds_size);
5769 for (i = 0; i < oprnds_size; ++i)
5770 vec_oprnds1.quick_push (op1);
5772 /* Arguments are ready. Create the new vector stmts. */
5773 for (i = multi_step_cvt; i >= 0; i--)
5775 tree this_dest = vec_dsts[i];
5776 code_helper c1 = code1, c2 = code2;
5777 if (i == 0 && codecvt2 != ERROR_MARK)
5779 c1 = codecvt1;
5780 c2 = codecvt2;
5782 if (known_eq (nunits_out, nunits_in))
5783 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5784 stmt_info, this_dest, gsi, c1,
5785 op_type);
5786 else
5787 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5788 &vec_oprnds1, stmt_info,
5789 this_dest, gsi,
5790 c1, c2, op_type);
5793 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5795 gimple *new_stmt;
5796 if (cvt_type)
5798 new_temp = make_ssa_name (vec_dest);
5799 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5800 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5802 else
5803 new_stmt = SSA_NAME_DEF_STMT (vop0);
5805 if (slp_node)
5806 slp_node->push_vec_def (new_stmt);
5807 else
5808 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5810 break;
5812 case NARROW_SRC:
5813 case NARROW_DST:
5814 /* In case the vectorization factor (VF) is bigger than the number
5815 of elements that we can fit in a vectype (nunits), we have to
5816 generate more than one vector stmt - i.e - we need to "unroll"
5817 the vector stmt by a factor VF/nunits. */
5818 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5819 op0, vectype_in, &vec_oprnds0);
5820 /* Arguments are ready. Create the new vector stmts. */
5821 if (cvt_type && modifier == NARROW_DST)
5822 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5824 new_temp = make_ssa_name (vec_dest);
5825 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5826 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5827 vec_oprnds0[i] = new_temp;
5830 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5831 multi_step_cvt,
5832 stmt_info, vec_dsts, gsi,
5833 slp_node, code1,
5834 modifier == NARROW_SRC);
5835 /* After demoting op0 to cvt_type, convert it to dest. */
5836 if (cvt_type && code == FLOAT_EXPR)
5838 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5840 /* Arguments are ready, create the new vector stmt. */
5841 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5842 gimple *new_stmt
5843 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5844 new_temp = make_ssa_name (vec_dest, new_stmt);
5845 gimple_set_lhs (new_stmt, new_temp);
5846 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5848 /* This is the last step of the conversion sequence. Store the
5849 vectors in SLP_NODE or in vector info of the scalar statement
5850 (or in STMT_VINFO_RELATED_STMT chain). */
5851 if (slp_node)
5852 slp_node->push_vec_def (new_stmt);
5853 else
5854 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5857 break;
5859 if (!slp_node)
5860 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5862 vec_oprnds0.release ();
5863 vec_oprnds1.release ();
5864 interm_types.release ();
5866 return true;
5869 /* Return true if we can assume from the scalar form of STMT_INFO that
5870 neither the scalar nor the vector forms will generate code. STMT_INFO
5871 is known not to involve a data reference. */
5873 bool
5874 vect_nop_conversion_p (stmt_vec_info stmt_info)
5876 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5877 if (!stmt)
5878 return false;
5880 tree lhs = gimple_assign_lhs (stmt);
5881 tree_code code = gimple_assign_rhs_code (stmt);
5882 tree rhs = gimple_assign_rhs1 (stmt);
5884 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5885 return true;
5887 if (CONVERT_EXPR_CODE_P (code))
5888 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5890 return false;
5893 /* Function vectorizable_assignment.
5895 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5896 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5897 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5898 Return true if STMT_INFO is vectorizable in this way. */
5900 static bool
5901 vectorizable_assignment (vec_info *vinfo,
5902 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5903 gimple **vec_stmt, slp_tree slp_node,
5904 stmt_vector_for_cost *cost_vec)
5906 tree vec_dest;
5907 tree scalar_dest;
5908 tree op;
5909 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5910 tree new_temp;
5911 enum vect_def_type dt[1] = {vect_unknown_def_type};
5912 int ndts = 1;
5913 int ncopies;
5914 int i;
5915 vec<tree> vec_oprnds = vNULL;
5916 tree vop;
5917 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5918 enum tree_code code;
5919 tree vectype_in;
5921 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5922 return false;
5924 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5925 && ! vec_stmt)
5926 return false;
5928 /* Is vectorizable assignment? */
5929 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5930 if (!stmt)
5931 return false;
5933 scalar_dest = gimple_assign_lhs (stmt);
5934 if (TREE_CODE (scalar_dest) != SSA_NAME)
5935 return false;
5937 if (STMT_VINFO_DATA_REF (stmt_info))
5938 return false;
5940 code = gimple_assign_rhs_code (stmt);
5941 if (!(gimple_assign_single_p (stmt)
5942 || code == PAREN_EXPR
5943 || CONVERT_EXPR_CODE_P (code)))
5944 return false;
5946 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5947 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5949 /* Multiple types in SLP are handled by creating the appropriate number of
5950 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5951 case of SLP. */
5952 if (slp_node)
5953 ncopies = 1;
5954 else
5955 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5957 gcc_assert (ncopies >= 1);
5959 slp_tree slp_op;
5960 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5961 &dt[0], &vectype_in))
5963 if (dump_enabled_p ())
5964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5965 "use not simple.\n");
5966 return false;
5968 if (!vectype_in)
5969 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5971 /* We can handle VIEW_CONVERT conversions that do not change the number
5972 of elements or the vector size or other conversions when the component
5973 types are nop-convertible. */
5974 if (!vectype_in
5975 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5976 || (code == VIEW_CONVERT_EXPR
5977 && maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5978 GET_MODE_SIZE (TYPE_MODE (vectype_in))))
5979 || (CONVERT_EXPR_CODE_P (code)
5980 && !tree_nop_conversion_p (TREE_TYPE (vectype),
5981 TREE_TYPE (vectype_in))))
5982 return false;
5984 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5986 if (dump_enabled_p ())
5987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5988 "can't convert between boolean and non "
5989 "boolean vectors %T\n", TREE_TYPE (op));
5991 return false;
5994 /* We do not handle bit-precision changes. */
5995 if ((CONVERT_EXPR_CODE_P (code)
5996 || code == VIEW_CONVERT_EXPR)
5997 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5998 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5999 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
6000 && !type_has_mode_precision_p (TREE_TYPE (op))))
6001 /* But a conversion that does not change the bit-pattern is ok. */
6002 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6003 && INTEGRAL_TYPE_P (TREE_TYPE (op))
6004 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
6005 > TYPE_PRECISION (TREE_TYPE (op)))
6006 && TYPE_UNSIGNED (TREE_TYPE (op)))
6007 || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
6008 == TYPE_PRECISION (TREE_TYPE (op))))))
6010 if (dump_enabled_p ())
6011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6012 "type conversion to/from bit-precision "
6013 "unsupported.\n");
6014 return false;
6017 if (!vec_stmt) /* transformation not required. */
6019 if (slp_node
6020 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6022 if (dump_enabled_p ())
6023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6024 "incompatible vector types for invariants\n");
6025 return false;
6027 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6028 DUMP_VECT_SCOPE ("vectorizable_assignment");
6029 if (!vect_nop_conversion_p (stmt_info))
6030 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
6031 cost_vec);
6032 return true;
6035 /* Transform. */
6036 if (dump_enabled_p ())
6037 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6039 /* Handle def. */
6040 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6042 /* Handle use. */
6043 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
6045 /* Arguments are ready. create the new vector stmt. */
6046 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6048 if (CONVERT_EXPR_CODE_P (code)
6049 || code == VIEW_CONVERT_EXPR)
6050 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6051 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6052 new_temp = make_ssa_name (vec_dest, new_stmt);
6053 gimple_assign_set_lhs (new_stmt, new_temp);
6054 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6055 if (slp_node)
6056 slp_node->push_vec_def (new_stmt);
6057 else
6058 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6060 if (!slp_node)
6061 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6063 vec_oprnds.release ();
6064 return true;
6068 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6069 either as shift by a scalar or by a vector. */
6071 bool
6072 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6075 machine_mode vec_mode;
6076 optab optab;
6077 int icode;
6078 tree vectype;
6080 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6081 if (!vectype)
6082 return false;
6084 optab = optab_for_tree_code (code, vectype, optab_scalar);
6085 if (!optab
6086 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6088 optab = optab_for_tree_code (code, vectype, optab_vector);
6089 if (!optab
6090 || (optab_handler (optab, TYPE_MODE (vectype))
6091 == CODE_FOR_nothing))
6092 return false;
6095 vec_mode = TYPE_MODE (vectype);
6096 icode = (int) optab_handler (optab, vec_mode);
6097 if (icode == CODE_FOR_nothing)
6098 return false;
6100 return true;
6104 /* Function vectorizable_shift.
6106 Check if STMT_INFO performs a shift operation that can be vectorized.
6107 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6108 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6109 Return true if STMT_INFO is vectorizable in this way. */
6111 static bool
6112 vectorizable_shift (vec_info *vinfo,
6113 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6114 gimple **vec_stmt, slp_tree slp_node,
6115 stmt_vector_for_cost *cost_vec)
6117 tree vec_dest;
6118 tree scalar_dest;
6119 tree op0, op1 = NULL;
6120 tree vec_oprnd1 = NULL_TREE;
6121 tree vectype;
6122 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6123 enum tree_code code;
6124 machine_mode vec_mode;
6125 tree new_temp;
6126 optab optab;
6127 int icode;
6128 machine_mode optab_op2_mode;
6129 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6130 int ndts = 2;
6131 poly_uint64 nunits_in;
6132 poly_uint64 nunits_out;
6133 tree vectype_out;
6134 tree op1_vectype;
6135 int ncopies;
6136 int i;
6137 vec<tree> vec_oprnds0 = vNULL;
6138 vec<tree> vec_oprnds1 = vNULL;
6139 tree vop0, vop1;
6140 unsigned int k;
6141 bool scalar_shift_arg = true;
6142 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6143 bool incompatible_op1_vectype_p = false;
6145 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6146 return false;
6148 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6149 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6150 && ! vec_stmt)
6151 return false;
6153 /* Is STMT a vectorizable binary/unary operation? */
6154 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6155 if (!stmt)
6156 return false;
6158 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6159 return false;
6161 code = gimple_assign_rhs_code (stmt);
6163 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6164 || code == RROTATE_EXPR))
6165 return false;
6167 scalar_dest = gimple_assign_lhs (stmt);
6168 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6169 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6171 if (dump_enabled_p ())
6172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6173 "bit-precision shifts not supported.\n");
6174 return false;
6177 slp_tree slp_op0;
6178 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6179 0, &op0, &slp_op0, &dt[0], &vectype))
6181 if (dump_enabled_p ())
6182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6183 "use not simple.\n");
6184 return false;
6186 /* If op0 is an external or constant def, infer the vector type
6187 from the scalar type. */
6188 if (!vectype)
6189 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6190 if (vec_stmt)
6191 gcc_assert (vectype);
6192 if (!vectype)
6194 if (dump_enabled_p ())
6195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6196 "no vectype for scalar type\n");
6197 return false;
6200 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6201 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6202 if (maybe_ne (nunits_out, nunits_in))
6203 return false;
6205 stmt_vec_info op1_def_stmt_info;
6206 slp_tree slp_op1;
6207 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6208 &dt[1], &op1_vectype, &op1_def_stmt_info))
6210 if (dump_enabled_p ())
6211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6212 "use not simple.\n");
6213 return false;
6216 /* Multiple types in SLP are handled by creating the appropriate number of
6217 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6218 case of SLP. */
6219 if (slp_node)
6220 ncopies = 1;
6221 else
6222 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6224 gcc_assert (ncopies >= 1);
6226 /* Determine whether the shift amount is a vector, or scalar. If the
6227 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6229 if ((dt[1] == vect_internal_def
6230 || dt[1] == vect_induction_def
6231 || dt[1] == vect_nested_cycle)
6232 && (!slp_node || SLP_TREE_LANES (slp_node) == 1))
6233 scalar_shift_arg = false;
6234 else if (dt[1] == vect_constant_def
6235 || dt[1] == vect_external_def
6236 || dt[1] == vect_internal_def)
6238 /* In SLP, need to check whether the shift count is the same,
6239 in loops if it is a constant or invariant, it is always
6240 a scalar shift. */
6241 if (slp_node)
6243 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6244 stmt_vec_info slpstmt_info;
6246 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6247 if (slpstmt_info)
6249 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6250 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6251 scalar_shift_arg = false;
6254 /* For internal SLP defs we have to make sure we see scalar stmts
6255 for all vector elements.
6256 ??? For different vectors we could resort to a different
6257 scalar shift operand but code-generation below simply always
6258 takes the first. */
6259 if (dt[1] == vect_internal_def
6260 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6261 stmts.length ()))
6262 scalar_shift_arg = false;
6265 /* If the shift amount is computed by a pattern stmt we cannot
6266 use the scalar amount directly thus give up and use a vector
6267 shift. */
6268 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6269 scalar_shift_arg = false;
6271 else
6273 if (dump_enabled_p ())
6274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6275 "operand mode requires invariant argument.\n");
6276 return false;
6279 /* Vector shifted by vector. */
6280 bool was_scalar_shift_arg = scalar_shift_arg;
6281 if (!scalar_shift_arg)
6283 optab = optab_for_tree_code (code, vectype, optab_vector);
6284 if (dump_enabled_p ())
6285 dump_printf_loc (MSG_NOTE, vect_location,
6286 "vector/vector shift/rotate found.\n");
6288 if (!op1_vectype)
6289 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6290 slp_op1);
6291 incompatible_op1_vectype_p
6292 = (op1_vectype == NULL_TREE
6293 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6294 TYPE_VECTOR_SUBPARTS (vectype))
6295 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6296 if (incompatible_op1_vectype_p
6297 && (!slp_node
6298 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6299 || slp_op1->refcnt != 1))
6301 if (dump_enabled_p ())
6302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6303 "unusable type for last operand in"
6304 " vector/vector shift/rotate.\n");
6305 return false;
6308 /* See if the machine has a vector shifted by scalar insn and if not
6309 then see if it has a vector shifted by vector insn. */
6310 else
6312 optab = optab_for_tree_code (code, vectype, optab_scalar);
6313 if (optab
6314 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6316 if (dump_enabled_p ())
6317 dump_printf_loc (MSG_NOTE, vect_location,
6318 "vector/scalar shift/rotate found.\n");
6320 else
6322 optab = optab_for_tree_code (code, vectype, optab_vector);
6323 if (optab
6324 && (optab_handler (optab, TYPE_MODE (vectype))
6325 != CODE_FOR_nothing))
6327 scalar_shift_arg = false;
6329 if (dump_enabled_p ())
6330 dump_printf_loc (MSG_NOTE, vect_location,
6331 "vector/vector shift/rotate found.\n");
6333 if (!op1_vectype)
6334 op1_vectype = get_vectype_for_scalar_type (vinfo,
6335 TREE_TYPE (op1),
6336 slp_op1);
6338 /* Unlike the other binary operators, shifts/rotates have
6339 the rhs being int, instead of the same type as the lhs,
6340 so make sure the scalar is the right type if we are
6341 dealing with vectors of long long/long/short/char. */
6342 incompatible_op1_vectype_p
6343 = (!op1_vectype
6344 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6345 TREE_TYPE (op1)));
6346 if (incompatible_op1_vectype_p
6347 && dt[1] == vect_internal_def)
6349 if (dump_enabled_p ())
6350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6351 "unusable type for last operand in"
6352 " vector/vector shift/rotate.\n");
6353 return false;
6359 /* Supportable by target? */
6360 if (!optab)
6362 if (dump_enabled_p ())
6363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6364 "no optab.\n");
6365 return false;
6367 vec_mode = TYPE_MODE (vectype);
6368 icode = (int) optab_handler (optab, vec_mode);
6369 if (icode == CODE_FOR_nothing)
6371 if (dump_enabled_p ())
6372 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6373 "op not supported by target.\n");
6374 return false;
6376 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6377 if (vect_emulated_vector_p (vectype))
6378 return false;
6380 if (!vec_stmt) /* transformation not required. */
6382 if (slp_node
6383 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6384 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6385 && (!incompatible_op1_vectype_p
6386 || dt[1] == vect_constant_def)
6387 && !vect_maybe_update_slp_op_vectype
6388 (slp_op1,
6389 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6391 if (dump_enabled_p ())
6392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6393 "incompatible vector types for invariants\n");
6394 return false;
6396 /* Now adjust the constant shift amount in place. */
6397 if (slp_node
6398 && incompatible_op1_vectype_p
6399 && dt[1] == vect_constant_def)
6401 for (unsigned i = 0;
6402 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6404 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6405 = fold_convert (TREE_TYPE (vectype),
6406 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6407 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6408 == INTEGER_CST));
6411 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6412 DUMP_VECT_SCOPE ("vectorizable_shift");
6413 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6414 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6415 return true;
6418 /* Transform. */
6420 if (dump_enabled_p ())
6421 dump_printf_loc (MSG_NOTE, vect_location,
6422 "transform binary/unary operation.\n");
6424 if (incompatible_op1_vectype_p && !slp_node)
6426 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6427 op1 = fold_convert (TREE_TYPE (vectype), op1);
6428 if (dt[1] != vect_constant_def)
6429 op1 = vect_init_vector (vinfo, stmt_info, op1,
6430 TREE_TYPE (vectype), NULL);
6433 /* Handle def. */
6434 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6436 if (scalar_shift_arg && dt[1] != vect_internal_def)
6438 /* Vector shl and shr insn patterns can be defined with scalar
6439 operand 2 (shift operand). In this case, use constant or loop
6440 invariant op1 directly, without extending it to vector mode
6441 first. */
6442 optab_op2_mode = insn_data[icode].operand[2].mode;
6443 if (!VECTOR_MODE_P (optab_op2_mode))
6445 if (dump_enabled_p ())
6446 dump_printf_loc (MSG_NOTE, vect_location,
6447 "operand 1 using scalar mode.\n");
6448 vec_oprnd1 = op1;
6449 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6450 vec_oprnds1.quick_push (vec_oprnd1);
6451 /* Store vec_oprnd1 for every vector stmt to be created.
6452 We check during the analysis that all the shift arguments
6453 are the same.
6454 TODO: Allow different constants for different vector
6455 stmts generated for an SLP instance. */
6456 for (k = 0;
6457 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6458 vec_oprnds1.quick_push (vec_oprnd1);
6461 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6463 if (was_scalar_shift_arg)
6465 /* If the argument was the same in all lanes create
6466 the correctly typed vector shift amount directly. */
6467 op1 = fold_convert (TREE_TYPE (vectype), op1);
6468 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6469 !loop_vinfo ? gsi : NULL);
6470 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6471 !loop_vinfo ? gsi : NULL);
6472 vec_oprnds1.create (slp_node->vec_stmts_size);
6473 for (k = 0; k < slp_node->vec_stmts_size; k++)
6474 vec_oprnds1.quick_push (vec_oprnd1);
6476 else if (dt[1] == vect_constant_def)
6477 /* The constant shift amount has been adjusted in place. */
6479 else
6480 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6483 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6484 (a special case for certain kind of vector shifts); otherwise,
6485 operand 1 should be of a vector type (the usual case). */
6486 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6487 op0, &vec_oprnds0,
6488 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6490 /* Arguments are ready. Create the new vector stmt. */
6491 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6493 /* For internal defs where we need to use a scalar shift arg
6494 extract the first lane. */
6495 if (scalar_shift_arg && dt[1] == vect_internal_def)
6497 vop1 = vec_oprnds1[0];
6498 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6499 gassign *new_stmt
6500 = gimple_build_assign (new_temp,
6501 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6502 vop1,
6503 TYPE_SIZE (TREE_TYPE (new_temp)),
6504 bitsize_zero_node));
6505 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6506 vop1 = new_temp;
6508 else
6509 vop1 = vec_oprnds1[i];
6510 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6511 new_temp = make_ssa_name (vec_dest, new_stmt);
6512 gimple_assign_set_lhs (new_stmt, new_temp);
6513 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6514 if (slp_node)
6515 slp_node->push_vec_def (new_stmt);
6516 else
6517 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6520 if (!slp_node)
6521 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6523 vec_oprnds0.release ();
6524 vec_oprnds1.release ();
6526 return true;
6529 /* Function vectorizable_operation.
6531 Check if STMT_INFO performs a binary, unary or ternary operation that can
6532 be vectorized.
6533 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6534 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6535 Return true if STMT_INFO is vectorizable in this way. */
6537 static bool
6538 vectorizable_operation (vec_info *vinfo,
6539 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6540 gimple **vec_stmt, slp_tree slp_node,
6541 stmt_vector_for_cost *cost_vec)
6543 tree vec_dest;
6544 tree scalar_dest;
6545 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6546 tree vectype;
6547 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6548 enum tree_code code, orig_code;
6549 machine_mode vec_mode;
6550 tree new_temp;
6551 int op_type;
6552 optab optab;
6553 bool target_support_p;
6554 enum vect_def_type dt[3]
6555 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6556 int ndts = 3;
6557 poly_uint64 nunits_in;
6558 poly_uint64 nunits_out;
6559 tree vectype_out;
6560 unsigned int ncopies;
6561 int vec_num;
6562 int i;
6563 vec<tree> vec_oprnds0 = vNULL;
6564 vec<tree> vec_oprnds1 = vNULL;
6565 vec<tree> vec_oprnds2 = vNULL;
6566 tree vop0, vop1, vop2;
6567 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6569 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6570 return false;
6572 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6573 && ! vec_stmt)
6574 return false;
6576 /* Is STMT a vectorizable binary/unary operation? */
6577 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6578 if (!stmt)
6579 return false;
6581 /* Loads and stores are handled in vectorizable_{load,store}. */
6582 if (STMT_VINFO_DATA_REF (stmt_info))
6583 return false;
6585 orig_code = code = gimple_assign_rhs_code (stmt);
6587 /* Shifts are handled in vectorizable_shift. */
6588 if (code == LSHIFT_EXPR
6589 || code == RSHIFT_EXPR
6590 || code == LROTATE_EXPR
6591 || code == RROTATE_EXPR)
6592 return false;
6594 /* Comparisons are handled in vectorizable_comparison. */
6595 if (TREE_CODE_CLASS (code) == tcc_comparison)
6596 return false;
6598 /* Conditions are handled in vectorizable_condition. */
6599 if (code == COND_EXPR)
6600 return false;
6602 /* For pointer addition and subtraction, we should use the normal
6603 plus and minus for the vector operation. */
6604 if (code == POINTER_PLUS_EXPR)
6605 code = PLUS_EXPR;
6606 if (code == POINTER_DIFF_EXPR)
6607 code = MINUS_EXPR;
6609 /* Support only unary or binary operations. */
6610 op_type = TREE_CODE_LENGTH (code);
6611 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6613 if (dump_enabled_p ())
6614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6615 "num. args = %d (not unary/binary/ternary op).\n",
6616 op_type);
6617 return false;
6620 scalar_dest = gimple_assign_lhs (stmt);
6621 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6623 /* Most operations cannot handle bit-precision types without extra
6624 truncations. */
6625 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6626 if (!mask_op_p
6627 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6628 /* Exception are bitwise binary operations. */
6629 && code != BIT_IOR_EXPR
6630 && code != BIT_XOR_EXPR
6631 && code != BIT_AND_EXPR)
6633 if (dump_enabled_p ())
6634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6635 "bit-precision arithmetic not supported.\n");
6636 return false;
6639 slp_tree slp_op0;
6640 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6641 0, &op0, &slp_op0, &dt[0], &vectype))
6643 if (dump_enabled_p ())
6644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6645 "use not simple.\n");
6646 return false;
6648 bool is_invariant = (dt[0] == vect_external_def
6649 || dt[0] == vect_constant_def);
6650 /* If op0 is an external or constant def, infer the vector type
6651 from the scalar type. */
6652 if (!vectype)
6654 /* For boolean type we cannot determine vectype by
6655 invariant value (don't know whether it is a vector
6656 of booleans or vector of integers). We use output
6657 vectype because operations on boolean don't change
6658 type. */
6659 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6661 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6663 if (dump_enabled_p ())
6664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6665 "not supported operation on bool value.\n");
6666 return false;
6668 vectype = vectype_out;
6670 else
6671 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6672 slp_node);
6674 if (vec_stmt)
6675 gcc_assert (vectype);
6676 if (!vectype)
6678 if (dump_enabled_p ())
6679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6680 "no vectype for scalar type %T\n",
6681 TREE_TYPE (op0));
6683 return false;
6686 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6687 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6688 if (maybe_ne (nunits_out, nunits_in)
6689 || !tree_nop_conversion_p (TREE_TYPE (vectype_out), TREE_TYPE (vectype)))
6690 return false;
6692 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6693 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6694 if (op_type == binary_op || op_type == ternary_op)
6696 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6697 1, &op1, &slp_op1, &dt[1], &vectype2))
6699 if (dump_enabled_p ())
6700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6701 "use not simple.\n");
6702 return false;
6704 is_invariant &= (dt[1] == vect_external_def
6705 || dt[1] == vect_constant_def);
6706 if (vectype2
6707 && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2))
6708 || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6709 TREE_TYPE (vectype2))))
6710 return false;
6712 if (op_type == ternary_op)
6714 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6715 2, &op2, &slp_op2, &dt[2], &vectype3))
6717 if (dump_enabled_p ())
6718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6719 "use not simple.\n");
6720 return false;
6722 is_invariant &= (dt[2] == vect_external_def
6723 || dt[2] == vect_constant_def);
6724 if (vectype3
6725 && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3))
6726 || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6727 TREE_TYPE (vectype3))))
6728 return false;
6731 /* Multiple types in SLP are handled by creating the appropriate number of
6732 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6733 case of SLP. */
6734 if (slp_node)
6736 ncopies = 1;
6737 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6739 else
6741 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6742 vec_num = 1;
6745 gcc_assert (ncopies >= 1);
6747 /* Reject attempts to combine mask types with nonmask types, e.g. if
6748 we have an AND between a (nonmask) boolean loaded from memory and
6749 a (mask) boolean result of a comparison.
6751 TODO: We could easily fix these cases up using pattern statements. */
6752 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6753 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6754 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6756 if (dump_enabled_p ())
6757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6758 "mixed mask and nonmask vector types\n");
6759 return false;
6762 /* Supportable by target? */
6764 vec_mode = TYPE_MODE (vectype);
6765 if (code == MULT_HIGHPART_EXPR)
6766 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6767 else
6769 optab = optab_for_tree_code (code, vectype, optab_default);
6770 if (!optab)
6772 if (dump_enabled_p ())
6773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6774 "no optab.\n");
6775 return false;
6777 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6778 || optab_libfunc (optab, vec_mode));
6781 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6782 if (!target_support_p || using_emulated_vectors_p)
6784 if (dump_enabled_p ())
6785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6786 "op not supported by target.\n");
6787 /* When vec_mode is not a vector mode and we verified ops we
6788 do not have to lower like AND are natively supported let
6789 those through even when the mode isn't word_mode. For
6790 ops we have to lower the lowering code assumes we are
6791 dealing with word_mode. */
6792 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
6793 || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6794 || !target_support_p)
6795 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6796 /* Check only during analysis. */
6797 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6799 if (dump_enabled_p ())
6800 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6801 return false;
6803 if (dump_enabled_p ())
6804 dump_printf_loc (MSG_NOTE, vect_location,
6805 "proceeding using word mode.\n");
6806 using_emulated_vectors_p = true;
6809 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6810 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6811 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6812 internal_fn cond_fn = get_conditional_internal_fn (code);
6813 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6815 /* If operating on inactive elements could generate spurious traps,
6816 we need to restrict the operation to active lanes. Note that this
6817 specifically doesn't apply to unhoisted invariants, since they
6818 operate on the same value for every lane.
6820 Similarly, if this operation is part of a reduction, a fully-masked
6821 loop should only change the active lanes of the reduction chain,
6822 keeping the inactive lanes as-is. */
6823 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6824 || reduc_idx >= 0);
6826 if (!vec_stmt) /* transformation not required. */
6828 if (loop_vinfo
6829 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6830 && mask_out_inactive)
6832 if (cond_len_fn != IFN_LAST
6833 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6834 OPTIMIZE_FOR_SPEED))
6835 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6837 else if (cond_fn != IFN_LAST
6838 && direct_internal_fn_supported_p (cond_fn, vectype,
6839 OPTIMIZE_FOR_SPEED))
6840 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6841 vectype, NULL);
6842 else
6844 if (dump_enabled_p ())
6845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6846 "can't use a fully-masked loop because no"
6847 " conditional operation is available.\n");
6848 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6852 /* Put types on constant and invariant SLP children. */
6853 if (slp_node
6854 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6855 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6856 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6858 if (dump_enabled_p ())
6859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6860 "incompatible vector types for invariants\n");
6861 return false;
6864 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6865 DUMP_VECT_SCOPE ("vectorizable_operation");
6866 vect_model_simple_cost (vinfo, stmt_info,
6867 ncopies, dt, ndts, slp_node, cost_vec);
6868 if (using_emulated_vectors_p)
6870 /* The above vect_model_simple_cost call handles constants
6871 in the prologue and (mis-)costs one of the stmts as
6872 vector stmt. See below for the actual lowering that will
6873 be applied. */
6874 unsigned n
6875 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6876 switch (code)
6878 case PLUS_EXPR:
6879 n *= 5;
6880 break;
6881 case MINUS_EXPR:
6882 n *= 6;
6883 break;
6884 case NEGATE_EXPR:
6885 n *= 4;
6886 break;
6887 default:
6888 /* Bit operations do not have extra cost and are accounted
6889 as vector stmt by vect_model_simple_cost. */
6890 n = 0;
6891 break;
6893 if (n != 0)
6895 /* We also need to materialize two large constants. */
6896 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6897 0, vect_prologue);
6898 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6899 0, vect_body);
6902 return true;
6905 /* Transform. */
6907 if (dump_enabled_p ())
6908 dump_printf_loc (MSG_NOTE, vect_location,
6909 "transform binary/unary operation.\n");
6911 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6912 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6914 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6915 vectors with unsigned elements, but the result is signed. So, we
6916 need to compute the MINUS_EXPR into vectype temporary and
6917 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6918 tree vec_cvt_dest = NULL_TREE;
6919 if (orig_code == POINTER_DIFF_EXPR)
6921 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6922 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6924 /* Handle def. */
6925 else
6926 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6928 /* In case the vectorization factor (VF) is bigger than the number
6929 of elements that we can fit in a vectype (nunits), we have to generate
6930 more than one vector stmt - i.e - we need to "unroll" the
6931 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6932 from one copy of the vector stmt to the next, in the field
6933 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6934 stages to find the correct vector defs to be used when vectorizing
6935 stmts that use the defs of the current stmt. The example below
6936 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6937 we need to create 4 vectorized stmts):
6939 before vectorization:
6940 RELATED_STMT VEC_STMT
6941 S1: x = memref - -
6942 S2: z = x + 1 - -
6944 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6945 there):
6946 RELATED_STMT VEC_STMT
6947 VS1_0: vx0 = memref0 VS1_1 -
6948 VS1_1: vx1 = memref1 VS1_2 -
6949 VS1_2: vx2 = memref2 VS1_3 -
6950 VS1_3: vx3 = memref3 - -
6951 S1: x = load - VS1_0
6952 S2: z = x + 1 - -
6954 step2: vectorize stmt S2 (done here):
6955 To vectorize stmt S2 we first need to find the relevant vector
6956 def for the first operand 'x'. This is, as usual, obtained from
6957 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6958 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6959 relevant vector def 'vx0'. Having found 'vx0' we can generate
6960 the vector stmt VS2_0, and as usual, record it in the
6961 STMT_VINFO_VEC_STMT of stmt S2.
6962 When creating the second copy (VS2_1), we obtain the relevant vector
6963 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6964 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6965 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6966 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6967 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6968 chain of stmts and pointers:
6969 RELATED_STMT VEC_STMT
6970 VS1_0: vx0 = memref0 VS1_1 -
6971 VS1_1: vx1 = memref1 VS1_2 -
6972 VS1_2: vx2 = memref2 VS1_3 -
6973 VS1_3: vx3 = memref3 - -
6974 S1: x = load - VS1_0
6975 VS2_0: vz0 = vx0 + v1 VS2_1 -
6976 VS2_1: vz1 = vx1 + v1 VS2_2 -
6977 VS2_2: vz2 = vx2 + v1 VS2_3 -
6978 VS2_3: vz3 = vx3 + v1 - -
6979 S2: z = x + 1 - VS2_0 */
6981 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6982 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6983 /* Arguments are ready. Create the new vector stmt. */
6984 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6986 gimple *new_stmt = NULL;
6987 vop1 = ((op_type == binary_op || op_type == ternary_op)
6988 ? vec_oprnds1[i] : NULL_TREE);
6989 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6990 if (using_emulated_vectors_p
6991 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6993 /* Lower the operation. This follows vector lowering. */
6994 unsigned int width = vector_element_bits (vectype);
6995 tree inner_type = TREE_TYPE (vectype);
6996 tree word_type
6997 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
6998 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6999 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
7000 tree high_bits
7001 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
7002 tree wvop0 = make_ssa_name (word_type);
7003 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
7004 build1 (VIEW_CONVERT_EXPR,
7005 word_type, vop0));
7006 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7007 tree result_low, signs;
7008 if (code == PLUS_EXPR || code == MINUS_EXPR)
7010 tree wvop1 = make_ssa_name (word_type);
7011 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
7012 build1 (VIEW_CONVERT_EXPR,
7013 word_type, vop1));
7014 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7015 signs = make_ssa_name (word_type);
7016 new_stmt = gimple_build_assign (signs,
7017 BIT_XOR_EXPR, wvop0, wvop1);
7018 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7019 tree b_low = make_ssa_name (word_type);
7020 new_stmt = gimple_build_assign (b_low,
7021 BIT_AND_EXPR, wvop1, low_bits);
7022 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7023 tree a_low = make_ssa_name (word_type);
7024 if (code == PLUS_EXPR)
7025 new_stmt = gimple_build_assign (a_low,
7026 BIT_AND_EXPR, wvop0, low_bits);
7027 else
7028 new_stmt = gimple_build_assign (a_low,
7029 BIT_IOR_EXPR, wvop0, high_bits);
7030 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7031 if (code == MINUS_EXPR)
7033 new_stmt = gimple_build_assign (NULL_TREE,
7034 BIT_NOT_EXPR, signs);
7035 signs = make_ssa_name (word_type);
7036 gimple_assign_set_lhs (new_stmt, signs);
7037 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7039 new_stmt = gimple_build_assign (NULL_TREE,
7040 BIT_AND_EXPR, signs, high_bits);
7041 signs = make_ssa_name (word_type);
7042 gimple_assign_set_lhs (new_stmt, signs);
7043 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7044 result_low = make_ssa_name (word_type);
7045 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7046 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7048 else
7050 tree a_low = make_ssa_name (word_type);
7051 new_stmt = gimple_build_assign (a_low,
7052 BIT_AND_EXPR, wvop0, low_bits);
7053 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7054 signs = make_ssa_name (word_type);
7055 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7056 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7057 new_stmt = gimple_build_assign (NULL_TREE,
7058 BIT_AND_EXPR, signs, high_bits);
7059 signs = make_ssa_name (word_type);
7060 gimple_assign_set_lhs (new_stmt, signs);
7061 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7062 result_low = make_ssa_name (word_type);
7063 new_stmt = gimple_build_assign (result_low,
7064 MINUS_EXPR, high_bits, a_low);
7065 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7067 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7068 signs);
7069 result_low = make_ssa_name (word_type);
7070 gimple_assign_set_lhs (new_stmt, result_low);
7071 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7072 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7073 build1 (VIEW_CONVERT_EXPR,
7074 vectype, result_low));
7075 new_temp = make_ssa_name (vectype);
7076 gimple_assign_set_lhs (new_stmt, new_temp);
7077 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7079 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7081 tree mask;
7082 if (masked_loop_p)
7083 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7084 vec_num * ncopies, vectype, i);
7085 else
7086 /* Dummy mask. */
7087 mask = build_minus_one_cst (truth_type_for (vectype));
7088 auto_vec<tree> vops (6);
7089 vops.quick_push (mask);
7090 vops.quick_push (vop0);
7091 if (vop1)
7092 vops.quick_push (vop1);
7093 if (vop2)
7094 vops.quick_push (vop2);
7095 if (reduc_idx >= 0)
7097 /* Perform the operation on active elements only and take
7098 inactive elements from the reduction chain input. */
7099 gcc_assert (!vop2);
7100 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7102 else
7104 auto else_value = targetm.preferred_else_value
7105 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7106 vops.quick_push (else_value);
7108 if (len_loop_p)
7110 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7111 vec_num * ncopies, vectype, i, 1);
7112 signed char biasval
7113 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7114 tree bias = build_int_cst (intQI_type_node, biasval);
7115 vops.quick_push (len);
7116 vops.quick_push (bias);
7118 gcall *call
7119 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7120 : cond_len_fn,
7121 vops);
7122 new_temp = make_ssa_name (vec_dest, call);
7123 gimple_call_set_lhs (call, new_temp);
7124 gimple_call_set_nothrow (call, true);
7125 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7126 new_stmt = call;
7128 else
7130 tree mask = NULL_TREE;
7131 /* When combining two masks check if either of them is elsewhere
7132 combined with a loop mask, if that's the case we can mark that the
7133 new combined mask doesn't need to be combined with a loop mask. */
7134 if (masked_loop_p
7135 && code == BIT_AND_EXPR
7136 && VECTOR_BOOLEAN_TYPE_P (vectype))
7138 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7139 ncopies}))
7141 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7142 vec_num * ncopies, vectype, i);
7144 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7145 vop0, gsi);
7148 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7149 ncopies }))
7151 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7152 vec_num * ncopies, vectype, i);
7154 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7155 vop1, gsi);
7159 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7160 new_temp = make_ssa_name (vec_dest, new_stmt);
7161 gimple_assign_set_lhs (new_stmt, new_temp);
7162 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7163 if (using_emulated_vectors_p)
7164 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7166 /* Enter the combined value into the vector cond hash so we don't
7167 AND it with a loop mask again. */
7168 if (mask)
7169 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7172 if (vec_cvt_dest)
7174 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7175 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7176 new_temp);
7177 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7178 gimple_assign_set_lhs (new_stmt, new_temp);
7179 vect_finish_stmt_generation (vinfo, stmt_info,
7180 new_stmt, gsi);
7183 if (slp_node)
7184 slp_node->push_vec_def (new_stmt);
7185 else
7186 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7189 if (!slp_node)
7190 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7192 vec_oprnds0.release ();
7193 vec_oprnds1.release ();
7194 vec_oprnds2.release ();
7196 return true;
7199 /* A helper function to ensure data reference DR_INFO's base alignment. */
7201 static void
7202 ensure_base_align (dr_vec_info *dr_info)
7204 /* Alignment is only analyzed for the first element of a DR group,
7205 use that to look at base alignment we need to enforce. */
7206 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7207 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7209 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7211 if (dr_info->base_misaligned)
7213 tree base_decl = dr_info->base_decl;
7215 // We should only be able to increase the alignment of a base object if
7216 // we know what its new alignment should be at compile time.
7217 unsigned HOST_WIDE_INT align_base_to =
7218 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7220 if (decl_in_symtab_p (base_decl))
7221 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7222 else if (DECL_ALIGN (base_decl) < align_base_to)
7224 SET_DECL_ALIGN (base_decl, align_base_to);
7225 DECL_USER_ALIGN (base_decl) = 1;
7227 dr_info->base_misaligned = false;
7232 /* Function get_group_alias_ptr_type.
7234 Return the alias type for the group starting at FIRST_STMT_INFO. */
7236 static tree
7237 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7239 struct data_reference *first_dr, *next_dr;
7241 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7242 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7243 while (next_stmt_info)
7245 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7246 if (get_alias_set (DR_REF (first_dr))
7247 != get_alias_set (DR_REF (next_dr)))
7249 if (dump_enabled_p ())
7250 dump_printf_loc (MSG_NOTE, vect_location,
7251 "conflicting alias set types.\n");
7252 return ptr_type_node;
7254 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7256 return reference_alias_ptr_type (DR_REF (first_dr));
7260 /* Function scan_operand_equal_p.
7262 Helper function for check_scan_store. Compare two references
7263 with .GOMP_SIMD_LANE bases. */
7265 static bool
7266 scan_operand_equal_p (tree ref1, tree ref2)
7268 tree ref[2] = { ref1, ref2 };
7269 poly_int64 bitsize[2], bitpos[2];
7270 tree offset[2], base[2];
7271 for (int i = 0; i < 2; ++i)
7273 machine_mode mode;
7274 int unsignedp, reversep, volatilep = 0;
7275 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7276 &offset[i], &mode, &unsignedp,
7277 &reversep, &volatilep);
7278 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7279 return false;
7280 if (TREE_CODE (base[i]) == MEM_REF
7281 && offset[i] == NULL_TREE
7282 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7284 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7285 if (is_gimple_assign (def_stmt)
7286 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7287 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7288 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7290 if (maybe_ne (mem_ref_offset (base[i]), 0))
7291 return false;
7292 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7293 offset[i] = gimple_assign_rhs2 (def_stmt);
7298 if (!operand_equal_p (base[0], base[1], 0))
7299 return false;
7300 if (maybe_ne (bitsize[0], bitsize[1]))
7301 return false;
7302 if (offset[0] != offset[1])
7304 if (!offset[0] || !offset[1])
7305 return false;
7306 if (!operand_equal_p (offset[0], offset[1], 0))
7308 tree step[2];
7309 for (int i = 0; i < 2; ++i)
7311 step[i] = integer_one_node;
7312 if (TREE_CODE (offset[i]) == SSA_NAME)
7314 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7315 if (is_gimple_assign (def_stmt)
7316 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7317 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7318 == INTEGER_CST))
7320 step[i] = gimple_assign_rhs2 (def_stmt);
7321 offset[i] = gimple_assign_rhs1 (def_stmt);
7324 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7326 step[i] = TREE_OPERAND (offset[i], 1);
7327 offset[i] = TREE_OPERAND (offset[i], 0);
7329 tree rhs1 = NULL_TREE;
7330 if (TREE_CODE (offset[i]) == SSA_NAME)
7332 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7333 if (gimple_assign_cast_p (def_stmt))
7334 rhs1 = gimple_assign_rhs1 (def_stmt);
7336 else if (CONVERT_EXPR_P (offset[i]))
7337 rhs1 = TREE_OPERAND (offset[i], 0);
7338 if (rhs1
7339 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7340 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7341 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7342 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7343 offset[i] = rhs1;
7345 if (!operand_equal_p (offset[0], offset[1], 0)
7346 || !operand_equal_p (step[0], step[1], 0))
7347 return false;
7350 return true;
7354 enum scan_store_kind {
7355 /* Normal permutation. */
7356 scan_store_kind_perm,
7358 /* Whole vector left shift permutation with zero init. */
7359 scan_store_kind_lshift_zero,
7361 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7362 scan_store_kind_lshift_cond
7365 /* Function check_scan_store.
7367 Verify if we can perform the needed permutations or whole vector shifts.
7368 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7369 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7370 to do at each step. */
7372 static int
7373 scan_store_can_perm_p (tree vectype, tree init,
7374 vec<enum scan_store_kind> *use_whole_vector = NULL)
7376 enum machine_mode vec_mode = TYPE_MODE (vectype);
7377 unsigned HOST_WIDE_INT nunits;
7378 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7379 return -1;
7380 int units_log2 = exact_log2 (nunits);
7381 if (units_log2 <= 0)
7382 return -1;
7384 int i;
7385 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7386 for (i = 0; i <= units_log2; ++i)
7388 unsigned HOST_WIDE_INT j, k;
7389 enum scan_store_kind kind = scan_store_kind_perm;
7390 vec_perm_builder sel (nunits, nunits, 1);
7391 sel.quick_grow (nunits);
7392 if (i == units_log2)
7394 for (j = 0; j < nunits; ++j)
7395 sel[j] = nunits - 1;
7397 else
7399 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7400 sel[j] = j;
7401 for (k = 0; j < nunits; ++j, ++k)
7402 sel[j] = nunits + k;
7404 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7405 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7407 if (i == units_log2)
7408 return -1;
7410 if (whole_vector_shift_kind == scan_store_kind_perm)
7412 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7413 return -1;
7414 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7415 /* Whole vector shifts shift in zeros, so if init is all zero
7416 constant, there is no need to do anything further. */
7417 if ((TREE_CODE (init) != INTEGER_CST
7418 && TREE_CODE (init) != REAL_CST)
7419 || !initializer_zerop (init))
7421 tree masktype = truth_type_for (vectype);
7422 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7423 return -1;
7424 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7427 kind = whole_vector_shift_kind;
7429 if (use_whole_vector)
7431 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7432 use_whole_vector->safe_grow_cleared (i, true);
7433 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7434 use_whole_vector->safe_push (kind);
7438 return units_log2;
7442 /* Function check_scan_store.
7444 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7446 static bool
7447 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7448 enum vect_def_type rhs_dt, slp_tree slp_node, tree mask,
7449 vect_memory_access_type memory_access_type)
7451 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7452 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7453 tree ref_type;
7455 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7456 if ((slp_node && SLP_TREE_LANES (slp_node) > 1)
7457 || mask
7458 || memory_access_type != VMAT_CONTIGUOUS
7459 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7460 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7461 || loop_vinfo == NULL
7462 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7463 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7464 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7465 || !integer_zerop (DR_INIT (dr_info->dr))
7466 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7467 || !alias_sets_conflict_p (get_alias_set (vectype),
7468 get_alias_set (TREE_TYPE (ref_type))))
7470 if (dump_enabled_p ())
7471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7472 "unsupported OpenMP scan store.\n");
7473 return false;
7476 /* We need to pattern match code built by OpenMP lowering and simplified
7477 by following optimizations into something we can handle.
7478 #pragma omp simd reduction(inscan,+:r)
7479 for (...)
7481 r += something ();
7482 #pragma omp scan inclusive (r)
7483 use (r);
7485 shall have body with:
7486 // Initialization for input phase, store the reduction initializer:
7487 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7488 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7489 D.2042[_21] = 0;
7490 // Actual input phase:
7492 r.0_5 = D.2042[_20];
7493 _6 = _4 + r.0_5;
7494 D.2042[_20] = _6;
7495 // Initialization for scan phase:
7496 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7497 _26 = D.2043[_25];
7498 _27 = D.2042[_25];
7499 _28 = _26 + _27;
7500 D.2043[_25] = _28;
7501 D.2042[_25] = _28;
7502 // Actual scan phase:
7504 r.1_8 = D.2042[_20];
7506 The "omp simd array" variable D.2042 holds the privatized copy used
7507 inside of the loop and D.2043 is another one that holds copies of
7508 the current original list item. The separate GOMP_SIMD_LANE ifn
7509 kinds are there in order to allow optimizing the initializer store
7510 and combiner sequence, e.g. if it is originally some C++ish user
7511 defined reduction, but allow the vectorizer to pattern recognize it
7512 and turn into the appropriate vectorized scan.
7514 For exclusive scan, this is slightly different:
7515 #pragma omp simd reduction(inscan,+:r)
7516 for (...)
7518 use (r);
7519 #pragma omp scan exclusive (r)
7520 r += something ();
7522 shall have body with:
7523 // Initialization for input phase, store the reduction initializer:
7524 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7525 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7526 D.2042[_21] = 0;
7527 // Actual input phase:
7529 r.0_5 = D.2042[_20];
7530 _6 = _4 + r.0_5;
7531 D.2042[_20] = _6;
7532 // Initialization for scan phase:
7533 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7534 _26 = D.2043[_25];
7535 D.2044[_25] = _26;
7536 _27 = D.2042[_25];
7537 _28 = _26 + _27;
7538 D.2043[_25] = _28;
7539 // Actual scan phase:
7541 r.1_8 = D.2044[_20];
7542 ... */
7544 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7546 /* Match the D.2042[_21] = 0; store above. Just require that
7547 it is a constant or external definition store. */
7548 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7550 fail_init:
7551 if (dump_enabled_p ())
7552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7553 "unsupported OpenMP scan initializer store.\n");
7554 return false;
7557 if (! loop_vinfo->scan_map)
7558 loop_vinfo->scan_map = new hash_map<tree, tree>;
7559 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7560 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7561 if (cached)
7562 goto fail_init;
7563 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7565 /* These stores can be vectorized normally. */
7566 return true;
7569 if (rhs_dt != vect_internal_def)
7571 fail:
7572 if (dump_enabled_p ())
7573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7574 "unsupported OpenMP scan combiner pattern.\n");
7575 return false;
7578 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7579 tree rhs = gimple_assign_rhs1 (stmt);
7580 if (TREE_CODE (rhs) != SSA_NAME)
7581 goto fail;
7583 gimple *other_store_stmt = NULL;
7584 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7585 bool inscan_var_store
7586 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7588 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7590 if (!inscan_var_store)
7592 use_operand_p use_p;
7593 imm_use_iterator iter;
7594 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7596 gimple *use_stmt = USE_STMT (use_p);
7597 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7598 continue;
7599 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7600 || !is_gimple_assign (use_stmt)
7601 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7602 || other_store_stmt
7603 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7604 goto fail;
7605 other_store_stmt = use_stmt;
7607 if (other_store_stmt == NULL)
7608 goto fail;
7609 rhs = gimple_assign_lhs (other_store_stmt);
7610 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7611 goto fail;
7614 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7616 use_operand_p use_p;
7617 imm_use_iterator iter;
7618 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7620 gimple *use_stmt = USE_STMT (use_p);
7621 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7622 continue;
7623 if (other_store_stmt)
7624 goto fail;
7625 other_store_stmt = use_stmt;
7628 else
7629 goto fail;
7631 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7632 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7633 || !is_gimple_assign (def_stmt)
7634 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7635 goto fail;
7637 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7638 /* For pointer addition, we should use the normal plus for the vector
7639 operation. */
7640 switch (code)
7642 case POINTER_PLUS_EXPR:
7643 code = PLUS_EXPR;
7644 break;
7645 case MULT_HIGHPART_EXPR:
7646 goto fail;
7647 default:
7648 break;
7650 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7651 goto fail;
7653 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7654 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7655 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7656 goto fail;
7658 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7659 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7660 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7661 || !gimple_assign_load_p (load1_stmt)
7662 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7663 || !gimple_assign_load_p (load2_stmt))
7664 goto fail;
7666 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7667 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7668 if (load1_stmt_info == NULL
7669 || load2_stmt_info == NULL
7670 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7671 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7672 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7673 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7674 goto fail;
7676 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7678 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7679 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7680 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7681 goto fail;
7682 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7683 tree lrhs;
7684 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7685 lrhs = rhs1;
7686 else
7687 lrhs = rhs2;
7688 use_operand_p use_p;
7689 imm_use_iterator iter;
7690 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7692 gimple *use_stmt = USE_STMT (use_p);
7693 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7694 continue;
7695 if (other_store_stmt)
7696 goto fail;
7697 other_store_stmt = use_stmt;
7701 if (other_store_stmt == NULL)
7702 goto fail;
7703 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7704 || !gimple_store_p (other_store_stmt))
7705 goto fail;
7707 stmt_vec_info other_store_stmt_info
7708 = loop_vinfo->lookup_stmt (other_store_stmt);
7709 if (other_store_stmt_info == NULL
7710 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7711 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7712 goto fail;
7714 gimple *stmt1 = stmt;
7715 gimple *stmt2 = other_store_stmt;
7716 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7717 std::swap (stmt1, stmt2);
7718 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7719 gimple_assign_rhs1 (load2_stmt)))
7721 std::swap (rhs1, rhs2);
7722 std::swap (load1_stmt, load2_stmt);
7723 std::swap (load1_stmt_info, load2_stmt_info);
7725 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7726 gimple_assign_rhs1 (load1_stmt)))
7727 goto fail;
7729 tree var3 = NULL_TREE;
7730 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7731 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7732 gimple_assign_rhs1 (load2_stmt)))
7733 goto fail;
7734 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7736 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7737 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7738 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7739 goto fail;
7740 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7741 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7742 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7743 || lookup_attribute ("omp simd inscan exclusive",
7744 DECL_ATTRIBUTES (var3)))
7745 goto fail;
7748 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7749 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7750 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7751 goto fail;
7753 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7754 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7755 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7756 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7757 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7758 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7759 goto fail;
7761 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7762 std::swap (var1, var2);
7764 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7766 if (!lookup_attribute ("omp simd inscan exclusive",
7767 DECL_ATTRIBUTES (var1)))
7768 goto fail;
7769 var1 = var3;
7772 if (loop_vinfo->scan_map == NULL)
7773 goto fail;
7774 tree *init = loop_vinfo->scan_map->get (var1);
7775 if (init == NULL)
7776 goto fail;
7778 /* The IL is as expected, now check if we can actually vectorize it.
7779 Inclusive scan:
7780 _26 = D.2043[_25];
7781 _27 = D.2042[_25];
7782 _28 = _26 + _27;
7783 D.2043[_25] = _28;
7784 D.2042[_25] = _28;
7785 should be vectorized as (where _40 is the vectorized rhs
7786 from the D.2042[_21] = 0; store):
7787 _30 = MEM <vector(8) int> [(int *)&D.2043];
7788 _31 = MEM <vector(8) int> [(int *)&D.2042];
7789 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7790 _33 = _31 + _32;
7791 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7792 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7793 _35 = _33 + _34;
7794 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7795 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7796 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7797 _37 = _35 + _36;
7798 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7799 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7800 _38 = _30 + _37;
7801 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7802 MEM <vector(8) int> [(int *)&D.2043] = _39;
7803 MEM <vector(8) int> [(int *)&D.2042] = _38;
7804 Exclusive scan:
7805 _26 = D.2043[_25];
7806 D.2044[_25] = _26;
7807 _27 = D.2042[_25];
7808 _28 = _26 + _27;
7809 D.2043[_25] = _28;
7810 should be vectorized as (where _40 is the vectorized rhs
7811 from the D.2042[_21] = 0; store):
7812 _30 = MEM <vector(8) int> [(int *)&D.2043];
7813 _31 = MEM <vector(8) int> [(int *)&D.2042];
7814 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7815 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7816 _34 = _32 + _33;
7817 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7818 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7819 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7820 _36 = _34 + _35;
7821 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7822 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7823 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7824 _38 = _36 + _37;
7825 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7826 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7827 _39 = _30 + _38;
7828 _50 = _31 + _39;
7829 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7830 MEM <vector(8) int> [(int *)&D.2044] = _39;
7831 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7832 enum machine_mode vec_mode = TYPE_MODE (vectype);
7833 optab optab = optab_for_tree_code (code, vectype, optab_default);
7834 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7835 goto fail;
7837 int units_log2 = scan_store_can_perm_p (vectype, *init);
7838 if (units_log2 == -1)
7839 goto fail;
7841 return true;
7845 /* Function vectorizable_scan_store.
7847 Helper of vectorizable_score, arguments like on vectorizable_store.
7848 Handle only the transformation, checking is done in check_scan_store. */
7850 static bool
7851 vectorizable_scan_store (vec_info *vinfo, stmt_vec_info stmt_info,
7852 slp_tree slp_node, gimple_stmt_iterator *gsi,
7853 gimple **vec_stmt, int ncopies)
7855 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7856 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7857 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7858 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7860 if (dump_enabled_p ())
7861 dump_printf_loc (MSG_NOTE, vect_location,
7862 "transform scan store. ncopies = %d\n", ncopies);
7864 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7865 tree rhs = gimple_assign_rhs1 (stmt);
7866 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7868 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7869 bool inscan_var_store
7870 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7872 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7874 use_operand_p use_p;
7875 imm_use_iterator iter;
7876 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7878 gimple *use_stmt = USE_STMT (use_p);
7879 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7880 continue;
7881 rhs = gimple_assign_lhs (use_stmt);
7882 break;
7886 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7887 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7888 if (code == POINTER_PLUS_EXPR)
7889 code = PLUS_EXPR;
7890 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7891 && commutative_tree_code (code));
7892 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7893 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7894 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7895 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7896 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7897 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7898 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7899 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7900 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7901 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7902 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7904 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7906 std::swap (rhs1, rhs2);
7907 std::swap (var1, var2);
7908 std::swap (load1_dr_info, load2_dr_info);
7911 tree *init = loop_vinfo->scan_map->get (var1);
7912 gcc_assert (init);
7914 unsigned HOST_WIDE_INT nunits;
7915 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7916 gcc_unreachable ();
7917 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7918 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7919 gcc_assert (units_log2 > 0);
7920 auto_vec<tree, 16> perms;
7921 perms.quick_grow (units_log2 + 1);
7922 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7923 for (int i = 0; i <= units_log2; ++i)
7925 unsigned HOST_WIDE_INT j, k;
7926 vec_perm_builder sel (nunits, nunits, 1);
7927 sel.quick_grow (nunits);
7928 if (i == units_log2)
7929 for (j = 0; j < nunits; ++j)
7930 sel[j] = nunits - 1;
7931 else
7933 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7934 sel[j] = j;
7935 for (k = 0; j < nunits; ++j, ++k)
7936 sel[j] = nunits + k;
7938 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7939 if (!use_whole_vector.is_empty ()
7940 && use_whole_vector[i] != scan_store_kind_perm)
7942 if (zero_vec == NULL_TREE)
7943 zero_vec = build_zero_cst (vectype);
7944 if (masktype == NULL_TREE
7945 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7946 masktype = truth_type_for (vectype);
7947 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7949 else
7950 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7953 tree vec_oprnd1 = NULL_TREE;
7954 tree vec_oprnd2 = NULL_TREE;
7955 tree vec_oprnd3 = NULL_TREE;
7956 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7957 tree dataref_offset = build_int_cst (ref_type, 0);
7958 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7959 vectype, VMAT_CONTIGUOUS);
7960 tree ldataref_ptr = NULL_TREE;
7961 tree orig = NULL_TREE;
7962 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7963 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7964 /* The initialization is invariant. */
7965 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, *init, vectype, NULL);
7966 auto_vec<tree> vec_oprnds2;
7967 auto_vec<tree> vec_oprnds3;
7968 if (ldataref_ptr == NULL)
7970 /* We want to lookup the vector operands of the reduction, not those
7971 of the store - for SLP we have to use the proper SLP node for the
7972 lookup, which should be the single child of the scan store. */
7973 vect_get_vec_defs (vinfo, stmt_info, SLP_TREE_CHILDREN (slp_node)[0],
7974 ncopies, rhs1, &vec_oprnds2, rhs2, &vec_oprnds3);
7975 /* ??? For SLP we do not key the def on 'rhs1' or 'rhs2' but get
7976 them in SLP child order. So we have to swap here with logic
7977 similar to above. */
7978 stmt_vec_info load
7979 = SLP_TREE_SCALAR_STMTS (SLP_TREE_CHILDREN
7980 (SLP_TREE_CHILDREN (slp_node)[0])[0])[0];
7981 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (load);
7982 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7983 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)))
7984 for (unsigned i = 0; i < vec_oprnds2.length (); ++i)
7985 std::swap (vec_oprnds2[i], vec_oprnds3[i]);;
7987 else
7988 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
7989 rhs2, &vec_oprnds3);
7990 for (unsigned j = 0; j < vec_oprnds3.length (); j++)
7992 if (ldataref_ptr == NULL)
7993 vec_oprnd2 = vec_oprnds2[j];
7994 vec_oprnd3 = vec_oprnds3[j];
7995 if (j == 0)
7996 orig = vec_oprnd3;
7997 else if (!inscan_var_store)
7998 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8000 if (ldataref_ptr)
8002 vec_oprnd2 = make_ssa_name (vectype);
8003 tree data_ref = fold_build2 (MEM_REF, vectype,
8004 unshare_expr (ldataref_ptr),
8005 dataref_offset);
8006 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
8007 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
8008 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8009 if (! slp_node)
8011 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8012 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8016 tree v = vec_oprnd2;
8017 for (int i = 0; i < units_log2; ++i)
8019 tree new_temp = make_ssa_name (vectype);
8020 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
8021 (zero_vec
8022 && (use_whole_vector[i]
8023 != scan_store_kind_perm))
8024 ? zero_vec : vec_oprnd1, v,
8025 perms[i]);
8026 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8027 if (! slp_node)
8029 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8030 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8033 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
8035 /* Whole vector shift shifted in zero bits, but if *init
8036 is not initializer_zerop, we need to replace those elements
8037 with elements from vec_oprnd1. */
8038 tree_vector_builder vb (masktype, nunits, 1);
8039 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
8040 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
8041 ? boolean_false_node : boolean_true_node);
8043 tree new_temp2 = make_ssa_name (vectype);
8044 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8045 new_temp, vec_oprnd1);
8046 vect_finish_stmt_generation (vinfo, stmt_info,
8047 g, gsi);
8048 if (! slp_node)
8049 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8050 new_temp = new_temp2;
8053 /* For exclusive scan, perform the perms[i] permutation once
8054 more. */
8055 if (i == 0
8056 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8057 && v == vec_oprnd2)
8059 v = new_temp;
8060 --i;
8061 continue;
8064 tree new_temp2 = make_ssa_name (vectype);
8065 g = gimple_build_assign (new_temp2, code, v, new_temp);
8066 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8067 if (! slp_node)
8068 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8070 v = new_temp2;
8073 tree new_temp = make_ssa_name (vectype);
8074 gimple *g = gimple_build_assign (new_temp, code, orig, v);
8075 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8076 if (! slp_node)
8077 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8079 tree last_perm_arg = new_temp;
8080 /* For exclusive scan, new_temp computed above is the exclusive scan
8081 prefix sum. Turn it into inclusive prefix sum for the broadcast
8082 of the last element into orig. */
8083 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8085 last_perm_arg = make_ssa_name (vectype);
8086 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8087 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8088 if (! slp_node)
8089 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8092 orig = make_ssa_name (vectype);
8093 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8094 last_perm_arg, perms[units_log2]);
8095 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8096 if (! slp_node)
8097 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8099 if (!inscan_var_store)
8101 tree data_ref = fold_build2 (MEM_REF, vectype,
8102 unshare_expr (dataref_ptr),
8103 dataref_offset);
8104 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8105 g = gimple_build_assign (data_ref, new_temp);
8106 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8107 if (! slp_node)
8108 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8112 if (inscan_var_store)
8113 for (unsigned j = 0; j < vec_oprnds3.length (); j++)
8115 if (j != 0)
8116 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8118 tree data_ref = fold_build2 (MEM_REF, vectype,
8119 unshare_expr (dataref_ptr),
8120 dataref_offset);
8121 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8122 gimple *g = gimple_build_assign (data_ref, orig);
8123 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8124 if (! slp_node)
8125 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8127 return true;
8131 /* Function vectorizable_store.
8133 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8134 that can be vectorized.
8135 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8136 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8137 Return true if STMT_INFO is vectorizable in this way. */
8139 static bool
8140 vectorizable_store (vec_info *vinfo,
8141 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8142 gimple **vec_stmt, slp_tree slp_node,
8143 stmt_vector_for_cost *cost_vec)
8145 tree data_ref;
8146 tree vec_oprnd = NULL_TREE;
8147 tree elem_type;
8148 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8149 class loop *loop = NULL;
8150 machine_mode vec_mode;
8151 tree dummy;
8152 enum vect_def_type rhs_dt = vect_unknown_def_type;
8153 enum vect_def_type mask_dt = vect_unknown_def_type;
8154 tree dataref_ptr = NULL_TREE;
8155 tree dataref_offset = NULL_TREE;
8156 gimple *ptr_incr = NULL;
8157 int ncopies;
8158 int j;
8159 stmt_vec_info first_stmt_info;
8160 bool grouped_store;
8161 unsigned int group_size, i;
8162 bool slp = (slp_node != NULL);
8163 unsigned int vec_num;
8164 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8165 tree aggr_type;
8166 gather_scatter_info gs_info;
8167 poly_uint64 vf;
8168 vec_load_store_type vls_type;
8169 tree ref_type;
8171 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8172 return false;
8174 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8175 && ! vec_stmt)
8176 return false;
8178 /* Is vectorizable store? */
8180 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8181 slp_tree mask_node = NULL;
8182 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8184 tree scalar_dest = gimple_assign_lhs (assign);
8185 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8186 && is_pattern_stmt_p (stmt_info))
8187 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8188 if (TREE_CODE (scalar_dest) != ARRAY_REF
8189 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8190 && TREE_CODE (scalar_dest) != INDIRECT_REF
8191 && TREE_CODE (scalar_dest) != COMPONENT_REF
8192 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8193 && TREE_CODE (scalar_dest) != REALPART_EXPR
8194 && TREE_CODE (scalar_dest) != MEM_REF)
8195 return false;
8197 else
8199 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8200 if (!call || !gimple_call_internal_p (call))
8201 return false;
8203 internal_fn ifn = gimple_call_internal_fn (call);
8204 if (!internal_store_fn_p (ifn))
8205 return false;
8207 int mask_index = internal_fn_mask_index (ifn);
8208 if (mask_index >= 0 && slp_node)
8209 mask_index = vect_slp_child_index_for_operand
8210 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8211 if (mask_index >= 0
8212 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8213 &mask, &mask_node, &mask_dt,
8214 &mask_vectype))
8215 return false;
8218 /* Cannot have hybrid store SLP -- that would mean storing to the
8219 same location twice. */
8220 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8222 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8223 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8225 if (loop_vinfo)
8227 loop = LOOP_VINFO_LOOP (loop_vinfo);
8228 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8230 else
8231 vf = 1;
8233 /* Multiple types in SLP are handled by creating the appropriate number of
8234 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8235 case of SLP. */
8236 if (slp)
8237 ncopies = 1;
8238 else
8239 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8241 gcc_assert (ncopies >= 1);
8243 /* FORNOW. This restriction should be relaxed. */
8244 if (loop
8245 && nested_in_vect_loop_p (loop, stmt_info)
8246 && (ncopies > 1 || (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)))
8248 if (dump_enabled_p ())
8249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8250 "multiple types in nested loop.\n");
8251 return false;
8254 tree op;
8255 slp_tree op_node;
8256 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8257 &op, &op_node, &rhs_dt, &rhs_vectype, &vls_type))
8258 return false;
8260 elem_type = TREE_TYPE (vectype);
8261 vec_mode = TYPE_MODE (vectype);
8263 if (!STMT_VINFO_DATA_REF (stmt_info))
8264 return false;
8266 vect_memory_access_type memory_access_type;
8267 enum dr_alignment_support alignment_support_scheme;
8268 int misalignment;
8269 poly_int64 poffset;
8270 internal_fn lanes_ifn;
8271 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8272 ncopies, &memory_access_type, &poffset,
8273 &alignment_support_scheme, &misalignment, &gs_info,
8274 &lanes_ifn))
8275 return false;
8277 if (slp_node
8278 && slp_node->ldst_lanes
8279 && memory_access_type != VMAT_LOAD_STORE_LANES)
8281 if (dump_enabled_p ())
8282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8283 "discovered store-lane but cannot use it.\n");
8284 return false;
8287 if (mask)
8289 if (memory_access_type == VMAT_CONTIGUOUS)
8291 if (!VECTOR_MODE_P (vec_mode)
8292 || !can_vec_mask_load_store_p (vec_mode,
8293 TYPE_MODE (mask_vectype), false))
8294 return false;
8296 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8297 && (memory_access_type != VMAT_GATHER_SCATTER
8298 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8300 if (dump_enabled_p ())
8301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8302 "unsupported access type for masked store.\n");
8303 return false;
8305 else if (memory_access_type == VMAT_GATHER_SCATTER
8306 && gs_info.ifn == IFN_LAST
8307 && !gs_info.decl)
8309 if (dump_enabled_p ())
8310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8311 "unsupported masked emulated scatter.\n");
8312 return false;
8315 else
8317 /* FORNOW. In some cases can vectorize even if data-type not supported
8318 (e.g. - array initialization with 0). */
8319 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8320 return false;
8323 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8324 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8325 && memory_access_type != VMAT_GATHER_SCATTER
8326 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8327 if (grouped_store)
8329 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8330 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8331 group_size = DR_GROUP_SIZE (first_stmt_info);
8333 else
8335 first_stmt_info = stmt_info;
8336 first_dr_info = dr_info;
8337 group_size = vec_num = 1;
8340 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8342 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node, mask,
8343 memory_access_type))
8344 return false;
8347 bool costing_p = !vec_stmt;
8348 if (costing_p) /* transformation not required. */
8350 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8352 if (loop_vinfo
8353 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8354 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8355 vls_type, group_size,
8356 memory_access_type, &gs_info,
8357 mask);
8359 if (slp_node
8360 && (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8361 || (mask
8362 && !vect_maybe_update_slp_op_vectype (mask_node,
8363 mask_vectype))))
8365 if (dump_enabled_p ())
8366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8367 "incompatible vector types for invariants\n");
8368 return false;
8371 if (dump_enabled_p ()
8372 && memory_access_type != VMAT_ELEMENTWISE
8373 && memory_access_type != VMAT_GATHER_SCATTER
8374 && alignment_support_scheme != dr_aligned)
8375 dump_printf_loc (MSG_NOTE, vect_location,
8376 "Vectorizing an unaligned access.\n");
8378 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8380 /* As function vect_transform_stmt shows, for interleaving stores
8381 the whole chain is vectorized when the last store in the chain
8382 is reached, the other stores in the group are skipped. So we
8383 want to only cost the last one here, but it's not trivial to
8384 get the last, as it's equivalent to use the first one for
8385 costing, use the first one instead. */
8386 if (grouped_store
8387 && !slp
8388 && first_stmt_info != stmt_info)
8389 return true;
8391 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8393 /* Transform. */
8395 ensure_base_align (dr_info);
8397 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8399 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8400 gcc_assert (!slp || SLP_TREE_LANES (slp_node) == 1);
8401 if (costing_p)
8403 unsigned int inside_cost = 0, prologue_cost = 0;
8404 if (vls_type == VLS_STORE_INVARIANT)
8405 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8406 stmt_info, 0, vect_prologue);
8407 vect_get_store_cost (vinfo, stmt_info, ncopies,
8408 alignment_support_scheme, misalignment,
8409 &inside_cost, cost_vec);
8411 if (dump_enabled_p ())
8412 dump_printf_loc (MSG_NOTE, vect_location,
8413 "vect_model_store_cost: inside_cost = %d, "
8414 "prologue_cost = %d .\n",
8415 inside_cost, prologue_cost);
8417 return true;
8419 return vectorizable_scan_store (vinfo, stmt_info, slp_node,
8420 gsi, vec_stmt, ncopies);
8423 if (grouped_store || slp)
8425 /* FORNOW */
8426 gcc_assert (!grouped_store
8427 || !loop
8428 || !nested_in_vect_loop_p (loop, stmt_info));
8430 if (slp)
8432 grouped_store = false;
8433 /* VEC_NUM is the number of vect stmts to be created for this
8434 group. */
8435 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8436 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8437 gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info)
8438 || (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8439 == first_stmt_info));
8440 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8441 op = vect_get_store_rhs (first_stmt_info);
8443 else
8444 /* VEC_NUM is the number of vect stmts to be created for this
8445 group. */
8446 vec_num = group_size;
8448 ref_type = get_group_alias_ptr_type (first_stmt_info);
8450 else
8451 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8453 if (!costing_p && dump_enabled_p ())
8454 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8455 ncopies);
8457 /* Check if we need to update prologue cost for invariant,
8458 and update it accordingly if so. If it's not for
8459 interleaving store, we can just check vls_type; but if
8460 it's for interleaving store, need to check the def_type
8461 of the stored value since the current vls_type is just
8462 for first_stmt_info. */
8463 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8465 gcc_assert (costing_p);
8466 if (slp)
8467 return;
8468 if (grouped_store)
8470 gcc_assert (store_rhs);
8471 enum vect_def_type cdt;
8472 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8473 if (cdt != vect_constant_def && cdt != vect_external_def)
8474 return;
8476 else if (vls_type != VLS_STORE_INVARIANT)
8477 return;
8478 *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
8479 0, vect_prologue);
8482 if (memory_access_type == VMAT_ELEMENTWISE
8483 || memory_access_type == VMAT_STRIDED_SLP)
8485 unsigned inside_cost = 0, prologue_cost = 0;
8486 gimple_stmt_iterator incr_gsi;
8487 bool insert_after;
8488 gimple *incr;
8489 tree offvar;
8490 tree ivstep;
8491 tree running_off;
8492 tree stride_base, stride_step, alias_off;
8493 tree vec_oprnd = NULL_TREE;
8494 tree dr_offset;
8495 unsigned int g;
8496 /* Checked by get_load_store_type. */
8497 unsigned int const_nunits = nunits.to_constant ();
8499 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8500 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8502 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8503 stride_base
8504 = fold_build_pointer_plus
8505 (DR_BASE_ADDRESS (first_dr_info->dr),
8506 size_binop (PLUS_EXPR,
8507 convert_to_ptrofftype (dr_offset),
8508 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8509 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8511 /* For a store with loop-invariant (but other than power-of-2)
8512 stride (i.e. not a grouped access) like so:
8514 for (i = 0; i < n; i += stride)
8515 array[i] = ...;
8517 we generate a new induction variable and new stores from
8518 the components of the (vectorized) rhs:
8520 for (j = 0; ; j += VF*stride)
8521 vectemp = ...;
8522 tmp1 = vectemp[0];
8523 array[j] = tmp1;
8524 tmp2 = vectemp[1];
8525 array[j + stride] = tmp2;
8529 unsigned nstores = const_nunits;
8530 unsigned lnel = 1;
8531 tree ltype = elem_type;
8532 tree lvectype = vectype;
8533 if (slp)
8535 HOST_WIDE_INT n = gcd (group_size, const_nunits);
8536 if (n == const_nunits)
8538 int mis_align = dr_misalignment (first_dr_info, vectype);
8539 dr_alignment_support dr_align
8540 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8541 mis_align);
8542 if (dr_align == dr_aligned
8543 || dr_align == dr_unaligned_supported)
8545 nstores = 1;
8546 lnel = const_nunits;
8547 ltype = vectype;
8548 lvectype = vectype;
8549 alignment_support_scheme = dr_align;
8550 misalignment = mis_align;
8553 else if (n > 1)
8555 nstores = const_nunits / n;
8556 lnel = n;
8557 ltype = build_vector_type (elem_type, n);
8558 lvectype = vectype;
8560 /* First check if vec_extract optab doesn't support extraction
8561 of vector elts directly. */
8562 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8563 machine_mode vmode;
8564 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8565 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8566 n).exists (&vmode)
8567 || (convert_optab_handler (vec_extract_optab,
8568 TYPE_MODE (vectype), vmode)
8569 == CODE_FOR_nothing))
8571 /* Try to avoid emitting an extract of vector elements
8572 by performing the extracts using an integer type of the
8573 same size, extracting from a vector of those and then
8574 re-interpreting it as the original vector type if
8575 supported. */
8576 unsigned lsize
8577 = n * GET_MODE_BITSIZE (elmode);
8578 unsigned int lnunits = const_nunits / n;
8579 /* If we can't construct such a vector fall back to
8580 element extracts from the original vector type and
8581 element size stores. */
8582 if (int_mode_for_size (lsize, 0).exists (&elmode)
8583 && VECTOR_MODE_P (TYPE_MODE (vectype))
8584 && related_vector_mode (TYPE_MODE (vectype), elmode,
8585 lnunits).exists (&vmode)
8586 && (convert_optab_handler (vec_extract_optab,
8587 vmode, elmode)
8588 != CODE_FOR_nothing))
8590 nstores = lnunits;
8591 lnel = n;
8592 ltype = build_nonstandard_integer_type (lsize, 1);
8593 lvectype = build_vector_type (ltype, nstores);
8595 /* Else fall back to vector extraction anyway.
8596 Fewer stores are more important than avoiding spilling
8597 of the vector we extract from. Compared to the
8598 construction case in vectorizable_load no store-forwarding
8599 issue exists here for reasonable archs. */
8602 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8603 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8606 if (!costing_p)
8608 ivstep = stride_step;
8609 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8610 build_int_cst (TREE_TYPE (ivstep), vf));
8612 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8614 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8615 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8616 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8617 insert_after, &offvar, NULL);
8618 incr = gsi_stmt (incr_gsi);
8620 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8623 alias_off = build_int_cst (ref_type, 0);
8624 stmt_vec_info next_stmt_info = first_stmt_info;
8625 auto_vec<tree> vec_oprnds;
8626 /* For costing some adjacent vector stores, we'd like to cost with
8627 the total number of them once instead of cost each one by one. */
8628 unsigned int n_adjacent_stores = 0;
8629 for (g = 0; g < group_size; g++)
8631 running_off = offvar;
8632 if (!costing_p)
8634 if (g)
8636 tree size = TYPE_SIZE_UNIT (ltype);
8637 tree pos
8638 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8639 tree newoff = copy_ssa_name (running_off, NULL);
8640 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8641 running_off, pos);
8642 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8643 running_off = newoff;
8646 if (!slp)
8647 op = vect_get_store_rhs (next_stmt_info);
8648 if (!costing_p)
8649 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
8650 &vec_oprnds);
8651 else
8652 update_prologue_cost (&prologue_cost, op);
8653 unsigned int group_el = 0;
8654 unsigned HOST_WIDE_INT
8655 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8656 for (j = 0; j < ncopies; j++)
8658 if (!costing_p)
8660 vec_oprnd = vec_oprnds[j];
8661 /* Pun the vector to extract from if necessary. */
8662 if (lvectype != vectype)
8664 tree tem = make_ssa_name (lvectype);
8665 tree cvt
8666 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8667 gimple *pun = gimple_build_assign (tem, cvt);
8668 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8669 vec_oprnd = tem;
8672 for (i = 0; i < nstores; i++)
8674 if (costing_p)
8676 /* Only need vector extracting when there are more
8677 than one stores. */
8678 if (nstores > 1)
8679 inside_cost
8680 += record_stmt_cost (cost_vec, 1, vec_to_scalar,
8681 stmt_info, 0, vect_body);
8682 /* Take a single lane vector type store as scalar
8683 store to avoid ICE like 110776. */
8684 if (VECTOR_TYPE_P (ltype)
8685 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8686 n_adjacent_stores++;
8687 else
8688 inside_cost
8689 += record_stmt_cost (cost_vec, 1, scalar_store,
8690 stmt_info, 0, vect_body);
8691 continue;
8693 tree newref, newoff;
8694 gimple *incr, *assign;
8695 tree size = TYPE_SIZE (ltype);
8696 /* Extract the i'th component. */
8697 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8698 bitsize_int (i), size);
8699 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8700 size, pos);
8702 elem = force_gimple_operand_gsi (gsi, elem, true,
8703 NULL_TREE, true,
8704 GSI_SAME_STMT);
8706 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8707 group_el * elsz);
8708 newref = build2 (MEM_REF, ltype,
8709 running_off, this_off);
8710 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8712 /* And store it to *running_off. */
8713 assign = gimple_build_assign (newref, elem);
8714 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8716 group_el += lnel;
8717 if (! slp
8718 || group_el == group_size)
8720 newoff = copy_ssa_name (running_off, NULL);
8721 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8722 running_off, stride_step);
8723 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8725 running_off = newoff;
8726 group_el = 0;
8728 if (g == group_size - 1
8729 && !slp)
8731 if (j == 0 && i == 0)
8732 *vec_stmt = assign;
8733 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8737 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8738 vec_oprnds.truncate(0);
8739 if (slp)
8740 break;
8743 if (costing_p)
8745 if (n_adjacent_stores > 0)
8746 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8747 alignment_support_scheme, misalignment,
8748 &inside_cost, cost_vec);
8749 if (dump_enabled_p ())
8750 dump_printf_loc (MSG_NOTE, vect_location,
8751 "vect_model_store_cost: inside_cost = %d, "
8752 "prologue_cost = %d .\n",
8753 inside_cost, prologue_cost);
8756 return true;
8759 gcc_assert (alignment_support_scheme);
8760 vec_loop_masks *loop_masks
8761 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8762 ? &LOOP_VINFO_MASKS (loop_vinfo)
8763 : NULL);
8764 vec_loop_lens *loop_lens
8765 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8766 ? &LOOP_VINFO_LENS (loop_vinfo)
8767 : NULL);
8769 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
8770 are some difference here. We cannot enable both the lens and masks
8771 during transform but it is allowed during analysis.
8772 Shouldn't go with length-based approach if fully masked. */
8773 if (cost_vec == NULL)
8774 /* The cost_vec is NULL during transfrom. */
8775 gcc_assert ((!loop_lens || !loop_masks));
8777 /* Targets with store-lane instructions must not require explicit
8778 realignment. vect_supportable_dr_alignment always returns either
8779 dr_aligned or dr_unaligned_supported for masked operations. */
8780 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8781 && !mask
8782 && !loop_masks)
8783 || alignment_support_scheme == dr_aligned
8784 || alignment_support_scheme == dr_unaligned_supported);
8786 tree offset = NULL_TREE;
8787 if (!known_eq (poffset, 0))
8788 offset = size_int (poffset);
8790 tree bump;
8791 tree vec_offset = NULL_TREE;
8792 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8794 aggr_type = NULL_TREE;
8795 bump = NULL_TREE;
8797 else if (memory_access_type == VMAT_GATHER_SCATTER)
8799 aggr_type = elem_type;
8800 if (!costing_p)
8801 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8802 &bump, &vec_offset, loop_lens);
8804 else
8806 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8807 aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
8808 else
8809 aggr_type = vectype;
8810 if (!costing_p)
8811 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8812 memory_access_type, loop_lens);
8815 if (mask && !costing_p)
8816 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8818 /* In case the vectorization factor (VF) is bigger than the number
8819 of elements that we can fit in a vectype (nunits), we have to generate
8820 more than one vector stmt - i.e - we need to "unroll" the
8821 vector stmt by a factor VF/nunits. */
8823 /* In case of interleaving (non-unit grouped access):
8825 S1: &base + 2 = x2
8826 S2: &base = x0
8827 S3: &base + 1 = x1
8828 S4: &base + 3 = x3
8830 We create vectorized stores starting from base address (the access of the
8831 first stmt in the chain (S2 in the above example), when the last store stmt
8832 of the chain (S4) is reached:
8834 VS1: &base = vx2
8835 VS2: &base + vec_size*1 = vx0
8836 VS3: &base + vec_size*2 = vx1
8837 VS4: &base + vec_size*3 = vx3
8839 Then permutation statements are generated:
8841 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8842 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8845 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8846 (the order of the data-refs in the output of vect_permute_store_chain
8847 corresponds to the order of scalar stmts in the interleaving chain - see
8848 the documentation of vect_permute_store_chain()).
8850 In case of both multiple types and interleaving, above vector stores and
8851 permutation stmts are created for every copy. The result vector stmts are
8852 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8853 STMT_VINFO_RELATED_STMT for the next copies.
8856 auto_vec<tree> dr_chain (group_size);
8857 auto_vec<tree> vec_masks;
8858 tree vec_mask = NULL;
8859 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8860 for (i = 0; i < group_size; i++)
8861 gvec_oprnds.quick_push (new auto_vec<tree> ());
8863 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8865 if (costing_p && slp_node)
8866 /* Update all incoming store operand nodes, the general handling
8867 above only handles the mask and the first store operand node. */
8868 for (slp_tree child : SLP_TREE_CHILDREN (slp_node))
8869 if (child != mask_node
8870 && !vect_maybe_update_slp_op_vectype (child, vectype))
8872 if (dump_enabled_p ())
8873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8874 "incompatible vector types for invariants\n");
8875 return false;
8877 unsigned inside_cost = 0, prologue_cost = 0;
8878 /* For costing some adjacent vector stores, we'd like to cost with
8879 the total number of them once instead of cost each one by one. */
8880 unsigned int n_adjacent_stores = 0;
8881 if (slp)
8882 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) / group_size;
8883 for (j = 0; j < ncopies; j++)
8885 gimple *new_stmt;
8886 if (j == 0)
8888 /* For interleaved stores we collect vectorized defs for all
8889 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8890 as an input to vect_permute_store_chain(). */
8891 stmt_vec_info next_stmt_info = first_stmt_info;
8892 for (i = 0; i < group_size; i++)
8894 /* Since gaps are not supported for interleaved stores,
8895 DR_GROUP_SIZE is the exact number of stmts in the
8896 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8897 op = vect_get_store_rhs (next_stmt_info);
8898 if (costing_p)
8899 update_prologue_cost (&prologue_cost, op);
8900 else if (!slp)
8902 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8903 ncopies, op,
8904 gvec_oprnds[i]);
8905 vec_oprnd = (*gvec_oprnds[i])[0];
8906 dr_chain.quick_push (vec_oprnd);
8908 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8911 if (!costing_p)
8913 if (mask)
8915 if (slp_node)
8916 vect_get_slp_defs (mask_node, &vec_masks);
8917 else
8918 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8919 mask, &vec_masks,
8920 mask_vectype);
8921 vec_mask = vec_masks[0];
8924 dataref_ptr
8925 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8926 aggr_type, NULL, offset, &dummy,
8927 gsi, &ptr_incr, false, bump);
8930 else if (!costing_p)
8932 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8933 /* DR_CHAIN is then used as an input to
8934 vect_permute_store_chain(). */
8935 if (!slp)
8937 /* We should have caught mismatched types earlier. */
8938 gcc_assert (
8939 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8940 for (i = 0; i < group_size; i++)
8942 vec_oprnd = (*gvec_oprnds[i])[j];
8943 dr_chain[i] = vec_oprnd;
8946 if (mask)
8947 vec_mask = vec_masks[j];
8948 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8949 stmt_info, bump);
8952 if (costing_p)
8954 n_adjacent_stores += group_size;
8955 continue;
8958 /* Get an array into which we can store the individual vectors. */
8959 tree vec_array = create_vector_array (vectype, group_size);
8961 /* Invalidate the current contents of VEC_ARRAY. This should
8962 become an RTL clobber too, which prevents the vector registers
8963 from being upward-exposed. */
8964 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8966 /* Store the individual vectors into the array. */
8967 for (i = 0; i < group_size; i++)
8969 if (slp)
8971 slp_tree child;
8972 if (i == 0 || !mask_node)
8973 child = SLP_TREE_CHILDREN (slp_node)[i];
8974 else
8975 child = SLP_TREE_CHILDREN (slp_node)[i + 1];
8976 vec_oprnd = SLP_TREE_VEC_DEFS (child)[j];
8978 else
8979 vec_oprnd = dr_chain[i];
8980 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8984 tree final_mask = NULL;
8985 tree final_len = NULL;
8986 tree bias = NULL;
8987 if (loop_masks)
8988 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8989 ncopies, vectype, j);
8990 if (vec_mask)
8991 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8992 vec_mask, gsi);
8994 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8996 if (loop_lens)
8997 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8998 ncopies, vectype, j, 1);
8999 else
9000 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9001 signed char biasval
9002 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9003 bias = build_int_cst (intQI_type_node, biasval);
9004 if (!final_mask)
9006 mask_vectype = truth_type_for (vectype);
9007 final_mask = build_minus_one_cst (mask_vectype);
9011 gcall *call;
9012 if (final_len && final_mask)
9014 /* Emit:
9015 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
9016 LEN, BIAS, VEC_ARRAY). */
9017 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9018 tree alias_ptr = build_int_cst (ref_type, align);
9019 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
9020 dataref_ptr, alias_ptr,
9021 final_mask, final_len, bias,
9022 vec_array);
9024 else if (final_mask)
9026 /* Emit:
9027 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
9028 VEC_ARRAY). */
9029 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9030 tree alias_ptr = build_int_cst (ref_type, align);
9031 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
9032 dataref_ptr, alias_ptr,
9033 final_mask, vec_array);
9035 else
9037 /* Emit:
9038 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
9039 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9040 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
9041 gimple_call_set_lhs (call, data_ref);
9043 gimple_call_set_nothrow (call, true);
9044 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9045 new_stmt = call;
9047 /* Record that VEC_ARRAY is now dead. */
9048 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9049 if (j == 0 && !slp)
9050 *vec_stmt = new_stmt;
9051 if (!slp)
9052 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9055 if (costing_p)
9057 if (n_adjacent_stores > 0)
9058 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
9059 alignment_support_scheme, misalignment,
9060 &inside_cost, cost_vec);
9061 if (dump_enabled_p ())
9062 dump_printf_loc (MSG_NOTE, vect_location,
9063 "vect_model_store_cost: inside_cost = %d, "
9064 "prologue_cost = %d .\n",
9065 inside_cost, prologue_cost);
9068 return true;
9071 if (memory_access_type == VMAT_GATHER_SCATTER)
9073 gcc_assert (!grouped_store);
9074 auto_vec<tree> vec_offsets;
9075 unsigned int inside_cost = 0, prologue_cost = 0;
9076 for (j = 0; j < ncopies; j++)
9078 gimple *new_stmt;
9079 if (j == 0)
9081 if (costing_p && vls_type == VLS_STORE_INVARIANT)
9082 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
9083 stmt_info, 0, vect_prologue);
9084 else if (!costing_p)
9086 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
9087 DR_CHAIN is of size 1. */
9088 gcc_assert (group_size == 1);
9089 if (slp_node)
9090 vect_get_slp_defs (op_node, gvec_oprnds[0]);
9091 else
9092 vect_get_vec_defs_for_operand (vinfo, first_stmt_info,
9093 ncopies, op, gvec_oprnds[0]);
9094 if (mask)
9096 if (slp_node)
9097 vect_get_slp_defs (mask_node, &vec_masks);
9098 else
9099 vect_get_vec_defs_for_operand (vinfo, stmt_info,
9100 ncopies,
9101 mask, &vec_masks,
9102 mask_vectype);
9105 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9106 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9107 slp_node, &gs_info,
9108 &dataref_ptr, &vec_offsets);
9109 else
9110 dataref_ptr
9111 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
9112 aggr_type, NULL, offset,
9113 &dummy, gsi, &ptr_incr, false,
9114 bump);
9117 else if (!costing_p)
9119 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9120 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9121 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9122 gsi, stmt_info, bump);
9125 new_stmt = NULL;
9126 for (i = 0; i < vec_num; ++i)
9128 if (!costing_p)
9130 vec_oprnd = (*gvec_oprnds[0])[vec_num * j + i];
9131 if (mask)
9132 vec_mask = vec_masks[vec_num * j + i];
9133 /* We should have catched mismatched types earlier. */
9134 gcc_assert (useless_type_conversion_p (vectype,
9135 TREE_TYPE (vec_oprnd)));
9137 unsigned HOST_WIDE_INT align;
9138 tree final_mask = NULL_TREE;
9139 tree final_len = NULL_TREE;
9140 tree bias = NULL_TREE;
9141 if (!costing_p)
9143 if (loop_masks)
9144 final_mask = vect_get_loop_mask (loop_vinfo, gsi,
9145 loop_masks, ncopies,
9146 vectype, j);
9147 if (vec_mask)
9148 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9149 final_mask, vec_mask, gsi);
9152 if (gs_info.ifn != IFN_LAST)
9154 if (costing_p)
9156 unsigned int cnunits = vect_nunits_for_cost (vectype);
9157 inside_cost
9158 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9159 stmt_info, 0, vect_body);
9160 continue;
9163 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9164 vec_offset = vec_offsets[vec_num * j + i];
9165 tree scale = size_int (gs_info.scale);
9167 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9169 if (loop_lens)
9170 final_len = vect_get_loop_len (loop_vinfo, gsi,
9171 loop_lens, ncopies,
9172 vectype, j, 1);
9173 else
9174 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9175 signed char biasval
9176 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9177 bias = build_int_cst (intQI_type_node, biasval);
9178 if (!final_mask)
9180 mask_vectype = truth_type_for (vectype);
9181 final_mask = build_minus_one_cst (mask_vectype);
9185 gcall *call;
9186 if (final_len && final_mask)
9187 call = gimple_build_call_internal
9188 (IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
9189 vec_offset, scale, vec_oprnd, final_mask,
9190 final_len, bias);
9191 else if (final_mask)
9192 call = gimple_build_call_internal
9193 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
9194 vec_offset, scale, vec_oprnd, final_mask);
9195 else
9196 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9197 dataref_ptr, vec_offset,
9198 scale, vec_oprnd);
9199 gimple_call_set_nothrow (call, true);
9200 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9201 new_stmt = call;
9203 else if (gs_info.decl)
9205 /* The builtin decls path for scatter is legacy, x86 only. */
9206 gcc_assert (nunits.is_constant ()
9207 && (!final_mask
9208 || SCALAR_INT_MODE_P
9209 (TYPE_MODE (TREE_TYPE (final_mask)))));
9210 if (costing_p)
9212 unsigned int cnunits = vect_nunits_for_cost (vectype);
9213 inside_cost
9214 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9215 stmt_info, 0, vect_body);
9216 continue;
9218 poly_uint64 offset_nunits
9219 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
9220 if (known_eq (nunits, offset_nunits))
9222 new_stmt = vect_build_one_scatter_store_call
9223 (vinfo, stmt_info, gsi, &gs_info,
9224 dataref_ptr, vec_offsets[vec_num * j + i],
9225 vec_oprnd, final_mask);
9226 vect_finish_stmt_generation (vinfo, stmt_info,
9227 new_stmt, gsi);
9229 else if (known_eq (nunits, offset_nunits * 2))
9231 /* We have a offset vector with half the number of
9232 lanes but the builtins will store full vectype
9233 data from the lower lanes. */
9234 new_stmt = vect_build_one_scatter_store_call
9235 (vinfo, stmt_info, gsi, &gs_info,
9236 dataref_ptr,
9237 vec_offsets[2 * vec_num * j + 2 * i],
9238 vec_oprnd, final_mask);
9239 vect_finish_stmt_generation (vinfo, stmt_info,
9240 new_stmt, gsi);
9241 int count = nunits.to_constant ();
9242 vec_perm_builder sel (count, count, 1);
9243 sel.quick_grow (count);
9244 for (int i = 0; i < count; ++i)
9245 sel[i] = i | (count / 2);
9246 vec_perm_indices indices (sel, 2, count);
9247 tree perm_mask
9248 = vect_gen_perm_mask_checked (vectype, indices);
9249 new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9250 vec_oprnd, vec_oprnd,
9251 perm_mask);
9252 vec_oprnd = make_ssa_name (vectype);
9253 gimple_set_lhs (new_stmt, vec_oprnd);
9254 vect_finish_stmt_generation (vinfo, stmt_info,
9255 new_stmt, gsi);
9256 if (final_mask)
9258 new_stmt = gimple_build_assign (NULL_TREE,
9259 VEC_UNPACK_HI_EXPR,
9260 final_mask);
9261 final_mask = make_ssa_name
9262 (truth_type_for (gs_info.offset_vectype));
9263 gimple_set_lhs (new_stmt, final_mask);
9264 vect_finish_stmt_generation (vinfo, stmt_info,
9265 new_stmt, gsi);
9267 new_stmt = vect_build_one_scatter_store_call
9268 (vinfo, stmt_info, gsi, &gs_info,
9269 dataref_ptr,
9270 vec_offsets[2 * vec_num * j + 2 * i + 1],
9271 vec_oprnd, final_mask);
9272 vect_finish_stmt_generation (vinfo, stmt_info,
9273 new_stmt, gsi);
9275 else if (known_eq (nunits * 2, offset_nunits))
9277 /* We have a offset vector with double the number of
9278 lanes. Select the low/high part accordingly. */
9279 vec_offset = vec_offsets[(vec_num * j + i) / 2];
9280 if ((vec_num * j + i) & 1)
9282 int count = offset_nunits.to_constant ();
9283 vec_perm_builder sel (count, count, 1);
9284 sel.quick_grow (count);
9285 for (int i = 0; i < count; ++i)
9286 sel[i] = i | (count / 2);
9287 vec_perm_indices indices (sel, 2, count);
9288 tree perm_mask = vect_gen_perm_mask_checked
9289 (TREE_TYPE (vec_offset), indices);
9290 new_stmt = gimple_build_assign (NULL_TREE,
9291 VEC_PERM_EXPR,
9292 vec_offset,
9293 vec_offset,
9294 perm_mask);
9295 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9296 gimple_set_lhs (new_stmt, vec_offset);
9297 vect_finish_stmt_generation (vinfo, stmt_info,
9298 new_stmt, gsi);
9300 new_stmt = vect_build_one_scatter_store_call
9301 (vinfo, stmt_info, gsi, &gs_info,
9302 dataref_ptr, vec_offset,
9303 vec_oprnd, final_mask);
9304 vect_finish_stmt_generation (vinfo, stmt_info,
9305 new_stmt, gsi);
9307 else
9308 gcc_unreachable ();
9310 else
9312 /* Emulated scatter. */
9313 gcc_assert (!final_mask);
9314 if (costing_p)
9316 unsigned int cnunits = vect_nunits_for_cost (vectype);
9317 /* For emulated scatter N offset vector element extracts
9318 (we assume the scalar scaling and ptr + offset add is
9319 consumed by the load). */
9320 inside_cost
9321 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9322 stmt_info, 0, vect_body);
9323 /* N scalar stores plus extracting the elements. */
9324 inside_cost
9325 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9326 stmt_info, 0, vect_body);
9327 inside_cost
9328 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9329 stmt_info, 0, vect_body);
9330 continue;
9333 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9334 unsigned HOST_WIDE_INT const_offset_nunits
9335 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9336 vec<constructor_elt, va_gc> *ctor_elts;
9337 vec_alloc (ctor_elts, const_nunits);
9338 gimple_seq stmts = NULL;
9339 tree elt_type = TREE_TYPE (vectype);
9340 unsigned HOST_WIDE_INT elt_size
9341 = tree_to_uhwi (TYPE_SIZE (elt_type));
9342 /* We support offset vectors with more elements
9343 than the data vector for now. */
9344 unsigned HOST_WIDE_INT factor
9345 = const_offset_nunits / const_nunits;
9346 vec_offset = vec_offsets[(vec_num * j + i) / factor];
9347 unsigned elt_offset
9348 = ((vec_num * j + i) % factor) * const_nunits;
9349 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9350 tree scale = size_int (gs_info.scale);
9351 align = get_object_alignment (DR_REF (first_dr_info->dr));
9352 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9353 for (unsigned k = 0; k < const_nunits; ++k)
9355 /* Compute the offsetted pointer. */
9356 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9357 bitsize_int (k + elt_offset));
9358 tree idx
9359 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
9360 vec_offset, TYPE_SIZE (idx_type), boff);
9361 idx = gimple_convert (&stmts, sizetype, idx);
9362 idx = gimple_build (&stmts, MULT_EXPR, sizetype,
9363 idx, scale);
9364 tree ptr
9365 = gimple_build (&stmts, PLUS_EXPR,
9366 TREE_TYPE (dataref_ptr),
9367 dataref_ptr, idx);
9368 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9369 /* Extract the element to be stored. */
9370 tree elt
9371 = gimple_build (&stmts, BIT_FIELD_REF,
9372 TREE_TYPE (vectype),
9373 vec_oprnd, TYPE_SIZE (elt_type),
9374 bitsize_int (k * elt_size));
9375 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9376 stmts = NULL;
9377 tree ref
9378 = build2 (MEM_REF, ltype, ptr,
9379 build_int_cst (ref_type, 0));
9380 new_stmt = gimple_build_assign (ref, elt);
9381 vect_finish_stmt_generation (vinfo, stmt_info,
9382 new_stmt, gsi);
9384 if (slp)
9385 slp_node->push_vec_def (new_stmt);
9388 if (!slp && !costing_p)
9389 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9392 if (!slp && !costing_p)
9393 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9395 if (costing_p && dump_enabled_p ())
9396 dump_printf_loc (MSG_NOTE, vect_location,
9397 "vect_model_store_cost: inside_cost = %d, "
9398 "prologue_cost = %d .\n",
9399 inside_cost, prologue_cost);
9401 return true;
9404 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9405 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9406 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9407 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9409 unsigned inside_cost = 0, prologue_cost = 0;
9410 /* For costing some adjacent vector stores, we'd like to cost with
9411 the total number of them once instead of cost each one by one. */
9412 unsigned int n_adjacent_stores = 0;
9413 auto_vec<tree> result_chain (group_size);
9414 auto_vec<tree, 1> vec_oprnds;
9415 for (j = 0; j < ncopies; j++)
9417 gimple *new_stmt;
9418 if (j == 0)
9420 if (slp && !costing_p)
9422 /* Get vectorized arguments for SLP_NODE. */
9423 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9424 &vec_oprnds, mask, &vec_masks);
9425 vec_oprnd = vec_oprnds[0];
9426 if (mask)
9427 vec_mask = vec_masks[0];
9429 else
9431 /* For interleaved stores we collect vectorized defs for all the
9432 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9433 input to vect_permute_store_chain().
9435 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9436 is of size 1. */
9437 stmt_vec_info next_stmt_info = first_stmt_info;
9438 for (i = 0; i < group_size; i++)
9440 /* Since gaps are not supported for interleaved stores,
9441 DR_GROUP_SIZE is the exact number of stmts in the chain.
9442 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9443 that there is no interleaving, DR_GROUP_SIZE is 1,
9444 and only one iteration of the loop will be executed. */
9445 op = vect_get_store_rhs (next_stmt_info);
9446 if (costing_p)
9447 update_prologue_cost (&prologue_cost, op);
9448 else
9450 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
9451 ncopies, op,
9452 gvec_oprnds[i]);
9453 vec_oprnd = (*gvec_oprnds[i])[0];
9454 dr_chain.quick_push (vec_oprnd);
9456 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9458 if (mask && !costing_p)
9460 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9461 mask, &vec_masks,
9462 mask_vectype);
9463 vec_mask = vec_masks[0];
9467 /* We should have catched mismatched types earlier. */
9468 gcc_assert (costing_p
9469 || useless_type_conversion_p (vectype,
9470 TREE_TYPE (vec_oprnd)));
9471 bool simd_lane_access_p
9472 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9473 if (!costing_p
9474 && simd_lane_access_p
9475 && !loop_masks
9476 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9477 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9478 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9479 && integer_zerop (DR_INIT (first_dr_info->dr))
9480 && alias_sets_conflict_p (get_alias_set (aggr_type),
9481 get_alias_set (TREE_TYPE (ref_type))))
9483 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9484 dataref_offset = build_int_cst (ref_type, 0);
9486 else if (!costing_p)
9487 dataref_ptr
9488 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9489 simd_lane_access_p ? loop : NULL,
9490 offset, &dummy, gsi, &ptr_incr,
9491 simd_lane_access_p, bump);
9493 else if (!costing_p)
9495 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9496 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9497 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9498 of size 1. */
9499 for (i = 0; i < group_size; i++)
9501 vec_oprnd = (*gvec_oprnds[i])[j];
9502 dr_chain[i] = vec_oprnd;
9504 if (mask)
9505 vec_mask = vec_masks[j];
9506 if (dataref_offset)
9507 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9508 else
9509 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9510 stmt_info, bump);
9513 new_stmt = NULL;
9514 if (grouped_store)
9516 /* Permute. */
9517 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9518 if (costing_p)
9520 int group_size = DR_GROUP_SIZE (first_stmt_info);
9521 int nstmts = ceil_log2 (group_size) * group_size;
9522 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
9523 stmt_info, 0, vect_body);
9524 if (dump_enabled_p ())
9525 dump_printf_loc (MSG_NOTE, vect_location,
9526 "vect_model_store_cost: "
9527 "strided group_size = %d .\n",
9528 group_size);
9530 else
9531 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9532 gsi, &result_chain);
9535 stmt_vec_info next_stmt_info = first_stmt_info;
9536 for (i = 0; i < vec_num; i++)
9538 if (!costing_p)
9540 if (slp)
9541 vec_oprnd = vec_oprnds[i];
9542 else if (grouped_store)
9543 /* For grouped stores vectorized defs are interleaved in
9544 vect_permute_store_chain(). */
9545 vec_oprnd = result_chain[i];
9548 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9550 if (costing_p)
9551 inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9552 stmt_info, 0, vect_body);
9553 else
9555 tree perm_mask = perm_mask_for_reverse (vectype);
9556 tree perm_dest = vect_create_destination_var (
9557 vect_get_store_rhs (stmt_info), vectype);
9558 tree new_temp = make_ssa_name (perm_dest);
9560 /* Generate the permute statement. */
9561 gimple *perm_stmt
9562 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9563 vec_oprnd, perm_mask);
9564 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
9565 gsi);
9567 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9568 vec_oprnd = new_temp;
9572 if (costing_p)
9574 n_adjacent_stores++;
9576 if (!slp)
9578 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9579 if (!next_stmt_info)
9580 break;
9583 continue;
9586 tree final_mask = NULL_TREE;
9587 tree final_len = NULL_TREE;
9588 tree bias = NULL_TREE;
9589 if (loop_masks)
9590 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9591 vec_num * ncopies, vectype,
9592 vec_num * j + i);
9593 if (slp && vec_mask)
9594 vec_mask = vec_masks[i];
9595 if (vec_mask)
9596 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9597 vec_mask, gsi);
9599 if (i > 0)
9600 /* Bump the vector pointer. */
9601 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9602 stmt_info, bump);
9604 unsigned misalign;
9605 unsigned HOST_WIDE_INT align;
9606 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9607 if (alignment_support_scheme == dr_aligned)
9608 misalign = 0;
9609 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9611 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9612 misalign = 0;
9614 else
9615 misalign = misalignment;
9616 if (dataref_offset == NULL_TREE
9617 && TREE_CODE (dataref_ptr) == SSA_NAME)
9618 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9619 misalign);
9620 align = least_bit_hwi (misalign | align);
9622 /* Compute IFN when LOOP_LENS or final_mask valid. */
9623 machine_mode vmode = TYPE_MODE (vectype);
9624 machine_mode new_vmode = vmode;
9625 internal_fn partial_ifn = IFN_LAST;
9626 if (loop_lens)
9628 opt_machine_mode new_ovmode
9629 = get_len_load_store_mode (vmode, false, &partial_ifn);
9630 new_vmode = new_ovmode.require ();
9631 unsigned factor
9632 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9633 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9634 vec_num * ncopies, vectype,
9635 vec_num * j + i, factor);
9637 else if (final_mask)
9639 if (!can_vec_mask_load_store_p (
9640 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9641 &partial_ifn))
9642 gcc_unreachable ();
9645 if (partial_ifn == IFN_MASK_LEN_STORE)
9647 if (!final_len)
9649 /* Pass VF value to 'len' argument of
9650 MASK_LEN_STORE if LOOP_LENS is invalid. */
9651 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9653 if (!final_mask)
9655 /* Pass all ones value to 'mask' argument of
9656 MASK_LEN_STORE if final_mask is invalid. */
9657 mask_vectype = truth_type_for (vectype);
9658 final_mask = build_minus_one_cst (mask_vectype);
9661 if (final_len)
9663 signed char biasval
9664 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9666 bias = build_int_cst (intQI_type_node, biasval);
9669 /* Arguments are ready. Create the new vector stmt. */
9670 if (final_len)
9672 gcall *call;
9673 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9674 /* Need conversion if it's wrapped with VnQI. */
9675 if (vmode != new_vmode)
9677 tree new_vtype
9678 = build_vector_type_for_mode (unsigned_intQI_type_node,
9679 new_vmode);
9680 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9681 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9682 gassign *new_stmt
9683 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9684 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9685 vec_oprnd = var;
9688 if (partial_ifn == IFN_MASK_LEN_STORE)
9689 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9690 dataref_ptr, ptr, final_mask,
9691 final_len, bias, vec_oprnd);
9692 else
9693 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9694 dataref_ptr, ptr, final_len,
9695 bias, vec_oprnd);
9696 gimple_call_set_nothrow (call, true);
9697 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9698 new_stmt = call;
9700 else if (final_mask)
9702 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9703 gcall *call
9704 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9705 ptr, final_mask, vec_oprnd);
9706 gimple_call_set_nothrow (call, true);
9707 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9708 new_stmt = call;
9710 else
9712 data_ref
9713 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9714 dataref_offset ? dataref_offset
9715 : build_int_cst (ref_type, 0));
9716 if (alignment_support_scheme == dr_aligned)
9718 else
9719 TREE_TYPE (data_ref)
9720 = build_aligned_type (TREE_TYPE (data_ref),
9721 align * BITS_PER_UNIT);
9722 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9723 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9724 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9727 if (slp)
9728 continue;
9730 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9731 if (!next_stmt_info)
9732 break;
9734 if (!slp && !costing_p)
9736 if (j == 0)
9737 *vec_stmt = new_stmt;
9738 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9742 if (costing_p)
9744 if (n_adjacent_stores > 0)
9745 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
9746 alignment_support_scheme, misalignment,
9747 &inside_cost, cost_vec);
9749 /* When vectorizing a store into the function result assign
9750 a penalty if the function returns in a multi-register location.
9751 In this case we assume we'll end up with having to spill the
9752 vector result and do piecewise loads as a conservative estimate. */
9753 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9754 if (base
9755 && (TREE_CODE (base) == RESULT_DECL
9756 || (DECL_P (base) && cfun_returns (base)))
9757 && !aggregate_value_p (base, cfun->decl))
9759 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9760 /* ??? Handle PARALLEL in some way. */
9761 if (REG_P (reg))
9763 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9764 /* Assume that a single reg-reg move is possible and cheap,
9765 do not account for vector to gp register move cost. */
9766 if (nregs > 1)
9768 /* Spill. */
9769 prologue_cost
9770 += record_stmt_cost (cost_vec, ncopies, vector_store,
9771 stmt_info, 0, vect_epilogue);
9772 /* Loads. */
9773 prologue_cost
9774 += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
9775 stmt_info, 0, vect_epilogue);
9779 if (dump_enabled_p ())
9780 dump_printf_loc (MSG_NOTE, vect_location,
9781 "vect_model_store_cost: inside_cost = %d, "
9782 "prologue_cost = %d .\n",
9783 inside_cost, prologue_cost);
9786 return true;
9789 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9790 VECTOR_CST mask. No checks are made that the target platform supports the
9791 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9792 vect_gen_perm_mask_checked. */
9794 tree
9795 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9797 tree mask_type;
9799 poly_uint64 nunits = sel.length ();
9800 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9802 mask_type = build_vector_type (ssizetype, nunits);
9803 return vec_perm_indices_to_tree (mask_type, sel);
9806 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9807 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9809 tree
9810 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9812 machine_mode vmode = TYPE_MODE (vectype);
9813 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9814 return vect_gen_perm_mask_any (vectype, sel);
9817 /* Given a vector variable X and Y, that was generated for the scalar
9818 STMT_INFO, generate instructions to permute the vector elements of X and Y
9819 using permutation mask MASK_VEC, insert them at *GSI and return the
9820 permuted vector variable. */
9822 static tree
9823 permute_vec_elements (vec_info *vinfo,
9824 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9825 gimple_stmt_iterator *gsi)
9827 tree vectype = TREE_TYPE (x);
9828 tree perm_dest, data_ref;
9829 gimple *perm_stmt;
9831 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9832 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9833 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9834 else
9835 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9836 data_ref = make_ssa_name (perm_dest);
9838 /* Generate the permute statement. */
9839 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9840 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9842 return data_ref;
9845 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9846 inserting them on the loops preheader edge. Returns true if we
9847 were successful in doing so (and thus STMT_INFO can be moved then),
9848 otherwise returns false. HOIST_P indicates if we want to hoist the
9849 definitions of all SSA uses, it would be false when we are costing. */
9851 static bool
9852 hoist_defs_of_uses (gimple *stmt, class loop *loop, bool hoist_p)
9854 ssa_op_iter i;
9855 use_operand_p use_p;
9856 auto_vec<use_operand_p, 8> to_hoist;
9858 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, i, SSA_OP_USE)
9860 gimple *def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
9861 if (!gimple_nop_p (def_stmt)
9862 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9864 /* Make sure we don't need to recurse. While we could do
9865 so in simple cases when there are more complex use webs
9866 we don't have an easy way to preserve stmt order to fulfil
9867 dependencies within them. */
9868 tree op2;
9869 ssa_op_iter i2;
9870 if (gimple_code (def_stmt) == GIMPLE_PHI
9871 || (single_ssa_def_operand (def_stmt, SSA_OP_DEF)
9872 == NULL_DEF_OPERAND_P))
9873 return false;
9874 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9876 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9877 if (!gimple_nop_p (def_stmt2)
9878 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9879 return false;
9881 to_hoist.safe_push (use_p);
9885 if (to_hoist.is_empty ())
9886 return true;
9888 if (!hoist_p)
9889 return true;
9891 /* Instead of moving defs we copy them so we can zero their UID to not
9892 confuse dominance queries in the preheader. */
9893 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
9894 for (use_operand_p use_p : to_hoist)
9896 gimple *def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
9897 gimple *copy = gimple_copy (def_stmt);
9898 gimple_set_uid (copy, 0);
9899 def_operand_p def_p = single_ssa_def_operand (def_stmt, SSA_OP_DEF);
9900 tree new_def = duplicate_ssa_name (DEF_FROM_PTR (def_p), copy);
9901 update_stmt (copy);
9902 def_p = single_ssa_def_operand (copy, SSA_OP_DEF);
9903 SET_DEF (def_p, new_def);
9904 SET_USE (use_p, new_def);
9905 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
9908 return true;
9911 /* vectorizable_load.
9913 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9914 that can be vectorized.
9915 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9916 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9917 Return true if STMT_INFO is vectorizable in this way. */
9919 static bool
9920 vectorizable_load (vec_info *vinfo,
9921 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9922 gimple **vec_stmt, slp_tree slp_node,
9923 stmt_vector_for_cost *cost_vec)
9925 tree scalar_dest;
9926 tree vec_dest = NULL;
9927 tree data_ref = NULL;
9928 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9929 class loop *loop = NULL;
9930 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9931 bool nested_in_vect_loop = false;
9932 tree elem_type;
9933 /* Avoid false positive uninitialized warning, see PR110652. */
9934 tree new_temp = NULL_TREE;
9935 machine_mode mode;
9936 tree dummy;
9937 tree dataref_ptr = NULL_TREE;
9938 tree dataref_offset = NULL_TREE;
9939 gimple *ptr_incr = NULL;
9940 int ncopies;
9941 int i, j;
9942 unsigned int group_size;
9943 poly_uint64 group_gap_adj;
9944 tree msq = NULL_TREE, lsq;
9945 tree realignment_token = NULL_TREE;
9946 gphi *phi = NULL;
9947 vec<tree> dr_chain = vNULL;
9948 bool grouped_load = false;
9949 stmt_vec_info first_stmt_info;
9950 stmt_vec_info first_stmt_info_for_drptr = NULL;
9951 bool compute_in_loop = false;
9952 class loop *at_loop;
9953 int vec_num;
9954 bool slp = (slp_node != NULL);
9955 bool slp_perm = false;
9956 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9957 poly_uint64 vf;
9958 tree aggr_type;
9959 gather_scatter_info gs_info;
9960 tree ref_type;
9961 enum vect_def_type mask_dt = vect_unknown_def_type;
9963 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9964 return false;
9966 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9967 && ! vec_stmt)
9968 return false;
9970 if (!STMT_VINFO_DATA_REF (stmt_info))
9971 return false;
9973 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9974 int mask_index = -1;
9975 slp_tree slp_op = NULL;
9976 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9978 scalar_dest = gimple_assign_lhs (assign);
9979 if (TREE_CODE (scalar_dest) != SSA_NAME)
9980 return false;
9982 tree_code code = gimple_assign_rhs_code (assign);
9983 if (code != ARRAY_REF
9984 && code != BIT_FIELD_REF
9985 && code != INDIRECT_REF
9986 && code != COMPONENT_REF
9987 && code != IMAGPART_EXPR
9988 && code != REALPART_EXPR
9989 && code != MEM_REF
9990 && TREE_CODE_CLASS (code) != tcc_declaration)
9991 return false;
9993 else
9995 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9996 if (!call || !gimple_call_internal_p (call))
9997 return false;
9999 internal_fn ifn = gimple_call_internal_fn (call);
10000 if (!internal_load_fn_p (ifn))
10001 return false;
10003 scalar_dest = gimple_call_lhs (call);
10004 if (!scalar_dest)
10005 return false;
10007 mask_index = internal_fn_mask_index (ifn);
10008 if (mask_index >= 0 && slp_node)
10009 mask_index = vect_slp_child_index_for_operand
10010 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
10011 if (mask_index >= 0
10012 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
10013 &mask, &slp_op, &mask_dt, &mask_vectype))
10014 return false;
10017 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10018 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10020 if (loop_vinfo)
10022 loop = LOOP_VINFO_LOOP (loop_vinfo);
10023 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
10024 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
10026 else
10027 vf = 1;
10029 /* Multiple types in SLP are handled by creating the appropriate number of
10030 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
10031 case of SLP. */
10032 if (slp)
10033 ncopies = 1;
10034 else
10035 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10037 gcc_assert (ncopies >= 1);
10039 /* FORNOW. This restriction should be relaxed. */
10040 if (nested_in_vect_loop
10041 && (ncopies > 1 || (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)))
10043 if (dump_enabled_p ())
10044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10045 "multiple types in nested loop.\n");
10046 return false;
10049 /* Invalidate assumptions made by dependence analysis when vectorization
10050 on the unrolled body effectively re-orders stmts. */
10051 if (ncopies > 1
10052 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
10053 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
10054 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
10056 if (dump_enabled_p ())
10057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10058 "cannot perform implicit CSE when unrolling "
10059 "with negative dependence distance\n");
10060 return false;
10063 elem_type = TREE_TYPE (vectype);
10064 mode = TYPE_MODE (vectype);
10066 /* FORNOW. In some cases can vectorize even if data-type not supported
10067 (e.g. - data copies). */
10068 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
10070 if (dump_enabled_p ())
10071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10072 "Aligned load, but unsupported type.\n");
10073 return false;
10076 /* Check if the load is a part of an interleaving chain. */
10077 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
10079 grouped_load = true;
10080 /* FORNOW */
10081 gcc_assert (!nested_in_vect_loop);
10082 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
10084 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10085 group_size = DR_GROUP_SIZE (first_stmt_info);
10087 /* Refuse non-SLP vectorization of SLP-only groups. */
10088 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
10090 if (dump_enabled_p ())
10091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10092 "cannot vectorize load in non-SLP mode.\n");
10093 return false;
10096 /* Invalidate assumptions made by dependence analysis when vectorization
10097 on the unrolled body effectively re-orders stmts. */
10098 if (STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
10099 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
10100 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
10102 if (dump_enabled_p ())
10103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10104 "cannot perform implicit CSE when performing "
10105 "group loads with negative dependence distance\n");
10106 return false;
10109 else
10110 group_size = 1;
10112 vect_memory_access_type memory_access_type;
10113 enum dr_alignment_support alignment_support_scheme;
10114 int misalignment;
10115 poly_int64 poffset;
10116 internal_fn lanes_ifn;
10117 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
10118 ncopies, &memory_access_type, &poffset,
10119 &alignment_support_scheme, &misalignment, &gs_info,
10120 &lanes_ifn))
10121 return false;
10123 /* ??? The following checks should really be part of
10124 get_group_load_store_type. */
10125 if (slp
10126 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
10127 && !((memory_access_type == VMAT_ELEMENTWISE
10128 || memory_access_type == VMAT_GATHER_SCATTER)
10129 && SLP_TREE_LANES (slp_node) == 1))
10131 slp_perm = true;
10133 if (!loop_vinfo)
10135 /* In BB vectorization we may not actually use a loaded vector
10136 accessing elements in excess of DR_GROUP_SIZE. */
10137 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10138 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
10139 unsigned HOST_WIDE_INT nunits;
10140 unsigned j, k, maxk = 0;
10141 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
10142 if (k > maxk)
10143 maxk = k;
10144 tree vectype = SLP_TREE_VECTYPE (slp_node);
10145 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
10146 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
10148 if (dump_enabled_p ())
10149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10150 "BB vectorization with gaps at the end of "
10151 "a load is not supported\n");
10152 return false;
10156 auto_vec<tree> tem;
10157 unsigned n_perms;
10158 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
10159 true, &n_perms))
10161 if (dump_enabled_p ())
10162 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
10163 vect_location,
10164 "unsupported load permutation\n");
10165 return false;
10169 if (slp_node
10170 && slp_node->ldst_lanes
10171 && memory_access_type != VMAT_LOAD_STORE_LANES)
10173 if (dump_enabled_p ())
10174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10175 "discovered load-lane but cannot use it.\n");
10176 return false;
10179 if (mask)
10181 if (memory_access_type == VMAT_CONTIGUOUS)
10183 machine_mode vec_mode = TYPE_MODE (vectype);
10184 if (!VECTOR_MODE_P (vec_mode)
10185 || !can_vec_mask_load_store_p (vec_mode,
10186 TYPE_MODE (mask_vectype), true))
10187 return false;
10189 else if (memory_access_type != VMAT_LOAD_STORE_LANES
10190 && memory_access_type != VMAT_GATHER_SCATTER)
10192 if (dump_enabled_p ())
10193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10194 "unsupported access type for masked load.\n");
10195 return false;
10197 else if (memory_access_type == VMAT_GATHER_SCATTER
10198 && gs_info.ifn == IFN_LAST
10199 && !gs_info.decl)
10201 if (dump_enabled_p ())
10202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10203 "unsupported masked emulated gather.\n");
10204 return false;
10206 else if (memory_access_type == VMAT_ELEMENTWISE
10207 || memory_access_type == VMAT_STRIDED_SLP)
10209 if (dump_enabled_p ())
10210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10211 "unsupported masked strided access.\n");
10212 return false;
10216 bool costing_p = !vec_stmt;
10218 if (costing_p) /* transformation not required. */
10220 if (slp_node
10221 && mask
10222 && !vect_maybe_update_slp_op_vectype (slp_op,
10223 mask_vectype))
10225 if (dump_enabled_p ())
10226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10227 "incompatible vector types for invariants\n");
10228 return false;
10231 if (!slp)
10232 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10234 if (loop_vinfo
10235 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10236 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10237 VLS_LOAD, group_size,
10238 memory_access_type, &gs_info,
10239 mask);
10241 if (dump_enabled_p ()
10242 && memory_access_type != VMAT_ELEMENTWISE
10243 && memory_access_type != VMAT_GATHER_SCATTER
10244 && alignment_support_scheme != dr_aligned)
10245 dump_printf_loc (MSG_NOTE, vect_location,
10246 "Vectorizing an unaligned access.\n");
10248 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10249 vinfo->any_known_not_updated_vssa = true;
10251 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10254 if (!slp)
10255 gcc_assert (memory_access_type
10256 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10258 if (dump_enabled_p () && !costing_p)
10259 dump_printf_loc (MSG_NOTE, vect_location,
10260 "transform load. ncopies = %d\n", ncopies);
10262 /* Transform. */
10264 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10265 ensure_base_align (dr_info);
10267 if (memory_access_type == VMAT_INVARIANT)
10269 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10270 /* If we have versioned for aliasing or the loop doesn't
10271 have any data dependencies that would preclude this,
10272 then we are sure this is a loop invariant load and
10273 thus we can insert it on the preheader edge.
10274 TODO: hoist_defs_of_uses should ideally be computed
10275 once at analysis time, remembered and used in the
10276 transform time. */
10277 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10278 && !nested_in_vect_loop
10279 && hoist_defs_of_uses (stmt_info->stmt, loop, false));
10280 if (costing_p)
10282 enum vect_cost_model_location cost_loc
10283 = hoist_p ? vect_prologue : vect_body;
10284 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10285 stmt_info, 0, cost_loc);
10286 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
10287 cost_loc);
10288 unsigned int prologue_cost = hoist_p ? cost : 0;
10289 unsigned int inside_cost = hoist_p ? 0 : cost;
10290 if (dump_enabled_p ())
10291 dump_printf_loc (MSG_NOTE, vect_location,
10292 "vect_model_load_cost: inside_cost = %d, "
10293 "prologue_cost = %d .\n",
10294 inside_cost, prologue_cost);
10295 return true;
10297 if (hoist_p)
10299 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
10300 if (dump_enabled_p ())
10301 dump_printf_loc (MSG_NOTE, vect_location,
10302 "hoisting out of the vectorized loop: %G",
10303 (gimple *) stmt);
10304 scalar_dest = copy_ssa_name (scalar_dest);
10305 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10306 edge pe = loop_preheader_edge (loop);
10307 gphi *vphi = get_virtual_phi (loop->header);
10308 tree vuse;
10309 if (vphi)
10310 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10311 else
10312 vuse = gimple_vuse (gsi_stmt (*gsi));
10313 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10314 gimple_set_vuse (new_stmt, vuse);
10315 gsi_insert_on_edge_immediate (pe, new_stmt);
10316 hoist_defs_of_uses (new_stmt, loop, true);
10318 /* These copies are all equivalent. */
10319 if (hoist_p)
10320 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10321 vectype, NULL);
10322 else
10324 gimple_stmt_iterator gsi2 = *gsi;
10325 gsi_next (&gsi2);
10326 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10327 vectype, &gsi2);
10329 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10330 if (slp)
10331 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10332 slp_node->push_vec_def (new_stmt);
10333 else
10335 for (j = 0; j < ncopies; ++j)
10336 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10337 *vec_stmt = new_stmt;
10339 return true;
10342 if (memory_access_type == VMAT_ELEMENTWISE
10343 || memory_access_type == VMAT_STRIDED_SLP)
10345 gimple_stmt_iterator incr_gsi;
10346 bool insert_after;
10347 tree offvar;
10348 tree ivstep;
10349 tree running_off;
10350 vec<constructor_elt, va_gc> *v = NULL;
10351 tree stride_base, stride_step, alias_off;
10352 /* Checked by get_load_store_type. */
10353 unsigned int const_nunits = nunits.to_constant ();
10354 unsigned HOST_WIDE_INT cst_offset = 0;
10355 tree dr_offset;
10356 unsigned int inside_cost = 0;
10358 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10359 gcc_assert (!nested_in_vect_loop);
10361 if (grouped_load)
10363 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10364 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10366 else
10368 first_stmt_info = stmt_info;
10369 first_dr_info = dr_info;
10372 if (slp && grouped_load
10373 && memory_access_type == VMAT_STRIDED_SLP)
10375 group_size = DR_GROUP_SIZE (first_stmt_info);
10376 ref_type = get_group_alias_ptr_type (first_stmt_info);
10378 else
10380 if (grouped_load)
10381 cst_offset
10382 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10383 * vect_get_place_in_interleaving_chain (stmt_info,
10384 first_stmt_info));
10385 group_size = 1;
10386 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10389 if (!costing_p)
10391 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10392 stride_base = fold_build_pointer_plus (
10393 DR_BASE_ADDRESS (first_dr_info->dr),
10394 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10395 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10396 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10398 /* For a load with loop-invariant (but other than power-of-2)
10399 stride (i.e. not a grouped access) like so:
10401 for (i = 0; i < n; i += stride)
10402 ... = array[i];
10404 we generate a new induction variable and new accesses to
10405 form a new vector (or vectors, depending on ncopies):
10407 for (j = 0; ; j += VF*stride)
10408 tmp1 = array[j];
10409 tmp2 = array[j + stride];
10411 vectemp = {tmp1, tmp2, ...}
10414 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10415 build_int_cst (TREE_TYPE (stride_step), vf));
10417 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10419 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10420 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10421 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10422 loop, &incr_gsi, insert_after,
10423 &offvar, NULL);
10425 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10428 running_off = offvar;
10429 alias_off = build_int_cst (ref_type, 0);
10430 int nloads = const_nunits;
10431 int lnel = 1;
10432 tree ltype = TREE_TYPE (vectype);
10433 tree lvectype = vectype;
10434 auto_vec<tree> dr_chain;
10435 if (memory_access_type == VMAT_STRIDED_SLP)
10437 HOST_WIDE_INT n = gcd (group_size, const_nunits);
10438 /* Use the target vector type if the group size is a multiple
10439 of it. */
10440 if (n == const_nunits)
10442 nloads = 1;
10443 lnel = const_nunits;
10444 ltype = vectype;
10446 /* Else use the biggest vector we can load the group without
10447 accessing excess elements. */
10448 else if (n > 1)
10450 tree ptype;
10451 tree vtype
10452 = vector_vector_composition_type (vectype, const_nunits / n,
10453 &ptype);
10454 if (vtype != NULL_TREE)
10456 nloads = const_nunits / n;
10457 lnel = n;
10458 lvectype = vtype;
10459 ltype = ptype;
10462 /* Else fall back to the default element-wise access. */
10463 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10465 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10466 else if (nloads == 1)
10467 ltype = vectype;
10469 if (slp)
10471 /* For SLP permutation support we need to load the whole group,
10472 not only the number of vector stmts the permutation result
10473 fits in. */
10474 if (slp_perm)
10476 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10477 variable VF. */
10478 unsigned int const_vf = vf.to_constant ();
10479 ncopies = CEIL (group_size * const_vf, const_nunits);
10480 dr_chain.create (ncopies);
10482 else
10483 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10485 unsigned int group_el = 0;
10486 unsigned HOST_WIDE_INT
10487 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10488 unsigned int n_groups = 0;
10489 /* For costing some adjacent vector loads, we'd like to cost with
10490 the total number of them once instead of cost each one by one. */
10491 unsigned int n_adjacent_loads = 0;
10492 for (j = 0; j < ncopies; j++)
10494 if (nloads > 1 && !costing_p)
10495 vec_alloc (v, nloads);
10496 gimple *new_stmt = NULL;
10497 for (i = 0; i < nloads; i++)
10499 if (costing_p)
10501 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10502 avoid ICE, see PR110776. */
10503 if (VECTOR_TYPE_P (ltype)
10504 && memory_access_type != VMAT_ELEMENTWISE)
10505 n_adjacent_loads++;
10506 else
10507 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10508 stmt_info, 0, vect_body);
10509 continue;
10511 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10512 group_el * elsz + cst_offset);
10513 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10514 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10515 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10516 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10517 if (nloads > 1)
10518 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10519 gimple_assign_lhs (new_stmt));
10521 group_el += lnel;
10522 if (! slp
10523 || group_el == group_size)
10525 n_groups++;
10526 /* When doing SLP make sure to not load elements from
10527 the next vector iteration, those will not be accessed
10528 so just use the last element again. See PR107451. */
10529 if (!slp || known_lt (n_groups, vf))
10531 tree newoff = copy_ssa_name (running_off);
10532 gimple *incr
10533 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10534 running_off, stride_step);
10535 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10536 running_off = newoff;
10538 group_el = 0;
10542 if (nloads > 1)
10544 if (costing_p)
10545 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10546 stmt_info, 0, vect_body);
10547 else
10549 tree vec_inv = build_constructor (lvectype, v);
10550 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10551 lvectype, gsi);
10552 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10553 if (lvectype != vectype)
10555 new_stmt
10556 = gimple_build_assign (make_ssa_name (vectype),
10557 VIEW_CONVERT_EXPR,
10558 build1 (VIEW_CONVERT_EXPR,
10559 vectype, new_temp));
10560 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10561 gsi);
10566 if (!costing_p)
10568 if (slp)
10570 if (slp_perm)
10571 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10572 else
10573 slp_node->push_vec_def (new_stmt);
10575 else
10577 if (j == 0)
10578 *vec_stmt = new_stmt;
10579 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10583 if (slp_perm)
10585 unsigned n_perms;
10586 if (costing_p)
10588 unsigned n_loads;
10589 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10590 true, &n_perms, &n_loads);
10591 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10592 first_stmt_info, 0, vect_body);
10594 else
10595 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10596 false, &n_perms);
10599 if (costing_p)
10601 if (n_adjacent_loads > 0)
10602 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10603 alignment_support_scheme, misalignment, false,
10604 &inside_cost, nullptr, cost_vec, cost_vec,
10605 true);
10606 if (dump_enabled_p ())
10607 dump_printf_loc (MSG_NOTE, vect_location,
10608 "vect_model_load_cost: inside_cost = %u, "
10609 "prologue_cost = 0 .\n",
10610 inside_cost);
10613 return true;
10616 if (memory_access_type == VMAT_GATHER_SCATTER
10617 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10618 grouped_load = false;
10620 if (grouped_load
10621 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10623 if (grouped_load)
10625 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10626 group_size = DR_GROUP_SIZE (first_stmt_info);
10628 else
10630 first_stmt_info = stmt_info;
10631 group_size = 1;
10633 /* For SLP vectorization we directly vectorize a subchain
10634 without permutation. */
10635 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10636 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10637 /* For BB vectorization always use the first stmt to base
10638 the data ref pointer on. */
10639 if (bb_vinfo)
10640 first_stmt_info_for_drptr
10641 = vect_find_first_scalar_stmt_in_slp (slp_node);
10643 /* Check if the chain of loads is already vectorized. */
10644 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10645 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10646 ??? But we can only do so if there is exactly one
10647 as we have no way to get at the rest. Leave the CSE
10648 opportunity alone.
10649 ??? With the group load eventually participating
10650 in multiple different permutations (having multiple
10651 slp nodes which refer to the same group) the CSE
10652 is even wrong code. See PR56270. */
10653 && !slp)
10655 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10656 return true;
10658 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10659 group_gap_adj = 0;
10661 /* VEC_NUM is the number of vect stmts to be created for this group. */
10662 if (slp)
10664 grouped_load = false;
10665 /* If an SLP permutation is from N elements to N elements,
10666 and if one vector holds a whole number of N, we can load
10667 the inputs to the permutation in the same way as an
10668 unpermuted sequence. In other cases we need to load the
10669 whole group, not only the number of vector stmts the
10670 permutation result fits in. */
10671 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10672 if (nested_in_vect_loop)
10673 /* We do not support grouped accesses in a nested loop,
10674 instead the access is contiguous but it might be
10675 permuted. No gap adjustment is needed though. */
10676 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10677 else if (slp_perm
10678 && (group_size != scalar_lanes
10679 || !multiple_p (nunits, group_size)))
10681 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10682 variable VF; see vect_transform_slp_perm_load. */
10683 unsigned int const_vf = vf.to_constant ();
10684 unsigned int const_nunits = nunits.to_constant ();
10685 vec_num = CEIL (group_size * const_vf, const_nunits);
10686 group_gap_adj = vf * group_size - nunits * vec_num;
10688 else
10690 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10691 group_gap_adj
10692 = group_size - scalar_lanes;
10695 else
10696 vec_num = group_size;
10698 ref_type = get_group_alias_ptr_type (first_stmt_info);
10700 else
10702 first_stmt_info = stmt_info;
10703 first_dr_info = dr_info;
10704 group_size = vec_num = 1;
10705 group_gap_adj = 0;
10706 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10707 if (slp)
10708 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10711 gcc_assert (alignment_support_scheme);
10712 vec_loop_masks *loop_masks
10713 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10714 ? &LOOP_VINFO_MASKS (loop_vinfo)
10715 : NULL);
10716 vec_loop_lens *loop_lens
10717 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10718 ? &LOOP_VINFO_LENS (loop_vinfo)
10719 : NULL);
10721 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
10722 are some difference here. We cannot enable both the lens and masks
10723 during transform but it is allowed during analysis.
10724 Shouldn't go with length-based approach if fully masked. */
10725 if (cost_vec == NULL)
10726 /* The cost_vec is NULL during transfrom. */
10727 gcc_assert ((!loop_lens || !loop_masks));
10729 /* Targets with store-lane instructions must not require explicit
10730 realignment. vect_supportable_dr_alignment always returns either
10731 dr_aligned or dr_unaligned_supported for masked operations. */
10732 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10733 && !mask
10734 && !loop_masks)
10735 || alignment_support_scheme == dr_aligned
10736 || alignment_support_scheme == dr_unaligned_supported);
10738 /* In case the vectorization factor (VF) is bigger than the number
10739 of elements that we can fit in a vectype (nunits), we have to generate
10740 more than one vector stmt - i.e - we need to "unroll" the
10741 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10742 from one copy of the vector stmt to the next, in the field
10743 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10744 stages to find the correct vector defs to be used when vectorizing
10745 stmts that use the defs of the current stmt. The example below
10746 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10747 need to create 4 vectorized stmts):
10749 before vectorization:
10750 RELATED_STMT VEC_STMT
10751 S1: x = memref - -
10752 S2: z = x + 1 - -
10754 step 1: vectorize stmt S1:
10755 We first create the vector stmt VS1_0, and, as usual, record a
10756 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10757 Next, we create the vector stmt VS1_1, and record a pointer to
10758 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10759 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10760 stmts and pointers:
10761 RELATED_STMT VEC_STMT
10762 VS1_0: vx0 = memref0 VS1_1 -
10763 VS1_1: vx1 = memref1 VS1_2 -
10764 VS1_2: vx2 = memref2 VS1_3 -
10765 VS1_3: vx3 = memref3 - -
10766 S1: x = load - VS1_0
10767 S2: z = x + 1 - -
10770 /* In case of interleaving (non-unit grouped access):
10772 S1: x2 = &base + 2
10773 S2: x0 = &base
10774 S3: x1 = &base + 1
10775 S4: x3 = &base + 3
10777 Vectorized loads are created in the order of memory accesses
10778 starting from the access of the first stmt of the chain:
10780 VS1: vx0 = &base
10781 VS2: vx1 = &base + vec_size*1
10782 VS3: vx3 = &base + vec_size*2
10783 VS4: vx4 = &base + vec_size*3
10785 Then permutation statements are generated:
10787 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10788 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10791 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10792 (the order of the data-refs in the output of vect_permute_load_chain
10793 corresponds to the order of scalar stmts in the interleaving chain - see
10794 the documentation of vect_permute_load_chain()).
10795 The generation of permutation stmts and recording them in
10796 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10798 In case of both multiple types and interleaving, the vector loads and
10799 permutation stmts above are created for every copy. The result vector
10800 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10801 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10803 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10804 on a target that supports unaligned accesses (dr_unaligned_supported)
10805 we generate the following code:
10806 p = initial_addr;
10807 indx = 0;
10808 loop {
10809 p = p + indx * vectype_size;
10810 vec_dest = *(p);
10811 indx = indx + 1;
10814 Otherwise, the data reference is potentially unaligned on a target that
10815 does not support unaligned accesses (dr_explicit_realign_optimized) -
10816 then generate the following code, in which the data in each iteration is
10817 obtained by two vector loads, one from the previous iteration, and one
10818 from the current iteration:
10819 p1 = initial_addr;
10820 msq_init = *(floor(p1))
10821 p2 = initial_addr + VS - 1;
10822 realignment_token = call target_builtin;
10823 indx = 0;
10824 loop {
10825 p2 = p2 + indx * vectype_size
10826 lsq = *(floor(p2))
10827 vec_dest = realign_load (msq, lsq, realignment_token)
10828 indx = indx + 1;
10829 msq = lsq;
10830 } */
10832 /* If the misalignment remains the same throughout the execution of the
10833 loop, we can create the init_addr and permutation mask at the loop
10834 preheader. Otherwise, it needs to be created inside the loop.
10835 This can only occur when vectorizing memory accesses in the inner-loop
10836 nested within an outer-loop that is being vectorized. */
10838 if (nested_in_vect_loop
10839 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10840 GET_MODE_SIZE (TYPE_MODE (vectype))))
10842 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10843 compute_in_loop = true;
10846 bool diff_first_stmt_info
10847 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10849 tree offset = NULL_TREE;
10850 if ((alignment_support_scheme == dr_explicit_realign_optimized
10851 || alignment_support_scheme == dr_explicit_realign)
10852 && !compute_in_loop)
10854 /* If we have different first_stmt_info, we can't set up realignment
10855 here, since we can't guarantee first_stmt_info DR has been
10856 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10857 distance from first_stmt_info DR instead as below. */
10858 if (!costing_p)
10860 if (!diff_first_stmt_info)
10861 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10862 &realignment_token,
10863 alignment_support_scheme, NULL_TREE,
10864 &at_loop);
10865 if (alignment_support_scheme == dr_explicit_realign_optimized)
10867 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10868 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10869 size_one_node);
10870 gcc_assert (!first_stmt_info_for_drptr);
10874 else
10875 at_loop = loop;
10877 if (!known_eq (poffset, 0))
10878 offset = (offset
10879 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10880 : size_int (poffset));
10882 tree bump;
10883 tree vec_offset = NULL_TREE;
10884 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10886 aggr_type = NULL_TREE;
10887 bump = NULL_TREE;
10889 else if (memory_access_type == VMAT_GATHER_SCATTER)
10891 aggr_type = elem_type;
10892 if (!costing_p)
10893 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10894 &bump, &vec_offset, loop_lens);
10896 else
10898 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10899 aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
10900 else
10901 aggr_type = vectype;
10902 if (!costing_p)
10903 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10904 memory_access_type, loop_lens);
10907 auto_vec<tree> vec_offsets;
10908 auto_vec<tree> vec_masks;
10909 if (mask && !costing_p)
10911 if (slp_node)
10912 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10913 &vec_masks);
10914 else
10915 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10916 &vec_masks, mask_vectype);
10919 tree vec_mask = NULL_TREE;
10920 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10922 gcc_assert (alignment_support_scheme == dr_aligned
10923 || alignment_support_scheme == dr_unaligned_supported);
10925 unsigned int inside_cost = 0, prologue_cost = 0;
10926 /* For costing some adjacent vector loads, we'd like to cost with
10927 the total number of them once instead of cost each one by one. */
10928 unsigned int n_adjacent_loads = 0;
10929 if (slp_node)
10930 ncopies = slp_node->vec_stmts_size / group_size;
10931 for (j = 0; j < ncopies; j++)
10933 if (costing_p)
10935 /* An IFN_LOAD_LANES will load all its vector results,
10936 regardless of which ones we actually need. Account
10937 for the cost of unused results. */
10938 if (first_stmt_info == stmt_info)
10940 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10941 stmt_vec_info next_stmt_info = first_stmt_info;
10944 gaps -= 1;
10945 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10947 while (next_stmt_info);
10948 if (gaps)
10950 if (dump_enabled_p ())
10951 dump_printf_loc (MSG_NOTE, vect_location,
10952 "vect_model_load_cost: %d "
10953 "unused vectors.\n",
10954 gaps);
10955 vect_get_load_cost (vinfo, stmt_info, gaps,
10956 alignment_support_scheme,
10957 misalignment, false, &inside_cost,
10958 &prologue_cost, cost_vec, cost_vec,
10959 true);
10962 n_adjacent_loads++;
10963 continue;
10966 /* 1. Create the vector or array pointer update chain. */
10967 if (j == 0)
10968 dataref_ptr
10969 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10970 at_loop, offset, &dummy, gsi,
10971 &ptr_incr, false, bump);
10972 else
10974 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10975 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10976 stmt_info, bump);
10978 if (mask)
10979 vec_mask = vec_masks[j];
10981 tree vec_array = create_vector_array (vectype, group_size);
10983 tree final_mask = NULL_TREE;
10984 tree final_len = NULL_TREE;
10985 tree bias = NULL_TREE;
10986 if (loop_masks)
10987 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10988 ncopies, vectype, j);
10989 if (vec_mask)
10990 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10991 vec_mask, gsi);
10993 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10995 if (loop_lens)
10996 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10997 ncopies, vectype, j, 1);
10998 else
10999 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11000 signed char biasval
11001 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11002 bias = build_int_cst (intQI_type_node, biasval);
11003 if (!final_mask)
11005 mask_vectype = truth_type_for (vectype);
11006 final_mask = build_minus_one_cst (mask_vectype);
11010 gcall *call;
11011 if (final_len && final_mask)
11013 /* Emit:
11014 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
11015 VEC_MASK, LEN, BIAS). */
11016 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
11017 tree alias_ptr = build_int_cst (ref_type, align);
11018 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
11019 dataref_ptr, alias_ptr,
11020 final_mask, final_len, bias);
11022 else if (final_mask)
11024 /* Emit:
11025 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
11026 VEC_MASK). */
11027 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
11028 tree alias_ptr = build_int_cst (ref_type, align);
11029 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
11030 dataref_ptr, alias_ptr,
11031 final_mask);
11033 else
11035 /* Emit:
11036 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
11037 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
11038 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
11040 gimple_call_set_lhs (call, vec_array);
11041 gimple_call_set_nothrow (call, true);
11042 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
11044 if (!slp)
11045 dr_chain.create (group_size);
11046 /* Extract each vector into an SSA_NAME. */
11047 for (unsigned i = 0; i < group_size; i++)
11049 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
11050 vec_array, i);
11051 if (slp)
11052 slp_node->push_vec_def (new_temp);
11053 else
11054 dr_chain.quick_push (new_temp);
11057 if (!slp)
11058 /* Record the mapping between SSA_NAMEs and statements. */
11059 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
11061 /* Record that VEC_ARRAY is now dead. */
11062 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
11064 if (!slp)
11065 dr_chain.release ();
11067 if (!slp_node)
11068 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11071 if (costing_p)
11073 if (n_adjacent_loads > 0)
11074 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
11075 alignment_support_scheme, misalignment, false,
11076 &inside_cost, &prologue_cost, cost_vec,
11077 cost_vec, true);
11078 if (dump_enabled_p ())
11079 dump_printf_loc (MSG_NOTE, vect_location,
11080 "vect_model_load_cost: inside_cost = %u, "
11081 "prologue_cost = %u .\n",
11082 inside_cost, prologue_cost);
11085 return true;
11088 if (memory_access_type == VMAT_GATHER_SCATTER)
11090 gcc_assert (alignment_support_scheme == dr_aligned
11091 || alignment_support_scheme == dr_unaligned_supported);
11092 gcc_assert (!grouped_load && !slp_perm);
11094 unsigned int inside_cost = 0, prologue_cost = 0;
11095 for (j = 0; j < ncopies; j++)
11097 /* 1. Create the vector or array pointer update chain. */
11098 if (j == 0 && !costing_p)
11100 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11101 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
11102 slp_node, &gs_info, &dataref_ptr,
11103 &vec_offsets);
11104 else
11105 dataref_ptr
11106 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11107 at_loop, offset, &dummy, gsi,
11108 &ptr_incr, false, bump);
11110 else if (!costing_p)
11112 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11113 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11114 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11115 gsi, stmt_info, bump);
11118 gimple *new_stmt = NULL;
11119 for (i = 0; i < vec_num; i++)
11121 tree final_mask = NULL_TREE;
11122 tree final_len = NULL_TREE;
11123 tree bias = NULL_TREE;
11124 if (!costing_p)
11126 if (mask)
11127 vec_mask = vec_masks[vec_num * j + i];
11128 if (loop_masks)
11129 final_mask
11130 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11131 vec_num * ncopies, vectype,
11132 vec_num * j + i);
11133 if (vec_mask)
11134 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11135 final_mask, vec_mask, gsi);
11137 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11138 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11139 gsi, stmt_info, bump);
11142 /* 2. Create the vector-load in the loop. */
11143 unsigned HOST_WIDE_INT align;
11144 if (gs_info.ifn != IFN_LAST)
11146 if (costing_p)
11148 unsigned int cnunits = vect_nunits_for_cost (vectype);
11149 inside_cost
11150 = record_stmt_cost (cost_vec, cnunits, scalar_load,
11151 stmt_info, 0, vect_body);
11152 continue;
11154 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11155 vec_offset = vec_offsets[vec_num * j + i];
11156 tree zero = build_zero_cst (vectype);
11157 tree scale = size_int (gs_info.scale);
11159 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
11161 if (loop_lens)
11162 final_len
11163 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11164 vec_num * ncopies, vectype,
11165 vec_num * j + i, 1);
11166 else
11167 final_len
11168 = build_int_cst (sizetype,
11169 TYPE_VECTOR_SUBPARTS (vectype));
11170 signed char biasval
11171 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11172 bias = build_int_cst (intQI_type_node, biasval);
11173 if (!final_mask)
11175 mask_vectype = truth_type_for (vectype);
11176 final_mask = build_minus_one_cst (mask_vectype);
11180 gcall *call;
11181 if (final_len && final_mask)
11182 call
11183 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
11184 dataref_ptr, vec_offset,
11185 scale, zero, final_mask,
11186 final_len, bias);
11187 else if (final_mask)
11188 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
11189 dataref_ptr, vec_offset,
11190 scale, zero, final_mask);
11191 else
11192 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
11193 dataref_ptr, vec_offset,
11194 scale, zero);
11195 gimple_call_set_nothrow (call, true);
11196 new_stmt = call;
11197 data_ref = NULL_TREE;
11199 else if (gs_info.decl)
11201 /* The builtin decls path for gather is legacy, x86 only. */
11202 gcc_assert (!final_len && nunits.is_constant ());
11203 if (costing_p)
11205 unsigned int cnunits = vect_nunits_for_cost (vectype);
11206 inside_cost
11207 = record_stmt_cost (cost_vec, cnunits, scalar_load,
11208 stmt_info, 0, vect_body);
11209 continue;
11211 poly_uint64 offset_nunits
11212 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
11213 if (known_eq (nunits, offset_nunits))
11215 new_stmt = vect_build_one_gather_load_call
11216 (vinfo, stmt_info, gsi, &gs_info,
11217 dataref_ptr, vec_offsets[vec_num * j + i],
11218 final_mask);
11219 data_ref = NULL_TREE;
11221 else if (known_eq (nunits, offset_nunits * 2))
11223 /* We have a offset vector with half the number of
11224 lanes but the builtins will produce full vectype
11225 data with just the lower lanes filled. */
11226 new_stmt = vect_build_one_gather_load_call
11227 (vinfo, stmt_info, gsi, &gs_info,
11228 dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
11229 final_mask);
11230 tree low = make_ssa_name (vectype);
11231 gimple_set_lhs (new_stmt, low);
11232 vect_finish_stmt_generation (vinfo, stmt_info,
11233 new_stmt, gsi);
11235 /* now put upper half of final_mask in final_mask low. */
11236 if (final_mask
11237 && !SCALAR_INT_MODE_P
11238 (TYPE_MODE (TREE_TYPE (final_mask))))
11240 int count = nunits.to_constant ();
11241 vec_perm_builder sel (count, count, 1);
11242 sel.quick_grow (count);
11243 for (int i = 0; i < count; ++i)
11244 sel[i] = i | (count / 2);
11245 vec_perm_indices indices (sel, 2, count);
11246 tree perm_mask = vect_gen_perm_mask_checked
11247 (TREE_TYPE (final_mask), indices);
11248 new_stmt = gimple_build_assign (NULL_TREE,
11249 VEC_PERM_EXPR,
11250 final_mask,
11251 final_mask,
11252 perm_mask);
11253 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11254 gimple_set_lhs (new_stmt, final_mask);
11255 vect_finish_stmt_generation (vinfo, stmt_info,
11256 new_stmt, gsi);
11258 else if (final_mask)
11260 new_stmt = gimple_build_assign (NULL_TREE,
11261 VEC_UNPACK_HI_EXPR,
11262 final_mask);
11263 final_mask = make_ssa_name
11264 (truth_type_for (gs_info.offset_vectype));
11265 gimple_set_lhs (new_stmt, final_mask);
11266 vect_finish_stmt_generation (vinfo, stmt_info,
11267 new_stmt, gsi);
11270 new_stmt = vect_build_one_gather_load_call
11271 (vinfo, stmt_info, gsi, &gs_info,
11272 dataref_ptr,
11273 vec_offsets[2 * vec_num * j + 2 * i + 1],
11274 final_mask);
11275 tree high = make_ssa_name (vectype);
11276 gimple_set_lhs (new_stmt, high);
11277 vect_finish_stmt_generation (vinfo, stmt_info,
11278 new_stmt, gsi);
11280 /* compose low + high. */
11281 int count = nunits.to_constant ();
11282 vec_perm_builder sel (count, count, 1);
11283 sel.quick_grow (count);
11284 for (int i = 0; i < count; ++i)
11285 sel[i] = i < count / 2 ? i : i + count / 2;
11286 vec_perm_indices indices (sel, 2, count);
11287 tree perm_mask
11288 = vect_gen_perm_mask_checked (vectype, indices);
11289 new_stmt = gimple_build_assign (NULL_TREE,
11290 VEC_PERM_EXPR,
11291 low, high, perm_mask);
11292 data_ref = NULL_TREE;
11294 else if (known_eq (nunits * 2, offset_nunits))
11296 /* We have a offset vector with double the number of
11297 lanes. Select the low/high part accordingly. */
11298 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11299 if ((vec_num * j + i) & 1)
11301 int count = offset_nunits.to_constant ();
11302 vec_perm_builder sel (count, count, 1);
11303 sel.quick_grow (count);
11304 for (int i = 0; i < count; ++i)
11305 sel[i] = i | (count / 2);
11306 vec_perm_indices indices (sel, 2, count);
11307 tree perm_mask = vect_gen_perm_mask_checked
11308 (TREE_TYPE (vec_offset), indices);
11309 new_stmt = gimple_build_assign (NULL_TREE,
11310 VEC_PERM_EXPR,
11311 vec_offset,
11312 vec_offset,
11313 perm_mask);
11314 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11315 gimple_set_lhs (new_stmt, vec_offset);
11316 vect_finish_stmt_generation (vinfo, stmt_info,
11317 new_stmt, gsi);
11319 new_stmt = vect_build_one_gather_load_call
11320 (vinfo, stmt_info, gsi, &gs_info,
11321 dataref_ptr, vec_offset, final_mask);
11322 data_ref = NULL_TREE;
11324 else
11325 gcc_unreachable ();
11327 else
11329 /* Emulated gather-scatter. */
11330 gcc_assert (!final_mask);
11331 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11332 if (costing_p)
11334 /* For emulated gathers N offset vector element
11335 offset add is consumed by the load). */
11336 inside_cost = record_stmt_cost (cost_vec, const_nunits,
11337 vec_to_scalar, stmt_info,
11338 0, vect_body);
11339 /* N scalar loads plus gathering them into a
11340 vector. */
11341 inside_cost
11342 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11343 stmt_info, 0, vect_body);
11344 inside_cost
11345 = record_stmt_cost (cost_vec, 1, vec_construct,
11346 stmt_info, 0, vect_body);
11347 continue;
11349 unsigned HOST_WIDE_INT const_offset_nunits
11350 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
11351 .to_constant ();
11352 vec<constructor_elt, va_gc> *ctor_elts;
11353 vec_alloc (ctor_elts, const_nunits);
11354 gimple_seq stmts = NULL;
11355 /* We support offset vectors with more elements
11356 than the data vector for now. */
11357 unsigned HOST_WIDE_INT factor
11358 = const_offset_nunits / const_nunits;
11359 vec_offset = vec_offsets[(vec_num * j + i) / factor];
11360 unsigned elt_offset
11361 = ((vec_num * j + i) % factor) * const_nunits;
11362 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11363 tree scale = size_int (gs_info.scale);
11364 align = get_object_alignment (DR_REF (first_dr_info->dr));
11365 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11366 for (unsigned k = 0; k < const_nunits; ++k)
11368 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11369 bitsize_int (k + elt_offset));
11370 tree idx
11371 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11372 vec_offset, TYPE_SIZE (idx_type), boff);
11373 idx = gimple_convert (&stmts, sizetype, idx);
11374 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
11375 scale);
11376 tree ptr = gimple_build (&stmts, PLUS_EXPR,
11377 TREE_TYPE (dataref_ptr),
11378 dataref_ptr, idx);
11379 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11380 tree elt = make_ssa_name (TREE_TYPE (vectype));
11381 tree ref = build2 (MEM_REF, ltype, ptr,
11382 build_int_cst (ref_type, 0));
11383 new_stmt = gimple_build_assign (elt, ref);
11384 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11385 gimple_seq_add_stmt (&stmts, new_stmt);
11386 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11388 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11389 new_stmt = gimple_build_assign (
11390 NULL_TREE, build_constructor (vectype, ctor_elts));
11391 data_ref = NULL_TREE;
11394 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11395 /* DATA_REF is null if we've already built the statement. */
11396 if (data_ref)
11398 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11399 new_stmt = gimple_build_assign (vec_dest, data_ref);
11401 new_temp = make_ssa_name (vec_dest, new_stmt);
11402 gimple_set_lhs (new_stmt, new_temp);
11403 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11405 /* Store vector loads in the corresponding SLP_NODE. */
11406 if (slp)
11407 slp_node->push_vec_def (new_stmt);
11410 if (!slp && !costing_p)
11411 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11414 if (!slp && !costing_p)
11415 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11417 if (costing_p && dump_enabled_p ())
11418 dump_printf_loc (MSG_NOTE, vect_location,
11419 "vect_model_load_cost: inside_cost = %u, "
11420 "prologue_cost = %u .\n",
11421 inside_cost, prologue_cost);
11422 return true;
11425 poly_uint64 group_elt = 0;
11426 unsigned int inside_cost = 0, prologue_cost = 0;
11427 /* For costing some adjacent vector loads, we'd like to cost with
11428 the total number of them once instead of cost each one by one. */
11429 unsigned int n_adjacent_loads = 0;
11430 for (j = 0; j < ncopies; j++)
11432 /* 1. Create the vector or array pointer update chain. */
11433 if (j == 0 && !costing_p)
11435 bool simd_lane_access_p
11436 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11437 if (simd_lane_access_p
11438 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11439 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11440 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11441 && integer_zerop (DR_INIT (first_dr_info->dr))
11442 && alias_sets_conflict_p (get_alias_set (aggr_type),
11443 get_alias_set (TREE_TYPE (ref_type)))
11444 && (alignment_support_scheme == dr_aligned
11445 || alignment_support_scheme == dr_unaligned_supported))
11447 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11448 dataref_offset = build_int_cst (ref_type, 0);
11450 else if (diff_first_stmt_info)
11452 dataref_ptr
11453 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11454 aggr_type, at_loop, offset, &dummy,
11455 gsi, &ptr_incr, simd_lane_access_p,
11456 bump);
11457 /* Adjust the pointer by the difference to first_stmt. */
11458 data_reference_p ptrdr
11459 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11460 tree diff
11461 = fold_convert (sizetype,
11462 size_binop (MINUS_EXPR,
11463 DR_INIT (first_dr_info->dr),
11464 DR_INIT (ptrdr)));
11465 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11466 stmt_info, diff);
11467 if (alignment_support_scheme == dr_explicit_realign)
11469 msq = vect_setup_realignment (vinfo,
11470 first_stmt_info_for_drptr, gsi,
11471 &realignment_token,
11472 alignment_support_scheme,
11473 dataref_ptr, &at_loop);
11474 gcc_assert (!compute_in_loop);
11477 else
11478 dataref_ptr
11479 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11480 at_loop,
11481 offset, &dummy, gsi, &ptr_incr,
11482 simd_lane_access_p, bump);
11484 else if (!costing_p)
11486 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11487 if (dataref_offset)
11488 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11489 bump);
11490 else
11491 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11492 stmt_info, bump);
11495 if (grouped_load || slp_perm)
11496 dr_chain.create (vec_num);
11498 gimple *new_stmt = NULL;
11499 for (i = 0; i < vec_num; i++)
11501 tree final_mask = NULL_TREE;
11502 tree final_len = NULL_TREE;
11503 tree bias = NULL_TREE;
11504 if (!costing_p)
11506 if (mask)
11507 vec_mask = vec_masks[vec_num * j + i];
11508 if (loop_masks)
11509 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11510 vec_num * ncopies, vectype,
11511 vec_num * j + i);
11512 if (vec_mask)
11513 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11514 final_mask, vec_mask, gsi);
11516 if (i > 0)
11517 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11518 gsi, stmt_info, bump);
11521 /* 2. Create the vector-load in the loop. */
11522 switch (alignment_support_scheme)
11524 case dr_aligned:
11525 case dr_unaligned_supported:
11527 if (costing_p)
11528 break;
11530 unsigned int misalign;
11531 unsigned HOST_WIDE_INT align;
11532 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11533 if (alignment_support_scheme == dr_aligned)
11534 misalign = 0;
11535 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11537 align
11538 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11539 misalign = 0;
11541 else
11542 misalign = misalignment;
11543 if (dataref_offset == NULL_TREE
11544 && TREE_CODE (dataref_ptr) == SSA_NAME)
11545 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11546 misalign);
11547 align = least_bit_hwi (misalign | align);
11549 /* Compute IFN when LOOP_LENS or final_mask valid. */
11550 machine_mode vmode = TYPE_MODE (vectype);
11551 machine_mode new_vmode = vmode;
11552 internal_fn partial_ifn = IFN_LAST;
11553 if (loop_lens)
11555 opt_machine_mode new_ovmode
11556 = get_len_load_store_mode (vmode, true, &partial_ifn);
11557 new_vmode = new_ovmode.require ();
11558 unsigned factor
11559 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11560 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11561 vec_num * ncopies, vectype,
11562 vec_num * j + i, factor);
11564 else if (final_mask)
11566 if (!can_vec_mask_load_store_p (
11567 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11568 &partial_ifn))
11569 gcc_unreachable ();
11572 if (partial_ifn == IFN_MASK_LEN_LOAD)
11574 if (!final_len)
11576 /* Pass VF value to 'len' argument of
11577 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11578 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11580 if (!final_mask)
11582 /* Pass all ones value to 'mask' argument of
11583 MASK_LEN_LOAD if final_mask is invalid. */
11584 mask_vectype = truth_type_for (vectype);
11585 final_mask = build_minus_one_cst (mask_vectype);
11588 if (final_len)
11590 signed char biasval
11591 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11593 bias = build_int_cst (intQI_type_node, biasval);
11596 if (final_len)
11598 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11599 gcall *call;
11600 if (partial_ifn == IFN_MASK_LEN_LOAD)
11601 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11602 dataref_ptr, ptr,
11603 final_mask, final_len,
11604 bias);
11605 else
11606 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11607 dataref_ptr, ptr,
11608 final_len, bias);
11609 gimple_call_set_nothrow (call, true);
11610 new_stmt = call;
11611 data_ref = NULL_TREE;
11613 /* Need conversion if it's wrapped with VnQI. */
11614 if (vmode != new_vmode)
11616 tree new_vtype = build_vector_type_for_mode (
11617 unsigned_intQI_type_node, new_vmode);
11618 tree var
11619 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11620 gimple_set_lhs (call, var);
11621 vect_finish_stmt_generation (vinfo, stmt_info, call,
11622 gsi);
11623 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11624 new_stmt = gimple_build_assign (vec_dest,
11625 VIEW_CONVERT_EXPR, op);
11628 else if (final_mask)
11630 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11631 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11632 dataref_ptr, ptr,
11633 final_mask);
11634 gimple_call_set_nothrow (call, true);
11635 new_stmt = call;
11636 data_ref = NULL_TREE;
11638 else
11640 tree ltype = vectype;
11641 tree new_vtype = NULL_TREE;
11642 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11643 unsigned int vect_align
11644 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11645 /* Try to use a single smaller load when we are about
11646 to load excess elements compared to the unrolled
11647 scalar loop. */
11648 if (known_gt ((vec_num * j + i + 1) * nunits,
11649 (group_size * vf - gap)))
11651 poly_uint64 remain = ((group_size * vf - gap)
11652 - (vec_num * j + i) * nunits);
11653 if (known_ge ((vec_num * j + i + 1) * nunits
11654 - (group_size * vf - gap), nunits))
11655 /* DR will be unused. */
11656 ltype = NULL_TREE;
11657 else if (known_ge (vect_align,
11658 tree_to_poly_uint64
11659 (TYPE_SIZE_UNIT (vectype))))
11660 /* Aligned access to excess elements is OK if
11661 at least one element is accessed in the
11662 scalar loop. */
11664 else if (known_gt (vect_align,
11665 ((nunits - remain)
11666 * vect_get_scalar_dr_size
11667 (first_dr_info))))
11668 /* Aligned access to the gap area when there's
11669 at least one element in it is OK. */
11671 else
11673 /* remain should now be > 0 and < nunits. */
11674 unsigned num;
11675 if (known_ne (remain, 0u)
11676 && constant_multiple_p (nunits, remain, &num))
11678 tree ptype;
11679 new_vtype
11680 = vector_vector_composition_type (vectype,
11681 num,
11682 &ptype);
11683 if (new_vtype)
11684 ltype = ptype;
11686 /* Else use multiple loads or a masked load? */
11687 /* For loop vectorization we now should have
11688 an alternate type or LOOP_VINFO_PEELING_FOR_GAPS
11689 set. */
11690 if (loop_vinfo)
11691 gcc_assert (new_vtype
11692 || LOOP_VINFO_PEELING_FOR_GAPS
11693 (loop_vinfo));
11694 /* But still reduce the access size to the next
11695 required power-of-two so peeling a single
11696 scalar iteration is sufficient. */
11697 unsigned HOST_WIDE_INT cremain;
11698 if (remain.is_constant (&cremain))
11700 unsigned HOST_WIDE_INT cpart_size
11701 = 1 << ceil_log2 (cremain);
11702 if (known_gt (nunits, cpart_size)
11703 && constant_multiple_p (nunits, cpart_size,
11704 &num))
11706 tree ptype;
11707 new_vtype
11708 = vector_vector_composition_type (vectype,
11709 num,
11710 &ptype);
11711 if (new_vtype)
11712 ltype = ptype;
11717 tree offset
11718 = (dataref_offset ? dataref_offset
11719 : build_int_cst (ref_type, 0));
11720 if (!ltype)
11722 else if (ltype != vectype
11723 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11725 poly_uint64 gap_offset
11726 = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
11727 - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
11728 tree gapcst = build_int_cstu (ref_type, gap_offset);
11729 offset = size_binop (PLUS_EXPR, offset, gapcst);
11731 if (ltype)
11733 data_ref
11734 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11735 if (alignment_support_scheme == dr_aligned)
11737 else
11738 TREE_TYPE (data_ref)
11739 = build_aligned_type (TREE_TYPE (data_ref),
11740 align * BITS_PER_UNIT);
11742 if (!ltype)
11743 data_ref = build_constructor (vectype, NULL);
11744 else if (ltype != vectype)
11746 vect_copy_ref_info (data_ref,
11747 DR_REF (first_dr_info->dr));
11748 tree tem = make_ssa_name (ltype);
11749 new_stmt = gimple_build_assign (tem, data_ref);
11750 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11751 gsi);
11752 data_ref = NULL;
11753 vec<constructor_elt, va_gc> *v;
11754 /* We've computed 'num' above to statically two
11755 or via constant_multiple_p. */
11756 unsigned num
11757 = (exact_div (tree_to_poly_uint64
11758 (TYPE_SIZE_UNIT (vectype)),
11759 tree_to_poly_uint64
11760 (TYPE_SIZE_UNIT (ltype)))
11761 .to_constant ());
11762 vec_alloc (v, num);
11763 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11765 while (--num)
11766 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11767 build_zero_cst (ltype));
11768 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11770 else
11772 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11773 while (--num)
11774 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11775 build_zero_cst (ltype));
11777 gcc_assert (new_vtype != NULL_TREE);
11778 if (new_vtype == vectype)
11779 new_stmt = gimple_build_assign (
11780 vec_dest, build_constructor (vectype, v));
11781 else
11783 tree new_vname = make_ssa_name (new_vtype);
11784 new_stmt = gimple_build_assign (
11785 new_vname, build_constructor (new_vtype, v));
11786 vect_finish_stmt_generation (vinfo, stmt_info,
11787 new_stmt, gsi);
11788 new_stmt = gimple_build_assign (
11789 vec_dest,
11790 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11794 break;
11796 case dr_explicit_realign:
11798 if (costing_p)
11799 break;
11800 tree ptr, bump;
11802 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11804 if (compute_in_loop)
11805 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11806 &realignment_token,
11807 dr_explicit_realign,
11808 dataref_ptr, NULL);
11810 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11811 ptr = copy_ssa_name (dataref_ptr);
11812 else
11813 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11814 // For explicit realign the target alignment should be
11815 // known at compile time.
11816 unsigned HOST_WIDE_INT align
11817 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11818 new_stmt = gimple_build_assign (
11819 ptr, BIT_AND_EXPR, dataref_ptr,
11820 build_int_cst (TREE_TYPE (dataref_ptr),
11821 -(HOST_WIDE_INT) align));
11822 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11823 data_ref
11824 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11825 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11826 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11827 new_stmt = gimple_build_assign (vec_dest, data_ref);
11828 new_temp = make_ssa_name (vec_dest, new_stmt);
11829 gimple_assign_set_lhs (new_stmt, new_temp);
11830 gimple_move_vops (new_stmt, stmt_info->stmt);
11831 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11832 msq = new_temp;
11834 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11835 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11836 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11837 bump);
11838 new_stmt = gimple_build_assign (
11839 NULL_TREE, BIT_AND_EXPR, ptr,
11840 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11841 if (TREE_CODE (ptr) == SSA_NAME)
11842 ptr = copy_ssa_name (ptr, new_stmt);
11843 else
11844 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11845 gimple_assign_set_lhs (new_stmt, ptr);
11846 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11847 data_ref
11848 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11849 break;
11851 case dr_explicit_realign_optimized:
11853 if (costing_p)
11854 break;
11855 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11856 new_temp = copy_ssa_name (dataref_ptr);
11857 else
11858 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11859 // We should only be doing this if we know the target
11860 // alignment at compile time.
11861 unsigned HOST_WIDE_INT align
11862 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11863 new_stmt = gimple_build_assign (
11864 new_temp, BIT_AND_EXPR, dataref_ptr,
11865 build_int_cst (TREE_TYPE (dataref_ptr),
11866 -(HOST_WIDE_INT) align));
11867 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11868 data_ref = build2 (MEM_REF, vectype, new_temp,
11869 build_int_cst (ref_type, 0));
11870 break;
11872 default:
11873 gcc_unreachable ();
11876 /* One common place to cost the above vect load for different
11877 alignment support schemes. */
11878 if (costing_p)
11880 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11881 only need to take care of the first stmt, whose
11882 stmt_info is first_stmt_info, vec_num iterating on it
11883 will cover the cost for the remaining, it's consistent
11884 with transforming. For the prologue cost for realign,
11885 we only need to count it once for the whole group. */
11886 bool first_stmt_info_p = first_stmt_info == stmt_info;
11887 bool add_realign_cost = first_stmt_info_p && i == 0;
11888 if (memory_access_type == VMAT_CONTIGUOUS
11889 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11890 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11891 && (!grouped_load || first_stmt_info_p)))
11893 /* Leave realign cases alone to keep them simple. */
11894 if (alignment_support_scheme == dr_explicit_realign_optimized
11895 || alignment_support_scheme == dr_explicit_realign)
11896 vect_get_load_cost (vinfo, stmt_info, 1,
11897 alignment_support_scheme, misalignment,
11898 add_realign_cost, &inside_cost,
11899 &prologue_cost, cost_vec, cost_vec,
11900 true);
11901 else
11902 n_adjacent_loads++;
11905 else
11907 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11908 /* DATA_REF is null if we've already built the statement. */
11909 if (data_ref)
11911 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11912 new_stmt = gimple_build_assign (vec_dest, data_ref);
11914 new_temp = make_ssa_name (vec_dest, new_stmt);
11915 gimple_set_lhs (new_stmt, new_temp);
11916 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11919 /* 3. Handle explicit realignment if necessary/supported.
11920 Create in loop:
11921 vec_dest = realign_load (msq, lsq, realignment_token) */
11922 if (!costing_p
11923 && (alignment_support_scheme == dr_explicit_realign_optimized
11924 || alignment_support_scheme == dr_explicit_realign))
11926 lsq = gimple_assign_lhs (new_stmt);
11927 if (!realignment_token)
11928 realignment_token = dataref_ptr;
11929 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11930 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11931 lsq, realignment_token);
11932 new_temp = make_ssa_name (vec_dest, new_stmt);
11933 gimple_assign_set_lhs (new_stmt, new_temp);
11934 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11936 if (alignment_support_scheme == dr_explicit_realign_optimized)
11938 gcc_assert (phi);
11939 if (i == vec_num - 1 && j == ncopies - 1)
11940 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11941 UNKNOWN_LOCATION);
11942 msq = lsq;
11946 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11948 if (costing_p)
11949 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11950 stmt_info, 0, vect_body);
11951 else
11953 tree perm_mask = perm_mask_for_reverse (vectype);
11954 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11955 perm_mask, stmt_info, gsi);
11956 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11960 /* Collect vector loads and later create their permutation in
11961 vect_transform_grouped_load (). */
11962 if (!costing_p && (grouped_load || slp_perm))
11963 dr_chain.quick_push (new_temp);
11965 /* Store vector loads in the corresponding SLP_NODE. */
11966 if (!costing_p && slp && !slp_perm)
11967 slp_node->push_vec_def (new_stmt);
11969 /* With SLP permutation we load the gaps as well, without
11970 we need to skip the gaps after we manage to fully load
11971 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11972 group_elt += nunits;
11973 if (!costing_p
11974 && maybe_ne (group_gap_adj, 0U)
11975 && !slp_perm
11976 && known_eq (group_elt, group_size - group_gap_adj))
11978 poly_wide_int bump_val
11979 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11980 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11981 == -1)
11982 bump_val = -bump_val;
11983 tree bump = wide_int_to_tree (sizetype, bump_val);
11984 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11985 stmt_info, bump);
11986 group_elt = 0;
11989 /* Bump the vector pointer to account for a gap or for excess
11990 elements loaded for a permuted SLP load. */
11991 if (!costing_p
11992 && maybe_ne (group_gap_adj, 0U)
11993 && slp_perm)
11995 poly_wide_int bump_val
11996 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11997 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11998 bump_val = -bump_val;
11999 tree bump = wide_int_to_tree (sizetype, bump_val);
12000 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
12001 stmt_info, bump);
12004 if (slp && !slp_perm)
12005 continue;
12007 if (slp_perm)
12009 unsigned n_perms;
12010 /* For SLP we know we've seen all possible uses of dr_chain so
12011 direct vect_transform_slp_perm_load to DCE the unused parts.
12012 ??? This is a hack to prevent compile-time issues as seen
12013 in PR101120 and friends. */
12014 if (costing_p)
12016 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
12017 true, &n_perms, nullptr);
12018 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
12019 stmt_info, 0, vect_body);
12021 else
12023 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
12024 gsi, vf, false, &n_perms,
12025 nullptr, true);
12026 gcc_assert (ok);
12029 else
12031 if (grouped_load)
12033 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
12034 /* We assume that the cost of a single load-lanes instruction
12035 is equivalent to the cost of DR_GROUP_SIZE separate loads.
12036 If a grouped access is instead being provided by a
12037 load-and-permute operation, include the cost of the
12038 permutes. */
12039 if (costing_p && first_stmt_info == stmt_info)
12041 /* Uses an even and odd extract operations or shuffle
12042 operations for each needed permute. */
12043 int group_size = DR_GROUP_SIZE (first_stmt_info);
12044 int nstmts = ceil_log2 (group_size) * group_size;
12045 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
12046 stmt_info, 0, vect_body);
12048 if (dump_enabled_p ())
12049 dump_printf_loc (MSG_NOTE, vect_location,
12050 "vect_model_load_cost:"
12051 "strided group_size = %d .\n",
12052 group_size);
12054 else if (!costing_p)
12056 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
12057 group_size, gsi);
12058 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12061 else if (!costing_p)
12062 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12064 dr_chain.release ();
12066 if (!slp && !costing_p)
12067 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12069 if (costing_p)
12071 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
12072 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
12073 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
12074 if (n_adjacent_loads > 0)
12075 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
12076 alignment_support_scheme, misalignment, false,
12077 &inside_cost, &prologue_cost, cost_vec, cost_vec,
12078 true);
12079 if (dump_enabled_p ())
12080 dump_printf_loc (MSG_NOTE, vect_location,
12081 "vect_model_load_cost: inside_cost = %u, "
12082 "prologue_cost = %u .\n",
12083 inside_cost, prologue_cost);
12086 return true;
12089 /* Function vect_is_simple_cond.
12091 Input:
12092 LOOP - the loop that is being vectorized.
12093 COND - Condition that is checked for simple use.
12095 Output:
12096 *COMP_VECTYPE - the vector type for the comparison.
12097 *DTS - The def types for the arguments of the comparison
12099 Returns whether a COND can be vectorized. Checks whether
12100 condition operands are supportable using vec_is_simple_use. */
12102 static bool
12103 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
12104 slp_tree slp_node, tree *comp_vectype,
12105 enum vect_def_type *dts, tree vectype)
12107 tree lhs, rhs;
12108 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12109 slp_tree slp_op;
12111 /* Mask case. */
12112 if (TREE_CODE (cond) == SSA_NAME
12113 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
12115 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
12116 &slp_op, &dts[0], comp_vectype)
12117 || !*comp_vectype
12118 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
12119 return false;
12120 return true;
12123 if (!COMPARISON_CLASS_P (cond))
12124 return false;
12126 lhs = TREE_OPERAND (cond, 0);
12127 rhs = TREE_OPERAND (cond, 1);
12129 if (TREE_CODE (lhs) == SSA_NAME)
12131 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
12132 &lhs, &slp_op, &dts[0], &vectype1))
12133 return false;
12135 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
12136 || TREE_CODE (lhs) == FIXED_CST)
12137 dts[0] = vect_constant_def;
12138 else
12139 return false;
12141 if (TREE_CODE (rhs) == SSA_NAME)
12143 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
12144 &rhs, &slp_op, &dts[1], &vectype2))
12145 return false;
12147 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
12148 || TREE_CODE (rhs) == FIXED_CST)
12149 dts[1] = vect_constant_def;
12150 else
12151 return false;
12153 if (vectype1 && vectype2
12154 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12155 TYPE_VECTOR_SUBPARTS (vectype2)))
12156 return false;
12158 *comp_vectype = vectype1 ? vectype1 : vectype2;
12159 /* Invariant comparison. */
12160 if (! *comp_vectype)
12162 tree scalar_type = TREE_TYPE (lhs);
12163 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12164 *comp_vectype = truth_type_for (vectype);
12165 else
12167 /* If we can widen the comparison to match vectype do so. */
12168 if (INTEGRAL_TYPE_P (scalar_type)
12169 && !slp_node
12170 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
12171 TYPE_SIZE (TREE_TYPE (vectype))))
12172 scalar_type = build_nonstandard_integer_type
12173 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
12174 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12175 slp_node);
12179 return true;
12182 /* vectorizable_condition.
12184 Check if STMT_INFO is conditional modify expression that can be vectorized.
12185 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12186 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
12187 at GSI.
12189 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
12191 Return true if STMT_INFO is vectorizable in this way. */
12193 static bool
12194 vectorizable_condition (vec_info *vinfo,
12195 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12196 gimple **vec_stmt,
12197 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12199 tree scalar_dest = NULL_TREE;
12200 tree vec_dest = NULL_TREE;
12201 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
12202 tree then_clause, else_clause;
12203 tree comp_vectype = NULL_TREE;
12204 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
12205 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
12206 tree vec_compare;
12207 tree new_temp;
12208 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12209 enum vect_def_type dts[4]
12210 = {vect_unknown_def_type, vect_unknown_def_type,
12211 vect_unknown_def_type, vect_unknown_def_type};
12212 int ndts = 4;
12213 int ncopies;
12214 int vec_num;
12215 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12216 int i;
12217 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12218 vec<tree> vec_oprnds0 = vNULL;
12219 vec<tree> vec_oprnds1 = vNULL;
12220 vec<tree> vec_oprnds2 = vNULL;
12221 vec<tree> vec_oprnds3 = vNULL;
12222 tree vec_cmp_type;
12223 bool masked = false;
12225 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12226 return false;
12228 /* Is vectorizable conditional operation? */
12229 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12230 if (!stmt)
12231 return false;
12233 code = gimple_assign_rhs_code (stmt);
12234 if (code != COND_EXPR)
12235 return false;
12237 stmt_vec_info reduc_info = NULL;
12238 int reduc_index = -1;
12239 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
12240 bool for_reduction
12241 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
12242 if (for_reduction)
12244 if (slp_node && SLP_TREE_LANES (slp_node) > 1)
12245 return false;
12246 reduc_info = info_for_reduction (vinfo, stmt_info);
12247 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
12248 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
12249 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
12250 || reduc_index != -1);
12252 else
12254 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12255 return false;
12258 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12259 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12261 if (slp_node)
12263 ncopies = 1;
12264 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
12266 else
12268 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12269 vec_num = 1;
12272 gcc_assert (ncopies >= 1);
12273 if (for_reduction && ncopies > 1)
12274 return false; /* FORNOW */
12276 cond_expr = gimple_assign_rhs1 (stmt);
12278 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
12279 &comp_vectype, &dts[0], vectype)
12280 || !comp_vectype)
12281 return false;
12283 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
12284 slp_tree then_slp_node, else_slp_node;
12285 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
12286 &then_clause, &then_slp_node, &dts[2], &vectype1))
12287 return false;
12288 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
12289 &else_clause, &else_slp_node, &dts[3], &vectype2))
12290 return false;
12292 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12293 return false;
12295 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12296 return false;
12298 masked = !COMPARISON_CLASS_P (cond_expr);
12299 vec_cmp_type = truth_type_for (comp_vectype);
12301 if (vec_cmp_type == NULL_TREE)
12302 return false;
12304 cond_code = TREE_CODE (cond_expr);
12305 if (!masked)
12307 cond_expr0 = TREE_OPERAND (cond_expr, 0);
12308 cond_expr1 = TREE_OPERAND (cond_expr, 1);
12311 /* For conditional reductions, the "then" value needs to be the candidate
12312 value calculated by this iteration while the "else" value needs to be
12313 the result carried over from previous iterations. If the COND_EXPR
12314 is the other way around, we need to swap it. */
12315 bool must_invert_cmp_result = false;
12316 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12318 if (masked)
12319 must_invert_cmp_result = true;
12320 else
12322 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12323 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12324 if (new_code == ERROR_MARK)
12325 must_invert_cmp_result = true;
12326 else
12328 cond_code = new_code;
12329 /* Make sure we don't accidentally use the old condition. */
12330 cond_expr = NULL_TREE;
12333 /* ??? The vectorized operand query below doesn't allow swapping
12334 this way for SLP. */
12335 if (slp_node)
12336 return false;
12337 std::swap (then_clause, else_clause);
12340 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12342 /* Boolean values may have another representation in vectors
12343 and therefore we prefer bit operations over comparison for
12344 them (which also works for scalar masks). We store opcodes
12345 to use in bitop1 and bitop2. Statement is vectorized as
12346 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12347 depending on bitop1 and bitop2 arity. */
12348 switch (cond_code)
12350 case GT_EXPR:
12351 bitop1 = BIT_NOT_EXPR;
12352 bitop2 = BIT_AND_EXPR;
12353 break;
12354 case GE_EXPR:
12355 bitop1 = BIT_NOT_EXPR;
12356 bitop2 = BIT_IOR_EXPR;
12357 break;
12358 case LT_EXPR:
12359 bitop1 = BIT_NOT_EXPR;
12360 bitop2 = BIT_AND_EXPR;
12361 std::swap (cond_expr0, cond_expr1);
12362 break;
12363 case LE_EXPR:
12364 bitop1 = BIT_NOT_EXPR;
12365 bitop2 = BIT_IOR_EXPR;
12366 std::swap (cond_expr0, cond_expr1);
12367 break;
12368 case NE_EXPR:
12369 bitop1 = BIT_XOR_EXPR;
12370 break;
12371 case EQ_EXPR:
12372 bitop1 = BIT_XOR_EXPR;
12373 bitop2 = BIT_NOT_EXPR;
12374 break;
12375 default:
12376 return false;
12378 cond_code = SSA_NAME;
12381 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12382 && reduction_type == EXTRACT_LAST_REDUCTION
12383 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12385 if (dump_enabled_p ())
12386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12387 "reduction comparison operation not supported.\n");
12388 return false;
12391 if (!vec_stmt)
12393 if (bitop1 != NOP_EXPR)
12395 machine_mode mode = TYPE_MODE (comp_vectype);
12396 optab optab;
12398 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12399 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12400 return false;
12402 if (bitop2 != NOP_EXPR)
12404 optab = optab_for_tree_code (bitop2, comp_vectype,
12405 optab_default);
12406 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12407 return false;
12411 vect_cost_for_stmt kind = vector_stmt;
12412 if (reduction_type == EXTRACT_LAST_REDUCTION)
12413 /* Count one reduction-like operation per vector. */
12414 kind = vec_to_scalar;
12415 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12416 && (masked
12417 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12418 cond_code)
12419 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12420 ERROR_MARK))))
12421 return false;
12423 if (slp_node
12424 && (!vect_maybe_update_slp_op_vectype
12425 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12426 || (op_adjust == 1
12427 && !vect_maybe_update_slp_op_vectype
12428 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12429 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12430 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12432 if (dump_enabled_p ())
12433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12434 "incompatible vector types for invariants\n");
12435 return false;
12438 if (loop_vinfo && for_reduction
12439 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12441 if (reduction_type == EXTRACT_LAST_REDUCTION)
12443 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12444 vectype, OPTIMIZE_FOR_SPEED))
12445 vect_record_loop_len (loop_vinfo,
12446 &LOOP_VINFO_LENS (loop_vinfo),
12447 ncopies * vec_num, vectype, 1);
12448 else
12449 vect_record_loop_mask (loop_vinfo,
12450 &LOOP_VINFO_MASKS (loop_vinfo),
12451 ncopies * vec_num, vectype, NULL);
12453 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12454 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12456 if (dump_enabled_p ())
12457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12458 "conditional reduction prevents the use"
12459 " of partial vectors.\n");
12460 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12464 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12465 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
12466 cost_vec, kind);
12467 return true;
12470 /* Transform. */
12472 /* Handle def. */
12473 scalar_dest = gimple_assign_lhs (stmt);
12474 if (reduction_type != EXTRACT_LAST_REDUCTION)
12475 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12477 bool swap_cond_operands = false;
12479 /* See whether another part of the vectorized code applies a loop
12480 mask to the condition, or to its inverse. */
12482 vec_loop_masks *masks = NULL;
12483 vec_loop_lens *lens = NULL;
12484 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12486 if (reduction_type == EXTRACT_LAST_REDUCTION)
12487 lens = &LOOP_VINFO_LENS (loop_vinfo);
12489 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12491 if (reduction_type == EXTRACT_LAST_REDUCTION)
12492 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12493 else
12495 scalar_cond_masked_key cond (cond_expr, ncopies);
12496 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12497 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12498 else
12500 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12501 tree_code orig_code = cond.code;
12502 cond.code = invert_tree_comparison (cond.code, honor_nans);
12503 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12505 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12506 cond_code = cond.code;
12507 swap_cond_operands = true;
12509 else
12511 /* Try the inverse of the current mask. We check if the
12512 inverse mask is live and if so we generate a negate of
12513 the current mask such that we still honor NaNs. */
12514 cond.inverted_p = true;
12515 cond.code = orig_code;
12516 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12518 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12519 cond_code = cond.code;
12520 swap_cond_operands = true;
12521 must_invert_cmp_result = true;
12528 /* Handle cond expr. */
12529 if (masked)
12530 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12531 cond_expr, comp_vectype, &vec_oprnds0,
12532 then_clause, vectype, &vec_oprnds2,
12533 reduction_type != EXTRACT_LAST_REDUCTION
12534 ? else_clause : NULL, vectype, &vec_oprnds3);
12535 else
12536 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12537 cond_expr0, comp_vectype, &vec_oprnds0,
12538 cond_expr1, comp_vectype, &vec_oprnds1,
12539 then_clause, vectype, &vec_oprnds2,
12540 reduction_type != EXTRACT_LAST_REDUCTION
12541 ? else_clause : NULL, vectype, &vec_oprnds3);
12543 if (reduction_type == EXTRACT_LAST_REDUCTION)
12544 vec_else_clause = else_clause;
12546 /* Arguments are ready. Create the new vector stmt. */
12547 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12549 vec_then_clause = vec_oprnds2[i];
12550 if (reduction_type != EXTRACT_LAST_REDUCTION)
12551 vec_else_clause = vec_oprnds3[i];
12553 if (swap_cond_operands)
12554 std::swap (vec_then_clause, vec_else_clause);
12556 if (masked)
12557 vec_compare = vec_cond_lhs;
12558 else
12560 vec_cond_rhs = vec_oprnds1[i];
12561 if (bitop1 == NOP_EXPR)
12563 gimple_seq stmts = NULL;
12564 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12565 vec_cond_lhs, vec_cond_rhs);
12566 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12568 else
12570 new_temp = make_ssa_name (vec_cmp_type);
12571 gassign *new_stmt;
12572 if (bitop1 == BIT_NOT_EXPR)
12573 new_stmt = gimple_build_assign (new_temp, bitop1,
12574 vec_cond_rhs);
12575 else
12576 new_stmt
12577 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12578 vec_cond_rhs);
12579 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12580 if (bitop2 == NOP_EXPR)
12581 vec_compare = new_temp;
12582 else if (bitop2 == BIT_NOT_EXPR
12583 && reduction_type != EXTRACT_LAST_REDUCTION)
12585 /* Instead of doing ~x ? y : z do x ? z : y. */
12586 vec_compare = new_temp;
12587 std::swap (vec_then_clause, vec_else_clause);
12589 else
12591 vec_compare = make_ssa_name (vec_cmp_type);
12592 if (bitop2 == BIT_NOT_EXPR)
12593 new_stmt
12594 = gimple_build_assign (vec_compare, bitop2, new_temp);
12595 else
12596 new_stmt
12597 = gimple_build_assign (vec_compare, bitop2,
12598 vec_cond_lhs, new_temp);
12599 vect_finish_stmt_generation (vinfo, stmt_info,
12600 new_stmt, gsi);
12605 /* If we decided to apply a loop mask to the result of the vector
12606 comparison, AND the comparison with the mask now. Later passes
12607 should then be able to reuse the AND results between mulitple
12608 vector statements.
12610 For example:
12611 for (int i = 0; i < 100; ++i)
12612 x[i] = y[i] ? z[i] : 10;
12614 results in following optimized GIMPLE:
12616 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12617 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12618 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12619 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12620 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12621 vect_iftmp.11_47, { 10, ... }>;
12623 instead of using a masked and unmasked forms of
12624 vec != { 0, ... } (masked in the MASK_LOAD,
12625 unmasked in the VEC_COND_EXPR). */
12627 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12628 in cases where that's necessary. */
12630 tree len = NULL_TREE, bias = NULL_TREE;
12631 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12633 if (!is_gimple_val (vec_compare))
12635 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12636 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12637 vec_compare);
12638 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12639 vec_compare = vec_compare_name;
12642 if (must_invert_cmp_result)
12644 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12645 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12646 BIT_NOT_EXPR,
12647 vec_compare);
12648 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12649 vec_compare = vec_compare_name;
12652 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12653 vectype, OPTIMIZE_FOR_SPEED))
12655 if (lens)
12657 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12658 vec_num * ncopies, vectype, i, 1);
12659 signed char biasval
12660 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12661 bias = build_int_cst (intQI_type_node, biasval);
12663 else
12665 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12666 bias = build_int_cst (intQI_type_node, 0);
12669 if (masks)
12671 tree loop_mask
12672 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12673 vectype, i);
12674 tree tmp2 = make_ssa_name (vec_cmp_type);
12675 gassign *g
12676 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12677 loop_mask);
12678 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12679 vec_compare = tmp2;
12683 gimple *new_stmt;
12684 if (reduction_type == EXTRACT_LAST_REDUCTION)
12686 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12687 tree lhs = gimple_get_lhs (old_stmt);
12688 if ((unsigned)i != vec_oprnds0.length () - 1)
12689 lhs = copy_ssa_name (lhs);
12690 if (len)
12691 new_stmt = gimple_build_call_internal
12692 (IFN_LEN_FOLD_EXTRACT_LAST, 5, vec_else_clause, vec_compare,
12693 vec_then_clause, len, bias);
12694 else
12695 new_stmt = gimple_build_call_internal
12696 (IFN_FOLD_EXTRACT_LAST, 3, vec_else_clause, vec_compare,
12697 vec_then_clause);
12698 gimple_call_set_lhs (new_stmt, lhs);
12699 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12700 if ((unsigned)i != vec_oprnds0.length () - 1)
12702 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12703 vec_else_clause = lhs;
12705 else if (old_stmt == gsi_stmt (*gsi))
12706 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12707 else
12709 /* In this case we're moving the definition to later in the
12710 block. That doesn't matter because the only uses of the
12711 lhs are in phi statements. */
12712 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12713 gsi_remove (&old_gsi, true);
12714 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12717 else
12719 new_temp = make_ssa_name (vec_dest);
12720 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12721 vec_then_clause, vec_else_clause);
12722 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12724 if (slp_node)
12725 slp_node->push_vec_def (new_stmt);
12726 else
12727 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12730 if (!slp_node)
12731 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12733 vec_oprnds0.release ();
12734 vec_oprnds1.release ();
12735 vec_oprnds2.release ();
12736 vec_oprnds3.release ();
12738 return true;
12741 /* Helper of vectorizable_comparison.
12743 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12744 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12745 comparison, put it in VEC_STMT, and insert it at GSI.
12747 Return true if STMT_INFO is vectorizable in this way. */
12749 static bool
12750 vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12751 stmt_vec_info stmt_info, tree_code code,
12752 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12753 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12755 tree lhs, rhs1, rhs2;
12756 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12757 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12758 tree new_temp;
12759 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12760 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12761 int ndts = 2;
12762 poly_uint64 nunits;
12763 int ncopies;
12764 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12765 int i;
12766 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12767 vec<tree> vec_oprnds0 = vNULL;
12768 vec<tree> vec_oprnds1 = vNULL;
12769 tree mask_type;
12770 tree mask = NULL_TREE;
12772 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12773 return false;
12775 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12776 return false;
12778 mask_type = vectype;
12779 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12781 if (slp_node)
12782 ncopies = 1;
12783 else
12784 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12786 gcc_assert (ncopies >= 1);
12788 if (TREE_CODE_CLASS (code) != tcc_comparison)
12789 return false;
12791 slp_tree slp_rhs1, slp_rhs2;
12792 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12793 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12794 return false;
12796 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12797 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12798 return false;
12800 if (vectype1 && vectype2
12801 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12802 TYPE_VECTOR_SUBPARTS (vectype2)))
12803 return false;
12805 vectype = vectype1 ? vectype1 : vectype2;
12807 /* Invariant comparison. */
12808 if (!vectype)
12810 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), slp_node);
12811 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12812 return false;
12814 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12815 return false;
12817 /* Can't compare mask and non-mask types. */
12818 if (vectype1 && vectype2
12819 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12820 return false;
12822 /* Boolean values may have another representation in vectors
12823 and therefore we prefer bit operations over comparison for
12824 them (which also works for scalar masks). We store opcodes
12825 to use in bitop1 and bitop2. Statement is vectorized as
12826 BITOP2 (rhs1 BITOP1 rhs2) or
12827 rhs1 BITOP2 (BITOP1 rhs2)
12828 depending on bitop1 and bitop2 arity. */
12829 bool swap_p = false;
12830 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12832 if (code == GT_EXPR)
12834 bitop1 = BIT_NOT_EXPR;
12835 bitop2 = BIT_AND_EXPR;
12837 else if (code == GE_EXPR)
12839 bitop1 = BIT_NOT_EXPR;
12840 bitop2 = BIT_IOR_EXPR;
12842 else if (code == LT_EXPR)
12844 bitop1 = BIT_NOT_EXPR;
12845 bitop2 = BIT_AND_EXPR;
12846 swap_p = true;
12848 else if (code == LE_EXPR)
12850 bitop1 = BIT_NOT_EXPR;
12851 bitop2 = BIT_IOR_EXPR;
12852 swap_p = true;
12854 else
12856 bitop1 = BIT_XOR_EXPR;
12857 if (code == EQ_EXPR)
12858 bitop2 = BIT_NOT_EXPR;
12862 if (!vec_stmt)
12864 if (bitop1 == NOP_EXPR)
12866 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12867 return false;
12869 else
12871 machine_mode mode = TYPE_MODE (vectype);
12872 optab optab;
12874 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12875 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12876 return false;
12878 if (bitop2 != NOP_EXPR)
12880 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12881 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12882 return false;
12886 /* Put types on constant and invariant SLP children. */
12887 if (slp_node
12888 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12889 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12891 if (dump_enabled_p ())
12892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12893 "incompatible vector types for invariants\n");
12894 return false;
12897 vect_model_simple_cost (vinfo, stmt_info,
12898 ncopies * (1 + (bitop2 != NOP_EXPR)),
12899 dts, ndts, slp_node, cost_vec);
12900 return true;
12903 /* Transform. */
12905 /* Handle def. */
12906 lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));
12907 if (lhs)
12908 mask = vect_create_destination_var (lhs, mask_type);
12910 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12911 rhs1, vectype, &vec_oprnds0,
12912 rhs2, vectype, &vec_oprnds1);
12913 if (swap_p)
12914 std::swap (vec_oprnds0, vec_oprnds1);
12916 /* Arguments are ready. Create the new vector stmt. */
12917 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12919 gimple *new_stmt;
12920 vec_rhs2 = vec_oprnds1[i];
12922 if (lhs)
12923 new_temp = make_ssa_name (mask);
12924 else
12925 new_temp = make_temp_ssa_name (mask_type, NULL, "cmp");
12926 if (bitop1 == NOP_EXPR)
12928 new_stmt = gimple_build_assign (new_temp, code,
12929 vec_rhs1, vec_rhs2);
12930 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12932 else
12934 if (bitop1 == BIT_NOT_EXPR)
12935 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12936 else
12937 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12938 vec_rhs2);
12939 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12940 if (bitop2 != NOP_EXPR)
12942 tree res = make_ssa_name (mask);
12943 if (bitop2 == BIT_NOT_EXPR)
12944 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12945 else
12946 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12947 new_temp);
12948 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12951 if (slp_node)
12952 slp_node->push_vec_def (new_stmt);
12953 else
12954 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12957 if (!slp_node)
12958 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12960 vec_oprnds0.release ();
12961 vec_oprnds1.release ();
12963 return true;
12966 /* vectorizable_comparison.
12968 Check if STMT_INFO is comparison expression that can be vectorized.
12969 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12970 comparison, put it in VEC_STMT, and insert it at GSI.
12972 Return true if STMT_INFO is vectorizable in this way. */
12974 static bool
12975 vectorizable_comparison (vec_info *vinfo,
12976 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12977 gimple **vec_stmt,
12978 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12980 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12982 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12983 return false;
12985 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12986 return false;
12988 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12989 if (!stmt)
12990 return false;
12992 enum tree_code code = gimple_assign_rhs_code (stmt);
12993 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12994 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12995 vec_stmt, slp_node, cost_vec))
12996 return false;
12998 if (!vec_stmt)
12999 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
13001 return true;
13004 /* Check to see if the current early break given in STMT_INFO is valid for
13005 vectorization. */
13007 static bool
13008 vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
13009 gimple_stmt_iterator *gsi, gimple **vec_stmt,
13010 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
13012 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
13013 if (!loop_vinfo
13014 || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
13015 return false;
13017 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
13018 return false;
13020 if (!STMT_VINFO_RELEVANT_P (stmt_info))
13021 return false;
13023 DUMP_VECT_SCOPE ("vectorizable_early_exit");
13025 auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info));
13027 tree vectype = NULL_TREE;
13028 slp_tree slp_op0;
13029 tree op0;
13030 enum vect_def_type dt0;
13031 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0,
13032 &vectype))
13034 if (dump_enabled_p ())
13035 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13036 "use not simple.\n");
13037 return false;
13040 if (!vectype)
13041 return false;
13043 machine_mode mode = TYPE_MODE (vectype);
13044 int ncopies;
13046 if (slp_node)
13047 ncopies = 1;
13048 else
13049 ncopies = vect_get_num_copies (loop_vinfo, vectype);
13051 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
13052 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
13053 bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
13054 bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
13056 /* Now build the new conditional. Pattern gimple_conds get dropped during
13057 codegen so we must replace the original insn. */
13058 gimple *orig_stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
13059 gcond *cond_stmt = as_a <gcond *>(orig_stmt);
13060 /* When vectorizing we assume that if the branch edge is taken that we're
13061 exiting the loop. This is not however always the case as the compiler will
13062 rewrite conditions to always be a comparison against 0. To do this it
13063 sometimes flips the edges. This is fine for scalar, but for vector we
13064 then have to flip the test, as we're still assuming that if you take the
13065 branch edge that we found the exit condition. i.e. we need to know whether
13066 we are generating a `forall` or an `exist` condition. */
13067 auto new_code = NE_EXPR;
13068 auto reduc_optab = ior_optab;
13069 auto reduc_op = BIT_IOR_EXPR;
13070 tree cst = build_zero_cst (vectype);
13071 edge exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 0);
13072 if (exit_true_edge->flags & EDGE_FALSE_VALUE)
13073 exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 1);
13074 gcc_assert (exit_true_edge->flags & EDGE_TRUE_VALUE);
13075 if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
13076 exit_true_edge->dest))
13078 new_code = EQ_EXPR;
13079 reduc_optab = and_optab;
13080 reduc_op = BIT_AND_EXPR;
13081 cst = build_minus_one_cst (vectype);
13084 /* Analyze only. */
13085 if (!vec_stmt)
13087 if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing)
13089 if (dump_enabled_p ())
13090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13091 "can't vectorize early exit because the "
13092 "target doesn't support flag setting vector "
13093 "comparisons.\n");
13094 return false;
13097 if (ncopies > 1
13098 && direct_optab_handler (reduc_optab, mode) == CODE_FOR_nothing)
13100 if (dump_enabled_p ())
13101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13102 "can't vectorize early exit because the "
13103 "target does not support boolean vector %s "
13104 "for type %T.\n",
13105 reduc_optab == ior_optab ? "OR" : "AND",
13106 vectype);
13107 return false;
13110 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
13111 vec_stmt, slp_node, cost_vec))
13112 return false;
13114 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
13116 if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
13117 OPTIMIZE_FOR_SPEED))
13118 vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
13119 else
13120 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
13123 return true;
13126 /* Tranform. */
13128 tree new_temp = NULL_TREE;
13129 gimple *new_stmt = NULL;
13131 if (dump_enabled_p ())
13132 dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
13134 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
13135 vec_stmt, slp_node, cost_vec))
13136 gcc_unreachable ();
13138 gimple *stmt = STMT_VINFO_STMT (stmt_info);
13139 basic_block cond_bb = gimple_bb (stmt);
13140 gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb);
13142 auto_vec<tree> stmts;
13144 if (slp_node)
13145 stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node));
13146 else
13148 auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info);
13149 stmts.reserve_exact (vec_stmts.length ());
13150 for (auto stmt : vec_stmts)
13151 stmts.quick_push (gimple_assign_lhs (stmt));
13154 /* Determine if we need to reduce the final value. */
13155 if (stmts.length () > 1)
13157 /* We build the reductions in a way to maintain as much parallelism as
13158 possible. */
13159 auto_vec<tree> workset (stmts.length ());
13161 /* Mask the statements as we queue them up. Normally we loop over
13162 vec_num, but since we inspect the exact results of vectorization
13163 we don't need to and instead can just use the stmts themselves. */
13164 if (masked_loop_p)
13165 for (unsigned i = 0; i < stmts.length (); i++)
13167 tree stmt_mask
13168 = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype,
13170 stmt_mask
13171 = prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), stmt_mask,
13172 stmts[i], &cond_gsi);
13173 workset.quick_push (stmt_mask);
13175 else if (len_loop_p)
13176 for (unsigned i = 0; i < stmts.length (); i++)
13178 tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
13179 lens, ncopies, vectype,
13180 stmts[i], i, 1);
13182 workset.quick_push (len_mask);
13184 else
13185 workset.splice (stmts);
13187 while (workset.length () > 1)
13189 new_temp = make_temp_ssa_name (vectype, NULL, "vexit_reduc");
13190 tree arg0 = workset.pop ();
13191 tree arg1 = workset.pop ();
13192 new_stmt = gimple_build_assign (new_temp, reduc_op, arg0, arg1);
13193 vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
13194 &cond_gsi);
13195 workset.quick_insert (0, new_temp);
13198 else
13200 new_temp = stmts[0];
13201 if (masked_loop_p)
13203 tree mask
13204 = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype, 0);
13205 new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
13206 new_temp, &cond_gsi);
13208 else if (len_loop_p)
13209 new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
13210 ncopies, vectype, new_temp, 0, 1);
13213 gcc_assert (new_temp);
13215 gimple_cond_set_condition (cond_stmt, new_code, new_temp, cst);
13216 update_stmt (orig_stmt);
13218 if (slp_node)
13219 SLP_TREE_VEC_DEFS (slp_node).truncate (0);
13220 else
13221 STMT_VINFO_VEC_STMTS (stmt_info).truncate (0);
13223 if (!slp_node)
13224 *vec_stmt = orig_stmt;
13226 return true;
13229 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
13230 can handle all live statements in the node. Otherwise return true
13231 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
13232 VEC_STMT_P is as for vectorizable_live_operation. */
13234 static bool
13235 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
13236 slp_tree slp_node, slp_instance slp_node_instance,
13237 bool vec_stmt_p,
13238 stmt_vector_for_cost *cost_vec)
13240 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
13241 if (slp_node)
13243 stmt_vec_info slp_stmt_info;
13244 unsigned int i;
13245 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
13247 if (slp_stmt_info
13248 && (STMT_VINFO_LIVE_P (slp_stmt_info)
13249 || (loop_vinfo
13250 && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13251 && STMT_VINFO_DEF_TYPE (slp_stmt_info)
13252 == vect_induction_def))
13253 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
13254 slp_node_instance, i,
13255 vec_stmt_p, cost_vec))
13256 return false;
13259 else if ((STMT_VINFO_LIVE_P (stmt_info)
13260 || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13261 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def))
13262 && !vectorizable_live_operation (vinfo, stmt_info,
13263 slp_node, slp_node_instance, -1,
13264 vec_stmt_p, cost_vec))
13265 return false;
13267 return true;
13270 /* Make sure the statement is vectorizable. */
13272 opt_result
13273 vect_analyze_stmt (vec_info *vinfo,
13274 stmt_vec_info stmt_info, bool *need_to_vectorize,
13275 slp_tree node, slp_instance node_instance,
13276 stmt_vector_for_cost *cost_vec)
13278 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
13279 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
13280 bool ok;
13281 gimple_seq pattern_def_seq;
13283 if (dump_enabled_p ())
13284 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
13285 stmt_info->stmt);
13287 if (gimple_has_volatile_ops (stmt_info->stmt))
13288 return opt_result::failure_at (stmt_info->stmt,
13289 "not vectorized:"
13290 " stmt has volatile operands: %G\n",
13291 stmt_info->stmt);
13293 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13294 && node == NULL
13295 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
13297 gimple_stmt_iterator si;
13299 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
13301 stmt_vec_info pattern_def_stmt_info
13302 = vinfo->lookup_stmt (gsi_stmt (si));
13303 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
13304 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
13306 /* Analyze def stmt of STMT if it's a pattern stmt. */
13307 if (dump_enabled_p ())
13308 dump_printf_loc (MSG_NOTE, vect_location,
13309 "==> examining pattern def statement: %G",
13310 pattern_def_stmt_info->stmt);
13312 opt_result res
13313 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
13314 need_to_vectorize, node, node_instance,
13315 cost_vec);
13316 if (!res)
13317 return res;
13322 /* Skip stmts that do not need to be vectorized. In loops this is expected
13323 to include:
13324 - the COND_EXPR which is the loop exit condition
13325 - any LABEL_EXPRs in the loop
13326 - computations that are used only for array indexing or loop control.
13327 In basic blocks we only analyze statements that are a part of some SLP
13328 instance, therefore, all the statements are relevant.
13330 Pattern statement needs to be analyzed instead of the original statement
13331 if the original statement is not relevant. Otherwise, we analyze both
13332 statements. In basic blocks we are called from some SLP instance
13333 traversal, don't analyze pattern stmts instead, the pattern stmts
13334 already will be part of SLP instance. */
13336 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
13337 if (!STMT_VINFO_RELEVANT_P (stmt_info)
13338 && !STMT_VINFO_LIVE_P (stmt_info))
13340 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13341 && pattern_stmt_info
13342 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13343 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
13345 /* Analyze PATTERN_STMT instead of the original stmt. */
13346 stmt_info = pattern_stmt_info;
13347 if (dump_enabled_p ())
13348 dump_printf_loc (MSG_NOTE, vect_location,
13349 "==> examining pattern statement: %G",
13350 stmt_info->stmt);
13352 else
13354 if (dump_enabled_p ())
13355 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
13357 if (node)
13358 return opt_result::failure_at (stmt_info->stmt,
13359 "not vectorized:"
13360 " irrelevant stmt as SLP node %p "
13361 "representative.\n",
13362 (void *)node);
13363 return opt_result::success ();
13366 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13367 && node == NULL
13368 && pattern_stmt_info
13369 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13370 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
13372 /* Analyze PATTERN_STMT too. */
13373 if (dump_enabled_p ())
13374 dump_printf_loc (MSG_NOTE, vect_location,
13375 "==> examining pattern statement: %G",
13376 pattern_stmt_info->stmt);
13378 opt_result res
13379 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
13380 node_instance, cost_vec);
13381 if (!res)
13382 return res;
13385 switch (STMT_VINFO_DEF_TYPE (stmt_info))
13387 case vect_internal_def:
13388 case vect_condition_def:
13389 break;
13391 case vect_reduction_def:
13392 case vect_nested_cycle:
13393 gcc_assert (!bb_vinfo
13394 && (relevance == vect_used_in_outer
13395 || relevance == vect_used_in_outer_by_reduction
13396 || relevance == vect_used_by_reduction
13397 || relevance == vect_unused_in_scope
13398 || relevance == vect_used_only_live));
13399 break;
13401 case vect_double_reduction_def:
13402 gcc_assert (!bb_vinfo && node);
13403 break;
13405 case vect_induction_def:
13406 case vect_first_order_recurrence:
13407 gcc_assert (!bb_vinfo);
13408 break;
13410 case vect_constant_def:
13411 case vect_external_def:
13412 case vect_unknown_def_type:
13413 default:
13414 gcc_unreachable ();
13417 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13418 if (node)
13419 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
13421 if (STMT_VINFO_RELEVANT_P (stmt_info))
13423 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
13424 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
13425 || gimple_code (stmt_info->stmt) == GIMPLE_COND
13426 || (call && gimple_call_lhs (call) == NULL_TREE));
13427 *need_to_vectorize = true;
13430 if (PURE_SLP_STMT (stmt_info) && !node)
13432 if (dump_enabled_p ())
13433 dump_printf_loc (MSG_NOTE, vect_location,
13434 "handled only by SLP analysis\n");
13435 return opt_result::success ();
13438 /* When we arrive here with a non-SLP statement and we are supposed
13439 to use SLP for everything fail vectorization. */
13440 if (!node && param_vect_force_slp)
13441 return opt_result::failure_at (stmt_info->stmt,
13442 "needs non-SLP handling\n");
13444 ok = true;
13445 if (!bb_vinfo
13446 && (STMT_VINFO_RELEVANT_P (stmt_info)
13447 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
13448 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
13449 -mveclibabi= takes preference over library functions with
13450 the simd attribute. */
13451 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13452 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
13453 cost_vec)
13454 || vectorizable_conversion (vinfo, stmt_info,
13455 NULL, NULL, node, cost_vec)
13456 || vectorizable_operation (vinfo, stmt_info,
13457 NULL, NULL, node, cost_vec)
13458 || vectorizable_assignment (vinfo, stmt_info,
13459 NULL, NULL, node, cost_vec)
13460 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13461 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13462 || vectorizable_lane_reducing (as_a <loop_vec_info> (vinfo),
13463 stmt_info, node, cost_vec)
13464 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13465 node, node_instance, cost_vec)
13466 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
13467 NULL, node, cost_vec)
13468 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13469 || vectorizable_condition (vinfo, stmt_info,
13470 NULL, NULL, node, cost_vec)
13471 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
13472 cost_vec)
13473 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13474 stmt_info, NULL, node)
13475 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13476 stmt_info, NULL, node, cost_vec)
13477 || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
13478 cost_vec));
13479 else
13481 if (bb_vinfo)
13482 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13483 || vectorizable_simd_clone_call (vinfo, stmt_info,
13484 NULL, NULL, node, cost_vec)
13485 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
13486 cost_vec)
13487 || vectorizable_shift (vinfo, stmt_info,
13488 NULL, NULL, node, cost_vec)
13489 || vectorizable_operation (vinfo, stmt_info,
13490 NULL, NULL, node, cost_vec)
13491 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
13492 cost_vec)
13493 || vectorizable_load (vinfo, stmt_info,
13494 NULL, NULL, node, cost_vec)
13495 || vectorizable_store (vinfo, stmt_info,
13496 NULL, NULL, node, cost_vec)
13497 || vectorizable_condition (vinfo, stmt_info,
13498 NULL, NULL, node, cost_vec)
13499 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
13500 cost_vec)
13501 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)
13502 || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
13503 cost_vec));
13507 if (node)
13508 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13510 if (!ok)
13511 return opt_result::failure_at (stmt_info->stmt,
13512 "not vectorized:"
13513 " relevant stmt not supported: %G",
13514 stmt_info->stmt);
13516 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
13517 need extra handling, except for vectorizable reductions. */
13518 if (!bb_vinfo
13519 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
13520 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
13521 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
13522 stmt_info, node, node_instance,
13523 false, cost_vec))
13524 return opt_result::failure_at (stmt_info->stmt,
13525 "not vectorized:"
13526 " live stmt not supported: %G",
13527 stmt_info->stmt);
13529 return opt_result::success ();
13533 /* Function vect_transform_stmt.
13535 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13537 bool
13538 vect_transform_stmt (vec_info *vinfo,
13539 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
13540 slp_tree slp_node, slp_instance slp_node_instance)
13542 bool is_store = false;
13543 gimple *vec_stmt = NULL;
13544 bool done;
13546 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
13548 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13549 if (slp_node)
13550 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
13552 switch (STMT_VINFO_TYPE (stmt_info))
13554 case type_demotion_vec_info_type:
13555 case type_promotion_vec_info_type:
13556 case type_conversion_vec_info_type:
13557 done = vectorizable_conversion (vinfo, stmt_info,
13558 gsi, &vec_stmt, slp_node, NULL);
13559 gcc_assert (done);
13560 break;
13562 case induc_vec_info_type:
13563 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
13564 stmt_info, &vec_stmt, slp_node,
13565 NULL);
13566 gcc_assert (done);
13567 break;
13569 case shift_vec_info_type:
13570 done = vectorizable_shift (vinfo, stmt_info,
13571 gsi, &vec_stmt, slp_node, NULL);
13572 gcc_assert (done);
13573 break;
13575 case op_vec_info_type:
13576 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13577 NULL);
13578 gcc_assert (done);
13579 break;
13581 case assignment_vec_info_type:
13582 done = vectorizable_assignment (vinfo, stmt_info,
13583 gsi, &vec_stmt, slp_node, NULL);
13584 gcc_assert (done);
13585 break;
13587 case load_vec_info_type:
13588 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13589 NULL);
13590 gcc_assert (done);
13591 break;
13593 case store_vec_info_type:
13594 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13595 && !slp_node
13596 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13597 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13598 /* In case of interleaving, the whole chain is vectorized when the
13599 last store in the chain is reached. Store stmts before the last
13600 one are skipped, and there vec_stmt_info shouldn't be freed
13601 meanwhile. */
13603 else
13605 done = vectorizable_store (vinfo, stmt_info,
13606 gsi, &vec_stmt, slp_node, NULL);
13607 gcc_assert (done);
13608 is_store = true;
13610 break;
13612 case condition_vec_info_type:
13613 done = vectorizable_condition (vinfo, stmt_info,
13614 gsi, &vec_stmt, slp_node, NULL);
13615 gcc_assert (done);
13616 break;
13618 case comparison_vec_info_type:
13619 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
13620 slp_node, NULL);
13621 gcc_assert (done);
13622 break;
13624 case call_vec_info_type:
13625 done = vectorizable_call (vinfo, stmt_info,
13626 gsi, &vec_stmt, slp_node, NULL);
13627 break;
13629 case call_simd_clone_vec_info_type:
13630 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
13631 slp_node, NULL);
13632 break;
13634 case reduc_vec_info_type:
13635 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13636 gsi, &vec_stmt, slp_node);
13637 gcc_assert (done);
13638 break;
13640 case cycle_phi_info_type:
13641 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13642 &vec_stmt, slp_node, slp_node_instance);
13643 gcc_assert (done);
13644 break;
13646 case lc_phi_info_type:
13647 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13648 stmt_info, &vec_stmt, slp_node);
13649 gcc_assert (done);
13650 break;
13652 case recurr_info_type:
13653 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13654 stmt_info, &vec_stmt, slp_node, NULL);
13655 gcc_assert (done);
13656 break;
13658 case phi_info_type:
13659 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13660 gcc_assert (done);
13661 break;
13663 case loop_exit_ctrl_vec_info_type:
13664 done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt,
13665 slp_node, NULL);
13666 gcc_assert (done);
13667 break;
13669 default:
13670 if (!STMT_VINFO_LIVE_P (stmt_info))
13672 if (dump_enabled_p ())
13673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13674 "stmt not supported.\n");
13675 gcc_unreachable ();
13677 done = true;
13680 if (!slp_node && vec_stmt)
13681 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13683 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13685 /* Handle stmts whose DEF is used outside the loop-nest that is
13686 being vectorized. */
13687 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13688 slp_node_instance, true, NULL);
13689 gcc_assert (done);
13692 if (slp_node)
13693 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13695 return is_store;
13699 /* Remove a group of stores (for SLP or interleaving), free their
13700 stmt_vec_info. */
13702 void
13703 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13705 stmt_vec_info next_stmt_info = first_stmt_info;
13707 while (next_stmt_info)
13709 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13710 next_stmt_info = vect_orig_stmt (next_stmt_info);
13711 /* Free the attached stmt_vec_info and remove the stmt. */
13712 vinfo->remove_stmt (next_stmt_info);
13713 next_stmt_info = tmp;
13717 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13718 elements of type SCALAR_TYPE, or null if the target doesn't support
13719 such a type.
13721 If NUNITS is zero, return a vector type that contains elements of
13722 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13724 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13725 for this vectorization region and want to "autodetect" the best choice.
13726 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13727 and we want the new type to be interoperable with it. PREVAILING_MODE
13728 in this case can be a scalar integer mode or a vector mode; when it
13729 is a vector mode, the function acts like a tree-level version of
13730 related_vector_mode. */
13732 tree
13733 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13734 tree scalar_type, poly_uint64 nunits)
13736 tree orig_scalar_type = scalar_type;
13737 scalar_mode inner_mode;
13738 machine_mode simd_mode;
13739 tree vectype;
13741 if ((!INTEGRAL_TYPE_P (scalar_type)
13742 && !POINTER_TYPE_P (scalar_type)
13743 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13744 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13745 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13746 return NULL_TREE;
13748 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13750 /* Interoperability between modes requires one to be a constant multiple
13751 of the other, so that the number of vectors required for each operation
13752 is a compile-time constant. */
13753 if (prevailing_mode != VOIDmode
13754 && !constant_multiple_p (nunits * nbytes,
13755 GET_MODE_SIZE (prevailing_mode))
13756 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13757 nunits * nbytes))
13758 return NULL_TREE;
13760 /* For vector types of elements whose mode precision doesn't
13761 match their types precision we use a element type of mode
13762 precision. The vectorization routines will have to make sure
13763 they support the proper result truncation/extension.
13764 We also make sure to build vector types with INTEGER_TYPE
13765 component type only. */
13766 if (INTEGRAL_TYPE_P (scalar_type)
13767 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13768 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13769 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13770 TYPE_UNSIGNED (scalar_type));
13772 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13773 When the component mode passes the above test simply use a type
13774 corresponding to that mode. The theory is that any use that
13775 would cause problems with this will disable vectorization anyway. */
13776 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13777 && !INTEGRAL_TYPE_P (scalar_type))
13778 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13780 /* We can't build a vector type of elements with alignment bigger than
13781 their size. */
13782 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13783 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13784 TYPE_UNSIGNED (scalar_type));
13786 /* If we felt back to using the mode fail if there was
13787 no scalar type for it. */
13788 if (scalar_type == NULL_TREE)
13789 return NULL_TREE;
13791 /* If no prevailing mode was supplied, use the mode the target prefers.
13792 Otherwise lookup a vector mode based on the prevailing mode. */
13793 if (prevailing_mode == VOIDmode)
13795 gcc_assert (known_eq (nunits, 0U));
13796 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13797 if (SCALAR_INT_MODE_P (simd_mode))
13799 /* Traditional behavior is not to take the integer mode
13800 literally, but simply to use it as a way of determining
13801 the vector size. It is up to mode_for_vector to decide
13802 what the TYPE_MODE should be.
13804 Note that nunits == 1 is allowed in order to support single
13805 element vector types. */
13806 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13807 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13808 return NULL_TREE;
13811 else if (SCALAR_INT_MODE_P (prevailing_mode)
13812 || !related_vector_mode (prevailing_mode,
13813 inner_mode, nunits).exists (&simd_mode))
13815 /* Fall back to using mode_for_vector, mostly in the hope of being
13816 able to use an integer mode. */
13817 if (known_eq (nunits, 0U)
13818 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13819 return NULL_TREE;
13821 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13822 return NULL_TREE;
13825 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13827 /* In cases where the mode was chosen by mode_for_vector, check that
13828 the target actually supports the chosen mode, or that it at least
13829 allows the vector mode to be replaced by a like-sized integer. */
13830 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13831 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13832 return NULL_TREE;
13834 /* Re-attach the address-space qualifier if we canonicalized the scalar
13835 type. */
13836 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13837 return build_qualified_type
13838 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13840 return vectype;
13843 /* Function get_vectype_for_scalar_type.
13845 Returns the vector type corresponding to SCALAR_TYPE as supported
13846 by the target. If GROUP_SIZE is nonzero and we're performing BB
13847 vectorization, make sure that the number of elements in the vector
13848 is no bigger than GROUP_SIZE. */
13850 tree
13851 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13852 unsigned int group_size)
13854 /* For BB vectorization, we should always have a group size once we've
13855 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13856 are tentative requests during things like early data reference
13857 analysis and pattern recognition. */
13858 if (is_a <bb_vec_info> (vinfo))
13859 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13860 else
13861 group_size = 0;
13863 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13864 scalar_type);
13865 if (vectype && vinfo->vector_mode == VOIDmode)
13866 vinfo->vector_mode = TYPE_MODE (vectype);
13868 /* Register the natural choice of vector type, before the group size
13869 has been applied. */
13870 if (vectype)
13871 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13873 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13874 try again with an explicit number of elements. */
13875 if (vectype
13876 && group_size
13877 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13879 /* Start with the biggest number of units that fits within
13880 GROUP_SIZE and halve it until we find a valid vector type.
13881 Usually either the first attempt will succeed or all will
13882 fail (in the latter case because GROUP_SIZE is too small
13883 for the target), but it's possible that a target could have
13884 a hole between supported vector types.
13886 If GROUP_SIZE is not a power of 2, this has the effect of
13887 trying the largest power of 2 that fits within the group,
13888 even though the group is not a multiple of that vector size.
13889 The BB vectorizer will then try to carve up the group into
13890 smaller pieces. */
13891 unsigned int nunits = 1 << floor_log2 (group_size);
13894 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13895 scalar_type, nunits);
13896 nunits /= 2;
13898 while (nunits > 1 && !vectype);
13901 return vectype;
13904 /* Return the vector type corresponding to SCALAR_TYPE as supported
13905 by the target. NODE, if nonnull, is the SLP tree node that will
13906 use the returned vector type. */
13908 tree
13909 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13911 unsigned int group_size = 0;
13912 if (node)
13913 group_size = SLP_TREE_LANES (node);
13914 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13917 /* Function get_mask_type_for_scalar_type.
13919 Returns the mask type corresponding to a result of comparison
13920 of vectors of specified SCALAR_TYPE as supported by target.
13921 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13922 make sure that the number of elements in the vector is no bigger
13923 than GROUP_SIZE. */
13925 tree
13926 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13927 unsigned int group_size)
13929 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13931 if (!vectype)
13932 return NULL;
13934 return truth_type_for (vectype);
13937 /* Function get_mask_type_for_scalar_type.
13939 Returns the mask type corresponding to a result of comparison
13940 of vectors of specified SCALAR_TYPE as supported by target.
13941 NODE, if nonnull, is the SLP tree node that will use the returned
13942 vector type. */
13944 tree
13945 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13946 slp_tree node)
13948 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13950 if (!vectype)
13951 return NULL;
13953 return truth_type_for (vectype);
13956 /* Function get_same_sized_vectype
13958 Returns a vector type corresponding to SCALAR_TYPE of size
13959 VECTOR_TYPE if supported by the target. */
13961 tree
13962 get_same_sized_vectype (tree scalar_type, tree vector_type)
13964 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13965 return truth_type_for (vector_type);
13967 poly_uint64 nunits;
13968 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13969 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13970 return NULL_TREE;
13972 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13973 scalar_type, nunits);
13976 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13977 would not change the chosen vector modes. */
13979 bool
13980 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13982 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13983 i != vinfo->used_vector_modes.end (); ++i)
13984 if (!VECTOR_MODE_P (*i)
13985 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13986 return false;
13987 return true;
13990 /* Function vect_is_simple_use.
13992 Input:
13993 VINFO - the vect info of the loop or basic block that is being vectorized.
13994 OPERAND - operand in the loop or bb.
13995 Output:
13996 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13997 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13998 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13999 the definition could be anywhere in the function
14000 DT - the type of definition
14002 Returns whether a stmt with OPERAND can be vectorized.
14003 For loops, supportable operands are constants, loop invariants, and operands
14004 that are defined by the current iteration of the loop. Unsupportable
14005 operands are those that are defined by a previous iteration of the loop (as
14006 is the case in reduction/induction computations).
14007 For basic blocks, supportable operands are constants and bb invariants.
14008 For now, operands defined outside the basic block are not supported. */
14010 bool
14011 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
14012 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
14014 if (def_stmt_info_out)
14015 *def_stmt_info_out = NULL;
14016 if (def_stmt_out)
14017 *def_stmt_out = NULL;
14018 *dt = vect_unknown_def_type;
14020 if (dump_enabled_p ())
14022 dump_printf_loc (MSG_NOTE, vect_location,
14023 "vect_is_simple_use: operand ");
14024 if (TREE_CODE (operand) == SSA_NAME
14025 && !SSA_NAME_IS_DEFAULT_DEF (operand))
14026 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
14027 else
14028 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
14031 if (CONSTANT_CLASS_P (operand))
14032 *dt = vect_constant_def;
14033 else if (is_gimple_min_invariant (operand))
14034 *dt = vect_external_def;
14035 else if (TREE_CODE (operand) != SSA_NAME)
14036 *dt = vect_unknown_def_type;
14037 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
14038 *dt = vect_external_def;
14039 else
14041 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
14042 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
14043 if (!stmt_vinfo)
14044 *dt = vect_external_def;
14045 else
14047 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
14048 def_stmt = stmt_vinfo->stmt;
14049 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
14050 if (def_stmt_info_out)
14051 *def_stmt_info_out = stmt_vinfo;
14053 if (def_stmt_out)
14054 *def_stmt_out = def_stmt;
14057 if (dump_enabled_p ())
14059 dump_printf (MSG_NOTE, ", type of def: ");
14060 switch (*dt)
14062 case vect_uninitialized_def:
14063 dump_printf (MSG_NOTE, "uninitialized\n");
14064 break;
14065 case vect_constant_def:
14066 dump_printf (MSG_NOTE, "constant\n");
14067 break;
14068 case vect_external_def:
14069 dump_printf (MSG_NOTE, "external\n");
14070 break;
14071 case vect_internal_def:
14072 dump_printf (MSG_NOTE, "internal\n");
14073 break;
14074 case vect_induction_def:
14075 dump_printf (MSG_NOTE, "induction\n");
14076 break;
14077 case vect_reduction_def:
14078 dump_printf (MSG_NOTE, "reduction\n");
14079 break;
14080 case vect_double_reduction_def:
14081 dump_printf (MSG_NOTE, "double reduction\n");
14082 break;
14083 case vect_nested_cycle:
14084 dump_printf (MSG_NOTE, "nested cycle\n");
14085 break;
14086 case vect_first_order_recurrence:
14087 dump_printf (MSG_NOTE, "first order recurrence\n");
14088 break;
14089 case vect_condition_def:
14090 dump_printf (MSG_NOTE, "control flow\n");
14091 break;
14092 case vect_unknown_def_type:
14093 dump_printf (MSG_NOTE, "unknown\n");
14094 break;
14098 if (*dt == vect_unknown_def_type)
14100 if (dump_enabled_p ())
14101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
14102 "Unsupported pattern.\n");
14103 return false;
14106 return true;
14109 /* Function vect_is_simple_use.
14111 Same as vect_is_simple_use but also determines the vector operand
14112 type of OPERAND and stores it to *VECTYPE. If the definition of
14113 OPERAND is vect_uninitialized_def, vect_constant_def or
14114 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
14115 is responsible to compute the best suited vector type for the
14116 scalar operand. */
14118 bool
14119 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
14120 tree *vectype, stmt_vec_info *def_stmt_info_out,
14121 gimple **def_stmt_out)
14123 stmt_vec_info def_stmt_info;
14124 gimple *def_stmt;
14125 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
14126 return false;
14128 if (def_stmt_out)
14129 *def_stmt_out = def_stmt;
14130 if (def_stmt_info_out)
14131 *def_stmt_info_out = def_stmt_info;
14133 /* Now get a vector type if the def is internal, otherwise supply
14134 NULL_TREE and leave it up to the caller to figure out a proper
14135 type for the use stmt. */
14136 if (*dt == vect_internal_def
14137 || *dt == vect_induction_def
14138 || *dt == vect_reduction_def
14139 || *dt == vect_double_reduction_def
14140 || *dt == vect_nested_cycle
14141 || *dt == vect_first_order_recurrence)
14143 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
14144 gcc_assert (*vectype != NULL_TREE);
14145 if (dump_enabled_p ())
14146 dump_printf_loc (MSG_NOTE, vect_location,
14147 "vect_is_simple_use: vectype %T\n", *vectype);
14149 else if (*dt == vect_uninitialized_def
14150 || *dt == vect_constant_def
14151 || *dt == vect_external_def)
14152 *vectype = NULL_TREE;
14153 else
14154 gcc_unreachable ();
14156 return true;
14159 /* Function vect_is_simple_use.
14161 Same as vect_is_simple_use but determines the operand by operand
14162 position OPERAND from either STMT or SLP_NODE, filling in *OP
14163 and *SLP_DEF (when SLP_NODE is not NULL). */
14165 bool
14166 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
14167 unsigned operand, tree *op, slp_tree *slp_def,
14168 enum vect_def_type *dt,
14169 tree *vectype, stmt_vec_info *def_stmt_info_out)
14171 if (slp_node)
14173 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
14174 *slp_def = child;
14175 *vectype = SLP_TREE_VECTYPE (child);
14176 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
14178 /* ??? VEC_PERM nodes might be intermediate and their lane value
14179 have no representative (nor do we build a VEC_PERM stmt for
14180 the actual operation). Note for two-operator nodes we set
14181 a representative but leave scalar stmts empty as we'd only
14182 have one for a subset of lanes. Ideally no caller would
14183 require *op for internal defs. */
14184 if (SLP_TREE_REPRESENTATIVE (child))
14186 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
14187 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
14189 else
14191 gcc_assert (SLP_TREE_CODE (child) == VEC_PERM_EXPR);
14192 *op = error_mark_node;
14193 *dt = vect_internal_def;
14194 if (def_stmt_info_out)
14195 *def_stmt_info_out = NULL;
14196 return true;
14199 else
14201 if (def_stmt_info_out)
14202 *def_stmt_info_out = NULL;
14203 *op = SLP_TREE_SCALAR_OPS (child)[0];
14204 *dt = SLP_TREE_DEF_TYPE (child);
14205 return true;
14208 else
14210 *slp_def = NULL;
14211 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
14213 if (gimple_assign_rhs_code (ass) == COND_EXPR
14214 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
14216 if (operand < 2)
14217 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
14218 else
14219 *op = gimple_op (ass, operand);
14221 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
14222 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
14223 else
14224 *op = gimple_op (ass, operand + 1);
14226 else if (gcond *cond = dyn_cast <gcond *> (stmt->stmt))
14227 *op = gimple_op (cond, operand);
14228 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
14229 *op = gimple_call_arg (call, operand);
14230 else
14231 gcc_unreachable ();
14232 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
14236 /* If OP is not NULL and is external or constant update its vector
14237 type with VECTYPE. Returns true if successful or false if not,
14238 for example when conflicting vector types are present. */
14240 bool
14241 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
14243 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
14244 return true;
14245 if (SLP_TREE_VECTYPE (op))
14246 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
14247 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
14248 should be handled by patters. Allow vect_constant_def for now. */
14249 if (VECTOR_BOOLEAN_TYPE_P (vectype)
14250 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
14251 return false;
14252 SLP_TREE_VECTYPE (op) = vectype;
14253 return true;
14256 /* Function supportable_widening_operation
14258 Check whether an operation represented by the code CODE is a
14259 widening operation that is supported by the target platform in
14260 vector form (i.e., when operating on arguments of type VECTYPE_IN
14261 producing a result of type VECTYPE_OUT).
14263 Widening operations we currently support are NOP (CONVERT), FLOAT,
14264 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
14265 are supported by the target platform either directly (via vector
14266 tree-codes), or via target builtins.
14268 Output:
14269 - CODE1 and CODE2 are codes of vector operations to be used when
14270 vectorizing the operation, if available.
14271 - MULTI_STEP_CVT determines the number of required intermediate steps in
14272 case of multi-step conversion (like char->short->int - in that case
14273 MULTI_STEP_CVT will be 1).
14274 - INTERM_TYPES contains the intermediate type required to perform the
14275 widening operation (short in the above example). */
14277 bool
14278 supportable_widening_operation (vec_info *vinfo,
14279 code_helper code,
14280 stmt_vec_info stmt_info,
14281 tree vectype_out, tree vectype_in,
14282 code_helper *code1,
14283 code_helper *code2,
14284 int *multi_step_cvt,
14285 vec<tree> *interm_types)
14287 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
14288 class loop *vect_loop = NULL;
14289 machine_mode vec_mode;
14290 enum insn_code icode1, icode2;
14291 optab optab1 = unknown_optab, optab2 = unknown_optab;
14292 tree vectype = vectype_in;
14293 tree wide_vectype = vectype_out;
14294 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
14295 int i;
14296 tree prev_type, intermediate_type;
14297 machine_mode intermediate_mode, prev_mode;
14298 optab optab3, optab4;
14300 *multi_step_cvt = 0;
14301 if (loop_info)
14302 vect_loop = LOOP_VINFO_LOOP (loop_info);
14304 switch (code.safe_as_tree_code ())
14306 case MAX_TREE_CODES:
14307 /* Don't set c1 and c2 if code is not a tree_code. */
14308 break;
14310 case WIDEN_MULT_EXPR:
14311 /* The result of a vectorized widening operation usually requires
14312 two vectors (because the widened results do not fit into one vector).
14313 The generated vector results would normally be expected to be
14314 generated in the same order as in the original scalar computation,
14315 i.e. if 8 results are generated in each vector iteration, they are
14316 to be organized as follows:
14317 vect1: [res1,res2,res3,res4],
14318 vect2: [res5,res6,res7,res8].
14320 However, in the special case that the result of the widening
14321 operation is used in a reduction computation only, the order doesn't
14322 matter (because when vectorizing a reduction we change the order of
14323 the computation). Some targets can take advantage of this and
14324 generate more efficient code. For example, targets like Altivec,
14325 that support widen_mult using a sequence of {mult_even,mult_odd}
14326 generate the following vectors:
14327 vect1: [res1,res3,res5,res7],
14328 vect2: [res2,res4,res6,res8].
14330 When vectorizing outer-loops, we execute the inner-loop sequentially
14331 (each vectorized inner-loop iteration contributes to VF outer-loop
14332 iterations in parallel). We therefore don't allow to change the
14333 order of the computation in the inner-loop during outer-loop
14334 vectorization. */
14335 /* TODO: Another case in which order doesn't *really* matter is when we
14336 widen and then contract again, e.g. (short)((int)x * y >> 8).
14337 Normally, pack_trunc performs an even/odd permute, whereas the
14338 repack from an even/odd expansion would be an interleave, which
14339 would be significantly simpler for e.g. AVX2. */
14340 /* In any case, in order to avoid duplicating the code below, recurse
14341 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
14342 are properly set up for the caller. If we fail, we'll continue with
14343 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
14344 if (vect_loop
14345 && !nested_in_vect_loop_p (vect_loop, stmt_info)
14346 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
14347 stmt_info, vectype_out,
14348 vectype_in, code1,
14349 code2, multi_step_cvt,
14350 interm_types))
14352 /* Elements in a vector with vect_used_by_reduction property cannot
14353 be reordered if the use chain with this property does not have the
14354 same operation. One such an example is s += a * b, where elements
14355 in a and b cannot be reordered. Here we check if the vector defined
14356 by STMT is only directly used in the reduction statement. */
14357 tree lhs = gimple_assign_lhs (vect_orig_stmt (stmt_info)->stmt);
14358 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
14359 if (use_stmt_info && STMT_VINFO_REDUC_DEF (use_stmt_info))
14360 return true;
14362 c1 = VEC_WIDEN_MULT_LO_EXPR;
14363 c2 = VEC_WIDEN_MULT_HI_EXPR;
14364 break;
14366 case DOT_PROD_EXPR:
14367 c1 = DOT_PROD_EXPR;
14368 c2 = DOT_PROD_EXPR;
14369 break;
14371 case SAD_EXPR:
14372 c1 = SAD_EXPR;
14373 c2 = SAD_EXPR;
14374 break;
14376 case VEC_WIDEN_MULT_EVEN_EXPR:
14377 /* Support the recursion induced just above. */
14378 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
14379 c2 = VEC_WIDEN_MULT_ODD_EXPR;
14380 break;
14382 case WIDEN_LSHIFT_EXPR:
14383 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
14384 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
14385 break;
14387 CASE_CONVERT:
14388 c1 = VEC_UNPACK_LO_EXPR;
14389 c2 = VEC_UNPACK_HI_EXPR;
14390 break;
14392 case FLOAT_EXPR:
14393 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
14394 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
14395 break;
14397 case FIX_TRUNC_EXPR:
14398 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
14399 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
14400 break;
14402 default:
14403 gcc_unreachable ();
14406 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
14407 std::swap (c1, c2);
14409 if (code == FIX_TRUNC_EXPR)
14411 /* The signedness is determined from output operand. */
14412 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14413 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14415 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
14416 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14417 && VECTOR_BOOLEAN_TYPE_P (vectype)
14418 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14419 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14421 /* If the input and result modes are the same, a different optab
14422 is needed where we pass in the number of units in vectype. */
14423 optab1 = vec_unpacks_sbool_lo_optab;
14424 optab2 = vec_unpacks_sbool_hi_optab;
14427 vec_mode = TYPE_MODE (vectype);
14428 if (widening_fn_p (code))
14430 /* If this is an internal fn then we must check whether the target
14431 supports either a low-high split or an even-odd split. */
14432 internal_fn ifn = as_internal_fn ((combined_fn) code);
14434 internal_fn lo, hi, even, odd;
14435 lookup_hilo_internal_fn (ifn, &lo, &hi);
14436 *code1 = as_combined_fn (lo);
14437 *code2 = as_combined_fn (hi);
14438 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
14439 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
14441 /* If we don't support low-high, then check for even-odd. */
14442 if (!optab1
14443 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14444 || !optab2
14445 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14447 lookup_evenodd_internal_fn (ifn, &even, &odd);
14448 *code1 = as_combined_fn (even);
14449 *code2 = as_combined_fn (odd);
14450 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
14451 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
14454 else if (code.is_tree_code ())
14456 if (code == FIX_TRUNC_EXPR)
14458 /* The signedness is determined from output operand. */
14459 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14460 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14462 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
14463 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14464 && VECTOR_BOOLEAN_TYPE_P (vectype)
14465 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14466 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14468 /* If the input and result modes are the same, a different optab
14469 is needed where we pass in the number of units in vectype. */
14470 optab1 = vec_unpacks_sbool_lo_optab;
14471 optab2 = vec_unpacks_sbool_hi_optab;
14473 else
14475 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14476 optab2 = optab_for_tree_code (c2, vectype, optab_default);
14478 *code1 = c1;
14479 *code2 = c2;
14482 if (!optab1 || !optab2)
14483 return false;
14485 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14486 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14487 return false;
14490 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14491 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14493 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14494 return true;
14495 /* For scalar masks we may have different boolean
14496 vector types having the same QImode. Thus we
14497 add additional check for elements number. */
14498 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
14499 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14500 return true;
14503 /* Check if it's a multi-step conversion that can be done using intermediate
14504 types. */
14506 prev_type = vectype;
14507 prev_mode = vec_mode;
14509 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
14510 return false;
14512 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14513 intermediate steps in promotion sequence. We try
14514 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
14515 not. */
14516 interm_types->create (MAX_INTERM_CVT_STEPS);
14517 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14519 intermediate_mode = insn_data[icode1].operand[0].mode;
14520 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14521 intermediate_type
14522 = vect_halve_mask_nunits (prev_type, intermediate_mode);
14523 else if (VECTOR_MODE_P (intermediate_mode))
14525 tree intermediate_element_type
14526 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
14527 TYPE_UNSIGNED (prev_type));
14528 intermediate_type
14529 = build_vector_type_for_mode (intermediate_element_type,
14530 intermediate_mode);
14532 else
14533 intermediate_type
14534 = lang_hooks.types.type_for_mode (intermediate_mode,
14535 TYPE_UNSIGNED (prev_type));
14537 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14538 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14539 && intermediate_mode == prev_mode
14540 && SCALAR_INT_MODE_P (prev_mode))
14542 /* If the input and result modes are the same, a different optab
14543 is needed where we pass in the number of units in vectype. */
14544 optab3 = vec_unpacks_sbool_lo_optab;
14545 optab4 = vec_unpacks_sbool_hi_optab;
14547 else
14549 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
14550 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
14553 if (!optab3 || !optab4
14554 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
14555 || insn_data[icode1].operand[0].mode != intermediate_mode
14556 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
14557 || insn_data[icode2].operand[0].mode != intermediate_mode
14558 || ((icode1 = optab_handler (optab3, intermediate_mode))
14559 == CODE_FOR_nothing)
14560 || ((icode2 = optab_handler (optab4, intermediate_mode))
14561 == CODE_FOR_nothing))
14562 break;
14564 interm_types->quick_push (intermediate_type);
14565 (*multi_step_cvt)++;
14567 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14568 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14570 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14571 return true;
14572 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
14573 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14574 return true;
14577 prev_type = intermediate_type;
14578 prev_mode = intermediate_mode;
14581 interm_types->release ();
14582 return false;
14586 /* Function supportable_narrowing_operation
14588 Check whether an operation represented by the code CODE is a
14589 narrowing operation that is supported by the target platform in
14590 vector form (i.e., when operating on arguments of type VECTYPE_IN
14591 and producing a result of type VECTYPE_OUT).
14593 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14594 and FLOAT. This function checks if these operations are supported by
14595 the target platform directly via vector tree-codes.
14597 Output:
14598 - CODE1 is the code of a vector operation to be used when
14599 vectorizing the operation, if available.
14600 - MULTI_STEP_CVT determines the number of required intermediate steps in
14601 case of multi-step conversion (like int->short->char - in that case
14602 MULTI_STEP_CVT will be 1).
14603 - INTERM_TYPES contains the intermediate type required to perform the
14604 narrowing operation (short in the above example). */
14606 bool
14607 supportable_narrowing_operation (code_helper code,
14608 tree vectype_out, tree vectype_in,
14609 code_helper *code1, int *multi_step_cvt,
14610 vec<tree> *interm_types)
14612 machine_mode vec_mode;
14613 enum insn_code icode1;
14614 optab optab1, interm_optab;
14615 tree vectype = vectype_in;
14616 tree narrow_vectype = vectype_out;
14617 enum tree_code c1;
14618 tree intermediate_type, prev_type;
14619 machine_mode intermediate_mode, prev_mode;
14620 int i;
14621 unsigned HOST_WIDE_INT n_elts;
14622 bool uns;
14624 if (!code.is_tree_code ())
14625 return false;
14627 *multi_step_cvt = 0;
14628 switch ((tree_code) code)
14630 CASE_CONVERT:
14631 c1 = VEC_PACK_TRUNC_EXPR;
14632 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14633 && VECTOR_BOOLEAN_TYPE_P (vectype)
14634 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14635 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
14636 && n_elts < BITS_PER_UNIT)
14637 optab1 = vec_pack_sbool_trunc_optab;
14638 else
14639 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14640 break;
14642 case FIX_TRUNC_EXPR:
14643 c1 = VEC_PACK_FIX_TRUNC_EXPR;
14644 /* The signedness is determined from output operand. */
14645 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14646 break;
14648 case FLOAT_EXPR:
14649 c1 = VEC_PACK_FLOAT_EXPR;
14650 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14651 break;
14653 default:
14654 gcc_unreachable ();
14657 if (!optab1)
14658 return false;
14660 vec_mode = TYPE_MODE (vectype);
14661 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14662 return false;
14664 *code1 = c1;
14666 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14668 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14669 return true;
14670 /* For scalar masks we may have different boolean
14671 vector types having the same QImode. Thus we
14672 add additional check for elements number. */
14673 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14674 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14675 return true;
14678 if (code == FLOAT_EXPR)
14679 return false;
14681 /* Check if it's a multi-step conversion that can be done using intermediate
14682 types. */
14683 prev_mode = vec_mode;
14684 prev_type = vectype;
14685 if (code == FIX_TRUNC_EXPR)
14686 uns = TYPE_UNSIGNED (vectype_out);
14687 else
14688 uns = TYPE_UNSIGNED (vectype);
14690 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14691 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14692 costly than signed. */
14693 if (code == FIX_TRUNC_EXPR && uns)
14695 enum insn_code icode2;
14697 intermediate_type
14698 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14699 interm_optab
14700 = optab_for_tree_code (c1, intermediate_type, optab_default);
14701 if (interm_optab != unknown_optab
14702 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14703 && insn_data[icode1].operand[0].mode
14704 == insn_data[icode2].operand[0].mode)
14706 uns = false;
14707 optab1 = interm_optab;
14708 icode1 = icode2;
14712 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14713 intermediate steps in promotion sequence. We try
14714 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14715 interm_types->create (MAX_INTERM_CVT_STEPS);
14716 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14718 intermediate_mode = insn_data[icode1].operand[0].mode;
14719 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14720 intermediate_type
14721 = vect_double_mask_nunits (prev_type, intermediate_mode);
14722 else
14723 intermediate_type
14724 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14725 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14726 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14727 && SCALAR_INT_MODE_P (prev_mode)
14728 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14729 && n_elts < BITS_PER_UNIT)
14730 interm_optab = vec_pack_sbool_trunc_optab;
14731 else
14732 interm_optab
14733 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14734 optab_default);
14735 if (!interm_optab
14736 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14737 || insn_data[icode1].operand[0].mode != intermediate_mode
14738 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14739 == CODE_FOR_nothing))
14740 break;
14742 interm_types->quick_push (intermediate_type);
14743 (*multi_step_cvt)++;
14745 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14747 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14748 return true;
14749 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14750 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14751 return true;
14754 prev_mode = intermediate_mode;
14755 prev_type = intermediate_type;
14756 optab1 = interm_optab;
14759 interm_types->release ();
14760 return false;
14763 /* Function supportable_indirect_convert_operation
14765 Check whether an operation represented by the code CODE is single or multi
14766 operations that are supported by the target platform in
14767 vector form (i.e., when operating on arguments of type VECTYPE_IN
14768 producing a result of type VECTYPE_OUT).
14770 Convert operations we currently support directly are FIX_TRUNC and FLOAT.
14771 This function checks if these operations are supported
14772 by the target platform directly (via vector tree-codes).
14774 Output:
14775 - converts contains some pairs to perform the convert operation,
14776 the pair's first is the intermediate type, and its second is the code of
14777 a vector operation to be used when converting the operation from the
14778 previous type to the intermediate type. */
14779 bool
14780 supportable_indirect_convert_operation (code_helper code,
14781 tree vectype_out,
14782 tree vectype_in,
14783 vec<std::pair<tree, tree_code> > *converts,
14784 tree op0)
14786 bool found_mode = false;
14787 scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
14788 scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
14789 opt_scalar_mode mode_iter;
14790 tree_code tc1, tc2, code1, code2;
14792 tree cvt_type = NULL_TREE;
14793 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
14795 if (supportable_convert_operation ((tree_code) code,
14796 vectype_out,
14797 vectype_in,
14798 &tc1))
14800 converts->safe_push (std::make_pair (vectype_out, tc1));
14801 return true;
14804 /* For conversions between float and integer types try whether
14805 we can use intermediate signed integer types to support the
14806 conversion. */
14807 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
14808 && (code == FLOAT_EXPR
14809 || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
14811 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
14812 bool float_expr_p = code == FLOAT_EXPR;
14813 unsigned short target_size;
14814 scalar_mode intermediate_mode;
14815 if (demotion)
14817 intermediate_mode = lhs_mode;
14818 target_size = GET_MODE_SIZE (rhs_mode);
14820 else
14822 target_size = GET_MODE_SIZE (lhs_mode);
14823 if (!int_mode_for_size
14824 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
14825 return false;
14827 code1 = float_expr_p ? (tree_code) code : NOP_EXPR;
14828 code2 = float_expr_p ? NOP_EXPR : (tree_code) code;
14829 opt_scalar_mode mode_iter;
14830 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
14832 intermediate_mode = mode_iter.require ();
14834 if (GET_MODE_SIZE (intermediate_mode) > target_size)
14835 break;
14837 scalar_mode cvt_mode;
14838 if (!int_mode_for_size
14839 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
14840 break;
14842 cvt_type = build_nonstandard_integer_type
14843 (GET_MODE_BITSIZE (cvt_mode), 0);
14845 /* Check if the intermediate type can hold OP0's range.
14846 When converting from float to integer this is not necessary
14847 because values that do not fit the (smaller) target type are
14848 unspecified anyway. */
14849 if (demotion && float_expr_p)
14851 wide_int op_min_value, op_max_value;
14852 /* For vector form, it looks like op0 doesn't have RANGE_INFO.
14853 In the future, if it is supported, changes may need to be made
14854 to this part, such as checking the RANGE of each element
14855 in the vector. */
14856 if (TREE_CODE (op0) != SSA_NAME
14857 || !SSA_NAME_RANGE_INFO (op0)
14858 || !vect_get_range_info (op0, &op_min_value,
14859 &op_max_value))
14860 break;
14862 if (cvt_type == NULL_TREE
14863 || (wi::min_precision (op_max_value, SIGNED)
14864 > TYPE_PRECISION (cvt_type))
14865 || (wi::min_precision (op_min_value, SIGNED)
14866 > TYPE_PRECISION (cvt_type)))
14867 continue;
14870 cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE (vectype_in),
14871 cvt_type,
14872 nelts);
14873 /* This should only happened for SLP as long as loop vectorizer
14874 only supports same-sized vector. */
14875 if (cvt_type == NULL_TREE
14876 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
14877 || !supportable_convert_operation ((tree_code) code1,
14878 vectype_out,
14879 cvt_type, &tc1)
14880 || !supportable_convert_operation ((tree_code) code2,
14881 cvt_type,
14882 vectype_in, &tc2))
14883 continue;
14885 found_mode = true;
14886 break;
14889 if (found_mode)
14891 converts->safe_push (std::make_pair (cvt_type, tc2));
14892 if (TYPE_MODE (cvt_type) != TYPE_MODE (vectype_out))
14893 converts->safe_push (std::make_pair (vectype_out, tc1));
14894 return true;
14897 return false;
14900 /* Generate and return a vector mask of MASK_TYPE such that
14901 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14902 Add the statements to SEQ. */
14904 tree
14905 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14906 tree end_index, const char *name)
14908 tree cmp_type = TREE_TYPE (start_index);
14909 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14910 cmp_type, mask_type,
14911 OPTIMIZE_FOR_SPEED));
14912 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14913 start_index, end_index,
14914 build_zero_cst (mask_type));
14915 tree tmp;
14916 if (name)
14917 tmp = make_temp_ssa_name (mask_type, NULL, name);
14918 else
14919 tmp = make_ssa_name (mask_type);
14920 gimple_call_set_lhs (call, tmp);
14921 gimple_seq_add_stmt (seq, call);
14922 return tmp;
14925 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14926 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14928 tree
14929 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14930 tree end_index)
14932 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14933 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14936 /* Try to compute the vector types required to vectorize STMT_INFO,
14937 returning true on success and false if vectorization isn't possible.
14938 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14939 take sure that the number of elements in the vectors is no bigger
14940 than GROUP_SIZE.
14942 On success:
14944 - Set *STMT_VECTYPE_OUT to:
14945 - NULL_TREE if the statement doesn't need to be vectorized;
14946 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14948 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14949 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14950 statement does not help to determine the overall number of units. */
14952 opt_result
14953 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14954 tree *stmt_vectype_out,
14955 tree *nunits_vectype_out,
14956 unsigned int group_size)
14958 gimple *stmt = stmt_info->stmt;
14960 /* For BB vectorization, we should always have a group size once we've
14961 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14962 are tentative requests during things like early data reference
14963 analysis and pattern recognition. */
14964 if (is_a <bb_vec_info> (vinfo))
14965 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14966 else
14967 group_size = 0;
14969 *stmt_vectype_out = NULL_TREE;
14970 *nunits_vectype_out = NULL_TREE;
14972 if (gimple_get_lhs (stmt) == NULL_TREE
14973 /* Allow vector conditionals through here. */
14974 && !is_a <gcond *> (stmt)
14975 /* MASK_STORE and friends have no lhs, but are ok. */
14976 && !(is_gimple_call (stmt)
14977 && gimple_call_internal_p (stmt)
14978 && internal_store_fn_p (gimple_call_internal_fn (stmt))))
14980 if (is_a <gcall *> (stmt))
14982 /* Ignore calls with no lhs. These must be calls to
14983 #pragma omp simd functions, and what vectorization factor
14984 it really needs can't be determined until
14985 vectorizable_simd_clone_call. */
14986 if (dump_enabled_p ())
14987 dump_printf_loc (MSG_NOTE, vect_location,
14988 "defer to SIMD clone analysis.\n");
14989 return opt_result::success ();
14992 return opt_result::failure_at (stmt,
14993 "not vectorized: irregular stmt: %G", stmt);
14996 tree vectype;
14997 tree scalar_type = NULL_TREE;
14998 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
15000 vectype = STMT_VINFO_VECTYPE (stmt_info);
15001 if (dump_enabled_p ())
15002 dump_printf_loc (MSG_NOTE, vect_location,
15003 "precomputed vectype: %T\n", vectype);
15005 else if (vect_use_mask_type_p (stmt_info))
15007 unsigned int precision = stmt_info->mask_precision;
15008 scalar_type = build_nonstandard_integer_type (precision, 1);
15009 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
15010 if (!vectype)
15011 return opt_result::failure_at (stmt, "not vectorized: unsupported"
15012 " data-type %T\n", scalar_type);
15013 if (dump_enabled_p ())
15014 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
15016 else
15018 /* If we got here with a gcond it means that the target had no available vector
15019 mode for the scalar type. We can't vectorize so abort. */
15020 if (is_a <gcond *> (stmt))
15021 return opt_result::failure_at (stmt,
15022 "not vectorized:"
15023 " unsupported data-type for gcond %T\n",
15024 scalar_type);
15026 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
15027 scalar_type = TREE_TYPE (DR_REF (dr));
15028 else
15029 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
15031 if (dump_enabled_p ())
15033 if (group_size)
15034 dump_printf_loc (MSG_NOTE, vect_location,
15035 "get vectype for scalar type (group size %d):"
15036 " %T\n", group_size, scalar_type);
15037 else
15038 dump_printf_loc (MSG_NOTE, vect_location,
15039 "get vectype for scalar type: %T\n", scalar_type);
15041 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
15042 if (!vectype)
15043 return opt_result::failure_at (stmt,
15044 "not vectorized:"
15045 " unsupported data-type %T\n",
15046 scalar_type);
15048 if (dump_enabled_p ())
15049 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
15052 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
15053 return opt_result::failure_at (stmt,
15054 "not vectorized: vector stmt in loop:%G",
15055 stmt);
15057 *stmt_vectype_out = vectype;
15059 /* Don't try to compute scalar types if the stmt produces a boolean
15060 vector; use the existing vector type instead. */
15061 tree nunits_vectype = vectype;
15062 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
15064 /* The number of units is set according to the smallest scalar
15065 type (or the largest vector size, but we only support one
15066 vector size per vectorization). */
15067 scalar_type = vect_get_smallest_scalar_type (stmt_info,
15068 TREE_TYPE (vectype));
15069 if (!types_compatible_p (scalar_type, TREE_TYPE (vectype)))
15071 if (dump_enabled_p ())
15072 dump_printf_loc (MSG_NOTE, vect_location,
15073 "get vectype for smallest scalar type: %T\n",
15074 scalar_type);
15075 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
15076 group_size);
15077 if (!nunits_vectype)
15078 return opt_result::failure_at
15079 (stmt, "not vectorized: unsupported data-type %T\n",
15080 scalar_type);
15081 if (dump_enabled_p ())
15082 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
15083 nunits_vectype);
15087 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
15088 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
15089 return opt_result::failure_at (stmt,
15090 "Not vectorized: Incompatible number "
15091 "of vector subparts between %T and %T\n",
15092 nunits_vectype, *stmt_vectype_out);
15094 if (dump_enabled_p ())
15096 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
15097 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
15098 dump_printf (MSG_NOTE, "\n");
15101 *nunits_vectype_out = nunits_vectype;
15102 return opt_result::success ();
15105 /* Generate and return statement sequence that sets vector length LEN that is:
15107 min_of_start_and_end = min (START_INDEX, END_INDEX);
15108 left_len = END_INDEX - min_of_start_and_end;
15109 rhs = min (left_len, LEN_LIMIT);
15110 LEN = rhs;
15112 Note: the cost of the code generated by this function is modeled
15113 by vect_estimate_min_profitable_iters, so changes here may need
15114 corresponding changes there. */
15116 gimple_seq
15117 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
15119 gimple_seq stmts = NULL;
15120 tree len_type = TREE_TYPE (len);
15121 gcc_assert (TREE_TYPE (start_index) == len_type);
15123 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
15124 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
15125 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
15126 gimple* stmt = gimple_build_assign (len, rhs);
15127 gimple_seq_add_stmt (&stmts, stmt);
15129 return stmts;