Darwin, libgcc : Adjust min version supported for the OS.
[official-gcc.git] / gcc / tree-vect-stmts.cc
blob821a8c3c2380a8d85112e36bad3c59605d58a2e9
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 static unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind,
95 stmt_vec_info stmt_info, slp_tree node,
96 tree vectype, int misalign,
97 enum vect_cost_model_location where)
99 if ((kind == vector_load || kind == unaligned_load)
100 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
101 kind = vector_gather_load;
102 if ((kind == vector_store || kind == unaligned_store)
103 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104 kind = vector_scatter_store;
106 stmt_info_for_cost si
107 = { count, kind, where, stmt_info, node, vectype, misalign };
108 body_cost_vec->safe_push (si);
110 return (unsigned)
111 (builtin_vectorization_cost (kind, vectype, misalign) * count);
114 unsigned
115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117 tree vectype, int misalign,
118 enum vect_cost_model_location where)
120 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
121 vectype, misalign, where);
124 unsigned
125 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
126 enum vect_cost_for_stmt kind, slp_tree node,
127 tree vectype, int misalign,
128 enum vect_cost_model_location where)
130 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
131 vectype, misalign, where);
134 unsigned
135 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
136 enum vect_cost_for_stmt kind,
137 enum vect_cost_model_location where)
139 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
140 || kind == scalar_stmt);
141 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
142 NULL_TREE, 0, where);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
147 static tree
148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
150 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 "vect_array");
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
159 static tree
160 read_vector_array (vec_info *vinfo,
161 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
162 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
164 tree vect_type, vect, vect_name, array_ref;
165 gimple *new_stmt;
167 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
168 vect_type = TREE_TYPE (TREE_TYPE (array));
169 vect = vect_create_destination_var (scalar_dest, vect_type);
170 array_ref = build4 (ARRAY_REF, vect_type, array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
174 new_stmt = gimple_build_assign (vect, array_ref);
175 vect_name = make_ssa_name (vect, new_stmt);
176 gimple_assign_set_lhs (new_stmt, vect_name);
177 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
179 return vect_name;
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
186 static void
187 write_vector_array (vec_info *vinfo,
188 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
189 tree vect, tree array, unsigned HOST_WIDE_INT n)
191 tree array_ref;
192 gimple *new_stmt;
194 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
195 build_int_cst (size_type_node, n),
196 NULL_TREE, NULL_TREE);
198 new_stmt = gimple_build_assign (array_ref, vect);
199 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
204 (and its group). */
206 static tree
207 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
209 tree mem_ref;
211 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
214 return mem_ref;
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
220 static void
221 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
222 gimple_stmt_iterator *gsi, tree var)
224 tree clobber = build_clobber (TREE_TYPE (var));
225 gimple *new_stmt = gimple_build_assign (var, clobber);
226 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
231 /* Function vect_mark_relevant.
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
235 static void
236 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
237 enum vect_relevant relevant, bool live_p)
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE, vect_location,
244 "mark relevant %d, live %d: %G", relevant, live_p,
245 stmt_info->stmt);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE, vect_location,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info = stmt_info;
263 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
265 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
269 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
270 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
271 STMT_VINFO_RELEVANT (stmt_info) = relevant;
273 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE, vect_location,
278 "already marked relevant/live.\n");
279 return;
282 worklist->safe_push (stmt_info);
286 /* Function is_simple_and_all_uses_invariant
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
290 bool
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
292 loop_vec_info loop_vinfo)
294 tree op;
295 ssa_op_iter iter;
297 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
298 if (!stmt)
299 return false;
301 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
303 enum vect_def_type dt = vect_uninitialized_def;
305 if (!vect_is_simple_use (op, loop_vinfo, &dt))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309 "use not simple.\n");
310 return false;
313 if (dt != vect_external_def && dt != vect_constant_def)
314 return false;
316 return true;
319 /* Function vect_stmt_relevant_p.
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
329 CHECKME: what other side effects would the vectorizer allow? */
331 static bool
332 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
333 enum vect_relevant *relevant, bool *live_p)
335 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
336 ssa_op_iter op_iter;
337 imm_use_iterator imm_iter;
338 use_operand_p use_p;
339 def_operand_p def_p;
341 *relevant = vect_unused_in_scope;
342 *live_p = false;
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info->stmt)
346 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
347 *relevant = vect_used_in_scope;
349 /* changing memory. */
350 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
351 if (gimple_vdef (stmt_info->stmt)
352 && !gimple_clobber_p (stmt_info->stmt))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE, vect_location,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant = vect_used_in_scope;
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
363 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
365 basic_block bb = gimple_bb (USE_STMT (use_p));
366 if (!flow_bb_inside_loop_p (loop, bb))
368 if (is_gimple_debug (USE_STMT (use_p)))
369 continue;
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: used out of loop.\n");
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
378 gcc_assert (bb == single_exit (loop)->dest);
380 *live_p = true;
385 if (*live_p && *relevant == vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant = vect_used_only_live;
394 return (*live_p || *relevant);
398 /* Function exist_non_indexing_operands_for_use_p
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
403 static bool
404 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
406 tree operand;
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info))
412 return true;
414 /* STMT has a data_ref. FORNOW this means that its of one of
415 the following forms:
416 -1- ARRAY_REF = var
417 -2- var = ARRAY_REF
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
422 for array indexing.
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
428 if (!assign || !gimple_assign_copy_p (assign))
430 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
431 if (call && gimple_call_internal_p (call))
433 internal_fn ifn = gimple_call_internal_fn (call);
434 int mask_index = internal_fn_mask_index (ifn);
435 if (mask_index >= 0
436 && use == gimple_call_arg (call, mask_index))
437 return true;
438 int stored_value_index = internal_fn_stored_value_index (ifn);
439 if (stored_value_index >= 0
440 && use == gimple_call_arg (call, stored_value_index))
441 return true;
442 if (internal_gather_scatter_fn_p (ifn)
443 && use == gimple_call_arg (call, 1))
444 return true;
446 return false;
449 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
450 return false;
451 operand = gimple_assign_rhs1 (assign);
452 if (TREE_CODE (operand) != SSA_NAME)
453 return false;
455 if (operand == use)
456 return true;
458 return false;
463 Function process_use.
465 Inputs:
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
471 be performed.
473 Outputs:
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
478 Exceptions:
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
487 Return true if everything is as expected. Return false otherwise. */
489 static opt_result
490 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
491 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
492 bool force)
494 stmt_vec_info dstmt_vinfo;
495 enum vect_def_type dt;
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
500 return opt_result::success ();
502 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
503 return opt_result::failure_at (stmt_vinfo->stmt,
504 "not vectorized:"
505 " unsupported use in stmt.\n");
507 if (!dstmt_vinfo)
508 return opt_result::success ();
510 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
511 basic_block bb = gimple_bb (stmt_vinfo->stmt);
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520 && bb->loop_father == def_bb->loop_father)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE, vect_location,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
526 return opt_result::success ();
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
531 d = dstmt_vinfo
532 inner-loop:
533 stmt # use (d)
534 outer-loop-tail-bb:
535 ... */
536 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE, vect_location,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
542 switch (relevant)
544 case vect_unused_in_scope:
545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546 vect_used_in_scope : vect_unused_in_scope;
547 break;
549 case vect_used_in_outer_by_reduction:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 relevant = vect_used_by_reduction;
552 break;
554 case vect_used_in_outer:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 relevant = vect_used_in_scope;
557 break;
559 case vect_used_in_scope:
560 break;
562 default:
563 gcc_unreachable ();
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
570 inner-loop:
571 d = dstmt_vinfo
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573 stmt # use (d) */
574 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE, vect_location,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
580 switch (relevant)
582 case vect_unused_in_scope:
583 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585 vect_used_in_outer_by_reduction : vect_unused_in_scope;
586 break;
588 case vect_used_by_reduction:
589 case vect_used_only_live:
590 relevant = vect_used_in_outer_by_reduction;
591 break;
593 case vect_used_in_scope:
594 relevant = vect_used_in_outer;
595 break;
597 default:
598 gcc_unreachable ();
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
604 of course. */
605 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
609 loop_latch_edge (bb->loop_father))
610 == use))
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE, vect_location,
614 "induction value on backedge.\n");
615 return opt_result::success ();
619 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
620 return opt_result::success ();
624 /* Function vect_mark_stmts_to_be_vectorized.
626 Not all stmts in the loop need to be vectorized. For example:
628 for i...
629 for j...
630 1. T0 = i + j
631 2. T1 = a[T0]
633 3. j = j + 1
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
638 This pass detects such stmts. */
640 opt_result
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
643 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
644 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
645 unsigned int nbbs = loop->num_nodes;
646 gimple_stmt_iterator si;
647 unsigned int i;
648 basic_block bb;
649 bool live_p;
650 enum vect_relevant relevant;
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
654 auto_vec<stmt_vec_info, 64> worklist;
656 /* 1. Init worklist. */
657 for (i = 0; i < nbbs; i++)
659 bb = bbs[i];
660 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
662 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
665 phi_info->stmt);
667 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
668 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
670 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
672 if (is_gimple_debug (gsi_stmt (si)))
673 continue;
674 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE, vect_location,
677 "init: stmt relevant? %G", stmt_info->stmt);
679 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
680 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
684 /* 2. Process_worklist */
685 while (worklist.length () > 0)
687 use_operand_p use_p;
688 ssa_op_iter iter;
690 stmt_vec_info stmt_vinfo = worklist.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location,
693 "worklist: examine stmt: %G", stmt_vinfo->stmt);
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
697 of STMT. */
698 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
713 case vect_reduction_def:
714 gcc_assert (relevant != vect_unused_in_scope);
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_in_scope
717 && relevant != vect_used_by_reduction
718 && relevant != vect_used_only_live)
719 return opt_result::failure_at
720 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
721 break;
723 case vect_nested_cycle:
724 if (relevant != vect_unused_in_scope
725 && relevant != vect_used_in_outer_by_reduction
726 && relevant != vect_used_in_outer)
727 return opt_result::failure_at
728 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
729 break;
731 case vect_double_reduction_def:
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_by_reduction
734 && relevant != vect_used_only_live)
735 return opt_result::failure_at
736 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
737 break;
739 default:
740 break;
743 if (is_pattern_stmt_p (stmt_vinfo))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
750 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
751 tree op = gimple_assign_rhs1 (assign);
753 i = 1;
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
756 opt_result res
757 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
758 loop_vinfo, relevant, &worklist, false);
759 if (!res)
760 return res;
761 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
762 loop_vinfo, relevant, &worklist, false);
763 if (!res)
764 return res;
765 i = 2;
767 for (; i < gimple_num_ops (assign); i++)
769 op = gimple_op (assign, i);
770 if (TREE_CODE (op) == SSA_NAME)
772 opt_result res
773 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
774 &worklist, false);
775 if (!res)
776 return res;
780 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
782 for (i = 0; i < gimple_call_num_args (call); i++)
784 tree arg = gimple_call_arg (call, i);
785 opt_result res
786 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
787 &worklist, false);
788 if (!res)
789 return res;
793 else
794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
796 tree op = USE_FROM_PTR (use_p);
797 opt_result res
798 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
799 &worklist, false);
800 if (!res)
801 return res;
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 opt_result res
810 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
811 &worklist, true);
812 if (!res)
814 if (fatal)
815 *fatal = false;
816 return res;
819 } /* while worklist */
821 return opt_result::success ();
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
830 static void
831 vect_model_simple_cost (vec_info *,
832 stmt_vec_info stmt_info, int ncopies,
833 enum vect_def_type *dt,
834 int ndts,
835 slp_tree node,
836 stmt_vector_for_cost *cost_vec,
837 vect_cost_for_stmt kind = vector_stmt)
839 int inside_cost = 0, prologue_cost = 0;
841 gcc_assert (cost_vec != NULL);
843 /* ??? Somehow we need to fix this at the callers. */
844 if (node)
845 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
847 if (!node)
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
850 cost model. */
851 for (int i = 0; i < ndts; i++)
852 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
853 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
854 stmt_info, 0, vect_prologue);
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
858 stmt_info, 0, vect_body);
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE, vect_location,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
875 static void
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
877 enum vect_def_type *dt,
878 unsigned int ncopies, int pwr,
879 stmt_vector_for_cost *cost_vec,
880 bool widen_arith)
882 int i;
883 int inside_cost = 0, prologue_cost = 0;
885 for (i = 0; i < pwr + 1; i++)
887 inside_cost += record_stmt_cost (cost_vec, ncopies,
888 widen_arith
889 ? vector_stmt : vec_promote_demote,
890 stmt_info, 0, vect_body);
891 ncopies *= 2;
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i = 0; i < 2; i++)
896 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
898 stmt_info, 0, vect_prologue);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE, vect_location,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost, prologue_cost);
906 /* Returns true if the current function returns DECL. */
908 static bool
909 cfun_returns (tree decl)
911 edge_iterator ei;
912 edge e;
913 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
915 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
916 if (!ret)
917 continue;
918 if (gimple_return_retval (ret) == decl)
919 return true;
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
922 though. */
923 gimple *def = ret;
926 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
928 while (gimple_clobber_p (def));
929 if (is_a <gassign *> (def)
930 && gimple_assign_lhs (def) == gimple_return_retval (ret)
931 && gimple_assign_rhs1 (def) == decl)
932 return true;
934 return false;
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
942 static void
943 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
944 vect_memory_access_type memory_access_type,
945 gather_scatter_info *gs_info,
946 dr_alignment_support alignment_support_scheme,
947 int misalignment,
948 vec_load_store_type vls_type, slp_tree slp_node,
949 stmt_vector_for_cost *cost_vec)
951 unsigned int inside_cost = 0, prologue_cost = 0;
952 stmt_vec_info first_stmt_info = stmt_info;
953 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
955 /* ??? Somehow we need to fix this at the callers. */
956 if (slp_node)
957 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
959 if (vls_type == VLS_STORE_INVARIANT)
961 if (!slp_node)
962 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
963 stmt_info, 0, vect_prologue);
966 /* Grouped stores update all elements in the group at once,
967 so we want the DR for the first statement. */
968 if (!slp_node && grouped_access_p)
969 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
971 /* True if we should include any once-per-group costs as well as
972 the cost of the statement itself. For SLP we only get called
973 once per group anyhow. */
974 bool first_stmt_p = (first_stmt_info == stmt_info);
976 /* We assume that the cost of a single store-lanes instruction is
977 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
978 access is instead being provided by a permute-and-store operation,
979 include the cost of the permutes. */
980 if (first_stmt_p
981 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
983 /* Uses a high and low interleave or shuffle operations for each
984 needed permute. */
985 int group_size = DR_GROUP_SIZE (first_stmt_info);
986 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
987 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
988 stmt_info, 0, vect_body);
990 if (dump_enabled_p ())
991 dump_printf_loc (MSG_NOTE, vect_location,
992 "vect_model_store_cost: strided group_size = %d .\n",
993 group_size);
996 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
997 /* Costs of the stores. */
998 if (memory_access_type == VMAT_ELEMENTWISE
999 || memory_access_type == VMAT_GATHER_SCATTER)
1001 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1002 if (memory_access_type == VMAT_GATHER_SCATTER
1003 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1004 /* For emulated scatter N offset vector element extracts
1005 (we assume the scalar scaling and ptr + offset add is consumed by
1006 the load). */
1007 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1008 vec_to_scalar, stmt_info, 0,
1009 vect_body);
1010 /* N scalar stores plus extracting the elements. */
1011 inside_cost += record_stmt_cost (cost_vec,
1012 ncopies * assumed_nunits,
1013 scalar_store, stmt_info, 0, vect_body);
1015 else
1016 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1017 misalignment, &inside_cost, cost_vec);
1019 if (memory_access_type == VMAT_ELEMENTWISE
1020 || memory_access_type == VMAT_STRIDED_SLP
1021 || (memory_access_type == VMAT_GATHER_SCATTER
1022 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1024 /* N scalar stores plus extracting the elements. */
1025 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1026 inside_cost += record_stmt_cost (cost_vec,
1027 ncopies * assumed_nunits,
1028 vec_to_scalar, stmt_info, 0, vect_body);
1031 /* When vectorizing a store into the function result assign
1032 a penalty if the function returns in a multi-register location.
1033 In this case we assume we'll end up with having to spill the
1034 vector result and do piecewise loads as a conservative estimate. */
1035 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1036 if (base
1037 && (TREE_CODE (base) == RESULT_DECL
1038 || (DECL_P (base) && cfun_returns (base)))
1039 && !aggregate_value_p (base, cfun->decl))
1041 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1042 /* ??? Handle PARALLEL in some way. */
1043 if (REG_P (reg))
1045 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1046 /* Assume that a single reg-reg move is possible and cheap,
1047 do not account for vector to gp register move cost. */
1048 if (nregs > 1)
1050 /* Spill. */
1051 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1052 vector_store,
1053 stmt_info, 0, vect_epilogue);
1054 /* Loads. */
1055 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1056 scalar_load,
1057 stmt_info, 0, vect_epilogue);
1062 if (dump_enabled_p ())
1063 dump_printf_loc (MSG_NOTE, vect_location,
1064 "vect_model_store_cost: inside_cost = %d, "
1065 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1069 /* Calculate cost of DR's memory access. */
1070 void
1071 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1072 dr_alignment_support alignment_support_scheme,
1073 int misalignment,
1074 unsigned int *inside_cost,
1075 stmt_vector_for_cost *body_cost_vec)
1077 switch (alignment_support_scheme)
1079 case dr_aligned:
1081 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1082 vector_store, stmt_info, 0,
1083 vect_body);
1085 if (dump_enabled_p ())
1086 dump_printf_loc (MSG_NOTE, vect_location,
1087 "vect_model_store_cost: aligned.\n");
1088 break;
1091 case dr_unaligned_supported:
1093 /* Here, we assign an additional cost for the unaligned store. */
1094 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1095 unaligned_store, stmt_info,
1096 misalignment, vect_body);
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_NOTE, vect_location,
1099 "vect_model_store_cost: unaligned supported by "
1100 "hardware.\n");
1101 break;
1104 case dr_unaligned_unsupported:
1106 *inside_cost = VECT_MAX_COST;
1108 if (dump_enabled_p ())
1109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1110 "vect_model_store_cost: unsupported access.\n");
1111 break;
1114 default:
1115 gcc_unreachable ();
1120 /* Function vect_model_load_cost
1122 Models cost for loads. In the case of grouped accesses, one access has
1123 the overhead of the grouped access attributed to it. Since unaligned
1124 accesses are supported for loads, we also account for the costs of the
1125 access scheme chosen. */
1127 static void
1128 vect_model_load_cost (vec_info *vinfo,
1129 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1130 vect_memory_access_type memory_access_type,
1131 dr_alignment_support alignment_support_scheme,
1132 int misalignment,
1133 gather_scatter_info *gs_info,
1134 slp_tree slp_node,
1135 stmt_vector_for_cost *cost_vec)
1137 unsigned int inside_cost = 0, prologue_cost = 0;
1138 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1140 gcc_assert (cost_vec);
1142 /* ??? Somehow we need to fix this at the callers. */
1143 if (slp_node)
1144 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1146 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1148 /* If the load is permuted then the alignment is determined by
1149 the first group element not by the first scalar stmt DR. */
1150 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1151 /* Record the cost for the permutation. */
1152 unsigned n_perms, n_loads;
1153 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1154 vf, true, &n_perms, &n_loads);
1155 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1156 first_stmt_info, 0, vect_body);
1158 /* And adjust the number of loads performed. This handles
1159 redundancies as well as loads that are later dead. */
1160 ncopies = n_loads;
1163 /* Grouped loads read all elements in the group at once,
1164 so we want the DR for the first statement. */
1165 stmt_vec_info first_stmt_info = stmt_info;
1166 if (!slp_node && grouped_access_p)
1167 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1169 /* True if we should include any once-per-group costs as well as
1170 the cost of the statement itself. For SLP we only get called
1171 once per group anyhow. */
1172 bool first_stmt_p = (first_stmt_info == stmt_info);
1174 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1175 ones we actually need. Account for the cost of unused results. */
1176 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1178 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1179 stmt_vec_info next_stmt_info = first_stmt_info;
1182 gaps -= 1;
1183 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1185 while (next_stmt_info);
1186 if (gaps)
1188 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE, vect_location,
1190 "vect_model_load_cost: %d unused vectors.\n",
1191 gaps);
1192 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1193 alignment_support_scheme, misalignment, false,
1194 &inside_cost, &prologue_cost,
1195 cost_vec, cost_vec, true);
1199 /* We assume that the cost of a single load-lanes instruction is
1200 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1201 access is instead being provided by a load-and-permute operation,
1202 include the cost of the permutes. */
1203 if (first_stmt_p
1204 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1206 /* Uses an even and odd extract operations or shuffle operations
1207 for each needed permute. */
1208 int group_size = DR_GROUP_SIZE (first_stmt_info);
1209 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1210 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1211 stmt_info, 0, vect_body);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: strided group_size = %d .\n",
1216 group_size);
1219 /* The loads themselves. */
1220 if (memory_access_type == VMAT_ELEMENTWISE
1221 || memory_access_type == VMAT_GATHER_SCATTER)
1223 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1224 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1225 if (memory_access_type == VMAT_GATHER_SCATTER
1226 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1227 /* For emulated gathers N offset vector element extracts
1228 (we assume the scalar scaling and ptr + offset add is consumed by
1229 the load). */
1230 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1231 vec_to_scalar, stmt_info, 0,
1232 vect_body);
1233 /* N scalar loads plus gathering them into a vector. */
1234 inside_cost += record_stmt_cost (cost_vec,
1235 ncopies * assumed_nunits,
1236 scalar_load, stmt_info, 0, vect_body);
1238 else if (memory_access_type == VMAT_INVARIANT)
1240 /* Invariant loads will ideally be hoisted and splat to a vector. */
1241 prologue_cost += record_stmt_cost (cost_vec, 1,
1242 scalar_load, stmt_info, 0,
1243 vect_prologue);
1244 prologue_cost += record_stmt_cost (cost_vec, 1,
1245 scalar_to_vec, stmt_info, 0,
1246 vect_prologue);
1248 else
1249 vect_get_load_cost (vinfo, stmt_info, ncopies,
1250 alignment_support_scheme, misalignment, first_stmt_p,
1251 &inside_cost, &prologue_cost,
1252 cost_vec, cost_vec, true);
1253 if (memory_access_type == VMAT_ELEMENTWISE
1254 || memory_access_type == VMAT_STRIDED_SLP
1255 || (memory_access_type == VMAT_GATHER_SCATTER
1256 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1257 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1258 stmt_info, 0, vect_body);
1260 if (dump_enabled_p ())
1261 dump_printf_loc (MSG_NOTE, vect_location,
1262 "vect_model_load_cost: inside_cost = %d, "
1263 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1267 /* Calculate cost of DR's memory access. */
1268 void
1269 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1270 dr_alignment_support alignment_support_scheme,
1271 int misalignment,
1272 bool add_realign_cost, unsigned int *inside_cost,
1273 unsigned int *prologue_cost,
1274 stmt_vector_for_cost *prologue_cost_vec,
1275 stmt_vector_for_cost *body_cost_vec,
1276 bool record_prologue_costs)
1278 switch (alignment_support_scheme)
1280 case dr_aligned:
1282 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1283 stmt_info, 0, vect_body);
1285 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE, vect_location,
1287 "vect_model_load_cost: aligned.\n");
1289 break;
1291 case dr_unaligned_supported:
1293 /* Here, we assign an additional cost for the unaligned load. */
1294 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1295 unaligned_load, stmt_info,
1296 misalignment, vect_body);
1298 if (dump_enabled_p ())
1299 dump_printf_loc (MSG_NOTE, vect_location,
1300 "vect_model_load_cost: unaligned supported by "
1301 "hardware.\n");
1303 break;
1305 case dr_explicit_realign:
1307 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1308 vector_load, stmt_info, 0, vect_body);
1309 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1310 vec_perm, stmt_info, 0, vect_body);
1312 /* FIXME: If the misalignment remains fixed across the iterations of
1313 the containing loop, the following cost should be added to the
1314 prologue costs. */
1315 if (targetm.vectorize.builtin_mask_for_load)
1316 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1317 stmt_info, 0, vect_body);
1319 if (dump_enabled_p ())
1320 dump_printf_loc (MSG_NOTE, vect_location,
1321 "vect_model_load_cost: explicit realign\n");
1323 break;
1325 case dr_explicit_realign_optimized:
1327 if (dump_enabled_p ())
1328 dump_printf_loc (MSG_NOTE, vect_location,
1329 "vect_model_load_cost: unaligned software "
1330 "pipelined.\n");
1332 /* Unaligned software pipeline has a load of an address, an initial
1333 load, and possibly a mask operation to "prime" the loop. However,
1334 if this is an access in a group of loads, which provide grouped
1335 access, then the above cost should only be considered for one
1336 access in the group. Inside the loop, there is a load op
1337 and a realignment op. */
1339 if (add_realign_cost && record_prologue_costs)
1341 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1342 vector_stmt, stmt_info,
1343 0, vect_prologue);
1344 if (targetm.vectorize.builtin_mask_for_load)
1345 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1346 vector_stmt, stmt_info,
1347 0, vect_prologue);
1350 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1351 stmt_info, 0, vect_body);
1352 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1353 stmt_info, 0, vect_body);
1355 if (dump_enabled_p ())
1356 dump_printf_loc (MSG_NOTE, vect_location,
1357 "vect_model_load_cost: explicit realign optimized"
1358 "\n");
1360 break;
1363 case dr_unaligned_unsupported:
1365 *inside_cost = VECT_MAX_COST;
1367 if (dump_enabled_p ())
1368 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1369 "vect_model_load_cost: unsupported access.\n");
1370 break;
1373 default:
1374 gcc_unreachable ();
1378 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1379 the loop preheader for the vectorized stmt STMT_VINFO. */
1381 static void
1382 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1383 gimple_stmt_iterator *gsi)
1385 if (gsi)
1386 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1387 else
1388 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1390 if (dump_enabled_p ())
1391 dump_printf_loc (MSG_NOTE, vect_location,
1392 "created new init_stmt: %G", new_stmt);
1395 /* Function vect_init_vector.
1397 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1398 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1399 vector type a vector with all elements equal to VAL is created first.
1400 Place the initialization at GSI if it is not NULL. Otherwise, place the
1401 initialization at the loop preheader.
1402 Return the DEF of INIT_STMT.
1403 It will be used in the vectorization of STMT_INFO. */
1405 tree
1406 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1407 gimple_stmt_iterator *gsi)
1409 gimple *init_stmt;
1410 tree new_temp;
1412 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1413 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1415 gcc_assert (VECTOR_TYPE_P (type));
1416 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1418 /* Scalar boolean value should be transformed into
1419 all zeros or all ones value before building a vector. */
1420 if (VECTOR_BOOLEAN_TYPE_P (type))
1422 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1423 tree false_val = build_zero_cst (TREE_TYPE (type));
1425 if (CONSTANT_CLASS_P (val))
1426 val = integer_zerop (val) ? false_val : true_val;
1427 else
1429 new_temp = make_ssa_name (TREE_TYPE (type));
1430 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1431 val, true_val, false_val);
1432 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1433 val = new_temp;
1436 else
1438 gimple_seq stmts = NULL;
1439 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1440 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1441 TREE_TYPE (type), val);
1442 else
1443 /* ??? Condition vectorization expects us to do
1444 promotion of invariant/external defs. */
1445 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1446 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1447 !gsi_end_p (gsi2); )
1449 init_stmt = gsi_stmt (gsi2);
1450 gsi_remove (&gsi2, false);
1451 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1455 val = build_vector_from_val (type, val);
1458 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1459 init_stmt = gimple_build_assign (new_temp, val);
1460 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1461 return new_temp;
1465 /* Function vect_get_vec_defs_for_operand.
1467 OP is an operand in STMT_VINFO. This function returns a vector of
1468 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1470 In the case that OP is an SSA_NAME which is defined in the loop, then
1471 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1473 In case OP is an invariant or constant, a new stmt that creates a vector def
1474 needs to be introduced. VECTYPE may be used to specify a required type for
1475 vector invariant. */
1477 void
1478 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1479 unsigned ncopies,
1480 tree op, vec<tree> *vec_oprnds, tree vectype)
1482 gimple *def_stmt;
1483 enum vect_def_type dt;
1484 bool is_simple_use;
1485 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1487 if (dump_enabled_p ())
1488 dump_printf_loc (MSG_NOTE, vect_location,
1489 "vect_get_vec_defs_for_operand: %T\n", op);
1491 stmt_vec_info def_stmt_info;
1492 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1493 &def_stmt_info, &def_stmt);
1494 gcc_assert (is_simple_use);
1495 if (def_stmt && dump_enabled_p ())
1496 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1498 vec_oprnds->create (ncopies);
1499 if (dt == vect_constant_def || dt == vect_external_def)
1501 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1502 tree vector_type;
1504 if (vectype)
1505 vector_type = vectype;
1506 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1507 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1508 vector_type = truth_type_for (stmt_vectype);
1509 else
1510 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1512 gcc_assert (vector_type);
1513 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1514 while (ncopies--)
1515 vec_oprnds->quick_push (vop);
1517 else
1519 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1520 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1521 for (unsigned i = 0; i < ncopies; ++i)
1522 vec_oprnds->quick_push (gimple_get_lhs
1523 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1528 /* Get vectorized definitions for OP0 and OP1. */
1530 void
1531 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1532 unsigned ncopies,
1533 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1534 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1535 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1536 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1538 if (slp_node)
1540 if (op0)
1541 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1542 if (op1)
1543 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1544 if (op2)
1545 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1546 if (op3)
1547 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1549 else
1551 if (op0)
1552 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1553 op0, vec_oprnds0, vectype0);
1554 if (op1)
1555 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1556 op1, vec_oprnds1, vectype1);
1557 if (op2)
1558 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1559 op2, vec_oprnds2, vectype2);
1560 if (op3)
1561 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1562 op3, vec_oprnds3, vectype3);
1566 void
1567 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1568 unsigned ncopies,
1569 tree op0, vec<tree> *vec_oprnds0,
1570 tree op1, vec<tree> *vec_oprnds1,
1571 tree op2, vec<tree> *vec_oprnds2,
1572 tree op3, vec<tree> *vec_oprnds3)
1574 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1575 op0, vec_oprnds0, NULL_TREE,
1576 op1, vec_oprnds1, NULL_TREE,
1577 op2, vec_oprnds2, NULL_TREE,
1578 op3, vec_oprnds3, NULL_TREE);
1581 /* Helper function called by vect_finish_replace_stmt and
1582 vect_finish_stmt_generation. Set the location of the new
1583 statement and create and return a stmt_vec_info for it. */
1585 static void
1586 vect_finish_stmt_generation_1 (vec_info *,
1587 stmt_vec_info stmt_info, gimple *vec_stmt)
1589 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1592 if (stmt_info)
1594 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1596 /* While EH edges will generally prevent vectorization, stmt might
1597 e.g. be in a must-not-throw region. Ensure newly created stmts
1598 that could throw are part of the same region. */
1599 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1600 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1601 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1603 else
1604 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1607 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1608 which sets the same scalar result as STMT_INFO did. Create and return a
1609 stmt_vec_info for VEC_STMT. */
1611 void
1612 vect_finish_replace_stmt (vec_info *vinfo,
1613 stmt_vec_info stmt_info, gimple *vec_stmt)
1615 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1616 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1618 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1619 gsi_replace (&gsi, vec_stmt, true);
1621 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1624 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1625 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1627 void
1628 vect_finish_stmt_generation (vec_info *vinfo,
1629 stmt_vec_info stmt_info, gimple *vec_stmt,
1630 gimple_stmt_iterator *gsi)
1632 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1634 if (!gsi_end_p (*gsi)
1635 && gimple_has_mem_ops (vec_stmt))
1637 gimple *at_stmt = gsi_stmt (*gsi);
1638 tree vuse = gimple_vuse (at_stmt);
1639 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1641 tree vdef = gimple_vdef (at_stmt);
1642 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1643 gimple_set_modified (vec_stmt, true);
1644 /* If we have an SSA vuse and insert a store, update virtual
1645 SSA form to avoid triggering the renamer. Do so only
1646 if we can easily see all uses - which is what almost always
1647 happens with the way vectorized stmts are inserted. */
1648 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1649 && ((is_gimple_assign (vec_stmt)
1650 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1651 || (is_gimple_call (vec_stmt)
1652 && (!(gimple_call_flags (vec_stmt)
1653 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1654 || (gimple_call_lhs (vec_stmt)
1655 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1657 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1658 gimple_set_vdef (vec_stmt, new_vdef);
1659 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1663 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1664 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1667 /* We want to vectorize a call to combined function CFN with function
1668 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1669 as the types of all inputs. Check whether this is possible using
1670 an internal function, returning its code if so or IFN_LAST if not. */
1672 static internal_fn
1673 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1674 tree vectype_out, tree vectype_in)
1676 internal_fn ifn;
1677 if (internal_fn_p (cfn))
1678 ifn = as_internal_fn (cfn);
1679 else
1680 ifn = associated_internal_fn (fndecl);
1681 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1683 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1684 if (info.vectorizable)
1686 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1687 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1688 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1689 OPTIMIZE_FOR_SPEED))
1690 return ifn;
1693 return IFN_LAST;
1697 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1698 gimple_stmt_iterator *);
1700 /* Check whether a load or store statement in the loop described by
1701 LOOP_VINFO is possible in a loop using partial vectors. This is
1702 testing whether the vectorizer pass has the appropriate support,
1703 as well as whether the target does.
1705 VLS_TYPE says whether the statement is a load or store and VECTYPE
1706 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1707 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1708 says how the load or store is going to be implemented and GROUP_SIZE
1709 is the number of load or store statements in the containing group.
1710 If the access is a gather load or scatter store, GS_INFO describes
1711 its arguments. If the load or store is conditional, SCALAR_MASK is the
1712 condition under which it occurs.
1714 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1715 vectors is not supported, otherwise record the required rgroup control
1716 types. */
1718 static void
1719 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1720 slp_tree slp_node,
1721 vec_load_store_type vls_type,
1722 int group_size,
1723 vect_memory_access_type
1724 memory_access_type,
1725 gather_scatter_info *gs_info,
1726 tree scalar_mask)
1728 /* Invariant loads need no special support. */
1729 if (memory_access_type == VMAT_INVARIANT)
1730 return;
1732 unsigned int nvectors;
1733 if (slp_node)
1734 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1735 else
1736 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1738 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1739 machine_mode vecmode = TYPE_MODE (vectype);
1740 bool is_load = (vls_type == VLS_LOAD);
1741 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1743 if (is_load
1744 ? !vect_load_lanes_supported (vectype, group_size, true)
1745 : !vect_store_lanes_supported (vectype, group_size, true))
1747 if (dump_enabled_p ())
1748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1749 "can't operate on partial vectors because"
1750 " the target doesn't have an appropriate"
1751 " load/store-lanes instruction.\n");
1752 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1753 return;
1755 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1756 scalar_mask);
1757 return;
1760 if (memory_access_type == VMAT_GATHER_SCATTER)
1762 internal_fn ifn = (is_load
1763 ? IFN_MASK_GATHER_LOAD
1764 : IFN_MASK_SCATTER_STORE);
1765 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1766 gs_info->memory_type,
1767 gs_info->offset_vectype,
1768 gs_info->scale))
1770 if (dump_enabled_p ())
1771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1772 "can't operate on partial vectors because"
1773 " the target doesn't have an appropriate"
1774 " gather load or scatter store instruction.\n");
1775 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1776 return;
1778 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1779 scalar_mask);
1780 return;
1783 if (memory_access_type != VMAT_CONTIGUOUS
1784 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1786 /* Element X of the data must come from iteration i * VF + X of the
1787 scalar loop. We need more work to support other mappings. */
1788 if (dump_enabled_p ())
1789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1790 "can't operate on partial vectors because an"
1791 " access isn't contiguous.\n");
1792 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1793 return;
1796 if (!VECTOR_MODE_P (vecmode))
1798 if (dump_enabled_p ())
1799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1800 "can't operate on partial vectors when emulating"
1801 " vector operations.\n");
1802 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1803 return;
1806 /* We might load more scalars than we need for permuting SLP loads.
1807 We checked in get_group_load_store_type that the extra elements
1808 don't leak into a new vector. */
1809 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1811 unsigned int nvectors;
1812 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1813 return nvectors;
1814 gcc_unreachable ();
1817 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1818 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1819 machine_mode mask_mode;
1820 bool using_partial_vectors_p = false;
1821 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1822 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1824 nvectors = group_memory_nvectors (group_size * vf, nunits);
1825 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1826 using_partial_vectors_p = true;
1829 machine_mode vmode;
1830 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1832 nvectors = group_memory_nvectors (group_size * vf, nunits);
1833 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1834 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1835 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1836 using_partial_vectors_p = true;
1839 if (!using_partial_vectors_p)
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1843 "can't operate on partial vectors because the"
1844 " target doesn't have the appropriate partial"
1845 " vectorization load or store.\n");
1846 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1850 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1851 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1852 that needs to be applied to all loads and stores in a vectorized loop.
1853 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1854 otherwise return VEC_MASK & LOOP_MASK.
1856 MASK_TYPE is the type of both masks. If new statements are needed,
1857 insert them before GSI. */
1859 static tree
1860 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1861 tree vec_mask, gimple_stmt_iterator *gsi)
1863 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1864 if (!loop_mask)
1865 return vec_mask;
1867 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1869 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1870 return vec_mask;
1872 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1873 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1874 vec_mask, loop_mask);
1876 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1877 return and_res;
1880 /* Determine whether we can use a gather load or scatter store to vectorize
1881 strided load or store STMT_INFO by truncating the current offset to a
1882 smaller width. We need to be able to construct an offset vector:
1884 { 0, X, X*2, X*3, ... }
1886 without loss of precision, where X is STMT_INFO's DR_STEP.
1888 Return true if this is possible, describing the gather load or scatter
1889 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1891 static bool
1892 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1893 loop_vec_info loop_vinfo, bool masked_p,
1894 gather_scatter_info *gs_info)
1896 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1897 data_reference *dr = dr_info->dr;
1898 tree step = DR_STEP (dr);
1899 if (TREE_CODE (step) != INTEGER_CST)
1901 /* ??? Perhaps we could use range information here? */
1902 if (dump_enabled_p ())
1903 dump_printf_loc (MSG_NOTE, vect_location,
1904 "cannot truncate variable step.\n");
1905 return false;
1908 /* Get the number of bits in an element. */
1909 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1910 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1911 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1913 /* Set COUNT to the upper limit on the number of elements - 1.
1914 Start with the maximum vectorization factor. */
1915 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1917 /* Try lowering COUNT to the number of scalar latch iterations. */
1918 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1919 widest_int max_iters;
1920 if (max_loop_iterations (loop, &max_iters)
1921 && max_iters < count)
1922 count = max_iters.to_shwi ();
1924 /* Try scales of 1 and the element size. */
1925 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1926 wi::overflow_type overflow = wi::OVF_NONE;
1927 for (int i = 0; i < 2; ++i)
1929 int scale = scales[i];
1930 widest_int factor;
1931 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1932 continue;
1934 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1935 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1936 if (overflow)
1937 continue;
1938 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1939 unsigned int min_offset_bits = wi::min_precision (range, sign);
1941 /* Find the narrowest viable offset type. */
1942 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1943 tree offset_type = build_nonstandard_integer_type (offset_bits,
1944 sign == UNSIGNED);
1946 /* See whether the target supports the operation with an offset
1947 no narrower than OFFSET_TYPE. */
1948 tree memory_type = TREE_TYPE (DR_REF (dr));
1949 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1950 vectype, memory_type, offset_type, scale,
1951 &gs_info->ifn, &gs_info->offset_vectype)
1952 || gs_info->ifn == IFN_LAST)
1953 continue;
1955 gs_info->decl = NULL_TREE;
1956 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1957 but we don't need to store that here. */
1958 gs_info->base = NULL_TREE;
1959 gs_info->element_type = TREE_TYPE (vectype);
1960 gs_info->offset = fold_convert (offset_type, step);
1961 gs_info->offset_dt = vect_constant_def;
1962 gs_info->scale = scale;
1963 gs_info->memory_type = memory_type;
1964 return true;
1967 if (overflow && dump_enabled_p ())
1968 dump_printf_loc (MSG_NOTE, vect_location,
1969 "truncating gather/scatter offset to %d bits"
1970 " might change its value.\n", element_bits);
1972 return false;
1975 /* Return true if we can use gather/scatter internal functions to
1976 vectorize STMT_INFO, which is a grouped or strided load or store.
1977 MASKED_P is true if load or store is conditional. When returning
1978 true, fill in GS_INFO with the information required to perform the
1979 operation. */
1981 static bool
1982 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1983 loop_vec_info loop_vinfo, bool masked_p,
1984 gather_scatter_info *gs_info)
1986 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1987 || gs_info->ifn == IFN_LAST)
1988 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1989 masked_p, gs_info);
1991 tree old_offset_type = TREE_TYPE (gs_info->offset);
1992 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1994 gcc_assert (TYPE_PRECISION (new_offset_type)
1995 >= TYPE_PRECISION (old_offset_type));
1996 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1998 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_NOTE, vect_location,
2000 "using gather/scatter for strided/grouped access,"
2001 " scale = %d\n", gs_info->scale);
2003 return true;
2006 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2007 elements with a known constant step. Return -1 if that step
2008 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2010 static int
2011 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2013 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2014 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2015 size_zero_node);
2018 /* If the target supports a permute mask that reverses the elements in
2019 a vector of type VECTYPE, return that mask, otherwise return null. */
2021 static tree
2022 perm_mask_for_reverse (tree vectype)
2024 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2026 /* The encoding has a single stepped pattern. */
2027 vec_perm_builder sel (nunits, 1, 3);
2028 for (int i = 0; i < 3; ++i)
2029 sel.quick_push (nunits - 1 - i);
2031 vec_perm_indices indices (sel, 1, nunits);
2032 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
2033 indices))
2034 return NULL_TREE;
2035 return vect_gen_perm_mask_checked (vectype, indices);
2038 /* A subroutine of get_load_store_type, with a subset of the same
2039 arguments. Handle the case where STMT_INFO is a load or store that
2040 accesses consecutive elements with a negative step. Sets *POFFSET
2041 to the offset to be applied to the DR for the first access. */
2043 static vect_memory_access_type
2044 get_negative_load_store_type (vec_info *vinfo,
2045 stmt_vec_info stmt_info, tree vectype,
2046 vec_load_store_type vls_type,
2047 unsigned int ncopies, poly_int64 *poffset)
2049 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2050 dr_alignment_support alignment_support_scheme;
2052 if (ncopies > 1)
2054 if (dump_enabled_p ())
2055 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2056 "multiple types with negative step.\n");
2057 return VMAT_ELEMENTWISE;
2060 /* For backward running DRs the first access in vectype actually is
2061 N-1 elements before the address of the DR. */
2062 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2063 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2065 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2066 alignment_support_scheme
2067 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2068 if (alignment_support_scheme != dr_aligned
2069 && alignment_support_scheme != dr_unaligned_supported)
2071 if (dump_enabled_p ())
2072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2073 "negative step but alignment required.\n");
2074 *poffset = 0;
2075 return VMAT_ELEMENTWISE;
2078 if (vls_type == VLS_STORE_INVARIANT)
2080 if (dump_enabled_p ())
2081 dump_printf_loc (MSG_NOTE, vect_location,
2082 "negative step with invariant source;"
2083 " no permute needed.\n");
2084 return VMAT_CONTIGUOUS_DOWN;
2087 if (!perm_mask_for_reverse (vectype))
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2091 "negative step and reversing not supported.\n");
2092 *poffset = 0;
2093 return VMAT_ELEMENTWISE;
2096 return VMAT_CONTIGUOUS_REVERSE;
2099 /* STMT_INFO is either a masked or unconditional store. Return the value
2100 being stored. */
2102 tree
2103 vect_get_store_rhs (stmt_vec_info stmt_info)
2105 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2107 gcc_assert (gimple_assign_single_p (assign));
2108 return gimple_assign_rhs1 (assign);
2110 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2112 internal_fn ifn = gimple_call_internal_fn (call);
2113 int index = internal_fn_stored_value_index (ifn);
2114 gcc_assert (index >= 0);
2115 return gimple_call_arg (call, index);
2117 gcc_unreachable ();
2120 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2122 This function returns a vector type which can be composed with NETLS pieces,
2123 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2124 same vector size as the return vector. It checks target whether supports
2125 pieces-size vector mode for construction firstly, if target fails to, check
2126 pieces-size scalar mode for construction further. It returns NULL_TREE if
2127 fails to find the available composition.
2129 For example, for (vtype=V16QI, nelts=4), we can probably get:
2130 - V16QI with PTYPE V4QI.
2131 - V4SI with PTYPE SI.
2132 - NULL_TREE. */
2134 static tree
2135 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2137 gcc_assert (VECTOR_TYPE_P (vtype));
2138 gcc_assert (known_gt (nelts, 0U));
2140 machine_mode vmode = TYPE_MODE (vtype);
2141 if (!VECTOR_MODE_P (vmode))
2142 return NULL_TREE;
2144 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2145 unsigned int pbsize;
2146 if (constant_multiple_p (vbsize, nelts, &pbsize))
2148 /* First check if vec_init optab supports construction from
2149 vector pieces directly. */
2150 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2151 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2152 machine_mode rmode;
2153 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2154 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2155 != CODE_FOR_nothing))
2157 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2158 return vtype;
2161 /* Otherwise check if exists an integer type of the same piece size and
2162 if vec_init optab supports construction from it directly. */
2163 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2164 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2165 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2166 != CODE_FOR_nothing))
2168 *ptype = build_nonstandard_integer_type (pbsize, 1);
2169 return build_vector_type (*ptype, nelts);
2173 return NULL_TREE;
2176 /* A subroutine of get_load_store_type, with a subset of the same
2177 arguments. Handle the case where STMT_INFO is part of a grouped load
2178 or store.
2180 For stores, the statements in the group are all consecutive
2181 and there is no gap at the end. For loads, the statements in the
2182 group might not be consecutive; there can be gaps between statements
2183 as well as at the end. */
2185 static bool
2186 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2187 tree vectype, slp_tree slp_node,
2188 bool masked_p, vec_load_store_type vls_type,
2189 vect_memory_access_type *memory_access_type,
2190 poly_int64 *poffset,
2191 dr_alignment_support *alignment_support_scheme,
2192 int *misalignment,
2193 gather_scatter_info *gs_info)
2195 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2196 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2197 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2198 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2199 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2200 bool single_element_p = (stmt_info == first_stmt_info
2201 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2202 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2203 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2205 /* True if the vectorized statements would access beyond the last
2206 statement in the group. */
2207 bool overrun_p = false;
2209 /* True if we can cope with such overrun by peeling for gaps, so that
2210 there is at least one final scalar iteration after the vector loop. */
2211 bool can_overrun_p = (!masked_p
2212 && vls_type == VLS_LOAD
2213 && loop_vinfo
2214 && !loop->inner);
2216 /* There can only be a gap at the end of the group if the stride is
2217 known at compile time. */
2218 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2220 /* Stores can't yet have gaps. */
2221 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2223 if (slp_node)
2225 /* For SLP vectorization we directly vectorize a subchain
2226 without permutation. */
2227 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2228 first_dr_info
2229 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2230 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2232 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2233 separated by the stride, until we have a complete vector.
2234 Fall back to scalar accesses if that isn't possible. */
2235 if (multiple_p (nunits, group_size))
2236 *memory_access_type = VMAT_STRIDED_SLP;
2237 else
2238 *memory_access_type = VMAT_ELEMENTWISE;
2240 else
2242 overrun_p = loop_vinfo && gap != 0;
2243 if (overrun_p && vls_type != VLS_LOAD)
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2246 "Grouped store with gaps requires"
2247 " non-consecutive accesses\n");
2248 return false;
2250 /* An overrun is fine if the trailing elements are smaller
2251 than the alignment boundary B. Every vector access will
2252 be a multiple of B and so we are guaranteed to access a
2253 non-gap element in the same B-sized block. */
2254 if (overrun_p
2255 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2256 vectype)
2257 / vect_get_scalar_dr_size (first_dr_info)))
2258 overrun_p = false;
2260 /* If the gap splits the vector in half and the target
2261 can do half-vector operations avoid the epilogue peeling
2262 by simply loading half of the vector only. Usually
2263 the construction with an upper zero half will be elided. */
2264 dr_alignment_support alss;
2265 int misalign = dr_misalignment (first_dr_info, vectype);
2266 tree half_vtype;
2267 if (overrun_p
2268 && !masked_p
2269 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2270 vectype, misalign)))
2271 == dr_aligned
2272 || alss == dr_unaligned_supported)
2273 && known_eq (nunits, (group_size - gap) * 2)
2274 && known_eq (nunits, group_size)
2275 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2276 != NULL_TREE))
2277 overrun_p = false;
2279 if (overrun_p && !can_overrun_p)
2281 if (dump_enabled_p ())
2282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2283 "Peeling for outer loop is not supported\n");
2284 return false;
2286 int cmp = compare_step_with_zero (vinfo, stmt_info);
2287 if (cmp < 0)
2289 if (single_element_p)
2290 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2291 only correct for single element "interleaving" SLP. */
2292 *memory_access_type = get_negative_load_store_type
2293 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2294 else
2296 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2297 separated by the stride, until we have a complete vector.
2298 Fall back to scalar accesses if that isn't possible. */
2299 if (multiple_p (nunits, group_size))
2300 *memory_access_type = VMAT_STRIDED_SLP;
2301 else
2302 *memory_access_type = VMAT_ELEMENTWISE;
2305 else
2307 gcc_assert (!loop_vinfo || cmp > 0);
2308 *memory_access_type = VMAT_CONTIGUOUS;
2311 /* When we have a contiguous access across loop iterations
2312 but the access in the loop doesn't cover the full vector
2313 we can end up with no gap recorded but still excess
2314 elements accessed, see PR103116. Make sure we peel for
2315 gaps if necessary and sufficient and give up if not. */
2316 if (loop_vinfo
2317 && *memory_access_type == VMAT_CONTIGUOUS
2318 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2319 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2320 nunits))
2322 unsigned HOST_WIDE_INT cnunits, cvf;
2323 if (!can_overrun_p
2324 || !nunits.is_constant (&cnunits)
2325 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2326 /* Peeling for gaps assumes that a single scalar iteration
2327 is enough to make sure the last vector iteration doesn't
2328 access excess elements.
2329 ??? Enhancements include peeling multiple iterations
2330 or using masked loads with a static mask. */
2331 || (group_size * cvf) % cnunits + group_size < cnunits)
2333 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2335 "peeling for gaps insufficient for "
2336 "access\n");
2337 return false;
2339 overrun_p = true;
2343 else
2345 /* We can always handle this case using elementwise accesses,
2346 but see if something more efficient is available. */
2347 *memory_access_type = VMAT_ELEMENTWISE;
2349 /* If there is a gap at the end of the group then these optimizations
2350 would access excess elements in the last iteration. */
2351 bool would_overrun_p = (gap != 0);
2352 /* An overrun is fine if the trailing elements are smaller than the
2353 alignment boundary B. Every vector access will be a multiple of B
2354 and so we are guaranteed to access a non-gap element in the
2355 same B-sized block. */
2356 if (would_overrun_p
2357 && !masked_p
2358 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2359 / vect_get_scalar_dr_size (first_dr_info)))
2360 would_overrun_p = false;
2362 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2363 && (can_overrun_p || !would_overrun_p)
2364 && compare_step_with_zero (vinfo, stmt_info) > 0)
2366 /* First cope with the degenerate case of a single-element
2367 vector. */
2368 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2371 /* Otherwise try using LOAD/STORE_LANES. */
2372 else if (vls_type == VLS_LOAD
2373 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2374 : vect_store_lanes_supported (vectype, group_size,
2375 masked_p))
2377 *memory_access_type = VMAT_LOAD_STORE_LANES;
2378 overrun_p = would_overrun_p;
2381 /* If that fails, try using permuting loads. */
2382 else if (vls_type == VLS_LOAD
2383 ? vect_grouped_load_supported (vectype, single_element_p,
2384 group_size)
2385 : vect_grouped_store_supported (vectype, group_size))
2387 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2388 overrun_p = would_overrun_p;
2392 /* As a last resort, trying using a gather load or scatter store.
2394 ??? Although the code can handle all group sizes correctly,
2395 it probably isn't a win to use separate strided accesses based
2396 on nearby locations. Or, even if it's a win over scalar code,
2397 it might not be a win over vectorizing at a lower VF, if that
2398 allows us to use contiguous accesses. */
2399 if (*memory_access_type == VMAT_ELEMENTWISE
2400 && single_element_p
2401 && loop_vinfo
2402 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2403 masked_p, gs_info))
2404 *memory_access_type = VMAT_GATHER_SCATTER;
2407 if (*memory_access_type == VMAT_GATHER_SCATTER
2408 || *memory_access_type == VMAT_ELEMENTWISE)
2410 *alignment_support_scheme = dr_unaligned_supported;
2411 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2413 else
2415 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2416 *alignment_support_scheme
2417 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2418 *misalignment);
2421 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2423 /* STMT is the leader of the group. Check the operands of all the
2424 stmts of the group. */
2425 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2426 while (next_stmt_info)
2428 tree op = vect_get_store_rhs (next_stmt_info);
2429 enum vect_def_type dt;
2430 if (!vect_is_simple_use (op, vinfo, &dt))
2432 if (dump_enabled_p ())
2433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2434 "use not simple.\n");
2435 return false;
2437 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2441 if (overrun_p)
2443 gcc_assert (can_overrun_p);
2444 if (dump_enabled_p ())
2445 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2446 "Data access with gaps requires scalar "
2447 "epilogue loop\n");
2448 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2451 return true;
2454 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2455 if there is a memory access type that the vectorized form can use,
2456 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2457 or scatters, fill in GS_INFO accordingly. In addition
2458 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2459 the target does not support the alignment scheme. *MISALIGNMENT
2460 is set according to the alignment of the access (including
2461 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2463 SLP says whether we're performing SLP rather than loop vectorization.
2464 MASKED_P is true if the statement is conditional on a vectorized mask.
2465 VECTYPE is the vector type that the vectorized statements will use.
2466 NCOPIES is the number of vector statements that will be needed. */
2468 static bool
2469 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2470 tree vectype, slp_tree slp_node,
2471 bool masked_p, vec_load_store_type vls_type,
2472 unsigned int ncopies,
2473 vect_memory_access_type *memory_access_type,
2474 poly_int64 *poffset,
2475 dr_alignment_support *alignment_support_scheme,
2476 int *misalignment,
2477 gather_scatter_info *gs_info)
2479 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2480 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2481 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2482 *poffset = 0;
2483 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2485 *memory_access_type = VMAT_GATHER_SCATTER;
2486 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2487 gcc_unreachable ();
2488 /* When using internal functions, we rely on pattern recognition
2489 to convert the type of the offset to the type that the target
2490 requires, with the result being a call to an internal function.
2491 If that failed for some reason (e.g. because another pattern
2492 took priority), just handle cases in which the offset already
2493 has the right type. */
2494 else if (gs_info->ifn != IFN_LAST
2495 && !is_gimple_call (stmt_info->stmt)
2496 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2497 TREE_TYPE (gs_info->offset_vectype)))
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2501 "%s offset requires a conversion\n",
2502 vls_type == VLS_LOAD ? "gather" : "scatter");
2503 return false;
2505 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2506 &gs_info->offset_dt,
2507 &gs_info->offset_vectype))
2509 if (dump_enabled_p ())
2510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2511 "%s index use not simple.\n",
2512 vls_type == VLS_LOAD ? "gather" : "scatter");
2513 return false;
2515 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2517 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2518 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2519 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2520 (gs_info->offset_vectype),
2521 TYPE_VECTOR_SUBPARTS (vectype)))
2523 if (dump_enabled_p ())
2524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2525 "unsupported vector types for emulated "
2526 "gather.\n");
2527 return false;
2530 /* Gather-scatter accesses perform only component accesses, alignment
2531 is irrelevant for them. */
2532 *alignment_support_scheme = dr_unaligned_supported;
2534 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2536 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2537 masked_p,
2538 vls_type, memory_access_type, poffset,
2539 alignment_support_scheme,
2540 misalignment, gs_info))
2541 return false;
2543 else if (STMT_VINFO_STRIDED_P (stmt_info))
2545 gcc_assert (!slp_node);
2546 if (loop_vinfo
2547 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2548 masked_p, gs_info))
2549 *memory_access_type = VMAT_GATHER_SCATTER;
2550 else
2551 *memory_access_type = VMAT_ELEMENTWISE;
2552 /* Alignment is irrelevant here. */
2553 *alignment_support_scheme = dr_unaligned_supported;
2555 else
2557 int cmp = compare_step_with_zero (vinfo, stmt_info);
2558 if (cmp == 0)
2560 gcc_assert (vls_type == VLS_LOAD);
2561 *memory_access_type = VMAT_INVARIANT;
2562 /* Invariant accesses perform only component accesses, alignment
2563 is irrelevant for them. */
2564 *alignment_support_scheme = dr_unaligned_supported;
2566 else
2568 if (cmp < 0)
2569 *memory_access_type = get_negative_load_store_type
2570 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2571 else
2572 *memory_access_type = VMAT_CONTIGUOUS;
2573 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2574 vectype, *poffset);
2575 *alignment_support_scheme
2576 = vect_supportable_dr_alignment (vinfo,
2577 STMT_VINFO_DR_INFO (stmt_info),
2578 vectype, *misalignment);
2582 if ((*memory_access_type == VMAT_ELEMENTWISE
2583 || *memory_access_type == VMAT_STRIDED_SLP)
2584 && !nunits.is_constant ())
2586 if (dump_enabled_p ())
2587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2588 "Not using elementwise accesses due to variable "
2589 "vectorization factor.\n");
2590 return false;
2593 if (*alignment_support_scheme == dr_unaligned_unsupported)
2595 if (dump_enabled_p ())
2596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2597 "unsupported unaligned access\n");
2598 return false;
2601 /* FIXME: At the moment the cost model seems to underestimate the
2602 cost of using elementwise accesses. This check preserves the
2603 traditional behavior until that can be fixed. */
2604 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2605 if (!first_stmt_info)
2606 first_stmt_info = stmt_info;
2607 if (*memory_access_type == VMAT_ELEMENTWISE
2608 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2609 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2610 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2611 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2613 if (dump_enabled_p ())
2614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2615 "not falling back to elementwise accesses\n");
2616 return false;
2618 return true;
2621 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2622 conditional operation STMT_INFO. When returning true, store the mask
2623 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2624 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2625 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2627 static bool
2628 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2629 slp_tree slp_node, unsigned mask_index,
2630 tree *mask, slp_tree *mask_node,
2631 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2633 enum vect_def_type mask_dt;
2634 tree mask_vectype;
2635 slp_tree mask_node_1;
2636 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2637 mask, &mask_node_1, &mask_dt, &mask_vectype))
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2641 "mask use not simple.\n");
2642 return false;
2645 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2647 if (dump_enabled_p ())
2648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2649 "mask argument is not a boolean.\n");
2650 return false;
2653 /* If the caller is not prepared for adjusting an external/constant
2654 SLP mask vector type fail. */
2655 if (slp_node
2656 && !mask_node
2657 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2659 if (dump_enabled_p ())
2660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2661 "SLP mask argument is not vectorized.\n");
2662 return false;
2665 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2666 if (!mask_vectype)
2667 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2669 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2671 if (dump_enabled_p ())
2672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2673 "could not find an appropriate vector mask type.\n");
2674 return false;
2677 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2678 TYPE_VECTOR_SUBPARTS (vectype)))
2680 if (dump_enabled_p ())
2681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2682 "vector mask type %T"
2683 " does not match vector data type %T.\n",
2684 mask_vectype, vectype);
2686 return false;
2689 *mask_dt_out = mask_dt;
2690 *mask_vectype_out = mask_vectype;
2691 if (mask_node)
2692 *mask_node = mask_node_1;
2693 return true;
2696 /* Return true if stored value RHS is suitable for vectorizing store
2697 statement STMT_INFO. When returning true, store the type of the
2698 definition in *RHS_DT_OUT, the type of the vectorized store value in
2699 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2701 static bool
2702 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2703 slp_tree slp_node, tree rhs,
2704 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2705 vec_load_store_type *vls_type_out)
2707 /* In the case this is a store from a constant make sure
2708 native_encode_expr can handle it. */
2709 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2711 if (dump_enabled_p ())
2712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2713 "cannot encode constant as a byte sequence.\n");
2714 return false;
2717 unsigned op_no = 0;
2718 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2720 if (gimple_call_internal_p (call)
2721 && internal_store_fn_p (gimple_call_internal_fn (call)))
2722 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2725 enum vect_def_type rhs_dt;
2726 tree rhs_vectype;
2727 slp_tree slp_op;
2728 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2729 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2731 if (dump_enabled_p ())
2732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2733 "use not simple.\n");
2734 return false;
2737 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2738 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2740 if (dump_enabled_p ())
2741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2742 "incompatible vector types.\n");
2743 return false;
2746 *rhs_dt_out = rhs_dt;
2747 *rhs_vectype_out = rhs_vectype;
2748 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2749 *vls_type_out = VLS_STORE_INVARIANT;
2750 else
2751 *vls_type_out = VLS_STORE;
2752 return true;
2755 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2756 Note that we support masks with floating-point type, in which case the
2757 floats are interpreted as a bitmask. */
2759 static tree
2760 vect_build_all_ones_mask (vec_info *vinfo,
2761 stmt_vec_info stmt_info, tree masktype)
2763 if (TREE_CODE (masktype) == INTEGER_TYPE)
2764 return build_int_cst (masktype, -1);
2765 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2767 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2768 mask = build_vector_from_val (masktype, mask);
2769 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2771 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2773 REAL_VALUE_TYPE r;
2774 long tmp[6];
2775 for (int j = 0; j < 6; ++j)
2776 tmp[j] = -1;
2777 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2778 tree mask = build_real (TREE_TYPE (masktype), r);
2779 mask = build_vector_from_val (masktype, mask);
2780 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2782 gcc_unreachable ();
2785 /* Build an all-zero merge value of type VECTYPE while vectorizing
2786 STMT_INFO as a gather load. */
2788 static tree
2789 vect_build_zero_merge_argument (vec_info *vinfo,
2790 stmt_vec_info stmt_info, tree vectype)
2792 tree merge;
2793 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2794 merge = build_int_cst (TREE_TYPE (vectype), 0);
2795 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2797 REAL_VALUE_TYPE r;
2798 long tmp[6];
2799 for (int j = 0; j < 6; ++j)
2800 tmp[j] = 0;
2801 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2802 merge = build_real (TREE_TYPE (vectype), r);
2804 else
2805 gcc_unreachable ();
2806 merge = build_vector_from_val (vectype, merge);
2807 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2810 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2811 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2812 the gather load operation. If the load is conditional, MASK is the
2813 unvectorized condition and MASK_DT is its definition type, otherwise
2814 MASK is null. */
2816 static void
2817 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2818 gimple_stmt_iterator *gsi,
2819 gimple **vec_stmt,
2820 gather_scatter_info *gs_info,
2821 tree mask)
2823 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2824 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2825 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2826 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2827 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2828 edge pe = loop_preheader_edge (loop);
2829 enum { NARROW, NONE, WIDEN } modifier;
2830 poly_uint64 gather_off_nunits
2831 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2833 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2834 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2835 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2836 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2837 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2838 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2839 tree scaletype = TREE_VALUE (arglist);
2840 tree real_masktype = masktype;
2841 gcc_checking_assert (types_compatible_p (srctype, rettype)
2842 && (!mask
2843 || TREE_CODE (masktype) == INTEGER_TYPE
2844 || types_compatible_p (srctype, masktype)));
2845 if (mask)
2846 masktype = truth_type_for (srctype);
2848 tree mask_halftype = masktype;
2849 tree perm_mask = NULL_TREE;
2850 tree mask_perm_mask = NULL_TREE;
2851 if (known_eq (nunits, gather_off_nunits))
2852 modifier = NONE;
2853 else if (known_eq (nunits * 2, gather_off_nunits))
2855 modifier = WIDEN;
2857 /* Currently widening gathers and scatters are only supported for
2858 fixed-length vectors. */
2859 int count = gather_off_nunits.to_constant ();
2860 vec_perm_builder sel (count, count, 1);
2861 for (int i = 0; i < count; ++i)
2862 sel.quick_push (i | (count / 2));
2864 vec_perm_indices indices (sel, 1, count);
2865 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2866 indices);
2868 else if (known_eq (nunits, gather_off_nunits * 2))
2870 modifier = NARROW;
2872 /* Currently narrowing gathers and scatters are only supported for
2873 fixed-length vectors. */
2874 int count = nunits.to_constant ();
2875 vec_perm_builder sel (count, count, 1);
2876 sel.quick_grow (count);
2877 for (int i = 0; i < count; ++i)
2878 sel[i] = i < count / 2 ? i : i + count / 2;
2879 vec_perm_indices indices (sel, 2, count);
2880 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2882 ncopies *= 2;
2884 if (mask && VECTOR_TYPE_P (real_masktype))
2886 for (int i = 0; i < count; ++i)
2887 sel[i] = i | (count / 2);
2888 indices.new_vector (sel, 2, count);
2889 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2891 else if (mask)
2892 mask_halftype = truth_type_for (gs_info->offset_vectype);
2894 else
2895 gcc_unreachable ();
2897 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2898 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2900 tree ptr = fold_convert (ptrtype, gs_info->base);
2901 if (!is_gimple_min_invariant (ptr))
2903 gimple_seq seq;
2904 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2905 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2906 gcc_assert (!new_bb);
2909 tree scale = build_int_cst (scaletype, gs_info->scale);
2911 tree vec_oprnd0 = NULL_TREE;
2912 tree vec_mask = NULL_TREE;
2913 tree src_op = NULL_TREE;
2914 tree mask_op = NULL_TREE;
2915 tree prev_res = NULL_TREE;
2917 if (!mask)
2919 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2920 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2923 auto_vec<tree> vec_oprnds0;
2924 auto_vec<tree> vec_masks;
2925 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2926 modifier == WIDEN ? ncopies / 2 : ncopies,
2927 gs_info->offset, &vec_oprnds0);
2928 if (mask)
2929 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2930 modifier == NARROW ? ncopies / 2 : ncopies,
2931 mask, &vec_masks, masktype);
2932 for (int j = 0; j < ncopies; ++j)
2934 tree op, var;
2935 if (modifier == WIDEN && (j & 1))
2936 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2937 perm_mask, stmt_info, gsi);
2938 else
2939 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2941 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2943 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2944 TYPE_VECTOR_SUBPARTS (idxtype)));
2945 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2946 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2947 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2948 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2949 op = var;
2952 if (mask)
2954 if (mask_perm_mask && (j & 1))
2955 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2956 mask_perm_mask, stmt_info, gsi);
2957 else
2959 if (modifier == NARROW)
2961 if ((j & 1) == 0)
2962 vec_mask = vec_masks[j / 2];
2964 else
2965 vec_mask = vec_masks[j];
2967 mask_op = vec_mask;
2968 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2970 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2971 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2972 gcc_assert (known_eq (sub1, sub2));
2973 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2974 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2975 gassign *new_stmt
2976 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2977 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2978 mask_op = var;
2981 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2983 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2984 gassign *new_stmt
2985 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2986 : VEC_UNPACK_LO_EXPR,
2987 mask_op);
2988 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2989 mask_op = var;
2991 src_op = mask_op;
2994 tree mask_arg = mask_op;
2995 if (masktype != real_masktype)
2997 tree utype, optype = TREE_TYPE (mask_op);
2998 if (VECTOR_TYPE_P (real_masktype)
2999 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
3000 utype = real_masktype;
3001 else
3002 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
3003 var = vect_get_new_ssa_name (utype, vect_scalar_var);
3004 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
3005 gassign *new_stmt
3006 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
3007 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3008 mask_arg = var;
3009 if (!useless_type_conversion_p (real_masktype, utype))
3011 gcc_assert (TYPE_PRECISION (utype)
3012 <= TYPE_PRECISION (real_masktype));
3013 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
3014 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
3015 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3016 mask_arg = var;
3018 src_op = build_zero_cst (srctype);
3020 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
3021 mask_arg, scale);
3023 if (!useless_type_conversion_p (vectype, rettype))
3025 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3026 TYPE_VECTOR_SUBPARTS (rettype)));
3027 op = vect_get_new_ssa_name (rettype, vect_simple_var);
3028 gimple_call_set_lhs (new_stmt, op);
3029 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3030 var = make_ssa_name (vec_dest);
3031 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3032 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3033 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3035 else
3037 var = make_ssa_name (vec_dest, new_stmt);
3038 gimple_call_set_lhs (new_stmt, var);
3039 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3042 if (modifier == NARROW)
3044 if ((j & 1) == 0)
3046 prev_res = var;
3047 continue;
3049 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3050 stmt_info, gsi);
3051 new_stmt = SSA_NAME_DEF_STMT (var);
3054 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3056 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3059 /* Prepare the base and offset in GS_INFO for vectorization.
3060 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3061 to the vectorized offset argument for the first copy of STMT_INFO.
3062 STMT_INFO is the statement described by GS_INFO and LOOP is the
3063 containing loop. */
3065 static void
3066 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3067 class loop *loop, stmt_vec_info stmt_info,
3068 slp_tree slp_node, gather_scatter_info *gs_info,
3069 tree *dataref_ptr, vec<tree> *vec_offset)
3071 gimple_seq stmts = NULL;
3072 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3073 if (stmts != NULL)
3075 basic_block new_bb;
3076 edge pe = loop_preheader_edge (loop);
3077 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3078 gcc_assert (!new_bb);
3080 if (slp_node)
3081 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3082 else
3084 unsigned ncopies
3085 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3086 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3087 gs_info->offset, vec_offset,
3088 gs_info->offset_vectype);
3092 /* Prepare to implement a grouped or strided load or store using
3093 the gather load or scatter store operation described by GS_INFO.
3094 STMT_INFO is the load or store statement.
3096 Set *DATAREF_BUMP to the amount that should be added to the base
3097 address after each copy of the vectorized statement. Set *VEC_OFFSET
3098 to an invariant offset vector in which element I has the value
3099 I * DR_STEP / SCALE. */
3101 static void
3102 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3103 loop_vec_info loop_vinfo,
3104 gather_scatter_info *gs_info,
3105 tree *dataref_bump, tree *vec_offset)
3107 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3108 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3110 tree bump = size_binop (MULT_EXPR,
3111 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3112 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3113 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3115 /* The offset given in GS_INFO can have pointer type, so use the element
3116 type of the vector instead. */
3117 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3119 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3120 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3121 ssize_int (gs_info->scale));
3122 step = fold_convert (offset_type, step);
3124 /* Create {0, X, X*2, X*3, ...}. */
3125 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3126 build_zero_cst (offset_type), step);
3127 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3130 /* Return the amount that should be added to a vector pointer to move
3131 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3132 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3133 vectorization. */
3135 static tree
3136 vect_get_data_ptr_increment (vec_info *vinfo,
3137 dr_vec_info *dr_info, tree aggr_type,
3138 vect_memory_access_type memory_access_type)
3140 if (memory_access_type == VMAT_INVARIANT)
3141 return size_zero_node;
3143 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3144 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3145 if (tree_int_cst_sgn (step) == -1)
3146 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3147 return iv_step;
3150 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3152 static bool
3153 vectorizable_bswap (vec_info *vinfo,
3154 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3155 gimple **vec_stmt, slp_tree slp_node,
3156 slp_tree *slp_op,
3157 tree vectype_in, stmt_vector_for_cost *cost_vec)
3159 tree op, vectype;
3160 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3161 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3162 unsigned ncopies;
3164 op = gimple_call_arg (stmt, 0);
3165 vectype = STMT_VINFO_VECTYPE (stmt_info);
3166 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3168 /* Multiple types in SLP are handled by creating the appropriate number of
3169 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3170 case of SLP. */
3171 if (slp_node)
3172 ncopies = 1;
3173 else
3174 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3176 gcc_assert (ncopies >= 1);
3178 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3179 if (! char_vectype)
3180 return false;
3182 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3183 unsigned word_bytes;
3184 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3185 return false;
3187 /* The encoding uses one stepped pattern for each byte in the word. */
3188 vec_perm_builder elts (num_bytes, word_bytes, 3);
3189 for (unsigned i = 0; i < 3; ++i)
3190 for (unsigned j = 0; j < word_bytes; ++j)
3191 elts.quick_push ((i + 1) * word_bytes - j - 1);
3193 vec_perm_indices indices (elts, 1, num_bytes);
3194 machine_mode vmode = TYPE_MODE (char_vectype);
3195 if (!can_vec_perm_const_p (vmode, vmode, indices))
3196 return false;
3198 if (! vec_stmt)
3200 if (slp_node
3201 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3203 if (dump_enabled_p ())
3204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3205 "incompatible vector types for invariants\n");
3206 return false;
3209 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3210 DUMP_VECT_SCOPE ("vectorizable_bswap");
3211 record_stmt_cost (cost_vec,
3212 1, vector_stmt, stmt_info, 0, vect_prologue);
3213 record_stmt_cost (cost_vec,
3214 slp_node
3215 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3216 vec_perm, stmt_info, 0, vect_body);
3217 return true;
3220 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3222 /* Transform. */
3223 vec<tree> vec_oprnds = vNULL;
3224 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3225 op, &vec_oprnds);
3226 /* Arguments are ready. create the new vector stmt. */
3227 unsigned i;
3228 tree vop;
3229 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3231 gimple *new_stmt;
3232 tree tem = make_ssa_name (char_vectype);
3233 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3234 char_vectype, vop));
3235 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3236 tree tem2 = make_ssa_name (char_vectype);
3237 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3238 tem, tem, bswap_vconst);
3239 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3240 tem = make_ssa_name (vectype);
3241 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3242 vectype, tem2));
3243 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3244 if (slp_node)
3245 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3246 else
3247 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3250 if (!slp_node)
3251 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3253 vec_oprnds.release ();
3254 return true;
3257 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3258 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3259 in a single step. On success, store the binary pack code in
3260 *CONVERT_CODE. */
3262 static bool
3263 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3264 tree_code *convert_code)
3266 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3267 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3268 return false;
3270 tree_code code;
3271 int multi_step_cvt = 0;
3272 auto_vec <tree, 8> interm_types;
3273 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3274 &code, &multi_step_cvt, &interm_types)
3275 || multi_step_cvt)
3276 return false;
3278 *convert_code = code;
3279 return true;
3282 /* Function vectorizable_call.
3284 Check if STMT_INFO performs a function call that can be vectorized.
3285 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3286 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3287 Return true if STMT_INFO is vectorizable in this way. */
3289 static bool
3290 vectorizable_call (vec_info *vinfo,
3291 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3292 gimple **vec_stmt, slp_tree slp_node,
3293 stmt_vector_for_cost *cost_vec)
3295 gcall *stmt;
3296 tree vec_dest;
3297 tree scalar_dest;
3298 tree op;
3299 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3300 tree vectype_out, vectype_in;
3301 poly_uint64 nunits_in;
3302 poly_uint64 nunits_out;
3303 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3304 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3305 tree fndecl, new_temp, rhs_type;
3306 enum vect_def_type dt[4]
3307 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3308 vect_unknown_def_type };
3309 tree vectypes[ARRAY_SIZE (dt)] = {};
3310 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3311 int ndts = ARRAY_SIZE (dt);
3312 int ncopies, j;
3313 auto_vec<tree, 8> vargs;
3314 enum { NARROW, NONE, WIDEN } modifier;
3315 size_t i, nargs;
3316 tree lhs;
3318 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3319 return false;
3321 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3322 && ! vec_stmt)
3323 return false;
3325 /* Is STMT_INFO a vectorizable call? */
3326 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3327 if (!stmt)
3328 return false;
3330 if (gimple_call_internal_p (stmt)
3331 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3332 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3333 /* Handled by vectorizable_load and vectorizable_store. */
3334 return false;
3336 if (gimple_call_lhs (stmt) == NULL_TREE
3337 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3338 return false;
3340 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3342 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3344 /* Process function arguments. */
3345 rhs_type = NULL_TREE;
3346 vectype_in = NULL_TREE;
3347 nargs = gimple_call_num_args (stmt);
3349 /* Bail out if the function has more than four arguments, we do not have
3350 interesting builtin functions to vectorize with more than two arguments
3351 except for fma. No arguments is also not good. */
3352 if (nargs == 0 || nargs > 4)
3353 return false;
3355 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3356 combined_fn cfn = gimple_call_combined_fn (stmt);
3357 if (cfn == CFN_GOMP_SIMD_LANE)
3359 nargs = 0;
3360 rhs_type = unsigned_type_node;
3363 int mask_opno = -1;
3364 if (internal_fn_p (cfn))
3365 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3367 for (i = 0; i < nargs; i++)
3369 if ((int) i == mask_opno)
3371 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3372 &op, &slp_op[i], &dt[i], &vectypes[i]))
3373 return false;
3374 continue;
3377 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3378 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3380 if (dump_enabled_p ())
3381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3382 "use not simple.\n");
3383 return false;
3386 /* We can only handle calls with arguments of the same type. */
3387 if (rhs_type
3388 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3390 if (dump_enabled_p ())
3391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3392 "argument types differ.\n");
3393 return false;
3395 if (!rhs_type)
3396 rhs_type = TREE_TYPE (op);
3398 if (!vectype_in)
3399 vectype_in = vectypes[i];
3400 else if (vectypes[i]
3401 && !types_compatible_p (vectypes[i], vectype_in))
3403 if (dump_enabled_p ())
3404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3405 "argument vector types differ.\n");
3406 return false;
3409 /* If all arguments are external or constant defs, infer the vector type
3410 from the scalar type. */
3411 if (!vectype_in)
3412 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3413 if (vec_stmt)
3414 gcc_assert (vectype_in);
3415 if (!vectype_in)
3417 if (dump_enabled_p ())
3418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3419 "no vectype for scalar type %T\n", rhs_type);
3421 return false;
3423 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3424 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3425 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3426 by a pack of the two vectors into an SI vector. We would need
3427 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3428 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3430 if (dump_enabled_p ())
3431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3432 "mismatched vector sizes %T and %T\n",
3433 vectype_in, vectype_out);
3434 return false;
3437 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3438 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3440 if (dump_enabled_p ())
3441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3442 "mixed mask and nonmask vector types\n");
3443 return false;
3446 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3448 if (dump_enabled_p ())
3449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3450 "use emulated vector type for call\n");
3451 return false;
3454 /* FORNOW */
3455 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3456 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3457 if (known_eq (nunits_in * 2, nunits_out))
3458 modifier = NARROW;
3459 else if (known_eq (nunits_out, nunits_in))
3460 modifier = NONE;
3461 else if (known_eq (nunits_out * 2, nunits_in))
3462 modifier = WIDEN;
3463 else
3464 return false;
3466 /* We only handle functions that do not read or clobber memory. */
3467 if (gimple_vuse (stmt))
3469 if (dump_enabled_p ())
3470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3471 "function reads from or writes to memory.\n");
3472 return false;
3475 /* For now, we only vectorize functions if a target specific builtin
3476 is available. TODO -- in some cases, it might be profitable to
3477 insert the calls for pieces of the vector, in order to be able
3478 to vectorize other operations in the loop. */
3479 fndecl = NULL_TREE;
3480 internal_fn ifn = IFN_LAST;
3481 tree callee = gimple_call_fndecl (stmt);
3483 /* First try using an internal function. */
3484 tree_code convert_code = ERROR_MARK;
3485 if (cfn != CFN_LAST
3486 && (modifier == NONE
3487 || (modifier == NARROW
3488 && simple_integer_narrowing (vectype_out, vectype_in,
3489 &convert_code))))
3490 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3491 vectype_in);
3493 /* If that fails, try asking for a target-specific built-in function. */
3494 if (ifn == IFN_LAST)
3496 if (cfn != CFN_LAST)
3497 fndecl = targetm.vectorize.builtin_vectorized_function
3498 (cfn, vectype_out, vectype_in);
3499 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3500 fndecl = targetm.vectorize.builtin_md_vectorized_function
3501 (callee, vectype_out, vectype_in);
3504 if (ifn == IFN_LAST && !fndecl)
3506 if (cfn == CFN_GOMP_SIMD_LANE
3507 && !slp_node
3508 && loop_vinfo
3509 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3510 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3511 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3512 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3514 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3515 { 0, 1, 2, ... vf - 1 } vector. */
3516 gcc_assert (nargs == 0);
3518 else if (modifier == NONE
3519 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3520 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3521 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3522 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3523 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3524 slp_op, vectype_in, cost_vec);
3525 else
3527 if (dump_enabled_p ())
3528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3529 "function is not vectorizable.\n");
3530 return false;
3534 if (slp_node)
3535 ncopies = 1;
3536 else if (modifier == NARROW && ifn == IFN_LAST)
3537 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3538 else
3539 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3541 /* Sanity check: make sure that at least one copy of the vectorized stmt
3542 needs to be generated. */
3543 gcc_assert (ncopies >= 1);
3545 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3546 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3547 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3548 if (!vec_stmt) /* transformation not required. */
3550 if (slp_node)
3551 for (i = 0; i < nargs; ++i)
3552 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3553 vectypes[i]
3554 ? vectypes[i] : vectype_in))
3556 if (dump_enabled_p ())
3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3558 "incompatible vector types for invariants\n");
3559 return false;
3561 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3562 DUMP_VECT_SCOPE ("vectorizable_call");
3563 vect_model_simple_cost (vinfo, stmt_info,
3564 ncopies, dt, ndts, slp_node, cost_vec);
3565 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3566 record_stmt_cost (cost_vec, ncopies / 2,
3567 vec_promote_demote, stmt_info, 0, vect_body);
3569 if (loop_vinfo
3570 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3571 && (reduc_idx >= 0 || mask_opno >= 0))
3573 if (reduc_idx >= 0
3574 && (cond_fn == IFN_LAST
3575 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3576 OPTIMIZE_FOR_SPEED)))
3578 if (dump_enabled_p ())
3579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3580 "can't use a fully-masked loop because no"
3581 " conditional operation is available.\n");
3582 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3584 else
3586 unsigned int nvectors
3587 = (slp_node
3588 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3589 : ncopies);
3590 tree scalar_mask = NULL_TREE;
3591 if (mask_opno >= 0)
3592 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3593 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3594 vectype_out, scalar_mask);
3597 return true;
3600 /* Transform. */
3602 if (dump_enabled_p ())
3603 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3605 /* Handle def. */
3606 scalar_dest = gimple_call_lhs (stmt);
3607 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3609 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3610 unsigned int vect_nargs = nargs;
3611 if (masked_loop_p && reduc_idx >= 0)
3613 ifn = cond_fn;
3614 vect_nargs += 2;
3617 if (modifier == NONE || ifn != IFN_LAST)
3619 tree prev_res = NULL_TREE;
3620 vargs.safe_grow (vect_nargs, true);
3621 auto_vec<vec<tree> > vec_defs (nargs);
3622 for (j = 0; j < ncopies; ++j)
3624 /* Build argument list for the vectorized call. */
3625 if (slp_node)
3627 vec<tree> vec_oprnds0;
3629 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3630 vec_oprnds0 = vec_defs[0];
3632 /* Arguments are ready. Create the new vector stmt. */
3633 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3635 int varg = 0;
3636 if (masked_loop_p && reduc_idx >= 0)
3638 unsigned int vec_num = vec_oprnds0.length ();
3639 /* Always true for SLP. */
3640 gcc_assert (ncopies == 1);
3641 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3642 vectype_out, i);
3644 size_t k;
3645 for (k = 0; k < nargs; k++)
3647 vec<tree> vec_oprndsk = vec_defs[k];
3648 vargs[varg++] = vec_oprndsk[i];
3650 if (masked_loop_p && reduc_idx >= 0)
3651 vargs[varg++] = vargs[reduc_idx + 1];
3652 gimple *new_stmt;
3653 if (modifier == NARROW)
3655 /* We don't define any narrowing conditional functions
3656 at present. */
3657 gcc_assert (mask_opno < 0);
3658 tree half_res = make_ssa_name (vectype_in);
3659 gcall *call
3660 = gimple_build_call_internal_vec (ifn, vargs);
3661 gimple_call_set_lhs (call, half_res);
3662 gimple_call_set_nothrow (call, true);
3663 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3664 if ((i & 1) == 0)
3666 prev_res = half_res;
3667 continue;
3669 new_temp = make_ssa_name (vec_dest);
3670 new_stmt = gimple_build_assign (new_temp, convert_code,
3671 prev_res, half_res);
3672 vect_finish_stmt_generation (vinfo, stmt_info,
3673 new_stmt, gsi);
3675 else
3677 if (mask_opno >= 0 && masked_loop_p)
3679 unsigned int vec_num = vec_oprnds0.length ();
3680 /* Always true for SLP. */
3681 gcc_assert (ncopies == 1);
3682 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3683 vectype_out, i);
3684 vargs[mask_opno] = prepare_vec_mask
3685 (loop_vinfo, TREE_TYPE (mask), mask,
3686 vargs[mask_opno], gsi);
3689 gcall *call;
3690 if (ifn != IFN_LAST)
3691 call = gimple_build_call_internal_vec (ifn, vargs);
3692 else
3693 call = gimple_build_call_vec (fndecl, vargs);
3694 new_temp = make_ssa_name (vec_dest, call);
3695 gimple_call_set_lhs (call, new_temp);
3696 gimple_call_set_nothrow (call, true);
3697 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3698 new_stmt = call;
3700 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3702 continue;
3705 int varg = 0;
3706 if (masked_loop_p && reduc_idx >= 0)
3707 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3708 vectype_out, j);
3709 for (i = 0; i < nargs; i++)
3711 op = gimple_call_arg (stmt, i);
3712 if (j == 0)
3714 vec_defs.quick_push (vNULL);
3715 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3716 op, &vec_defs[i],
3717 vectypes[i]);
3719 vargs[varg++] = vec_defs[i][j];
3721 if (masked_loop_p && reduc_idx >= 0)
3722 vargs[varg++] = vargs[reduc_idx + 1];
3724 if (mask_opno >= 0 && masked_loop_p)
3726 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3727 vectype_out, j);
3728 vargs[mask_opno]
3729 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3730 vargs[mask_opno], gsi);
3733 gimple *new_stmt;
3734 if (cfn == CFN_GOMP_SIMD_LANE)
3736 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3737 tree new_var
3738 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3739 gimple *init_stmt = gimple_build_assign (new_var, cst);
3740 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3741 new_temp = make_ssa_name (vec_dest);
3742 new_stmt = gimple_build_assign (new_temp, new_var);
3743 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3745 else if (modifier == NARROW)
3747 /* We don't define any narrowing conditional functions at
3748 present. */
3749 gcc_assert (mask_opno < 0);
3750 tree half_res = make_ssa_name (vectype_in);
3751 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3752 gimple_call_set_lhs (call, half_res);
3753 gimple_call_set_nothrow (call, true);
3754 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3755 if ((j & 1) == 0)
3757 prev_res = half_res;
3758 continue;
3760 new_temp = make_ssa_name (vec_dest);
3761 new_stmt = gimple_build_assign (new_temp, convert_code,
3762 prev_res, half_res);
3763 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3765 else
3767 gcall *call;
3768 if (ifn != IFN_LAST)
3769 call = gimple_build_call_internal_vec (ifn, vargs);
3770 else
3771 call = gimple_build_call_vec (fndecl, vargs);
3772 new_temp = make_ssa_name (vec_dest, call);
3773 gimple_call_set_lhs (call, new_temp);
3774 gimple_call_set_nothrow (call, true);
3775 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3776 new_stmt = call;
3779 if (j == (modifier == NARROW ? 1 : 0))
3780 *vec_stmt = new_stmt;
3781 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3783 for (i = 0; i < nargs; i++)
3785 vec<tree> vec_oprndsi = vec_defs[i];
3786 vec_oprndsi.release ();
3789 else if (modifier == NARROW)
3791 auto_vec<vec<tree> > vec_defs (nargs);
3792 /* We don't define any narrowing conditional functions at present. */
3793 gcc_assert (mask_opno < 0);
3794 for (j = 0; j < ncopies; ++j)
3796 /* Build argument list for the vectorized call. */
3797 if (j == 0)
3798 vargs.create (nargs * 2);
3799 else
3800 vargs.truncate (0);
3802 if (slp_node)
3804 vec<tree> vec_oprnds0;
3806 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3807 vec_oprnds0 = vec_defs[0];
3809 /* Arguments are ready. Create the new vector stmt. */
3810 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3812 size_t k;
3813 vargs.truncate (0);
3814 for (k = 0; k < nargs; k++)
3816 vec<tree> vec_oprndsk = vec_defs[k];
3817 vargs.quick_push (vec_oprndsk[i]);
3818 vargs.quick_push (vec_oprndsk[i + 1]);
3820 gcall *call;
3821 if (ifn != IFN_LAST)
3822 call = gimple_build_call_internal_vec (ifn, vargs);
3823 else
3824 call = gimple_build_call_vec (fndecl, vargs);
3825 new_temp = make_ssa_name (vec_dest, call);
3826 gimple_call_set_lhs (call, new_temp);
3827 gimple_call_set_nothrow (call, true);
3828 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3829 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3831 continue;
3834 for (i = 0; i < nargs; i++)
3836 op = gimple_call_arg (stmt, i);
3837 if (j == 0)
3839 vec_defs.quick_push (vNULL);
3840 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3841 op, &vec_defs[i], vectypes[i]);
3843 vec_oprnd0 = vec_defs[i][2*j];
3844 vec_oprnd1 = vec_defs[i][2*j+1];
3846 vargs.quick_push (vec_oprnd0);
3847 vargs.quick_push (vec_oprnd1);
3850 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3851 new_temp = make_ssa_name (vec_dest, new_stmt);
3852 gimple_call_set_lhs (new_stmt, new_temp);
3853 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3855 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3858 if (!slp_node)
3859 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3861 for (i = 0; i < nargs; i++)
3863 vec<tree> vec_oprndsi = vec_defs[i];
3864 vec_oprndsi.release ();
3867 else
3868 /* No current target implements this case. */
3869 return false;
3871 vargs.release ();
3873 /* The call in STMT might prevent it from being removed in dce.
3874 We however cannot remove it here, due to the way the ssa name
3875 it defines is mapped to the new definition. So just replace
3876 rhs of the statement with something harmless. */
3878 if (slp_node)
3879 return true;
3881 stmt_info = vect_orig_stmt (stmt_info);
3882 lhs = gimple_get_lhs (stmt_info->stmt);
3884 gassign *new_stmt
3885 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3886 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3888 return true;
3892 struct simd_call_arg_info
3894 tree vectype;
3895 tree op;
3896 HOST_WIDE_INT linear_step;
3897 enum vect_def_type dt;
3898 unsigned int align;
3899 bool simd_lane_linear;
3902 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3903 is linear within simd lane (but not within whole loop), note it in
3904 *ARGINFO. */
3906 static void
3907 vect_simd_lane_linear (tree op, class loop *loop,
3908 struct simd_call_arg_info *arginfo)
3910 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3912 if (!is_gimple_assign (def_stmt)
3913 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3914 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3915 return;
3917 tree base = gimple_assign_rhs1 (def_stmt);
3918 HOST_WIDE_INT linear_step = 0;
3919 tree v = gimple_assign_rhs2 (def_stmt);
3920 while (TREE_CODE (v) == SSA_NAME)
3922 tree t;
3923 def_stmt = SSA_NAME_DEF_STMT (v);
3924 if (is_gimple_assign (def_stmt))
3925 switch (gimple_assign_rhs_code (def_stmt))
3927 case PLUS_EXPR:
3928 t = gimple_assign_rhs2 (def_stmt);
3929 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3930 return;
3931 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3932 v = gimple_assign_rhs1 (def_stmt);
3933 continue;
3934 case MULT_EXPR:
3935 t = gimple_assign_rhs2 (def_stmt);
3936 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3937 return;
3938 linear_step = tree_to_shwi (t);
3939 v = gimple_assign_rhs1 (def_stmt);
3940 continue;
3941 CASE_CONVERT:
3942 t = gimple_assign_rhs1 (def_stmt);
3943 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3944 || (TYPE_PRECISION (TREE_TYPE (v))
3945 < TYPE_PRECISION (TREE_TYPE (t))))
3946 return;
3947 if (!linear_step)
3948 linear_step = 1;
3949 v = t;
3950 continue;
3951 default:
3952 return;
3954 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3955 && loop->simduid
3956 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3957 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3958 == loop->simduid))
3960 if (!linear_step)
3961 linear_step = 1;
3962 arginfo->linear_step = linear_step;
3963 arginfo->op = base;
3964 arginfo->simd_lane_linear = true;
3965 return;
3970 /* Return the number of elements in vector type VECTYPE, which is associated
3971 with a SIMD clone. At present these vectors always have a constant
3972 length. */
3974 static unsigned HOST_WIDE_INT
3975 simd_clone_subparts (tree vectype)
3977 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3980 /* Function vectorizable_simd_clone_call.
3982 Check if STMT_INFO performs a function call that can be vectorized
3983 by calling a simd clone of the function.
3984 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3985 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3986 Return true if STMT_INFO is vectorizable in this way. */
3988 static bool
3989 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3990 gimple_stmt_iterator *gsi,
3991 gimple **vec_stmt, slp_tree slp_node,
3992 stmt_vector_for_cost *)
3994 tree vec_dest;
3995 tree scalar_dest;
3996 tree op, type;
3997 tree vec_oprnd0 = NULL_TREE;
3998 tree vectype;
3999 poly_uint64 nunits;
4000 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4001 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4002 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4003 tree fndecl, new_temp;
4004 int ncopies, j;
4005 auto_vec<simd_call_arg_info> arginfo;
4006 vec<tree> vargs = vNULL;
4007 size_t i, nargs;
4008 tree lhs, rtype, ratype;
4009 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
4010 int arg_offset = 0;
4012 /* Is STMT a vectorizable call? */
4013 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4014 if (!stmt)
4015 return false;
4017 fndecl = gimple_call_fndecl (stmt);
4018 if (fndecl == NULL_TREE
4019 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4021 fndecl = gimple_call_arg (stmt, 0);
4022 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4023 fndecl = TREE_OPERAND (fndecl, 0);
4024 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4025 arg_offset = 1;
4027 if (fndecl == NULL_TREE)
4028 return false;
4030 struct cgraph_node *node = cgraph_node::get (fndecl);
4031 if (node == NULL || node->simd_clones == NULL)
4032 return false;
4034 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4035 return false;
4037 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4038 && ! vec_stmt)
4039 return false;
4041 if (gimple_call_lhs (stmt)
4042 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4043 return false;
4045 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4047 vectype = STMT_VINFO_VECTYPE (stmt_info);
4049 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4050 return false;
4052 /* FORNOW */
4053 if (slp_node)
4054 return false;
4056 /* Process function arguments. */
4057 nargs = gimple_call_num_args (stmt) - arg_offset;
4059 /* Bail out if the function has zero arguments. */
4060 if (nargs == 0)
4061 return false;
4063 arginfo.reserve (nargs, true);
4065 for (i = 0; i < nargs; i++)
4067 simd_call_arg_info thisarginfo;
4068 affine_iv iv;
4070 thisarginfo.linear_step = 0;
4071 thisarginfo.align = 0;
4072 thisarginfo.op = NULL_TREE;
4073 thisarginfo.simd_lane_linear = false;
4075 op = gimple_call_arg (stmt, i + arg_offset);
4076 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4077 &thisarginfo.vectype)
4078 || thisarginfo.dt == vect_uninitialized_def)
4080 if (dump_enabled_p ())
4081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4082 "use not simple.\n");
4083 return false;
4086 if (thisarginfo.dt == vect_constant_def
4087 || thisarginfo.dt == vect_external_def)
4088 gcc_assert (thisarginfo.vectype == NULL_TREE);
4089 else
4090 gcc_assert (thisarginfo.vectype != NULL_TREE);
4092 /* For linear arguments, the analyze phase should have saved
4093 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4094 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4095 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4097 gcc_assert (vec_stmt);
4098 thisarginfo.linear_step
4099 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4100 thisarginfo.op
4101 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4102 thisarginfo.simd_lane_linear
4103 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4104 == boolean_true_node);
4105 /* If loop has been peeled for alignment, we need to adjust it. */
4106 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4107 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4108 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4110 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4111 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4112 tree opt = TREE_TYPE (thisarginfo.op);
4113 bias = fold_convert (TREE_TYPE (step), bias);
4114 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4115 thisarginfo.op
4116 = fold_build2 (POINTER_TYPE_P (opt)
4117 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4118 thisarginfo.op, bias);
4121 else if (!vec_stmt
4122 && thisarginfo.dt != vect_constant_def
4123 && thisarginfo.dt != vect_external_def
4124 && loop_vinfo
4125 && TREE_CODE (op) == SSA_NAME
4126 && simple_iv (loop, loop_containing_stmt (stmt), op,
4127 &iv, false)
4128 && tree_fits_shwi_p (iv.step))
4130 thisarginfo.linear_step = tree_to_shwi (iv.step);
4131 thisarginfo.op = iv.base;
4133 else if ((thisarginfo.dt == vect_constant_def
4134 || thisarginfo.dt == vect_external_def)
4135 && POINTER_TYPE_P (TREE_TYPE (op)))
4136 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4137 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4138 linear too. */
4139 if (POINTER_TYPE_P (TREE_TYPE (op))
4140 && !thisarginfo.linear_step
4141 && !vec_stmt
4142 && thisarginfo.dt != vect_constant_def
4143 && thisarginfo.dt != vect_external_def
4144 && loop_vinfo
4145 && !slp_node
4146 && TREE_CODE (op) == SSA_NAME)
4147 vect_simd_lane_linear (op, loop, &thisarginfo);
4149 arginfo.quick_push (thisarginfo);
4152 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4153 if (!vf.is_constant ())
4155 if (dump_enabled_p ())
4156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4157 "not considering SIMD clones; not yet supported"
4158 " for variable-width vectors.\n");
4159 return false;
4162 unsigned int badness = 0;
4163 struct cgraph_node *bestn = NULL;
4164 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4165 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4166 else
4167 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4168 n = n->simdclone->next_clone)
4170 unsigned int this_badness = 0;
4171 unsigned int num_calls;
4172 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4173 || n->simdclone->nargs != nargs)
4174 continue;
4175 if (num_calls != 1)
4176 this_badness += exact_log2 (num_calls) * 4096;
4177 if (n->simdclone->inbranch)
4178 this_badness += 8192;
4179 int target_badness = targetm.simd_clone.usable (n);
4180 if (target_badness < 0)
4181 continue;
4182 this_badness += target_badness * 512;
4183 for (i = 0; i < nargs; i++)
4185 switch (n->simdclone->args[i].arg_type)
4187 case SIMD_CLONE_ARG_TYPE_VECTOR:
4188 if (!useless_type_conversion_p
4189 (n->simdclone->args[i].orig_type,
4190 TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
4191 i = -1;
4192 else if (arginfo[i].dt == vect_constant_def
4193 || arginfo[i].dt == vect_external_def
4194 || arginfo[i].linear_step)
4195 this_badness += 64;
4196 break;
4197 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4198 if (arginfo[i].dt != vect_constant_def
4199 && arginfo[i].dt != vect_external_def)
4200 i = -1;
4201 break;
4202 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4203 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4204 if (arginfo[i].dt == vect_constant_def
4205 || arginfo[i].dt == vect_external_def
4206 || (arginfo[i].linear_step
4207 != n->simdclone->args[i].linear_step))
4208 i = -1;
4209 break;
4210 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4211 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4212 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4213 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4214 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4215 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4216 /* FORNOW */
4217 i = -1;
4218 break;
4219 case SIMD_CLONE_ARG_TYPE_MASK:
4220 break;
4222 if (i == (size_t) -1)
4223 break;
4224 if (n->simdclone->args[i].alignment > arginfo[i].align)
4226 i = -1;
4227 break;
4229 if (arginfo[i].align)
4230 this_badness += (exact_log2 (arginfo[i].align)
4231 - exact_log2 (n->simdclone->args[i].alignment));
4233 if (i == (size_t) -1)
4234 continue;
4235 if (bestn == NULL || this_badness < badness)
4237 bestn = n;
4238 badness = this_badness;
4242 if (bestn == NULL)
4243 return false;
4245 for (i = 0; i < nargs; i++)
4247 if ((arginfo[i].dt == vect_constant_def
4248 || arginfo[i].dt == vect_external_def)
4249 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4251 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
4252 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4253 slp_node);
4254 if (arginfo[i].vectype == NULL
4255 || !constant_multiple_p (bestn->simdclone->simdlen,
4256 simd_clone_subparts (arginfo[i].vectype)))
4257 return false;
4260 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4261 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4263 if (dump_enabled_p ())
4264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4265 "vector mask arguments are not supported.\n");
4266 return false;
4269 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4270 && bestn->simdclone->mask_mode == VOIDmode
4271 && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
4272 != simd_clone_subparts (arginfo[i].vectype)))
4274 /* FORNOW we only have partial support for vector-type masks that
4275 can't hold all of simdlen. */
4276 if (dump_enabled_p ())
4277 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4278 vect_location,
4279 "in-branch vector clones are not yet"
4280 " supported for mismatched vector sizes.\n");
4281 return false;
4283 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4284 && bestn->simdclone->mask_mode != VOIDmode)
4286 /* FORNOW don't support integer-type masks. */
4287 if (dump_enabled_p ())
4288 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4289 vect_location,
4290 "in-branch vector clones are not yet"
4291 " supported for integer mask modes.\n");
4292 return false;
4296 fndecl = bestn->decl;
4297 nunits = bestn->simdclone->simdlen;
4298 ncopies = vector_unroll_factor (vf, nunits);
4300 /* If the function isn't const, only allow it in simd loops where user
4301 has asserted that at least nunits consecutive iterations can be
4302 performed using SIMD instructions. */
4303 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4304 && gimple_vuse (stmt))
4305 return false;
4307 /* Sanity check: make sure that at least one copy of the vectorized stmt
4308 needs to be generated. */
4309 gcc_assert (ncopies >= 1);
4311 if (!vec_stmt) /* transformation not required. */
4313 /* When the original call is pure or const but the SIMD ABI dictates
4314 an aggregate return we will have to use a virtual definition and
4315 in a loop eventually even need to add a virtual PHI. That's
4316 not straight-forward so allow to fix this up via renaming. */
4317 if (gimple_call_lhs (stmt)
4318 && !gimple_vdef (stmt)
4319 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4320 vinfo->any_known_not_updated_vssa = true;
4321 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4322 for (i = 0; i < nargs; i++)
4323 if ((bestn->simdclone->args[i].arg_type
4324 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4325 || (bestn->simdclone->args[i].arg_type
4326 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4328 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4329 + 1,
4330 true);
4331 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4332 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4333 ? size_type_node : TREE_TYPE (arginfo[i].op);
4334 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4335 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4336 tree sll = arginfo[i].simd_lane_linear
4337 ? boolean_true_node : boolean_false_node;
4338 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4340 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4341 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4342 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4343 dt, slp_node, cost_vec); */
4344 return true;
4347 /* Transform. */
4349 if (dump_enabled_p ())
4350 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4352 /* Handle def. */
4353 scalar_dest = gimple_call_lhs (stmt);
4354 vec_dest = NULL_TREE;
4355 rtype = NULL_TREE;
4356 ratype = NULL_TREE;
4357 if (scalar_dest)
4359 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4360 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4361 if (TREE_CODE (rtype) == ARRAY_TYPE)
4363 ratype = rtype;
4364 rtype = TREE_TYPE (ratype);
4368 auto_vec<vec<tree> > vec_oprnds;
4369 auto_vec<unsigned> vec_oprnds_i;
4370 vec_oprnds.safe_grow_cleared (nargs, true);
4371 vec_oprnds_i.safe_grow_cleared (nargs, true);
4372 for (j = 0; j < ncopies; ++j)
4374 /* Build argument list for the vectorized call. */
4375 if (j == 0)
4376 vargs.create (nargs);
4377 else
4378 vargs.truncate (0);
4380 for (i = 0; i < nargs; i++)
4382 unsigned int k, l, m, o;
4383 tree atype;
4384 op = gimple_call_arg (stmt, i + arg_offset);
4385 switch (bestn->simdclone->args[i].arg_type)
4387 case SIMD_CLONE_ARG_TYPE_VECTOR:
4388 atype = bestn->simdclone->args[i].vector_type;
4389 o = vector_unroll_factor (nunits,
4390 simd_clone_subparts (atype));
4391 for (m = j * o; m < (j + 1) * o; m++)
4393 if (simd_clone_subparts (atype)
4394 < simd_clone_subparts (arginfo[i].vectype))
4396 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4397 k = (simd_clone_subparts (arginfo[i].vectype)
4398 / simd_clone_subparts (atype));
4399 gcc_assert ((k & (k - 1)) == 0);
4400 if (m == 0)
4402 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4403 ncopies * o / k, op,
4404 &vec_oprnds[i]);
4405 vec_oprnds_i[i] = 0;
4406 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4408 else
4410 vec_oprnd0 = arginfo[i].op;
4411 if ((m & (k - 1)) == 0)
4412 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4414 arginfo[i].op = vec_oprnd0;
4415 vec_oprnd0
4416 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4417 bitsize_int (prec),
4418 bitsize_int ((m & (k - 1)) * prec));
4419 gassign *new_stmt
4420 = gimple_build_assign (make_ssa_name (atype),
4421 vec_oprnd0);
4422 vect_finish_stmt_generation (vinfo, stmt_info,
4423 new_stmt, gsi);
4424 vargs.safe_push (gimple_assign_lhs (new_stmt));
4426 else
4428 k = (simd_clone_subparts (atype)
4429 / simd_clone_subparts (arginfo[i].vectype));
4430 gcc_assert ((k & (k - 1)) == 0);
4431 vec<constructor_elt, va_gc> *ctor_elts;
4432 if (k != 1)
4433 vec_alloc (ctor_elts, k);
4434 else
4435 ctor_elts = NULL;
4436 for (l = 0; l < k; l++)
4438 if (m == 0 && l == 0)
4440 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4441 k * o * ncopies,
4443 &vec_oprnds[i]);
4444 vec_oprnds_i[i] = 0;
4445 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4447 else
4448 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4449 arginfo[i].op = vec_oprnd0;
4450 if (k == 1)
4451 break;
4452 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4453 vec_oprnd0);
4455 if (k == 1)
4456 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4457 atype))
4459 vec_oprnd0
4460 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4461 gassign *new_stmt
4462 = gimple_build_assign (make_ssa_name (atype),
4463 vec_oprnd0);
4464 vect_finish_stmt_generation (vinfo, stmt_info,
4465 new_stmt, gsi);
4466 vargs.safe_push (gimple_assign_lhs (new_stmt));
4468 else
4469 vargs.safe_push (vec_oprnd0);
4470 else
4472 vec_oprnd0 = build_constructor (atype, ctor_elts);
4473 gassign *new_stmt
4474 = gimple_build_assign (make_ssa_name (atype),
4475 vec_oprnd0);
4476 vect_finish_stmt_generation (vinfo, stmt_info,
4477 new_stmt, gsi);
4478 vargs.safe_push (gimple_assign_lhs (new_stmt));
4482 break;
4483 case SIMD_CLONE_ARG_TYPE_MASK:
4484 atype = bestn->simdclone->args[i].vector_type;
4485 if (bestn->simdclone->mask_mode != VOIDmode)
4487 /* FORNOW: this is disabled above. */
4488 gcc_unreachable ();
4490 else
4492 tree elt_type = TREE_TYPE (atype);
4493 tree one = fold_convert (elt_type, integer_one_node);
4494 tree zero = fold_convert (elt_type, integer_zero_node);
4495 o = vector_unroll_factor (nunits,
4496 simd_clone_subparts (atype));
4497 for (m = j * o; m < (j + 1) * o; m++)
4499 if (simd_clone_subparts (atype)
4500 < simd_clone_subparts (arginfo[i].vectype))
4502 /* The mask type has fewer elements than simdlen. */
4504 /* FORNOW */
4505 gcc_unreachable ();
4507 else if (simd_clone_subparts (atype)
4508 == simd_clone_subparts (arginfo[i].vectype))
4510 /* The SIMD clone function has the same number of
4511 elements as the current function. */
4512 if (m == 0)
4514 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4515 o * ncopies,
4517 &vec_oprnds[i]);
4518 vec_oprnds_i[i] = 0;
4520 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4521 vec_oprnd0
4522 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4523 build_vector_from_val (atype, one),
4524 build_vector_from_val (atype, zero));
4525 gassign *new_stmt
4526 = gimple_build_assign (make_ssa_name (atype),
4527 vec_oprnd0);
4528 vect_finish_stmt_generation (vinfo, stmt_info,
4529 new_stmt, gsi);
4530 vargs.safe_push (gimple_assign_lhs (new_stmt));
4532 else
4534 /* The mask type has more elements than simdlen. */
4536 /* FORNOW */
4537 gcc_unreachable ();
4541 break;
4542 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4543 vargs.safe_push (op);
4544 break;
4545 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4546 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4547 if (j == 0)
4549 gimple_seq stmts;
4550 arginfo[i].op
4551 = force_gimple_operand (unshare_expr (arginfo[i].op),
4552 &stmts, true, NULL_TREE);
4553 if (stmts != NULL)
4555 basic_block new_bb;
4556 edge pe = loop_preheader_edge (loop);
4557 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4558 gcc_assert (!new_bb);
4560 if (arginfo[i].simd_lane_linear)
4562 vargs.safe_push (arginfo[i].op);
4563 break;
4565 tree phi_res = copy_ssa_name (op);
4566 gphi *new_phi = create_phi_node (phi_res, loop->header);
4567 add_phi_arg (new_phi, arginfo[i].op,
4568 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4569 enum tree_code code
4570 = POINTER_TYPE_P (TREE_TYPE (op))
4571 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4572 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4573 ? sizetype : TREE_TYPE (op);
4574 poly_widest_int cst
4575 = wi::mul (bestn->simdclone->args[i].linear_step,
4576 ncopies * nunits);
4577 tree tcst = wide_int_to_tree (type, cst);
4578 tree phi_arg = copy_ssa_name (op);
4579 gassign *new_stmt
4580 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4581 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4582 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4583 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4584 UNKNOWN_LOCATION);
4585 arginfo[i].op = phi_res;
4586 vargs.safe_push (phi_res);
4588 else
4590 enum tree_code code
4591 = POINTER_TYPE_P (TREE_TYPE (op))
4592 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4593 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4594 ? sizetype : TREE_TYPE (op);
4595 poly_widest_int cst
4596 = wi::mul (bestn->simdclone->args[i].linear_step,
4597 j * nunits);
4598 tree tcst = wide_int_to_tree (type, cst);
4599 new_temp = make_ssa_name (TREE_TYPE (op));
4600 gassign *new_stmt
4601 = gimple_build_assign (new_temp, code,
4602 arginfo[i].op, tcst);
4603 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4604 vargs.safe_push (new_temp);
4606 break;
4607 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4608 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4609 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4610 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4611 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4612 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4613 default:
4614 gcc_unreachable ();
4618 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4619 if (vec_dest)
4621 gcc_assert (ratype
4622 || known_eq (simd_clone_subparts (rtype), nunits));
4623 if (ratype)
4624 new_temp = create_tmp_var (ratype);
4625 else if (useless_type_conversion_p (vectype, rtype))
4626 new_temp = make_ssa_name (vec_dest, new_call);
4627 else
4628 new_temp = make_ssa_name (rtype, new_call);
4629 gimple_call_set_lhs (new_call, new_temp);
4631 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4632 gimple *new_stmt = new_call;
4634 if (vec_dest)
4636 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4638 unsigned int k, l;
4639 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4640 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4641 k = vector_unroll_factor (nunits,
4642 simd_clone_subparts (vectype));
4643 gcc_assert ((k & (k - 1)) == 0);
4644 for (l = 0; l < k; l++)
4646 tree t;
4647 if (ratype)
4649 t = build_fold_addr_expr (new_temp);
4650 t = build2 (MEM_REF, vectype, t,
4651 build_int_cst (TREE_TYPE (t), l * bytes));
4653 else
4654 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4655 bitsize_int (prec), bitsize_int (l * prec));
4656 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4657 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4659 if (j == 0 && l == 0)
4660 *vec_stmt = new_stmt;
4661 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4664 if (ratype)
4665 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4666 continue;
4668 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4670 unsigned int k = (simd_clone_subparts (vectype)
4671 / simd_clone_subparts (rtype));
4672 gcc_assert ((k & (k - 1)) == 0);
4673 if ((j & (k - 1)) == 0)
4674 vec_alloc (ret_ctor_elts, k);
4675 if (ratype)
4677 unsigned int m, o;
4678 o = vector_unroll_factor (nunits,
4679 simd_clone_subparts (rtype));
4680 for (m = 0; m < o; m++)
4682 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4683 size_int (m), NULL_TREE, NULL_TREE);
4684 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4685 tem);
4686 vect_finish_stmt_generation (vinfo, stmt_info,
4687 new_stmt, gsi);
4688 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4689 gimple_assign_lhs (new_stmt));
4691 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4693 else
4694 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4695 if ((j & (k - 1)) != k - 1)
4696 continue;
4697 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4698 new_stmt
4699 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4700 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4702 if ((unsigned) j == k - 1)
4703 *vec_stmt = new_stmt;
4704 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4705 continue;
4707 else if (ratype)
4709 tree t = build_fold_addr_expr (new_temp);
4710 t = build2 (MEM_REF, vectype, t,
4711 build_int_cst (TREE_TYPE (t), 0));
4712 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4713 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4714 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4716 else if (!useless_type_conversion_p (vectype, rtype))
4718 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4719 new_stmt
4720 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4721 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4725 if (j == 0)
4726 *vec_stmt = new_stmt;
4727 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4730 for (i = 0; i < nargs; ++i)
4732 vec<tree> oprndsi = vec_oprnds[i];
4733 oprndsi.release ();
4735 vargs.release ();
4737 /* Mark the clone as no longer being a candidate for GC. */
4738 bestn->gc_candidate = false;
4740 /* The call in STMT might prevent it from being removed in dce.
4741 We however cannot remove it here, due to the way the ssa name
4742 it defines is mapped to the new definition. So just replace
4743 rhs of the statement with something harmless. */
4745 if (slp_node)
4746 return true;
4748 gimple *new_stmt;
4749 if (scalar_dest)
4751 type = TREE_TYPE (scalar_dest);
4752 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4753 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4755 else
4756 new_stmt = gimple_build_nop ();
4757 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4758 unlink_stmt_vdef (stmt);
4760 return true;
4764 /* Function vect_gen_widened_results_half
4766 Create a vector stmt whose code, type, number of arguments, and result
4767 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4768 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4769 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4770 needs to be created (DECL is a function-decl of a target-builtin).
4771 STMT_INFO is the original scalar stmt that we are vectorizing. */
4773 static gimple *
4774 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4775 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4776 tree vec_dest, gimple_stmt_iterator *gsi,
4777 stmt_vec_info stmt_info)
4779 gimple *new_stmt;
4780 tree new_temp;
4782 /* Generate half of the widened result: */
4783 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4784 if (op_type != binary_op)
4785 vec_oprnd1 = NULL;
4786 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4787 new_temp = make_ssa_name (vec_dest, new_stmt);
4788 gimple_assign_set_lhs (new_stmt, new_temp);
4789 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4791 return new_stmt;
4795 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4796 For multi-step conversions store the resulting vectors and call the function
4797 recursively. */
4799 static void
4800 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4801 int multi_step_cvt,
4802 stmt_vec_info stmt_info,
4803 vec<tree> &vec_dsts,
4804 gimple_stmt_iterator *gsi,
4805 slp_tree slp_node, enum tree_code code)
4807 unsigned int i;
4808 tree vop0, vop1, new_tmp, vec_dest;
4810 vec_dest = vec_dsts.pop ();
4812 for (i = 0; i < vec_oprnds->length (); i += 2)
4814 /* Create demotion operation. */
4815 vop0 = (*vec_oprnds)[i];
4816 vop1 = (*vec_oprnds)[i + 1];
4817 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4818 new_tmp = make_ssa_name (vec_dest, new_stmt);
4819 gimple_assign_set_lhs (new_stmt, new_tmp);
4820 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4822 if (multi_step_cvt)
4823 /* Store the resulting vector for next recursive call. */
4824 (*vec_oprnds)[i/2] = new_tmp;
4825 else
4827 /* This is the last step of the conversion sequence. Store the
4828 vectors in SLP_NODE or in vector info of the scalar statement
4829 (or in STMT_VINFO_RELATED_STMT chain). */
4830 if (slp_node)
4831 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4832 else
4833 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4837 /* For multi-step demotion operations we first generate demotion operations
4838 from the source type to the intermediate types, and then combine the
4839 results (stored in VEC_OPRNDS) in demotion operation to the destination
4840 type. */
4841 if (multi_step_cvt)
4843 /* At each level of recursion we have half of the operands we had at the
4844 previous level. */
4845 vec_oprnds->truncate ((i+1)/2);
4846 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4847 multi_step_cvt - 1,
4848 stmt_info, vec_dsts, gsi,
4849 slp_node, VEC_PACK_TRUNC_EXPR);
4852 vec_dsts.quick_push (vec_dest);
4856 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4857 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4858 STMT_INFO. For multi-step conversions store the resulting vectors and
4859 call the function recursively. */
4861 static void
4862 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4863 vec<tree> *vec_oprnds0,
4864 vec<tree> *vec_oprnds1,
4865 stmt_vec_info stmt_info, tree vec_dest,
4866 gimple_stmt_iterator *gsi,
4867 enum tree_code code1,
4868 enum tree_code code2, int op_type)
4870 int i;
4871 tree vop0, vop1, new_tmp1, new_tmp2;
4872 gimple *new_stmt1, *new_stmt2;
4873 vec<tree> vec_tmp = vNULL;
4875 vec_tmp.create (vec_oprnds0->length () * 2);
4876 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4878 if (op_type == binary_op)
4879 vop1 = (*vec_oprnds1)[i];
4880 else
4881 vop1 = NULL_TREE;
4883 /* Generate the two halves of promotion operation. */
4884 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4885 op_type, vec_dest, gsi,
4886 stmt_info);
4887 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4888 op_type, vec_dest, gsi,
4889 stmt_info);
4890 if (is_gimple_call (new_stmt1))
4892 new_tmp1 = gimple_call_lhs (new_stmt1);
4893 new_tmp2 = gimple_call_lhs (new_stmt2);
4895 else
4897 new_tmp1 = gimple_assign_lhs (new_stmt1);
4898 new_tmp2 = gimple_assign_lhs (new_stmt2);
4901 /* Store the results for the next step. */
4902 vec_tmp.quick_push (new_tmp1);
4903 vec_tmp.quick_push (new_tmp2);
4906 vec_oprnds0->release ();
4907 *vec_oprnds0 = vec_tmp;
4910 /* Create vectorized promotion stmts for widening stmts using only half the
4911 potential vector size for input. */
4912 static void
4913 vect_create_half_widening_stmts (vec_info *vinfo,
4914 vec<tree> *vec_oprnds0,
4915 vec<tree> *vec_oprnds1,
4916 stmt_vec_info stmt_info, tree vec_dest,
4917 gimple_stmt_iterator *gsi,
4918 enum tree_code code1,
4919 int op_type)
4921 int i;
4922 tree vop0, vop1;
4923 gimple *new_stmt1;
4924 gimple *new_stmt2;
4925 gimple *new_stmt3;
4926 vec<tree> vec_tmp = vNULL;
4928 vec_tmp.create (vec_oprnds0->length ());
4929 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4931 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4933 gcc_assert (op_type == binary_op);
4934 vop1 = (*vec_oprnds1)[i];
4936 /* Widen the first vector input. */
4937 out_type = TREE_TYPE (vec_dest);
4938 new_tmp1 = make_ssa_name (out_type);
4939 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4940 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4941 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4943 /* Widen the second vector input. */
4944 new_tmp2 = make_ssa_name (out_type);
4945 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4946 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4947 /* Perform the operation. With both vector inputs widened. */
4948 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4950 else
4952 /* Perform the operation. With the single vector input widened. */
4953 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4956 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4957 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4958 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4960 /* Store the results for the next step. */
4961 vec_tmp.quick_push (new_tmp3);
4964 vec_oprnds0->release ();
4965 *vec_oprnds0 = vec_tmp;
4969 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4970 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4971 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4972 Return true if STMT_INFO is vectorizable in this way. */
4974 static bool
4975 vectorizable_conversion (vec_info *vinfo,
4976 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4977 gimple **vec_stmt, slp_tree slp_node,
4978 stmt_vector_for_cost *cost_vec)
4980 tree vec_dest;
4981 tree scalar_dest;
4982 tree op0, op1 = NULL_TREE;
4983 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4984 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4985 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4986 tree new_temp;
4987 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4988 int ndts = 2;
4989 poly_uint64 nunits_in;
4990 poly_uint64 nunits_out;
4991 tree vectype_out, vectype_in;
4992 int ncopies, i;
4993 tree lhs_type, rhs_type;
4994 enum { NARROW, NONE, WIDEN } modifier;
4995 vec<tree> vec_oprnds0 = vNULL;
4996 vec<tree> vec_oprnds1 = vNULL;
4997 tree vop0;
4998 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4999 int multi_step_cvt = 0;
5000 vec<tree> interm_types = vNULL;
5001 tree intermediate_type, cvt_type = NULL_TREE;
5002 int op_type;
5003 unsigned short fltsz;
5005 /* Is STMT a vectorizable conversion? */
5007 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5008 return false;
5010 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5011 && ! vec_stmt)
5012 return false;
5014 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5015 if (!stmt)
5016 return false;
5018 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5019 return false;
5021 code = gimple_assign_rhs_code (stmt);
5022 if (!CONVERT_EXPR_CODE_P (code)
5023 && code != FIX_TRUNC_EXPR
5024 && code != FLOAT_EXPR
5025 && code != WIDEN_PLUS_EXPR
5026 && code != WIDEN_MINUS_EXPR
5027 && code != WIDEN_MULT_EXPR
5028 && code != WIDEN_LSHIFT_EXPR)
5029 return false;
5031 bool widen_arith = (code == WIDEN_PLUS_EXPR
5032 || code == WIDEN_MINUS_EXPR
5033 || code == WIDEN_MULT_EXPR
5034 || code == WIDEN_LSHIFT_EXPR);
5035 op_type = TREE_CODE_LENGTH (code);
5037 /* Check types of lhs and rhs. */
5038 scalar_dest = gimple_assign_lhs (stmt);
5039 lhs_type = TREE_TYPE (scalar_dest);
5040 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5042 /* Check the operands of the operation. */
5043 slp_tree slp_op0, slp_op1 = NULL;
5044 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5045 0, &op0, &slp_op0, &dt[0], &vectype_in))
5047 if (dump_enabled_p ())
5048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5049 "use not simple.\n");
5050 return false;
5053 rhs_type = TREE_TYPE (op0);
5054 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5055 && !((INTEGRAL_TYPE_P (lhs_type)
5056 && INTEGRAL_TYPE_P (rhs_type))
5057 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5058 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5059 return false;
5061 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5062 && ((INTEGRAL_TYPE_P (lhs_type)
5063 && !type_has_mode_precision_p (lhs_type))
5064 || (INTEGRAL_TYPE_P (rhs_type)
5065 && !type_has_mode_precision_p (rhs_type))))
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "type conversion to/from bit-precision unsupported."
5070 "\n");
5071 return false;
5074 if (op_type == binary_op)
5076 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
5077 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
5079 op1 = gimple_assign_rhs2 (stmt);
5080 tree vectype1_in;
5081 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5082 &op1, &slp_op1, &dt[1], &vectype1_in))
5084 if (dump_enabled_p ())
5085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5086 "use not simple.\n");
5087 return false;
5089 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5090 OP1. */
5091 if (!vectype_in)
5092 vectype_in = vectype1_in;
5095 /* If op0 is an external or constant def, infer the vector type
5096 from the scalar type. */
5097 if (!vectype_in)
5098 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5099 if (vec_stmt)
5100 gcc_assert (vectype_in);
5101 if (!vectype_in)
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5105 "no vectype for scalar type %T\n", rhs_type);
5107 return false;
5110 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5111 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5113 if (dump_enabled_p ())
5114 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5115 "can't convert between boolean and non "
5116 "boolean vectors %T\n", rhs_type);
5118 return false;
5121 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5122 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5123 if (known_eq (nunits_out, nunits_in))
5124 if (widen_arith)
5125 modifier = WIDEN;
5126 else
5127 modifier = NONE;
5128 else if (multiple_p (nunits_out, nunits_in))
5129 modifier = NARROW;
5130 else
5132 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5133 modifier = WIDEN;
5136 /* Multiple types in SLP are handled by creating the appropriate number of
5137 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5138 case of SLP. */
5139 if (slp_node)
5140 ncopies = 1;
5141 else if (modifier == NARROW)
5142 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5143 else
5144 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5146 /* Sanity check: make sure that at least one copy of the vectorized stmt
5147 needs to be generated. */
5148 gcc_assert (ncopies >= 1);
5150 bool found_mode = false;
5151 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5152 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5153 opt_scalar_mode rhs_mode_iter;
5155 /* Supportable by target? */
5156 switch (modifier)
5158 case NONE:
5159 if (code != FIX_TRUNC_EXPR
5160 && code != FLOAT_EXPR
5161 && !CONVERT_EXPR_CODE_P (code))
5162 return false;
5163 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
5164 break;
5165 /* FALLTHRU */
5166 unsupported:
5167 if (dump_enabled_p ())
5168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5169 "conversion not supported by target.\n");
5170 return false;
5172 case WIDEN:
5173 if (known_eq (nunits_in, nunits_out))
5175 if (!supportable_half_widening_operation (code, vectype_out,
5176 vectype_in, &code1))
5177 goto unsupported;
5178 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5179 break;
5181 if (supportable_widening_operation (vinfo, code, stmt_info,
5182 vectype_out, vectype_in, &code1,
5183 &code2, &multi_step_cvt,
5184 &interm_types))
5186 /* Binary widening operation can only be supported directly by the
5187 architecture. */
5188 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5189 break;
5192 if (code != FLOAT_EXPR
5193 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5194 goto unsupported;
5196 fltsz = GET_MODE_SIZE (lhs_mode);
5197 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5199 rhs_mode = rhs_mode_iter.require ();
5200 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5201 break;
5203 cvt_type
5204 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5205 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5206 if (cvt_type == NULL_TREE)
5207 goto unsupported;
5209 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5211 if (!supportable_convert_operation (code, vectype_out,
5212 cvt_type, &codecvt1))
5213 goto unsupported;
5215 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5216 vectype_out, cvt_type,
5217 &codecvt1, &codecvt2,
5218 &multi_step_cvt,
5219 &interm_types))
5220 continue;
5221 else
5222 gcc_assert (multi_step_cvt == 0);
5224 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5225 cvt_type,
5226 vectype_in, &code1, &code2,
5227 &multi_step_cvt, &interm_types))
5229 found_mode = true;
5230 break;
5234 if (!found_mode)
5235 goto unsupported;
5237 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5238 codecvt2 = ERROR_MARK;
5239 else
5241 multi_step_cvt++;
5242 interm_types.safe_push (cvt_type);
5243 cvt_type = NULL_TREE;
5245 break;
5247 case NARROW:
5248 gcc_assert (op_type == unary_op);
5249 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5250 &code1, &multi_step_cvt,
5251 &interm_types))
5252 break;
5254 if (code != FIX_TRUNC_EXPR
5255 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5256 goto unsupported;
5258 cvt_type
5259 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5260 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5261 if (cvt_type == NULL_TREE)
5262 goto unsupported;
5263 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5264 &codecvt1))
5265 goto unsupported;
5266 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5267 &code1, &multi_step_cvt,
5268 &interm_types))
5269 break;
5270 goto unsupported;
5272 default:
5273 gcc_unreachable ();
5276 if (!vec_stmt) /* transformation not required. */
5278 if (slp_node
5279 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5280 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5282 if (dump_enabled_p ())
5283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5284 "incompatible vector types for invariants\n");
5285 return false;
5287 DUMP_VECT_SCOPE ("vectorizable_conversion");
5288 if (modifier == NONE)
5290 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5291 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5292 cost_vec);
5294 else if (modifier == NARROW)
5296 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5297 /* The final packing step produces one vector result per copy. */
5298 unsigned int nvectors
5299 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5300 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5301 multi_step_cvt, cost_vec,
5302 widen_arith);
5304 else
5306 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5307 /* The initial unpacking step produces two vector results
5308 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5309 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5310 unsigned int nvectors
5311 = (slp_node
5312 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5313 : ncopies * 2);
5314 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5315 multi_step_cvt, cost_vec,
5316 widen_arith);
5318 interm_types.release ();
5319 return true;
5322 /* Transform. */
5323 if (dump_enabled_p ())
5324 dump_printf_loc (MSG_NOTE, vect_location,
5325 "transform conversion. ncopies = %d.\n", ncopies);
5327 if (op_type == binary_op)
5329 if (CONSTANT_CLASS_P (op0))
5330 op0 = fold_convert (TREE_TYPE (op1), op0);
5331 else if (CONSTANT_CLASS_P (op1))
5332 op1 = fold_convert (TREE_TYPE (op0), op1);
5335 /* In case of multi-step conversion, we first generate conversion operations
5336 to the intermediate types, and then from that types to the final one.
5337 We create vector destinations for the intermediate type (TYPES) received
5338 from supportable_*_operation, and store them in the correct order
5339 for future use in vect_create_vectorized_*_stmts (). */
5340 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5341 vec_dest = vect_create_destination_var (scalar_dest,
5342 (cvt_type && modifier == WIDEN)
5343 ? cvt_type : vectype_out);
5344 vec_dsts.quick_push (vec_dest);
5346 if (multi_step_cvt)
5348 for (i = interm_types.length () - 1;
5349 interm_types.iterate (i, &intermediate_type); i--)
5351 vec_dest = vect_create_destination_var (scalar_dest,
5352 intermediate_type);
5353 vec_dsts.quick_push (vec_dest);
5357 if (cvt_type)
5358 vec_dest = vect_create_destination_var (scalar_dest,
5359 modifier == WIDEN
5360 ? vectype_out : cvt_type);
5362 int ninputs = 1;
5363 if (!slp_node)
5365 if (modifier == WIDEN)
5367 else if (modifier == NARROW)
5369 if (multi_step_cvt)
5370 ninputs = vect_pow2 (multi_step_cvt);
5371 ninputs *= 2;
5375 switch (modifier)
5377 case NONE:
5378 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5379 op0, &vec_oprnds0);
5380 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5382 /* Arguments are ready, create the new vector stmt. */
5383 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5384 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5385 new_temp = make_ssa_name (vec_dest, new_stmt);
5386 gimple_assign_set_lhs (new_stmt, new_temp);
5387 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5389 if (slp_node)
5390 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5391 else
5392 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5394 break;
5396 case WIDEN:
5397 /* In case the vectorization factor (VF) is bigger than the number
5398 of elements that we can fit in a vectype (nunits), we have to
5399 generate more than one vector stmt - i.e - we need to "unroll"
5400 the vector stmt by a factor VF/nunits. */
5401 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5402 op0, &vec_oprnds0,
5403 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5404 &vec_oprnds1);
5405 if (code == WIDEN_LSHIFT_EXPR)
5407 int oprnds_size = vec_oprnds0.length ();
5408 vec_oprnds1.create (oprnds_size);
5409 for (i = 0; i < oprnds_size; ++i)
5410 vec_oprnds1.quick_push (op1);
5412 /* Arguments are ready. Create the new vector stmts. */
5413 for (i = multi_step_cvt; i >= 0; i--)
5415 tree this_dest = vec_dsts[i];
5416 enum tree_code c1 = code1, c2 = code2;
5417 if (i == 0 && codecvt2 != ERROR_MARK)
5419 c1 = codecvt1;
5420 c2 = codecvt2;
5422 if (known_eq (nunits_out, nunits_in))
5423 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5424 &vec_oprnds1, stmt_info,
5425 this_dest, gsi,
5426 c1, op_type);
5427 else
5428 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5429 &vec_oprnds1, stmt_info,
5430 this_dest, gsi,
5431 c1, c2, op_type);
5434 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5436 gimple *new_stmt;
5437 if (cvt_type)
5439 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5440 new_temp = make_ssa_name (vec_dest);
5441 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5442 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5444 else
5445 new_stmt = SSA_NAME_DEF_STMT (vop0);
5447 if (slp_node)
5448 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5449 else
5450 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5452 break;
5454 case NARROW:
5455 /* In case the vectorization factor (VF) is bigger than the number
5456 of elements that we can fit in a vectype (nunits), we have to
5457 generate more than one vector stmt - i.e - we need to "unroll"
5458 the vector stmt by a factor VF/nunits. */
5459 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5460 op0, &vec_oprnds0);
5461 /* Arguments are ready. Create the new vector stmts. */
5462 if (cvt_type)
5463 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5465 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5466 new_temp = make_ssa_name (vec_dest);
5467 gassign *new_stmt
5468 = gimple_build_assign (new_temp, codecvt1, vop0);
5469 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5470 vec_oprnds0[i] = new_temp;
5473 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5474 multi_step_cvt,
5475 stmt_info, vec_dsts, gsi,
5476 slp_node, code1);
5477 break;
5479 if (!slp_node)
5480 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5482 vec_oprnds0.release ();
5483 vec_oprnds1.release ();
5484 interm_types.release ();
5486 return true;
5489 /* Return true if we can assume from the scalar form of STMT_INFO that
5490 neither the scalar nor the vector forms will generate code. STMT_INFO
5491 is known not to involve a data reference. */
5493 bool
5494 vect_nop_conversion_p (stmt_vec_info stmt_info)
5496 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5497 if (!stmt)
5498 return false;
5500 tree lhs = gimple_assign_lhs (stmt);
5501 tree_code code = gimple_assign_rhs_code (stmt);
5502 tree rhs = gimple_assign_rhs1 (stmt);
5504 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5505 return true;
5507 if (CONVERT_EXPR_CODE_P (code))
5508 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5510 return false;
5513 /* Function vectorizable_assignment.
5515 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5516 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5517 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5518 Return true if STMT_INFO is vectorizable in this way. */
5520 static bool
5521 vectorizable_assignment (vec_info *vinfo,
5522 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5523 gimple **vec_stmt, slp_tree slp_node,
5524 stmt_vector_for_cost *cost_vec)
5526 tree vec_dest;
5527 tree scalar_dest;
5528 tree op;
5529 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5530 tree new_temp;
5531 enum vect_def_type dt[1] = {vect_unknown_def_type};
5532 int ndts = 1;
5533 int ncopies;
5534 int i;
5535 vec<tree> vec_oprnds = vNULL;
5536 tree vop;
5537 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5538 enum tree_code code;
5539 tree vectype_in;
5541 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5542 return false;
5544 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5545 && ! vec_stmt)
5546 return false;
5548 /* Is vectorizable assignment? */
5549 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5550 if (!stmt)
5551 return false;
5553 scalar_dest = gimple_assign_lhs (stmt);
5554 if (TREE_CODE (scalar_dest) != SSA_NAME)
5555 return false;
5557 if (STMT_VINFO_DATA_REF (stmt_info))
5558 return false;
5560 code = gimple_assign_rhs_code (stmt);
5561 if (!(gimple_assign_single_p (stmt)
5562 || code == PAREN_EXPR
5563 || CONVERT_EXPR_CODE_P (code)))
5564 return false;
5566 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5567 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5569 /* Multiple types in SLP are handled by creating the appropriate number of
5570 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5571 case of SLP. */
5572 if (slp_node)
5573 ncopies = 1;
5574 else
5575 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5577 gcc_assert (ncopies >= 1);
5579 slp_tree slp_op;
5580 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5581 &dt[0], &vectype_in))
5583 if (dump_enabled_p ())
5584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5585 "use not simple.\n");
5586 return false;
5588 if (!vectype_in)
5589 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5591 /* We can handle NOP_EXPR conversions that do not change the number
5592 of elements or the vector size. */
5593 if ((CONVERT_EXPR_CODE_P (code)
5594 || code == VIEW_CONVERT_EXPR)
5595 && (!vectype_in
5596 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5597 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5598 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5599 return false;
5601 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5603 if (dump_enabled_p ())
5604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5605 "can't convert between boolean and non "
5606 "boolean vectors %T\n", TREE_TYPE (op));
5608 return false;
5611 /* We do not handle bit-precision changes. */
5612 if ((CONVERT_EXPR_CODE_P (code)
5613 || code == VIEW_CONVERT_EXPR)
5614 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5615 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5616 || !type_has_mode_precision_p (TREE_TYPE (op)))
5617 /* But a conversion that does not change the bit-pattern is ok. */
5618 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5619 > TYPE_PRECISION (TREE_TYPE (op)))
5620 && TYPE_UNSIGNED (TREE_TYPE (op))))
5622 if (dump_enabled_p ())
5623 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5624 "type conversion to/from bit-precision "
5625 "unsupported.\n");
5626 return false;
5629 if (!vec_stmt) /* transformation not required. */
5631 if (slp_node
5632 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5634 if (dump_enabled_p ())
5635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5636 "incompatible vector types for invariants\n");
5637 return false;
5639 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5640 DUMP_VECT_SCOPE ("vectorizable_assignment");
5641 if (!vect_nop_conversion_p (stmt_info))
5642 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5643 cost_vec);
5644 return true;
5647 /* Transform. */
5648 if (dump_enabled_p ())
5649 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5651 /* Handle def. */
5652 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5654 /* Handle use. */
5655 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5657 /* Arguments are ready. create the new vector stmt. */
5658 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5660 if (CONVERT_EXPR_CODE_P (code)
5661 || code == VIEW_CONVERT_EXPR)
5662 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5663 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5664 new_temp = make_ssa_name (vec_dest, new_stmt);
5665 gimple_assign_set_lhs (new_stmt, new_temp);
5666 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5667 if (slp_node)
5668 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5669 else
5670 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5672 if (!slp_node)
5673 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5675 vec_oprnds.release ();
5676 return true;
5680 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5681 either as shift by a scalar or by a vector. */
5683 bool
5684 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5687 machine_mode vec_mode;
5688 optab optab;
5689 int icode;
5690 tree vectype;
5692 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5693 if (!vectype)
5694 return false;
5696 optab = optab_for_tree_code (code, vectype, optab_scalar);
5697 if (!optab
5698 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5700 optab = optab_for_tree_code (code, vectype, optab_vector);
5701 if (!optab
5702 || (optab_handler (optab, TYPE_MODE (vectype))
5703 == CODE_FOR_nothing))
5704 return false;
5707 vec_mode = TYPE_MODE (vectype);
5708 icode = (int) optab_handler (optab, vec_mode);
5709 if (icode == CODE_FOR_nothing)
5710 return false;
5712 return true;
5716 /* Function vectorizable_shift.
5718 Check if STMT_INFO performs a shift operation that can be vectorized.
5719 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5720 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5721 Return true if STMT_INFO is vectorizable in this way. */
5723 static bool
5724 vectorizable_shift (vec_info *vinfo,
5725 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5726 gimple **vec_stmt, slp_tree slp_node,
5727 stmt_vector_for_cost *cost_vec)
5729 tree vec_dest;
5730 tree scalar_dest;
5731 tree op0, op1 = NULL;
5732 tree vec_oprnd1 = NULL_TREE;
5733 tree vectype;
5734 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5735 enum tree_code code;
5736 machine_mode vec_mode;
5737 tree new_temp;
5738 optab optab;
5739 int icode;
5740 machine_mode optab_op2_mode;
5741 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5742 int ndts = 2;
5743 poly_uint64 nunits_in;
5744 poly_uint64 nunits_out;
5745 tree vectype_out;
5746 tree op1_vectype;
5747 int ncopies;
5748 int i;
5749 vec<tree> vec_oprnds0 = vNULL;
5750 vec<tree> vec_oprnds1 = vNULL;
5751 tree vop0, vop1;
5752 unsigned int k;
5753 bool scalar_shift_arg = true;
5754 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5755 bool incompatible_op1_vectype_p = false;
5757 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5758 return false;
5760 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5761 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5762 && ! vec_stmt)
5763 return false;
5765 /* Is STMT a vectorizable binary/unary operation? */
5766 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5767 if (!stmt)
5768 return false;
5770 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5771 return false;
5773 code = gimple_assign_rhs_code (stmt);
5775 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5776 || code == RROTATE_EXPR))
5777 return false;
5779 scalar_dest = gimple_assign_lhs (stmt);
5780 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5781 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5783 if (dump_enabled_p ())
5784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5785 "bit-precision shifts not supported.\n");
5786 return false;
5789 slp_tree slp_op0;
5790 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5791 0, &op0, &slp_op0, &dt[0], &vectype))
5793 if (dump_enabled_p ())
5794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5795 "use not simple.\n");
5796 return false;
5798 /* If op0 is an external or constant def, infer the vector type
5799 from the scalar type. */
5800 if (!vectype)
5801 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5802 if (vec_stmt)
5803 gcc_assert (vectype);
5804 if (!vectype)
5806 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5808 "no vectype for scalar type\n");
5809 return false;
5812 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5813 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5814 if (maybe_ne (nunits_out, nunits_in))
5815 return false;
5817 stmt_vec_info op1_def_stmt_info;
5818 slp_tree slp_op1;
5819 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5820 &dt[1], &op1_vectype, &op1_def_stmt_info))
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5824 "use not simple.\n");
5825 return false;
5828 /* Multiple types in SLP are handled by creating the appropriate number of
5829 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5830 case of SLP. */
5831 if (slp_node)
5832 ncopies = 1;
5833 else
5834 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5836 gcc_assert (ncopies >= 1);
5838 /* Determine whether the shift amount is a vector, or scalar. If the
5839 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5841 if ((dt[1] == vect_internal_def
5842 || dt[1] == vect_induction_def
5843 || dt[1] == vect_nested_cycle)
5844 && !slp_node)
5845 scalar_shift_arg = false;
5846 else if (dt[1] == vect_constant_def
5847 || dt[1] == vect_external_def
5848 || dt[1] == vect_internal_def)
5850 /* In SLP, need to check whether the shift count is the same,
5851 in loops if it is a constant or invariant, it is always
5852 a scalar shift. */
5853 if (slp_node)
5855 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5856 stmt_vec_info slpstmt_info;
5858 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5860 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5861 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5862 scalar_shift_arg = false;
5865 /* For internal SLP defs we have to make sure we see scalar stmts
5866 for all vector elements.
5867 ??? For different vectors we could resort to a different
5868 scalar shift operand but code-generation below simply always
5869 takes the first. */
5870 if (dt[1] == vect_internal_def
5871 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5872 stmts.length ()))
5873 scalar_shift_arg = false;
5876 /* If the shift amount is computed by a pattern stmt we cannot
5877 use the scalar amount directly thus give up and use a vector
5878 shift. */
5879 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5880 scalar_shift_arg = false;
5882 else
5884 if (dump_enabled_p ())
5885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5886 "operand mode requires invariant argument.\n");
5887 return false;
5890 /* Vector shifted by vector. */
5891 bool was_scalar_shift_arg = scalar_shift_arg;
5892 if (!scalar_shift_arg)
5894 optab = optab_for_tree_code (code, vectype, optab_vector);
5895 if (dump_enabled_p ())
5896 dump_printf_loc (MSG_NOTE, vect_location,
5897 "vector/vector shift/rotate found.\n");
5899 if (!op1_vectype)
5900 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5901 slp_op1);
5902 incompatible_op1_vectype_p
5903 = (op1_vectype == NULL_TREE
5904 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5905 TYPE_VECTOR_SUBPARTS (vectype))
5906 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5907 if (incompatible_op1_vectype_p
5908 && (!slp_node
5909 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5910 || slp_op1->refcnt != 1))
5912 if (dump_enabled_p ())
5913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5914 "unusable type for last operand in"
5915 " vector/vector shift/rotate.\n");
5916 return false;
5919 /* See if the machine has a vector shifted by scalar insn and if not
5920 then see if it has a vector shifted by vector insn. */
5921 else
5923 optab = optab_for_tree_code (code, vectype, optab_scalar);
5924 if (optab
5925 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5927 if (dump_enabled_p ())
5928 dump_printf_loc (MSG_NOTE, vect_location,
5929 "vector/scalar shift/rotate found.\n");
5931 else
5933 optab = optab_for_tree_code (code, vectype, optab_vector);
5934 if (optab
5935 && (optab_handler (optab, TYPE_MODE (vectype))
5936 != CODE_FOR_nothing))
5938 scalar_shift_arg = false;
5940 if (dump_enabled_p ())
5941 dump_printf_loc (MSG_NOTE, vect_location,
5942 "vector/vector shift/rotate found.\n");
5944 if (!op1_vectype)
5945 op1_vectype = get_vectype_for_scalar_type (vinfo,
5946 TREE_TYPE (op1),
5947 slp_op1);
5949 /* Unlike the other binary operators, shifts/rotates have
5950 the rhs being int, instead of the same type as the lhs,
5951 so make sure the scalar is the right type if we are
5952 dealing with vectors of long long/long/short/char. */
5953 incompatible_op1_vectype_p
5954 = (!op1_vectype
5955 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5956 TREE_TYPE (op1)));
5957 if (incompatible_op1_vectype_p
5958 && dt[1] == vect_internal_def)
5960 if (dump_enabled_p ())
5961 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5962 "unusable type for last operand in"
5963 " vector/vector shift/rotate.\n");
5964 return false;
5970 /* Supportable by target? */
5971 if (!optab)
5973 if (dump_enabled_p ())
5974 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5975 "no optab.\n");
5976 return false;
5978 vec_mode = TYPE_MODE (vectype);
5979 icode = (int) optab_handler (optab, vec_mode);
5980 if (icode == CODE_FOR_nothing)
5982 if (dump_enabled_p ())
5983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5984 "op not supported by target.\n");
5985 return false;
5987 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5988 if (vect_emulated_vector_p (vectype))
5989 return false;
5991 if (!vec_stmt) /* transformation not required. */
5993 if (slp_node
5994 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5995 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5996 && (!incompatible_op1_vectype_p
5997 || dt[1] == vect_constant_def)
5998 && !vect_maybe_update_slp_op_vectype
5999 (slp_op1,
6000 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6002 if (dump_enabled_p ())
6003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6004 "incompatible vector types for invariants\n");
6005 return false;
6007 /* Now adjust the constant shift amount in place. */
6008 if (slp_node
6009 && incompatible_op1_vectype_p
6010 && dt[1] == vect_constant_def)
6012 for (unsigned i = 0;
6013 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6015 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6016 = fold_convert (TREE_TYPE (vectype),
6017 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6018 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6019 == INTEGER_CST));
6022 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6023 DUMP_VECT_SCOPE ("vectorizable_shift");
6024 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6025 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6026 return true;
6029 /* Transform. */
6031 if (dump_enabled_p ())
6032 dump_printf_loc (MSG_NOTE, vect_location,
6033 "transform binary/unary operation.\n");
6035 if (incompatible_op1_vectype_p && !slp_node)
6037 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6038 op1 = fold_convert (TREE_TYPE (vectype), op1);
6039 if (dt[1] != vect_constant_def)
6040 op1 = vect_init_vector (vinfo, stmt_info, op1,
6041 TREE_TYPE (vectype), NULL);
6044 /* Handle def. */
6045 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6047 if (scalar_shift_arg && dt[1] != vect_internal_def)
6049 /* Vector shl and shr insn patterns can be defined with scalar
6050 operand 2 (shift operand). In this case, use constant or loop
6051 invariant op1 directly, without extending it to vector mode
6052 first. */
6053 optab_op2_mode = insn_data[icode].operand[2].mode;
6054 if (!VECTOR_MODE_P (optab_op2_mode))
6056 if (dump_enabled_p ())
6057 dump_printf_loc (MSG_NOTE, vect_location,
6058 "operand 1 using scalar mode.\n");
6059 vec_oprnd1 = op1;
6060 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6061 vec_oprnds1.quick_push (vec_oprnd1);
6062 /* Store vec_oprnd1 for every vector stmt to be created.
6063 We check during the analysis that all the shift arguments
6064 are the same.
6065 TODO: Allow different constants for different vector
6066 stmts generated for an SLP instance. */
6067 for (k = 0;
6068 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6069 vec_oprnds1.quick_push (vec_oprnd1);
6072 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6074 if (was_scalar_shift_arg)
6076 /* If the argument was the same in all lanes create
6077 the correctly typed vector shift amount directly. */
6078 op1 = fold_convert (TREE_TYPE (vectype), op1);
6079 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6080 !loop_vinfo ? gsi : NULL);
6081 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6082 !loop_vinfo ? gsi : NULL);
6083 vec_oprnds1.create (slp_node->vec_stmts_size);
6084 for (k = 0; k < slp_node->vec_stmts_size; k++)
6085 vec_oprnds1.quick_push (vec_oprnd1);
6087 else if (dt[1] == vect_constant_def)
6088 /* The constant shift amount has been adjusted in place. */
6090 else
6091 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6094 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6095 (a special case for certain kind of vector shifts); otherwise,
6096 operand 1 should be of a vector type (the usual case). */
6097 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6098 op0, &vec_oprnds0,
6099 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6101 /* Arguments are ready. Create the new vector stmt. */
6102 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6104 /* For internal defs where we need to use a scalar shift arg
6105 extract the first lane. */
6106 if (scalar_shift_arg && dt[1] == vect_internal_def)
6108 vop1 = vec_oprnds1[0];
6109 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6110 gassign *new_stmt
6111 = gimple_build_assign (new_temp,
6112 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6113 vop1,
6114 TYPE_SIZE (TREE_TYPE (new_temp)),
6115 bitsize_zero_node));
6116 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6117 vop1 = new_temp;
6119 else
6120 vop1 = vec_oprnds1[i];
6121 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6122 new_temp = make_ssa_name (vec_dest, new_stmt);
6123 gimple_assign_set_lhs (new_stmt, new_temp);
6124 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6125 if (slp_node)
6126 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6127 else
6128 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6131 if (!slp_node)
6132 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6134 vec_oprnds0.release ();
6135 vec_oprnds1.release ();
6137 return true;
6140 /* Function vectorizable_operation.
6142 Check if STMT_INFO performs a binary, unary or ternary operation that can
6143 be vectorized.
6144 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6145 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6146 Return true if STMT_INFO is vectorizable in this way. */
6148 static bool
6149 vectorizable_operation (vec_info *vinfo,
6150 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6151 gimple **vec_stmt, slp_tree slp_node,
6152 stmt_vector_for_cost *cost_vec)
6154 tree vec_dest;
6155 tree scalar_dest;
6156 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6157 tree vectype;
6158 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6159 enum tree_code code, orig_code;
6160 machine_mode vec_mode;
6161 tree new_temp;
6162 int op_type;
6163 optab optab;
6164 bool target_support_p;
6165 enum vect_def_type dt[3]
6166 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6167 int ndts = 3;
6168 poly_uint64 nunits_in;
6169 poly_uint64 nunits_out;
6170 tree vectype_out;
6171 int ncopies, vec_num;
6172 int i;
6173 vec<tree> vec_oprnds0 = vNULL;
6174 vec<tree> vec_oprnds1 = vNULL;
6175 vec<tree> vec_oprnds2 = vNULL;
6176 tree vop0, vop1, vop2;
6177 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6179 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6180 return false;
6182 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6183 && ! vec_stmt)
6184 return false;
6186 /* Is STMT a vectorizable binary/unary operation? */
6187 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6188 if (!stmt)
6189 return false;
6191 /* Loads and stores are handled in vectorizable_{load,store}. */
6192 if (STMT_VINFO_DATA_REF (stmt_info))
6193 return false;
6195 orig_code = code = gimple_assign_rhs_code (stmt);
6197 /* Shifts are handled in vectorizable_shift. */
6198 if (code == LSHIFT_EXPR
6199 || code == RSHIFT_EXPR
6200 || code == LROTATE_EXPR
6201 || code == RROTATE_EXPR)
6202 return false;
6204 /* Comparisons are handled in vectorizable_comparison. */
6205 if (TREE_CODE_CLASS (code) == tcc_comparison)
6206 return false;
6208 /* Conditions are handled in vectorizable_condition. */
6209 if (code == COND_EXPR)
6210 return false;
6212 /* For pointer addition and subtraction, we should use the normal
6213 plus and minus for the vector operation. */
6214 if (code == POINTER_PLUS_EXPR)
6215 code = PLUS_EXPR;
6216 if (code == POINTER_DIFF_EXPR)
6217 code = MINUS_EXPR;
6219 /* Support only unary or binary operations. */
6220 op_type = TREE_CODE_LENGTH (code);
6221 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6223 if (dump_enabled_p ())
6224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6225 "num. args = %d (not unary/binary/ternary op).\n",
6226 op_type);
6227 return false;
6230 scalar_dest = gimple_assign_lhs (stmt);
6231 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6233 /* Most operations cannot handle bit-precision types without extra
6234 truncations. */
6235 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6236 if (!mask_op_p
6237 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6238 /* Exception are bitwise binary operations. */
6239 && code != BIT_IOR_EXPR
6240 && code != BIT_XOR_EXPR
6241 && code != BIT_AND_EXPR)
6243 if (dump_enabled_p ())
6244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6245 "bit-precision arithmetic not supported.\n");
6246 return false;
6249 slp_tree slp_op0;
6250 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6251 0, &op0, &slp_op0, &dt[0], &vectype))
6253 if (dump_enabled_p ())
6254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6255 "use not simple.\n");
6256 return false;
6258 bool is_invariant = (dt[0] == vect_external_def
6259 || dt[0] == vect_constant_def);
6260 /* If op0 is an external or constant def, infer the vector type
6261 from the scalar type. */
6262 if (!vectype)
6264 /* For boolean type we cannot determine vectype by
6265 invariant value (don't know whether it is a vector
6266 of booleans or vector of integers). We use output
6267 vectype because operations on boolean don't change
6268 type. */
6269 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6271 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6273 if (dump_enabled_p ())
6274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6275 "not supported operation on bool value.\n");
6276 return false;
6278 vectype = vectype_out;
6280 else
6281 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6282 slp_node);
6284 if (vec_stmt)
6285 gcc_assert (vectype);
6286 if (!vectype)
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6290 "no vectype for scalar type %T\n",
6291 TREE_TYPE (op0));
6293 return false;
6296 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6297 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6298 if (maybe_ne (nunits_out, nunits_in))
6299 return false;
6301 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6302 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6303 if (op_type == binary_op || op_type == ternary_op)
6305 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6306 1, &op1, &slp_op1, &dt[1], &vectype2))
6308 if (dump_enabled_p ())
6309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6310 "use not simple.\n");
6311 return false;
6313 is_invariant &= (dt[1] == vect_external_def
6314 || dt[1] == vect_constant_def);
6315 if (vectype2
6316 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6317 return false;
6319 if (op_type == ternary_op)
6321 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6322 2, &op2, &slp_op2, &dt[2], &vectype3))
6324 if (dump_enabled_p ())
6325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6326 "use not simple.\n");
6327 return false;
6329 is_invariant &= (dt[2] == vect_external_def
6330 || dt[2] == vect_constant_def);
6331 if (vectype3
6332 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6333 return false;
6336 /* Multiple types in SLP are handled by creating the appropriate number of
6337 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6338 case of SLP. */
6339 if (slp_node)
6341 ncopies = 1;
6342 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6344 else
6346 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6347 vec_num = 1;
6350 gcc_assert (ncopies >= 1);
6352 /* Reject attempts to combine mask types with nonmask types, e.g. if
6353 we have an AND between a (nonmask) boolean loaded from memory and
6354 a (mask) boolean result of a comparison.
6356 TODO: We could easily fix these cases up using pattern statements. */
6357 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6358 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6359 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6361 if (dump_enabled_p ())
6362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6363 "mixed mask and nonmask vector types\n");
6364 return false;
6367 /* Supportable by target? */
6369 vec_mode = TYPE_MODE (vectype);
6370 if (code == MULT_HIGHPART_EXPR)
6371 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6372 else
6374 optab = optab_for_tree_code (code, vectype, optab_default);
6375 if (!optab)
6377 if (dump_enabled_p ())
6378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6379 "no optab.\n");
6380 return false;
6382 target_support_p = (optab_handler (optab, vec_mode)
6383 != CODE_FOR_nothing);
6386 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6387 if (!target_support_p || using_emulated_vectors_p)
6389 if (dump_enabled_p ())
6390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6391 "op not supported by target.\n");
6392 /* When vec_mode is not a vector mode and we verified ops we
6393 do not have to lower like AND are natively supported let
6394 those through even when the mode isn't word_mode. For
6395 ops we have to lower the lowering code assumes we are
6396 dealing with word_mode. */
6397 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6398 || !target_support_p)
6399 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6400 /* Check only during analysis. */
6401 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6403 if (dump_enabled_p ())
6404 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6405 return false;
6407 if (dump_enabled_p ())
6408 dump_printf_loc (MSG_NOTE, vect_location,
6409 "proceeding using word mode.\n");
6410 using_emulated_vectors_p = true;
6413 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6414 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6415 internal_fn cond_fn = get_conditional_internal_fn (code);
6417 /* If operating on inactive elements could generate spurious traps,
6418 we need to restrict the operation to active lanes. Note that this
6419 specifically doesn't apply to unhoisted invariants, since they
6420 operate on the same value for every lane.
6422 Similarly, if this operation is part of a reduction, a fully-masked
6423 loop should only change the active lanes of the reduction chain,
6424 keeping the inactive lanes as-is. */
6425 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6426 || reduc_idx >= 0);
6428 if (!vec_stmt) /* transformation not required. */
6430 if (loop_vinfo
6431 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6432 && mask_out_inactive)
6434 if (cond_fn == IFN_LAST
6435 || !direct_internal_fn_supported_p (cond_fn, vectype,
6436 OPTIMIZE_FOR_SPEED))
6438 if (dump_enabled_p ())
6439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6440 "can't use a fully-masked loop because no"
6441 " conditional operation is available.\n");
6442 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6444 else
6445 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6446 vectype, NULL);
6449 /* Put types on constant and invariant SLP children. */
6450 if (slp_node
6451 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6452 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6453 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6455 if (dump_enabled_p ())
6456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6457 "incompatible vector types for invariants\n");
6458 return false;
6461 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6462 DUMP_VECT_SCOPE ("vectorizable_operation");
6463 vect_model_simple_cost (vinfo, stmt_info,
6464 ncopies, dt, ndts, slp_node, cost_vec);
6465 if (using_emulated_vectors_p)
6467 /* The above vect_model_simple_cost call handles constants
6468 in the prologue and (mis-)costs one of the stmts as
6469 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6470 for the actual lowering that will be applied. */
6471 unsigned n
6472 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6473 switch (code)
6475 case PLUS_EXPR:
6476 n *= 5;
6477 break;
6478 case MINUS_EXPR:
6479 n *= 6;
6480 break;
6481 case NEGATE_EXPR:
6482 n *= 4;
6483 break;
6484 default:;
6486 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6488 return true;
6491 /* Transform. */
6493 if (dump_enabled_p ())
6494 dump_printf_loc (MSG_NOTE, vect_location,
6495 "transform binary/unary operation.\n");
6497 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6499 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6500 vectors with unsigned elements, but the result is signed. So, we
6501 need to compute the MINUS_EXPR into vectype temporary and
6502 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6503 tree vec_cvt_dest = NULL_TREE;
6504 if (orig_code == POINTER_DIFF_EXPR)
6506 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6507 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6509 /* Handle def. */
6510 else
6511 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6513 /* In case the vectorization factor (VF) is bigger than the number
6514 of elements that we can fit in a vectype (nunits), we have to generate
6515 more than one vector stmt - i.e - we need to "unroll" the
6516 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6517 from one copy of the vector stmt to the next, in the field
6518 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6519 stages to find the correct vector defs to be used when vectorizing
6520 stmts that use the defs of the current stmt. The example below
6521 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6522 we need to create 4 vectorized stmts):
6524 before vectorization:
6525 RELATED_STMT VEC_STMT
6526 S1: x = memref - -
6527 S2: z = x + 1 - -
6529 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6530 there):
6531 RELATED_STMT VEC_STMT
6532 VS1_0: vx0 = memref0 VS1_1 -
6533 VS1_1: vx1 = memref1 VS1_2 -
6534 VS1_2: vx2 = memref2 VS1_3 -
6535 VS1_3: vx3 = memref3 - -
6536 S1: x = load - VS1_0
6537 S2: z = x + 1 - -
6539 step2: vectorize stmt S2 (done here):
6540 To vectorize stmt S2 we first need to find the relevant vector
6541 def for the first operand 'x'. This is, as usual, obtained from
6542 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6543 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6544 relevant vector def 'vx0'. Having found 'vx0' we can generate
6545 the vector stmt VS2_0, and as usual, record it in the
6546 STMT_VINFO_VEC_STMT of stmt S2.
6547 When creating the second copy (VS2_1), we obtain the relevant vector
6548 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6549 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6550 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6551 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6552 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6553 chain of stmts and pointers:
6554 RELATED_STMT VEC_STMT
6555 VS1_0: vx0 = memref0 VS1_1 -
6556 VS1_1: vx1 = memref1 VS1_2 -
6557 VS1_2: vx2 = memref2 VS1_3 -
6558 VS1_3: vx3 = memref3 - -
6559 S1: x = load - VS1_0
6560 VS2_0: vz0 = vx0 + v1 VS2_1 -
6561 VS2_1: vz1 = vx1 + v1 VS2_2 -
6562 VS2_2: vz2 = vx2 + v1 VS2_3 -
6563 VS2_3: vz3 = vx3 + v1 - -
6564 S2: z = x + 1 - VS2_0 */
6566 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6567 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6568 /* Arguments are ready. Create the new vector stmt. */
6569 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6571 gimple *new_stmt = NULL;
6572 vop1 = ((op_type == binary_op || op_type == ternary_op)
6573 ? vec_oprnds1[i] : NULL_TREE);
6574 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6575 if (using_emulated_vectors_p
6576 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6578 /* Lower the operation. This follows vector lowering. */
6579 unsigned int width = vector_element_bits (vectype);
6580 tree inner_type = TREE_TYPE (vectype);
6581 tree word_type
6582 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
6583 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6584 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6585 tree high_bits
6586 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6587 tree wvop0 = make_ssa_name (word_type);
6588 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6589 build1 (VIEW_CONVERT_EXPR,
6590 word_type, vop0));
6591 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6592 tree result_low, signs;
6593 if (code == PLUS_EXPR || code == MINUS_EXPR)
6595 tree wvop1 = make_ssa_name (word_type);
6596 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6597 build1 (VIEW_CONVERT_EXPR,
6598 word_type, vop1));
6599 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6600 signs = make_ssa_name (word_type);
6601 new_stmt = gimple_build_assign (signs,
6602 BIT_XOR_EXPR, wvop0, wvop1);
6603 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6604 tree b_low = make_ssa_name (word_type);
6605 new_stmt = gimple_build_assign (b_low,
6606 BIT_AND_EXPR, wvop1, low_bits);
6607 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6608 tree a_low = make_ssa_name (word_type);
6609 if (code == PLUS_EXPR)
6610 new_stmt = gimple_build_assign (a_low,
6611 BIT_AND_EXPR, wvop0, low_bits);
6612 else
6613 new_stmt = gimple_build_assign (a_low,
6614 BIT_IOR_EXPR, wvop0, high_bits);
6615 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6616 if (code == MINUS_EXPR)
6618 new_stmt = gimple_build_assign (NULL_TREE,
6619 BIT_NOT_EXPR, signs);
6620 signs = make_ssa_name (word_type);
6621 gimple_assign_set_lhs (new_stmt, signs);
6622 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6624 new_stmt = gimple_build_assign (NULL_TREE,
6625 BIT_AND_EXPR, signs, high_bits);
6626 signs = make_ssa_name (word_type);
6627 gimple_assign_set_lhs (new_stmt, signs);
6628 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6629 result_low = make_ssa_name (word_type);
6630 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
6631 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6633 else
6635 tree a_low = make_ssa_name (word_type);
6636 new_stmt = gimple_build_assign (a_low,
6637 BIT_AND_EXPR, wvop0, low_bits);
6638 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6639 signs = make_ssa_name (word_type);
6640 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
6641 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6642 new_stmt = gimple_build_assign (NULL_TREE,
6643 BIT_AND_EXPR, signs, high_bits);
6644 signs = make_ssa_name (word_type);
6645 gimple_assign_set_lhs (new_stmt, signs);
6646 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6647 result_low = make_ssa_name (word_type);
6648 new_stmt = gimple_build_assign (result_low,
6649 MINUS_EXPR, high_bits, a_low);
6650 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6652 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
6653 signs);
6654 result_low = make_ssa_name (word_type);
6655 gimple_assign_set_lhs (new_stmt, result_low);
6656 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6657 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
6658 build1 (VIEW_CONVERT_EXPR,
6659 vectype, result_low));
6660 new_temp = make_ssa_name (vectype);
6661 gimple_assign_set_lhs (new_stmt, new_temp);
6662 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6664 else if (masked_loop_p && mask_out_inactive)
6666 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6667 vectype, i);
6668 auto_vec<tree> vops (5);
6669 vops.quick_push (mask);
6670 vops.quick_push (vop0);
6671 if (vop1)
6672 vops.quick_push (vop1);
6673 if (vop2)
6674 vops.quick_push (vop2);
6675 if (reduc_idx >= 0)
6677 /* Perform the operation on active elements only and take
6678 inactive elements from the reduction chain input. */
6679 gcc_assert (!vop2);
6680 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
6682 else
6684 auto else_value = targetm.preferred_else_value
6685 (cond_fn, vectype, vops.length () - 1, &vops[1]);
6686 vops.quick_push (else_value);
6688 gcall *call = gimple_build_call_internal_vec (cond_fn, vops);
6689 new_temp = make_ssa_name (vec_dest, call);
6690 gimple_call_set_lhs (call, new_temp);
6691 gimple_call_set_nothrow (call, true);
6692 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6693 new_stmt = call;
6695 else
6697 tree mask = NULL_TREE;
6698 /* When combining two masks check if either of them is elsewhere
6699 combined with a loop mask, if that's the case we can mark that the
6700 new combined mask doesn't need to be combined with a loop mask. */
6701 if (masked_loop_p
6702 && code == BIT_AND_EXPR
6703 && VECTOR_BOOLEAN_TYPE_P (vectype))
6705 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6706 ncopies}))
6708 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6709 vectype, i);
6711 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6712 vop0, gsi);
6715 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6716 ncopies }))
6718 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6719 vectype, i);
6721 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6722 vop1, gsi);
6726 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6727 new_temp = make_ssa_name (vec_dest, new_stmt);
6728 gimple_assign_set_lhs (new_stmt, new_temp);
6729 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6730 if (using_emulated_vectors_p)
6731 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6733 /* Enter the combined value into the vector cond hash so we don't
6734 AND it with a loop mask again. */
6735 if (mask)
6736 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6739 if (vec_cvt_dest)
6741 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6742 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6743 new_temp);
6744 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6745 gimple_assign_set_lhs (new_stmt, new_temp);
6746 vect_finish_stmt_generation (vinfo, stmt_info,
6747 new_stmt, gsi);
6750 if (slp_node)
6751 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6752 else
6753 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6756 if (!slp_node)
6757 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6759 vec_oprnds0.release ();
6760 vec_oprnds1.release ();
6761 vec_oprnds2.release ();
6763 return true;
6766 /* A helper function to ensure data reference DR_INFO's base alignment. */
6768 static void
6769 ensure_base_align (dr_vec_info *dr_info)
6771 /* Alignment is only analyzed for the first element of a DR group,
6772 use that to look at base alignment we need to enforce. */
6773 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6774 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6776 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6778 if (dr_info->base_misaligned)
6780 tree base_decl = dr_info->base_decl;
6782 // We should only be able to increase the alignment of a base object if
6783 // we know what its new alignment should be at compile time.
6784 unsigned HOST_WIDE_INT align_base_to =
6785 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6787 if (decl_in_symtab_p (base_decl))
6788 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6789 else if (DECL_ALIGN (base_decl) < align_base_to)
6791 SET_DECL_ALIGN (base_decl, align_base_to);
6792 DECL_USER_ALIGN (base_decl) = 1;
6794 dr_info->base_misaligned = false;
6799 /* Function get_group_alias_ptr_type.
6801 Return the alias type for the group starting at FIRST_STMT_INFO. */
6803 static tree
6804 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6806 struct data_reference *first_dr, *next_dr;
6808 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6809 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6810 while (next_stmt_info)
6812 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6813 if (get_alias_set (DR_REF (first_dr))
6814 != get_alias_set (DR_REF (next_dr)))
6816 if (dump_enabled_p ())
6817 dump_printf_loc (MSG_NOTE, vect_location,
6818 "conflicting alias set types.\n");
6819 return ptr_type_node;
6821 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6823 return reference_alias_ptr_type (DR_REF (first_dr));
6827 /* Function scan_operand_equal_p.
6829 Helper function for check_scan_store. Compare two references
6830 with .GOMP_SIMD_LANE bases. */
6832 static bool
6833 scan_operand_equal_p (tree ref1, tree ref2)
6835 tree ref[2] = { ref1, ref2 };
6836 poly_int64 bitsize[2], bitpos[2];
6837 tree offset[2], base[2];
6838 for (int i = 0; i < 2; ++i)
6840 machine_mode mode;
6841 int unsignedp, reversep, volatilep = 0;
6842 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6843 &offset[i], &mode, &unsignedp,
6844 &reversep, &volatilep);
6845 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6846 return false;
6847 if (TREE_CODE (base[i]) == MEM_REF
6848 && offset[i] == NULL_TREE
6849 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6851 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6852 if (is_gimple_assign (def_stmt)
6853 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6854 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6855 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6857 if (maybe_ne (mem_ref_offset (base[i]), 0))
6858 return false;
6859 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6860 offset[i] = gimple_assign_rhs2 (def_stmt);
6865 if (!operand_equal_p (base[0], base[1], 0))
6866 return false;
6867 if (maybe_ne (bitsize[0], bitsize[1]))
6868 return false;
6869 if (offset[0] != offset[1])
6871 if (!offset[0] || !offset[1])
6872 return false;
6873 if (!operand_equal_p (offset[0], offset[1], 0))
6875 tree step[2];
6876 for (int i = 0; i < 2; ++i)
6878 step[i] = integer_one_node;
6879 if (TREE_CODE (offset[i]) == SSA_NAME)
6881 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6882 if (is_gimple_assign (def_stmt)
6883 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6884 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6885 == INTEGER_CST))
6887 step[i] = gimple_assign_rhs2 (def_stmt);
6888 offset[i] = gimple_assign_rhs1 (def_stmt);
6891 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6893 step[i] = TREE_OPERAND (offset[i], 1);
6894 offset[i] = TREE_OPERAND (offset[i], 0);
6896 tree rhs1 = NULL_TREE;
6897 if (TREE_CODE (offset[i]) == SSA_NAME)
6899 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6900 if (gimple_assign_cast_p (def_stmt))
6901 rhs1 = gimple_assign_rhs1 (def_stmt);
6903 else if (CONVERT_EXPR_P (offset[i]))
6904 rhs1 = TREE_OPERAND (offset[i], 0);
6905 if (rhs1
6906 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6907 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6908 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6909 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6910 offset[i] = rhs1;
6912 if (!operand_equal_p (offset[0], offset[1], 0)
6913 || !operand_equal_p (step[0], step[1], 0))
6914 return false;
6917 return true;
6921 enum scan_store_kind {
6922 /* Normal permutation. */
6923 scan_store_kind_perm,
6925 /* Whole vector left shift permutation with zero init. */
6926 scan_store_kind_lshift_zero,
6928 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6929 scan_store_kind_lshift_cond
6932 /* Function check_scan_store.
6934 Verify if we can perform the needed permutations or whole vector shifts.
6935 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6936 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6937 to do at each step. */
6939 static int
6940 scan_store_can_perm_p (tree vectype, tree init,
6941 vec<enum scan_store_kind> *use_whole_vector = NULL)
6943 enum machine_mode vec_mode = TYPE_MODE (vectype);
6944 unsigned HOST_WIDE_INT nunits;
6945 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6946 return -1;
6947 int units_log2 = exact_log2 (nunits);
6948 if (units_log2 <= 0)
6949 return -1;
6951 int i;
6952 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6953 for (i = 0; i <= units_log2; ++i)
6955 unsigned HOST_WIDE_INT j, k;
6956 enum scan_store_kind kind = scan_store_kind_perm;
6957 vec_perm_builder sel (nunits, nunits, 1);
6958 sel.quick_grow (nunits);
6959 if (i == units_log2)
6961 for (j = 0; j < nunits; ++j)
6962 sel[j] = nunits - 1;
6964 else
6966 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6967 sel[j] = j;
6968 for (k = 0; j < nunits; ++j, ++k)
6969 sel[j] = nunits + k;
6971 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6972 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
6974 if (i == units_log2)
6975 return -1;
6977 if (whole_vector_shift_kind == scan_store_kind_perm)
6979 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6980 return -1;
6981 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6982 /* Whole vector shifts shift in zeros, so if init is all zero
6983 constant, there is no need to do anything further. */
6984 if ((TREE_CODE (init) != INTEGER_CST
6985 && TREE_CODE (init) != REAL_CST)
6986 || !initializer_zerop (init))
6988 tree masktype = truth_type_for (vectype);
6989 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6990 return -1;
6991 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6994 kind = whole_vector_shift_kind;
6996 if (use_whole_vector)
6998 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6999 use_whole_vector->safe_grow_cleared (i, true);
7000 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7001 use_whole_vector->safe_push (kind);
7005 return units_log2;
7009 /* Function check_scan_store.
7011 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7013 static bool
7014 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7015 enum vect_def_type rhs_dt, bool slp, tree mask,
7016 vect_memory_access_type memory_access_type)
7018 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7019 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7020 tree ref_type;
7022 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7023 if (slp
7024 || mask
7025 || memory_access_type != VMAT_CONTIGUOUS
7026 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7027 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7028 || loop_vinfo == NULL
7029 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7030 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7031 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7032 || !integer_zerop (DR_INIT (dr_info->dr))
7033 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7034 || !alias_sets_conflict_p (get_alias_set (vectype),
7035 get_alias_set (TREE_TYPE (ref_type))))
7037 if (dump_enabled_p ())
7038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7039 "unsupported OpenMP scan store.\n");
7040 return false;
7043 /* We need to pattern match code built by OpenMP lowering and simplified
7044 by following optimizations into something we can handle.
7045 #pragma omp simd reduction(inscan,+:r)
7046 for (...)
7048 r += something ();
7049 #pragma omp scan inclusive (r)
7050 use (r);
7052 shall have body with:
7053 // Initialization for input phase, store the reduction initializer:
7054 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7055 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7056 D.2042[_21] = 0;
7057 // Actual input phase:
7059 r.0_5 = D.2042[_20];
7060 _6 = _4 + r.0_5;
7061 D.2042[_20] = _6;
7062 // Initialization for scan phase:
7063 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7064 _26 = D.2043[_25];
7065 _27 = D.2042[_25];
7066 _28 = _26 + _27;
7067 D.2043[_25] = _28;
7068 D.2042[_25] = _28;
7069 // Actual scan phase:
7071 r.1_8 = D.2042[_20];
7073 The "omp simd array" variable D.2042 holds the privatized copy used
7074 inside of the loop and D.2043 is another one that holds copies of
7075 the current original list item. The separate GOMP_SIMD_LANE ifn
7076 kinds are there in order to allow optimizing the initializer store
7077 and combiner sequence, e.g. if it is originally some C++ish user
7078 defined reduction, but allow the vectorizer to pattern recognize it
7079 and turn into the appropriate vectorized scan.
7081 For exclusive scan, this is slightly different:
7082 #pragma omp simd reduction(inscan,+:r)
7083 for (...)
7085 use (r);
7086 #pragma omp scan exclusive (r)
7087 r += something ();
7089 shall have body with:
7090 // Initialization for input phase, store the reduction initializer:
7091 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7092 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7093 D.2042[_21] = 0;
7094 // Actual input phase:
7096 r.0_5 = D.2042[_20];
7097 _6 = _4 + r.0_5;
7098 D.2042[_20] = _6;
7099 // Initialization for scan phase:
7100 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7101 _26 = D.2043[_25];
7102 D.2044[_25] = _26;
7103 _27 = D.2042[_25];
7104 _28 = _26 + _27;
7105 D.2043[_25] = _28;
7106 // Actual scan phase:
7108 r.1_8 = D.2044[_20];
7109 ... */
7111 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7113 /* Match the D.2042[_21] = 0; store above. Just require that
7114 it is a constant or external definition store. */
7115 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7117 fail_init:
7118 if (dump_enabled_p ())
7119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7120 "unsupported OpenMP scan initializer store.\n");
7121 return false;
7124 if (! loop_vinfo->scan_map)
7125 loop_vinfo->scan_map = new hash_map<tree, tree>;
7126 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7127 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7128 if (cached)
7129 goto fail_init;
7130 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7132 /* These stores can be vectorized normally. */
7133 return true;
7136 if (rhs_dt != vect_internal_def)
7138 fail:
7139 if (dump_enabled_p ())
7140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7141 "unsupported OpenMP scan combiner pattern.\n");
7142 return false;
7145 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7146 tree rhs = gimple_assign_rhs1 (stmt);
7147 if (TREE_CODE (rhs) != SSA_NAME)
7148 goto fail;
7150 gimple *other_store_stmt = NULL;
7151 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7152 bool inscan_var_store
7153 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7155 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7157 if (!inscan_var_store)
7159 use_operand_p use_p;
7160 imm_use_iterator iter;
7161 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7163 gimple *use_stmt = USE_STMT (use_p);
7164 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7165 continue;
7166 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7167 || !is_gimple_assign (use_stmt)
7168 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7169 || other_store_stmt
7170 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7171 goto fail;
7172 other_store_stmt = use_stmt;
7174 if (other_store_stmt == NULL)
7175 goto fail;
7176 rhs = gimple_assign_lhs (other_store_stmt);
7177 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7178 goto fail;
7181 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7183 use_operand_p use_p;
7184 imm_use_iterator iter;
7185 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7187 gimple *use_stmt = USE_STMT (use_p);
7188 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7189 continue;
7190 if (other_store_stmt)
7191 goto fail;
7192 other_store_stmt = use_stmt;
7195 else
7196 goto fail;
7198 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7199 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7200 || !is_gimple_assign (def_stmt)
7201 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7202 goto fail;
7204 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7205 /* For pointer addition, we should use the normal plus for the vector
7206 operation. */
7207 switch (code)
7209 case POINTER_PLUS_EXPR:
7210 code = PLUS_EXPR;
7211 break;
7212 case MULT_HIGHPART_EXPR:
7213 goto fail;
7214 default:
7215 break;
7217 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7218 goto fail;
7220 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7221 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7222 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7223 goto fail;
7225 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7226 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7227 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7228 || !gimple_assign_load_p (load1_stmt)
7229 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7230 || !gimple_assign_load_p (load2_stmt))
7231 goto fail;
7233 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7234 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7235 if (load1_stmt_info == NULL
7236 || load2_stmt_info == NULL
7237 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7238 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7239 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7240 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7241 goto fail;
7243 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7245 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7246 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7247 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7248 goto fail;
7249 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7250 tree lrhs;
7251 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7252 lrhs = rhs1;
7253 else
7254 lrhs = rhs2;
7255 use_operand_p use_p;
7256 imm_use_iterator iter;
7257 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7259 gimple *use_stmt = USE_STMT (use_p);
7260 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7261 continue;
7262 if (other_store_stmt)
7263 goto fail;
7264 other_store_stmt = use_stmt;
7268 if (other_store_stmt == NULL)
7269 goto fail;
7270 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7271 || !gimple_store_p (other_store_stmt))
7272 goto fail;
7274 stmt_vec_info other_store_stmt_info
7275 = loop_vinfo->lookup_stmt (other_store_stmt);
7276 if (other_store_stmt_info == NULL
7277 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7278 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7279 goto fail;
7281 gimple *stmt1 = stmt;
7282 gimple *stmt2 = other_store_stmt;
7283 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7284 std::swap (stmt1, stmt2);
7285 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7286 gimple_assign_rhs1 (load2_stmt)))
7288 std::swap (rhs1, rhs2);
7289 std::swap (load1_stmt, load2_stmt);
7290 std::swap (load1_stmt_info, load2_stmt_info);
7292 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7293 gimple_assign_rhs1 (load1_stmt)))
7294 goto fail;
7296 tree var3 = NULL_TREE;
7297 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7298 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7299 gimple_assign_rhs1 (load2_stmt)))
7300 goto fail;
7301 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7303 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7304 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7305 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7306 goto fail;
7307 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7308 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7309 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7310 || lookup_attribute ("omp simd inscan exclusive",
7311 DECL_ATTRIBUTES (var3)))
7312 goto fail;
7315 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7316 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7317 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7318 goto fail;
7320 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7321 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7322 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7323 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7324 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7325 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7326 goto fail;
7328 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7329 std::swap (var1, var2);
7331 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7333 if (!lookup_attribute ("omp simd inscan exclusive",
7334 DECL_ATTRIBUTES (var1)))
7335 goto fail;
7336 var1 = var3;
7339 if (loop_vinfo->scan_map == NULL)
7340 goto fail;
7341 tree *init = loop_vinfo->scan_map->get (var1);
7342 if (init == NULL)
7343 goto fail;
7345 /* The IL is as expected, now check if we can actually vectorize it.
7346 Inclusive scan:
7347 _26 = D.2043[_25];
7348 _27 = D.2042[_25];
7349 _28 = _26 + _27;
7350 D.2043[_25] = _28;
7351 D.2042[_25] = _28;
7352 should be vectorized as (where _40 is the vectorized rhs
7353 from the D.2042[_21] = 0; store):
7354 _30 = MEM <vector(8) int> [(int *)&D.2043];
7355 _31 = MEM <vector(8) int> [(int *)&D.2042];
7356 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7357 _33 = _31 + _32;
7358 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7359 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7360 _35 = _33 + _34;
7361 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7362 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7363 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7364 _37 = _35 + _36;
7365 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7366 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7367 _38 = _30 + _37;
7368 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7369 MEM <vector(8) int> [(int *)&D.2043] = _39;
7370 MEM <vector(8) int> [(int *)&D.2042] = _38;
7371 Exclusive scan:
7372 _26 = D.2043[_25];
7373 D.2044[_25] = _26;
7374 _27 = D.2042[_25];
7375 _28 = _26 + _27;
7376 D.2043[_25] = _28;
7377 should be vectorized as (where _40 is the vectorized rhs
7378 from the D.2042[_21] = 0; store):
7379 _30 = MEM <vector(8) int> [(int *)&D.2043];
7380 _31 = MEM <vector(8) int> [(int *)&D.2042];
7381 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7382 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7383 _34 = _32 + _33;
7384 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7385 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7386 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7387 _36 = _34 + _35;
7388 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7389 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7390 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7391 _38 = _36 + _37;
7392 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7393 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7394 _39 = _30 + _38;
7395 _50 = _31 + _39;
7396 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7397 MEM <vector(8) int> [(int *)&D.2044] = _39;
7398 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7399 enum machine_mode vec_mode = TYPE_MODE (vectype);
7400 optab optab = optab_for_tree_code (code, vectype, optab_default);
7401 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7402 goto fail;
7404 int units_log2 = scan_store_can_perm_p (vectype, *init);
7405 if (units_log2 == -1)
7406 goto fail;
7408 return true;
7412 /* Function vectorizable_scan_store.
7414 Helper of vectorizable_score, arguments like on vectorizable_store.
7415 Handle only the transformation, checking is done in check_scan_store. */
7417 static bool
7418 vectorizable_scan_store (vec_info *vinfo,
7419 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7420 gimple **vec_stmt, int ncopies)
7422 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7423 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7424 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7425 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7427 if (dump_enabled_p ())
7428 dump_printf_loc (MSG_NOTE, vect_location,
7429 "transform scan store. ncopies = %d\n", ncopies);
7431 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7432 tree rhs = gimple_assign_rhs1 (stmt);
7433 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7435 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7436 bool inscan_var_store
7437 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7439 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7441 use_operand_p use_p;
7442 imm_use_iterator iter;
7443 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7445 gimple *use_stmt = USE_STMT (use_p);
7446 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7447 continue;
7448 rhs = gimple_assign_lhs (use_stmt);
7449 break;
7453 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7454 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7455 if (code == POINTER_PLUS_EXPR)
7456 code = PLUS_EXPR;
7457 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7458 && commutative_tree_code (code));
7459 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7460 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7461 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7462 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7463 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7464 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7465 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7466 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7467 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7468 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7469 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7471 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7473 std::swap (rhs1, rhs2);
7474 std::swap (var1, var2);
7475 std::swap (load1_dr_info, load2_dr_info);
7478 tree *init = loop_vinfo->scan_map->get (var1);
7479 gcc_assert (init);
7481 unsigned HOST_WIDE_INT nunits;
7482 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7483 gcc_unreachable ();
7484 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7485 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7486 gcc_assert (units_log2 > 0);
7487 auto_vec<tree, 16> perms;
7488 perms.quick_grow (units_log2 + 1);
7489 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7490 for (int i = 0; i <= units_log2; ++i)
7492 unsigned HOST_WIDE_INT j, k;
7493 vec_perm_builder sel (nunits, nunits, 1);
7494 sel.quick_grow (nunits);
7495 if (i == units_log2)
7496 for (j = 0; j < nunits; ++j)
7497 sel[j] = nunits - 1;
7498 else
7500 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7501 sel[j] = j;
7502 for (k = 0; j < nunits; ++j, ++k)
7503 sel[j] = nunits + k;
7505 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7506 if (!use_whole_vector.is_empty ()
7507 && use_whole_vector[i] != scan_store_kind_perm)
7509 if (zero_vec == NULL_TREE)
7510 zero_vec = build_zero_cst (vectype);
7511 if (masktype == NULL_TREE
7512 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7513 masktype = truth_type_for (vectype);
7514 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7516 else
7517 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7520 tree vec_oprnd1 = NULL_TREE;
7521 tree vec_oprnd2 = NULL_TREE;
7522 tree vec_oprnd3 = NULL_TREE;
7523 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7524 tree dataref_offset = build_int_cst (ref_type, 0);
7525 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7526 vectype, VMAT_CONTIGUOUS);
7527 tree ldataref_ptr = NULL_TREE;
7528 tree orig = NULL_TREE;
7529 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7530 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7531 auto_vec<tree> vec_oprnds1;
7532 auto_vec<tree> vec_oprnds2;
7533 auto_vec<tree> vec_oprnds3;
7534 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7535 *init, &vec_oprnds1,
7536 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7537 rhs2, &vec_oprnds3);
7538 for (int j = 0; j < ncopies; j++)
7540 vec_oprnd1 = vec_oprnds1[j];
7541 if (ldataref_ptr == NULL)
7542 vec_oprnd2 = vec_oprnds2[j];
7543 vec_oprnd3 = vec_oprnds3[j];
7544 if (j == 0)
7545 orig = vec_oprnd3;
7546 else if (!inscan_var_store)
7547 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7549 if (ldataref_ptr)
7551 vec_oprnd2 = make_ssa_name (vectype);
7552 tree data_ref = fold_build2 (MEM_REF, vectype,
7553 unshare_expr (ldataref_ptr),
7554 dataref_offset);
7555 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7556 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7557 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7558 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7559 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7562 tree v = vec_oprnd2;
7563 for (int i = 0; i < units_log2; ++i)
7565 tree new_temp = make_ssa_name (vectype);
7566 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7567 (zero_vec
7568 && (use_whole_vector[i]
7569 != scan_store_kind_perm))
7570 ? zero_vec : vec_oprnd1, v,
7571 perms[i]);
7572 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7573 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7574 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7576 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7578 /* Whole vector shift shifted in zero bits, but if *init
7579 is not initializer_zerop, we need to replace those elements
7580 with elements from vec_oprnd1. */
7581 tree_vector_builder vb (masktype, nunits, 1);
7582 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7583 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7584 ? boolean_false_node : boolean_true_node);
7586 tree new_temp2 = make_ssa_name (vectype);
7587 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7588 new_temp, vec_oprnd1);
7589 vect_finish_stmt_generation (vinfo, stmt_info,
7590 g, gsi);
7591 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7592 new_temp = new_temp2;
7595 /* For exclusive scan, perform the perms[i] permutation once
7596 more. */
7597 if (i == 0
7598 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7599 && v == vec_oprnd2)
7601 v = new_temp;
7602 --i;
7603 continue;
7606 tree new_temp2 = make_ssa_name (vectype);
7607 g = gimple_build_assign (new_temp2, code, v, new_temp);
7608 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7609 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7611 v = new_temp2;
7614 tree new_temp = make_ssa_name (vectype);
7615 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7616 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7617 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7619 tree last_perm_arg = new_temp;
7620 /* For exclusive scan, new_temp computed above is the exclusive scan
7621 prefix sum. Turn it into inclusive prefix sum for the broadcast
7622 of the last element into orig. */
7623 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7625 last_perm_arg = make_ssa_name (vectype);
7626 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7627 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7628 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7631 orig = make_ssa_name (vectype);
7632 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7633 last_perm_arg, perms[units_log2]);
7634 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7635 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7637 if (!inscan_var_store)
7639 tree data_ref = fold_build2 (MEM_REF, vectype,
7640 unshare_expr (dataref_ptr),
7641 dataref_offset);
7642 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7643 g = gimple_build_assign (data_ref, new_temp);
7644 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7645 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7649 if (inscan_var_store)
7650 for (int j = 0; j < ncopies; j++)
7652 if (j != 0)
7653 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7655 tree data_ref = fold_build2 (MEM_REF, vectype,
7656 unshare_expr (dataref_ptr),
7657 dataref_offset);
7658 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7659 gimple *g = gimple_build_assign (data_ref, orig);
7660 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7661 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7663 return true;
7667 /* Function vectorizable_store.
7669 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7670 that can be vectorized.
7671 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7672 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7673 Return true if STMT_INFO is vectorizable in this way. */
7675 static bool
7676 vectorizable_store (vec_info *vinfo,
7677 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7678 gimple **vec_stmt, slp_tree slp_node,
7679 stmt_vector_for_cost *cost_vec)
7681 tree data_ref;
7682 tree op;
7683 tree vec_oprnd = NULL_TREE;
7684 tree elem_type;
7685 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7686 class loop *loop = NULL;
7687 machine_mode vec_mode;
7688 tree dummy;
7689 enum vect_def_type rhs_dt = vect_unknown_def_type;
7690 enum vect_def_type mask_dt = vect_unknown_def_type;
7691 tree dataref_ptr = NULL_TREE;
7692 tree dataref_offset = NULL_TREE;
7693 gimple *ptr_incr = NULL;
7694 int ncopies;
7695 int j;
7696 stmt_vec_info first_stmt_info;
7697 bool grouped_store;
7698 unsigned int group_size, i;
7699 vec<tree> oprnds = vNULL;
7700 vec<tree> result_chain = vNULL;
7701 vec<tree> vec_oprnds = vNULL;
7702 bool slp = (slp_node != NULL);
7703 unsigned int vec_num;
7704 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7705 tree aggr_type;
7706 gather_scatter_info gs_info;
7707 poly_uint64 vf;
7708 vec_load_store_type vls_type;
7709 tree ref_type;
7711 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7712 return false;
7714 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7715 && ! vec_stmt)
7716 return false;
7718 /* Is vectorizable store? */
7720 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7721 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7723 tree scalar_dest = gimple_assign_lhs (assign);
7724 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7725 && is_pattern_stmt_p (stmt_info))
7726 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7727 if (TREE_CODE (scalar_dest) != ARRAY_REF
7728 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7729 && TREE_CODE (scalar_dest) != INDIRECT_REF
7730 && TREE_CODE (scalar_dest) != COMPONENT_REF
7731 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7732 && TREE_CODE (scalar_dest) != REALPART_EXPR
7733 && TREE_CODE (scalar_dest) != MEM_REF)
7734 return false;
7736 else
7738 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7739 if (!call || !gimple_call_internal_p (call))
7740 return false;
7742 internal_fn ifn = gimple_call_internal_fn (call);
7743 if (!internal_store_fn_p (ifn))
7744 return false;
7746 if (slp_node != NULL)
7748 if (dump_enabled_p ())
7749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7750 "SLP of masked stores not supported.\n");
7751 return false;
7754 int mask_index = internal_fn_mask_index (ifn);
7755 if (mask_index >= 0
7756 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7757 &mask, NULL, &mask_dt, &mask_vectype))
7758 return false;
7761 op = vect_get_store_rhs (stmt_info);
7763 /* Cannot have hybrid store SLP -- that would mean storing to the
7764 same location twice. */
7765 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7767 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7768 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7770 if (loop_vinfo)
7772 loop = LOOP_VINFO_LOOP (loop_vinfo);
7773 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7775 else
7776 vf = 1;
7778 /* Multiple types in SLP are handled by creating the appropriate number of
7779 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7780 case of SLP. */
7781 if (slp)
7782 ncopies = 1;
7783 else
7784 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7786 gcc_assert (ncopies >= 1);
7788 /* FORNOW. This restriction should be relaxed. */
7789 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7791 if (dump_enabled_p ())
7792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7793 "multiple types in nested loop.\n");
7794 return false;
7797 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7798 op, &rhs_dt, &rhs_vectype, &vls_type))
7799 return false;
7801 elem_type = TREE_TYPE (vectype);
7802 vec_mode = TYPE_MODE (vectype);
7804 if (!STMT_VINFO_DATA_REF (stmt_info))
7805 return false;
7807 vect_memory_access_type memory_access_type;
7808 enum dr_alignment_support alignment_support_scheme;
7809 int misalignment;
7810 poly_int64 poffset;
7811 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7812 ncopies, &memory_access_type, &poffset,
7813 &alignment_support_scheme, &misalignment, &gs_info))
7814 return false;
7816 if (mask)
7818 if (memory_access_type == VMAT_CONTIGUOUS)
7820 if (!VECTOR_MODE_P (vec_mode)
7821 || !can_vec_mask_load_store_p (vec_mode,
7822 TYPE_MODE (mask_vectype), false))
7823 return false;
7825 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7826 && (memory_access_type != VMAT_GATHER_SCATTER
7827 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7829 if (dump_enabled_p ())
7830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7831 "unsupported access type for masked store.\n");
7832 return false;
7834 else if (memory_access_type == VMAT_GATHER_SCATTER
7835 && gs_info.ifn == IFN_LAST
7836 && !gs_info.decl)
7838 if (dump_enabled_p ())
7839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7840 "unsupported masked emulated scatter.\n");
7841 return false;
7844 else
7846 /* FORNOW. In some cases can vectorize even if data-type not supported
7847 (e.g. - array initialization with 0). */
7848 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7849 return false;
7852 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7853 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7854 && memory_access_type != VMAT_GATHER_SCATTER
7855 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7856 if (grouped_store)
7858 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7859 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7860 group_size = DR_GROUP_SIZE (first_stmt_info);
7862 else
7864 first_stmt_info = stmt_info;
7865 first_dr_info = dr_info;
7866 group_size = vec_num = 1;
7869 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7871 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7872 memory_access_type))
7873 return false;
7876 if (!vec_stmt) /* transformation not required. */
7878 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7880 if (loop_vinfo
7881 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7882 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
7883 vls_type, group_size,
7884 memory_access_type, &gs_info,
7885 mask);
7887 if (slp_node
7888 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7889 vectype))
7891 if (dump_enabled_p ())
7892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7893 "incompatible vector types for invariants\n");
7894 return false;
7897 if (dump_enabled_p ()
7898 && memory_access_type != VMAT_ELEMENTWISE
7899 && memory_access_type != VMAT_GATHER_SCATTER
7900 && alignment_support_scheme != dr_aligned)
7901 dump_printf_loc (MSG_NOTE, vect_location,
7902 "Vectorizing an unaligned access.\n");
7904 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7905 vect_model_store_cost (vinfo, stmt_info, ncopies,
7906 memory_access_type, &gs_info,
7907 alignment_support_scheme,
7908 misalignment, vls_type, slp_node, cost_vec);
7909 return true;
7911 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7913 /* Transform. */
7915 ensure_base_align (dr_info);
7917 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7919 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7920 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7921 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7922 tree ptr, var, scale, vec_mask;
7923 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7924 tree mask_halfvectype = mask_vectype;
7925 edge pe = loop_preheader_edge (loop);
7926 gimple_seq seq;
7927 basic_block new_bb;
7928 enum { NARROW, NONE, WIDEN } modifier;
7929 poly_uint64 scatter_off_nunits
7930 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7932 if (known_eq (nunits, scatter_off_nunits))
7933 modifier = NONE;
7934 else if (known_eq (nunits * 2, scatter_off_nunits))
7936 modifier = WIDEN;
7938 /* Currently gathers and scatters are only supported for
7939 fixed-length vectors. */
7940 unsigned int count = scatter_off_nunits.to_constant ();
7941 vec_perm_builder sel (count, count, 1);
7942 for (i = 0; i < (unsigned int) count; ++i)
7943 sel.quick_push (i | (count / 2));
7945 vec_perm_indices indices (sel, 1, count);
7946 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7947 indices);
7948 gcc_assert (perm_mask != NULL_TREE);
7950 else if (known_eq (nunits, scatter_off_nunits * 2))
7952 modifier = NARROW;
7954 /* Currently gathers and scatters are only supported for
7955 fixed-length vectors. */
7956 unsigned int count = nunits.to_constant ();
7957 vec_perm_builder sel (count, count, 1);
7958 for (i = 0; i < (unsigned int) count; ++i)
7959 sel.quick_push (i | (count / 2));
7961 vec_perm_indices indices (sel, 2, count);
7962 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7963 gcc_assert (perm_mask != NULL_TREE);
7964 ncopies *= 2;
7966 if (mask)
7967 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7969 else
7970 gcc_unreachable ();
7972 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7973 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7974 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7975 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7976 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7977 scaletype = TREE_VALUE (arglist);
7979 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7980 && TREE_CODE (rettype) == VOID_TYPE);
7982 ptr = fold_convert (ptrtype, gs_info.base);
7983 if (!is_gimple_min_invariant (ptr))
7985 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7986 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7987 gcc_assert (!new_bb);
7990 if (mask == NULL_TREE)
7992 mask_arg = build_int_cst (masktype, -1);
7993 mask_arg = vect_init_vector (vinfo, stmt_info,
7994 mask_arg, masktype, NULL);
7997 scale = build_int_cst (scaletype, gs_info.scale);
7999 auto_vec<tree> vec_oprnds0;
8000 auto_vec<tree> vec_oprnds1;
8001 auto_vec<tree> vec_masks;
8002 if (mask)
8004 tree mask_vectype = truth_type_for (vectype);
8005 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8006 modifier == NARROW
8007 ? ncopies / 2 : ncopies,
8008 mask, &vec_masks, mask_vectype);
8010 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8011 modifier == WIDEN
8012 ? ncopies / 2 : ncopies,
8013 gs_info.offset, &vec_oprnds0);
8014 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8015 modifier == NARROW
8016 ? ncopies / 2 : ncopies,
8017 op, &vec_oprnds1);
8018 for (j = 0; j < ncopies; ++j)
8020 if (modifier == WIDEN)
8022 if (j & 1)
8023 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
8024 perm_mask, stmt_info, gsi);
8025 else
8026 op = vec_oprnd0 = vec_oprnds0[j / 2];
8027 src = vec_oprnd1 = vec_oprnds1[j];
8028 if (mask)
8029 mask_op = vec_mask = vec_masks[j];
8031 else if (modifier == NARROW)
8033 if (j & 1)
8034 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
8035 perm_mask, stmt_info, gsi);
8036 else
8037 src = vec_oprnd1 = vec_oprnds1[j / 2];
8038 op = vec_oprnd0 = vec_oprnds0[j];
8039 if (mask)
8040 mask_op = vec_mask = vec_masks[j / 2];
8042 else
8044 op = vec_oprnd0 = vec_oprnds0[j];
8045 src = vec_oprnd1 = vec_oprnds1[j];
8046 if (mask)
8047 mask_op = vec_mask = vec_masks[j];
8050 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
8052 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
8053 TYPE_VECTOR_SUBPARTS (srctype)));
8054 var = vect_get_new_ssa_name (srctype, vect_simple_var);
8055 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
8056 gassign *new_stmt
8057 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
8058 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8059 src = var;
8062 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
8064 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
8065 TYPE_VECTOR_SUBPARTS (idxtype)));
8066 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
8067 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
8068 gassign *new_stmt
8069 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
8070 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8071 op = var;
8074 if (mask)
8076 tree utype;
8077 mask_arg = mask_op;
8078 if (modifier == NARROW)
8080 var = vect_get_new_ssa_name (mask_halfvectype,
8081 vect_simple_var);
8082 gassign *new_stmt
8083 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
8084 : VEC_UNPACK_LO_EXPR,
8085 mask_op);
8086 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8087 mask_arg = var;
8089 tree optype = TREE_TYPE (mask_arg);
8090 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
8091 utype = masktype;
8092 else
8093 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
8094 var = vect_get_new_ssa_name (utype, vect_scalar_var);
8095 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
8096 gassign *new_stmt
8097 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
8098 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8099 mask_arg = var;
8100 if (!useless_type_conversion_p (masktype, utype))
8102 gcc_assert (TYPE_PRECISION (utype)
8103 <= TYPE_PRECISION (masktype));
8104 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
8105 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
8106 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8107 mask_arg = var;
8111 gcall *new_stmt
8112 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
8113 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8115 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8117 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8118 return true;
8120 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8121 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8123 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8124 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
8126 if (grouped_store)
8128 /* FORNOW */
8129 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8131 /* We vectorize all the stmts of the interleaving group when we
8132 reach the last stmt in the group. */
8133 if (DR_GROUP_STORE_COUNT (first_stmt_info)
8134 < DR_GROUP_SIZE (first_stmt_info)
8135 && !slp)
8137 *vec_stmt = NULL;
8138 return true;
8141 if (slp)
8143 grouped_store = false;
8144 /* VEC_NUM is the number of vect stmts to be created for this
8145 group. */
8146 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8147 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8148 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8149 == first_stmt_info);
8150 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8151 op = vect_get_store_rhs (first_stmt_info);
8153 else
8154 /* VEC_NUM is the number of vect stmts to be created for this
8155 group. */
8156 vec_num = group_size;
8158 ref_type = get_group_alias_ptr_type (first_stmt_info);
8160 else
8161 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8163 if (dump_enabled_p ())
8164 dump_printf_loc (MSG_NOTE, vect_location,
8165 "transform store. ncopies = %d\n", ncopies);
8167 if (memory_access_type == VMAT_ELEMENTWISE
8168 || memory_access_type == VMAT_STRIDED_SLP)
8170 gimple_stmt_iterator incr_gsi;
8171 bool insert_after;
8172 gimple *incr;
8173 tree offvar;
8174 tree ivstep;
8175 tree running_off;
8176 tree stride_base, stride_step, alias_off;
8177 tree vec_oprnd;
8178 tree dr_offset;
8179 unsigned int g;
8180 /* Checked by get_load_store_type. */
8181 unsigned int const_nunits = nunits.to_constant ();
8183 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8184 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8186 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8187 stride_base
8188 = fold_build_pointer_plus
8189 (DR_BASE_ADDRESS (first_dr_info->dr),
8190 size_binop (PLUS_EXPR,
8191 convert_to_ptrofftype (dr_offset),
8192 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8193 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8195 /* For a store with loop-invariant (but other than power-of-2)
8196 stride (i.e. not a grouped access) like so:
8198 for (i = 0; i < n; i += stride)
8199 array[i] = ...;
8201 we generate a new induction variable and new stores from
8202 the components of the (vectorized) rhs:
8204 for (j = 0; ; j += VF*stride)
8205 vectemp = ...;
8206 tmp1 = vectemp[0];
8207 array[j] = tmp1;
8208 tmp2 = vectemp[1];
8209 array[j + stride] = tmp2;
8213 unsigned nstores = const_nunits;
8214 unsigned lnel = 1;
8215 tree ltype = elem_type;
8216 tree lvectype = vectype;
8217 if (slp)
8219 if (group_size < const_nunits
8220 && const_nunits % group_size == 0)
8222 nstores = const_nunits / group_size;
8223 lnel = group_size;
8224 ltype = build_vector_type (elem_type, group_size);
8225 lvectype = vectype;
8227 /* First check if vec_extract optab doesn't support extraction
8228 of vector elts directly. */
8229 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8230 machine_mode vmode;
8231 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8232 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8233 group_size).exists (&vmode)
8234 || (convert_optab_handler (vec_extract_optab,
8235 TYPE_MODE (vectype), vmode)
8236 == CODE_FOR_nothing))
8238 /* Try to avoid emitting an extract of vector elements
8239 by performing the extracts using an integer type of the
8240 same size, extracting from a vector of those and then
8241 re-interpreting it as the original vector type if
8242 supported. */
8243 unsigned lsize
8244 = group_size * GET_MODE_BITSIZE (elmode);
8245 unsigned int lnunits = const_nunits / group_size;
8246 /* If we can't construct such a vector fall back to
8247 element extracts from the original vector type and
8248 element size stores. */
8249 if (int_mode_for_size (lsize, 0).exists (&elmode)
8250 && VECTOR_MODE_P (TYPE_MODE (vectype))
8251 && related_vector_mode (TYPE_MODE (vectype), elmode,
8252 lnunits).exists (&vmode)
8253 && (convert_optab_handler (vec_extract_optab,
8254 vmode, elmode)
8255 != CODE_FOR_nothing))
8257 nstores = lnunits;
8258 lnel = group_size;
8259 ltype = build_nonstandard_integer_type (lsize, 1);
8260 lvectype = build_vector_type (ltype, nstores);
8262 /* Else fall back to vector extraction anyway.
8263 Fewer stores are more important than avoiding spilling
8264 of the vector we extract from. Compared to the
8265 construction case in vectorizable_load no store-forwarding
8266 issue exists here for reasonable archs. */
8269 else if (group_size >= const_nunits
8270 && group_size % const_nunits == 0)
8272 nstores = 1;
8273 lnel = const_nunits;
8274 ltype = vectype;
8275 lvectype = vectype;
8277 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8278 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8281 ivstep = stride_step;
8282 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8283 build_int_cst (TREE_TYPE (ivstep), vf));
8285 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8287 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8288 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8289 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
8290 loop, &incr_gsi, insert_after,
8291 &offvar, NULL);
8292 incr = gsi_stmt (incr_gsi);
8294 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8296 alias_off = build_int_cst (ref_type, 0);
8297 stmt_vec_info next_stmt_info = first_stmt_info;
8298 for (g = 0; g < group_size; g++)
8300 running_off = offvar;
8301 if (g)
8303 tree size = TYPE_SIZE_UNIT (ltype);
8304 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8305 size);
8306 tree newoff = copy_ssa_name (running_off, NULL);
8307 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8308 running_off, pos);
8309 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8310 running_off = newoff;
8312 if (!slp)
8313 op = vect_get_store_rhs (next_stmt_info);
8314 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8315 op, &vec_oprnds);
8316 unsigned int group_el = 0;
8317 unsigned HOST_WIDE_INT
8318 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8319 for (j = 0; j < ncopies; j++)
8321 vec_oprnd = vec_oprnds[j];
8322 /* Pun the vector to extract from if necessary. */
8323 if (lvectype != vectype)
8325 tree tem = make_ssa_name (lvectype);
8326 gimple *pun
8327 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8328 lvectype, vec_oprnd));
8329 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8330 vec_oprnd = tem;
8332 for (i = 0; i < nstores; i++)
8334 tree newref, newoff;
8335 gimple *incr, *assign;
8336 tree size = TYPE_SIZE (ltype);
8337 /* Extract the i'th component. */
8338 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8339 bitsize_int (i), size);
8340 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8341 size, pos);
8343 elem = force_gimple_operand_gsi (gsi, elem, true,
8344 NULL_TREE, true,
8345 GSI_SAME_STMT);
8347 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8348 group_el * elsz);
8349 newref = build2 (MEM_REF, ltype,
8350 running_off, this_off);
8351 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8353 /* And store it to *running_off. */
8354 assign = gimple_build_assign (newref, elem);
8355 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8357 group_el += lnel;
8358 if (! slp
8359 || group_el == group_size)
8361 newoff = copy_ssa_name (running_off, NULL);
8362 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8363 running_off, stride_step);
8364 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8366 running_off = newoff;
8367 group_el = 0;
8369 if (g == group_size - 1
8370 && !slp)
8372 if (j == 0 && i == 0)
8373 *vec_stmt = assign;
8374 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8378 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8379 vec_oprnds.release ();
8380 if (slp)
8381 break;
8384 return true;
8387 auto_vec<tree> dr_chain (group_size);
8388 oprnds.create (group_size);
8390 gcc_assert (alignment_support_scheme);
8391 vec_loop_masks *loop_masks
8392 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8393 ? &LOOP_VINFO_MASKS (loop_vinfo)
8394 : NULL);
8395 vec_loop_lens *loop_lens
8396 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8397 ? &LOOP_VINFO_LENS (loop_vinfo)
8398 : NULL);
8400 /* Shouldn't go with length-based approach if fully masked. */
8401 gcc_assert (!loop_lens || !loop_masks);
8403 /* Targets with store-lane instructions must not require explicit
8404 realignment. vect_supportable_dr_alignment always returns either
8405 dr_aligned or dr_unaligned_supported for masked operations. */
8406 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8407 && !mask
8408 && !loop_masks)
8409 || alignment_support_scheme == dr_aligned
8410 || alignment_support_scheme == dr_unaligned_supported);
8412 tree offset = NULL_TREE;
8413 if (!known_eq (poffset, 0))
8414 offset = size_int (poffset);
8416 tree bump;
8417 tree vec_offset = NULL_TREE;
8418 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8420 aggr_type = NULL_TREE;
8421 bump = NULL_TREE;
8423 else if (memory_access_type == VMAT_GATHER_SCATTER)
8425 aggr_type = elem_type;
8426 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8427 &bump, &vec_offset);
8429 else
8431 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8432 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8433 else
8434 aggr_type = vectype;
8435 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8436 memory_access_type);
8439 if (mask)
8440 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8442 /* In case the vectorization factor (VF) is bigger than the number
8443 of elements that we can fit in a vectype (nunits), we have to generate
8444 more than one vector stmt - i.e - we need to "unroll" the
8445 vector stmt by a factor VF/nunits. */
8447 /* In case of interleaving (non-unit grouped access):
8449 S1: &base + 2 = x2
8450 S2: &base = x0
8451 S3: &base + 1 = x1
8452 S4: &base + 3 = x3
8454 We create vectorized stores starting from base address (the access of the
8455 first stmt in the chain (S2 in the above example), when the last store stmt
8456 of the chain (S4) is reached:
8458 VS1: &base = vx2
8459 VS2: &base + vec_size*1 = vx0
8460 VS3: &base + vec_size*2 = vx1
8461 VS4: &base + vec_size*3 = vx3
8463 Then permutation statements are generated:
8465 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8466 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8469 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8470 (the order of the data-refs in the output of vect_permute_store_chain
8471 corresponds to the order of scalar stmts in the interleaving chain - see
8472 the documentation of vect_permute_store_chain()).
8474 In case of both multiple types and interleaving, above vector stores and
8475 permutation stmts are created for every copy. The result vector stmts are
8476 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8477 STMT_VINFO_RELATED_STMT for the next copies.
8480 auto_vec<tree> vec_masks;
8481 tree vec_mask = NULL;
8482 auto_vec<tree> vec_offsets;
8483 auto_vec<vec<tree> > gvec_oprnds;
8484 gvec_oprnds.safe_grow_cleared (group_size, true);
8485 for (j = 0; j < ncopies; j++)
8487 gimple *new_stmt;
8488 if (j == 0)
8490 if (slp)
8492 /* Get vectorized arguments for SLP_NODE. */
8493 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8494 op, &vec_oprnds);
8495 vec_oprnd = vec_oprnds[0];
8497 else
8499 /* For interleaved stores we collect vectorized defs for all the
8500 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8501 used as an input to vect_permute_store_chain().
8503 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8504 and OPRNDS are of size 1. */
8505 stmt_vec_info next_stmt_info = first_stmt_info;
8506 for (i = 0; i < group_size; i++)
8508 /* Since gaps are not supported for interleaved stores,
8509 DR_GROUP_SIZE is the exact number of stmts in the chain.
8510 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8511 that there is no interleaving, DR_GROUP_SIZE is 1,
8512 and only one iteration of the loop will be executed. */
8513 op = vect_get_store_rhs (next_stmt_info);
8514 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8515 ncopies, op, &gvec_oprnds[i]);
8516 vec_oprnd = gvec_oprnds[i][0];
8517 dr_chain.quick_push (gvec_oprnds[i][0]);
8518 oprnds.quick_push (gvec_oprnds[i][0]);
8519 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8521 if (mask)
8523 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8524 mask, &vec_masks, mask_vectype);
8525 vec_mask = vec_masks[0];
8529 /* We should have catched mismatched types earlier. */
8530 gcc_assert (useless_type_conversion_p (vectype,
8531 TREE_TYPE (vec_oprnd)));
8532 bool simd_lane_access_p
8533 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8534 if (simd_lane_access_p
8535 && !loop_masks
8536 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8537 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8538 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8539 && integer_zerop (DR_INIT (first_dr_info->dr))
8540 && alias_sets_conflict_p (get_alias_set (aggr_type),
8541 get_alias_set (TREE_TYPE (ref_type))))
8543 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8544 dataref_offset = build_int_cst (ref_type, 0);
8546 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8547 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8548 slp_node, &gs_info, &dataref_ptr,
8549 &vec_offsets);
8550 else
8551 dataref_ptr
8552 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8553 simd_lane_access_p ? loop : NULL,
8554 offset, &dummy, gsi, &ptr_incr,
8555 simd_lane_access_p, bump);
8557 else
8559 /* For interleaved stores we created vectorized defs for all the
8560 defs stored in OPRNDS in the previous iteration (previous copy).
8561 DR_CHAIN is then used as an input to vect_permute_store_chain().
8562 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8563 OPRNDS are of size 1. */
8564 for (i = 0; i < group_size; i++)
8566 vec_oprnd = gvec_oprnds[i][j];
8567 dr_chain[i] = gvec_oprnds[i][j];
8568 oprnds[i] = gvec_oprnds[i][j];
8570 if (mask)
8571 vec_mask = vec_masks[j];
8572 if (dataref_offset)
8573 dataref_offset
8574 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8575 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8576 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8577 stmt_info, bump);
8580 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8582 tree vec_array;
8584 /* Get an array into which we can store the individual vectors. */
8585 vec_array = create_vector_array (vectype, vec_num);
8587 /* Invalidate the current contents of VEC_ARRAY. This should
8588 become an RTL clobber too, which prevents the vector registers
8589 from being upward-exposed. */
8590 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8592 /* Store the individual vectors into the array. */
8593 for (i = 0; i < vec_num; i++)
8595 vec_oprnd = dr_chain[i];
8596 write_vector_array (vinfo, stmt_info,
8597 gsi, vec_oprnd, vec_array, i);
8600 tree final_mask = NULL;
8601 if (loop_masks)
8602 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8603 vectype, j);
8604 if (vec_mask)
8605 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8606 final_mask, vec_mask, gsi);
8608 gcall *call;
8609 if (final_mask)
8611 /* Emit:
8612 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8613 VEC_ARRAY). */
8614 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8615 tree alias_ptr = build_int_cst (ref_type, align);
8616 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8617 dataref_ptr, alias_ptr,
8618 final_mask, vec_array);
8620 else
8622 /* Emit:
8623 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8624 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8625 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8626 vec_array);
8627 gimple_call_set_lhs (call, data_ref);
8629 gimple_call_set_nothrow (call, true);
8630 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8631 new_stmt = call;
8633 /* Record that VEC_ARRAY is now dead. */
8634 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8636 else
8638 new_stmt = NULL;
8639 if (grouped_store)
8641 if (j == 0)
8642 result_chain.create (group_size);
8643 /* Permute. */
8644 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8645 gsi, &result_chain);
8648 stmt_vec_info next_stmt_info = first_stmt_info;
8649 for (i = 0; i < vec_num; i++)
8651 unsigned misalign;
8652 unsigned HOST_WIDE_INT align;
8654 tree final_mask = NULL_TREE;
8655 if (loop_masks)
8656 final_mask = vect_get_loop_mask (gsi, loop_masks,
8657 vec_num * ncopies,
8658 vectype, vec_num * j + i);
8659 if (vec_mask)
8660 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8661 final_mask, vec_mask, gsi);
8663 if (memory_access_type == VMAT_GATHER_SCATTER
8664 && gs_info.ifn != IFN_LAST)
8666 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8667 vec_offset = vec_offsets[vec_num * j + i];
8668 tree scale = size_int (gs_info.scale);
8669 gcall *call;
8670 if (final_mask)
8671 call = gimple_build_call_internal
8672 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8673 scale, vec_oprnd, final_mask);
8674 else
8675 call = gimple_build_call_internal
8676 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8677 scale, vec_oprnd);
8678 gimple_call_set_nothrow (call, true);
8679 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8680 new_stmt = call;
8681 break;
8683 else if (memory_access_type == VMAT_GATHER_SCATTER)
8685 /* Emulated scatter. */
8686 gcc_assert (!final_mask);
8687 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
8688 unsigned HOST_WIDE_INT const_offset_nunits
8689 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
8690 .to_constant ();
8691 vec<constructor_elt, va_gc> *ctor_elts;
8692 vec_alloc (ctor_elts, const_nunits);
8693 gimple_seq stmts = NULL;
8694 tree elt_type = TREE_TYPE (vectype);
8695 unsigned HOST_WIDE_INT elt_size
8696 = tree_to_uhwi (TYPE_SIZE (elt_type));
8697 /* We support offset vectors with more elements
8698 than the data vector for now. */
8699 unsigned HOST_WIDE_INT factor
8700 = const_offset_nunits / const_nunits;
8701 vec_offset = vec_offsets[j / factor];
8702 unsigned elt_offset = (j % factor) * const_nunits;
8703 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
8704 tree scale = size_int (gs_info.scale);
8705 align = get_object_alignment (DR_REF (first_dr_info->dr));
8706 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
8707 for (unsigned k = 0; k < const_nunits; ++k)
8709 /* Compute the offsetted pointer. */
8710 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
8711 bitsize_int (k + elt_offset));
8712 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
8713 idx_type, vec_offset,
8714 TYPE_SIZE (idx_type), boff);
8715 idx = gimple_convert (&stmts, sizetype, idx);
8716 idx = gimple_build (&stmts, MULT_EXPR,
8717 sizetype, idx, scale);
8718 tree ptr = gimple_build (&stmts, PLUS_EXPR,
8719 TREE_TYPE (dataref_ptr),
8720 dataref_ptr, idx);
8721 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
8722 /* Extract the element to be stored. */
8723 tree elt = gimple_build (&stmts, BIT_FIELD_REF,
8724 TREE_TYPE (vectype), vec_oprnd,
8725 TYPE_SIZE (elt_type),
8726 bitsize_int (k * elt_size));
8727 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8728 stmts = NULL;
8729 tree ref = build2 (MEM_REF, ltype, ptr,
8730 build_int_cst (ref_type, 0));
8731 new_stmt = gimple_build_assign (ref, elt);
8732 vect_finish_stmt_generation (vinfo, stmt_info,
8733 new_stmt, gsi);
8735 break;
8738 if (i > 0)
8739 /* Bump the vector pointer. */
8740 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8741 gsi, stmt_info, bump);
8743 if (slp)
8744 vec_oprnd = vec_oprnds[i];
8745 else if (grouped_store)
8746 /* For grouped stores vectorized defs are interleaved in
8747 vect_permute_store_chain(). */
8748 vec_oprnd = result_chain[i];
8750 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8751 if (alignment_support_scheme == dr_aligned)
8752 misalign = 0;
8753 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8755 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8756 misalign = 0;
8758 else
8759 misalign = misalignment;
8760 if (dataref_offset == NULL_TREE
8761 && TREE_CODE (dataref_ptr) == SSA_NAME)
8762 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8763 misalign);
8764 align = least_bit_hwi (misalign | align);
8766 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8768 tree perm_mask = perm_mask_for_reverse (vectype);
8769 tree perm_dest = vect_create_destination_var
8770 (vect_get_store_rhs (stmt_info), vectype);
8771 tree new_temp = make_ssa_name (perm_dest);
8773 /* Generate the permute statement. */
8774 gimple *perm_stmt
8775 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8776 vec_oprnd, perm_mask);
8777 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8779 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8780 vec_oprnd = new_temp;
8783 /* Arguments are ready. Create the new vector stmt. */
8784 if (final_mask)
8786 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8787 gcall *call
8788 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8789 dataref_ptr, ptr,
8790 final_mask, vec_oprnd);
8791 gimple_call_set_nothrow (call, true);
8792 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8793 new_stmt = call;
8795 else if (loop_lens)
8797 tree final_len
8798 = vect_get_loop_len (loop_vinfo, loop_lens,
8799 vec_num * ncopies, vec_num * j + i);
8800 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8801 machine_mode vmode = TYPE_MODE (vectype);
8802 opt_machine_mode new_ovmode
8803 = get_len_load_store_mode (vmode, false);
8804 machine_mode new_vmode = new_ovmode.require ();
8805 /* Need conversion if it's wrapped with VnQI. */
8806 if (vmode != new_vmode)
8808 tree new_vtype
8809 = build_vector_type_for_mode (unsigned_intQI_type_node,
8810 new_vmode);
8811 tree var
8812 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8813 vec_oprnd
8814 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8815 gassign *new_stmt
8816 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8817 vec_oprnd);
8818 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8819 gsi);
8820 vec_oprnd = var;
8823 signed char biasval =
8824 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8826 tree bias = build_int_cst (intQI_type_node, biasval);
8827 gcall *call
8828 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8829 ptr, final_len, vec_oprnd,
8830 bias);
8831 gimple_call_set_nothrow (call, true);
8832 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8833 new_stmt = call;
8835 else
8837 data_ref = fold_build2 (MEM_REF, vectype,
8838 dataref_ptr,
8839 dataref_offset
8840 ? dataref_offset
8841 : build_int_cst (ref_type, 0));
8842 if (alignment_support_scheme == dr_aligned)
8844 else
8845 TREE_TYPE (data_ref)
8846 = build_aligned_type (TREE_TYPE (data_ref),
8847 align * BITS_PER_UNIT);
8848 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8849 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8850 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8853 if (slp)
8854 continue;
8856 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8857 if (!next_stmt_info)
8858 break;
8861 if (!slp)
8863 if (j == 0)
8864 *vec_stmt = new_stmt;
8865 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8869 for (i = 0; i < group_size; ++i)
8871 vec<tree> oprndsi = gvec_oprnds[i];
8872 oprndsi.release ();
8874 oprnds.release ();
8875 result_chain.release ();
8876 vec_oprnds.release ();
8878 return true;
8881 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8882 VECTOR_CST mask. No checks are made that the target platform supports the
8883 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8884 vect_gen_perm_mask_checked. */
8886 tree
8887 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8889 tree mask_type;
8891 poly_uint64 nunits = sel.length ();
8892 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8894 mask_type = build_vector_type (ssizetype, nunits);
8895 return vec_perm_indices_to_tree (mask_type, sel);
8898 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8899 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8901 tree
8902 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8904 machine_mode vmode = TYPE_MODE (vectype);
8905 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
8906 return vect_gen_perm_mask_any (vectype, sel);
8909 /* Given a vector variable X and Y, that was generated for the scalar
8910 STMT_INFO, generate instructions to permute the vector elements of X and Y
8911 using permutation mask MASK_VEC, insert them at *GSI and return the
8912 permuted vector variable. */
8914 static tree
8915 permute_vec_elements (vec_info *vinfo,
8916 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8917 gimple_stmt_iterator *gsi)
8919 tree vectype = TREE_TYPE (x);
8920 tree perm_dest, data_ref;
8921 gimple *perm_stmt;
8923 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8924 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8925 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8926 else
8927 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8928 data_ref = make_ssa_name (perm_dest);
8930 /* Generate the permute statement. */
8931 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8932 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8934 return data_ref;
8937 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8938 inserting them on the loops preheader edge. Returns true if we
8939 were successful in doing so (and thus STMT_INFO can be moved then),
8940 otherwise returns false. */
8942 static bool
8943 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8945 ssa_op_iter i;
8946 tree op;
8947 bool any = false;
8949 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8951 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8952 if (!gimple_nop_p (def_stmt)
8953 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8955 /* Make sure we don't need to recurse. While we could do
8956 so in simple cases when there are more complex use webs
8957 we don't have an easy way to preserve stmt order to fulfil
8958 dependencies within them. */
8959 tree op2;
8960 ssa_op_iter i2;
8961 if (gimple_code (def_stmt) == GIMPLE_PHI)
8962 return false;
8963 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8965 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8966 if (!gimple_nop_p (def_stmt2)
8967 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8968 return false;
8970 any = true;
8974 if (!any)
8975 return true;
8977 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8979 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8980 if (!gimple_nop_p (def_stmt)
8981 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8983 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8984 gsi_remove (&gsi, false);
8985 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8989 return true;
8992 /* vectorizable_load.
8994 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8995 that can be vectorized.
8996 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8997 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8998 Return true if STMT_INFO is vectorizable in this way. */
9000 static bool
9001 vectorizable_load (vec_info *vinfo,
9002 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9003 gimple **vec_stmt, slp_tree slp_node,
9004 stmt_vector_for_cost *cost_vec)
9006 tree scalar_dest;
9007 tree vec_dest = NULL;
9008 tree data_ref = NULL;
9009 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9010 class loop *loop = NULL;
9011 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9012 bool nested_in_vect_loop = false;
9013 tree elem_type;
9014 tree new_temp;
9015 machine_mode mode;
9016 tree dummy;
9017 tree dataref_ptr = NULL_TREE;
9018 tree dataref_offset = NULL_TREE;
9019 gimple *ptr_incr = NULL;
9020 int ncopies;
9021 int i, j;
9022 unsigned int group_size;
9023 poly_uint64 group_gap_adj;
9024 tree msq = NULL_TREE, lsq;
9025 tree realignment_token = NULL_TREE;
9026 gphi *phi = NULL;
9027 vec<tree> dr_chain = vNULL;
9028 bool grouped_load = false;
9029 stmt_vec_info first_stmt_info;
9030 stmt_vec_info first_stmt_info_for_drptr = NULL;
9031 bool compute_in_loop = false;
9032 class loop *at_loop;
9033 int vec_num;
9034 bool slp = (slp_node != NULL);
9035 bool slp_perm = false;
9036 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9037 poly_uint64 vf;
9038 tree aggr_type;
9039 gather_scatter_info gs_info;
9040 tree ref_type;
9041 enum vect_def_type mask_dt = vect_unknown_def_type;
9043 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9044 return false;
9046 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9047 && ! vec_stmt)
9048 return false;
9050 if (!STMT_VINFO_DATA_REF (stmt_info))
9051 return false;
9053 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9054 int mask_index = -1;
9055 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9057 scalar_dest = gimple_assign_lhs (assign);
9058 if (TREE_CODE (scalar_dest) != SSA_NAME)
9059 return false;
9061 tree_code code = gimple_assign_rhs_code (assign);
9062 if (code != ARRAY_REF
9063 && code != BIT_FIELD_REF
9064 && code != INDIRECT_REF
9065 && code != COMPONENT_REF
9066 && code != IMAGPART_EXPR
9067 && code != REALPART_EXPR
9068 && code != MEM_REF
9069 && TREE_CODE_CLASS (code) != tcc_declaration)
9070 return false;
9072 else
9074 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9075 if (!call || !gimple_call_internal_p (call))
9076 return false;
9078 internal_fn ifn = gimple_call_internal_fn (call);
9079 if (!internal_load_fn_p (ifn))
9080 return false;
9082 scalar_dest = gimple_call_lhs (call);
9083 if (!scalar_dest)
9084 return false;
9086 mask_index = internal_fn_mask_index (ifn);
9087 /* ??? For SLP the mask operand is always last. */
9088 if (mask_index >= 0 && slp_node)
9089 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
9090 if (mask_index >= 0
9091 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9092 &mask, NULL, &mask_dt, &mask_vectype))
9093 return false;
9096 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9097 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9099 if (loop_vinfo)
9101 loop = LOOP_VINFO_LOOP (loop_vinfo);
9102 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9103 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9105 else
9106 vf = 1;
9108 /* Multiple types in SLP are handled by creating the appropriate number of
9109 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9110 case of SLP. */
9111 if (slp)
9112 ncopies = 1;
9113 else
9114 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9116 gcc_assert (ncopies >= 1);
9118 /* FORNOW. This restriction should be relaxed. */
9119 if (nested_in_vect_loop && ncopies > 1)
9121 if (dump_enabled_p ())
9122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9123 "multiple types in nested loop.\n");
9124 return false;
9127 /* Invalidate assumptions made by dependence analysis when vectorization
9128 on the unrolled body effectively re-orders stmts. */
9129 if (ncopies > 1
9130 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9131 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9132 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9134 if (dump_enabled_p ())
9135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9136 "cannot perform implicit CSE when unrolling "
9137 "with negative dependence distance\n");
9138 return false;
9141 elem_type = TREE_TYPE (vectype);
9142 mode = TYPE_MODE (vectype);
9144 /* FORNOW. In some cases can vectorize even if data-type not supported
9145 (e.g. - data copies). */
9146 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9148 if (dump_enabled_p ())
9149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9150 "Aligned load, but unsupported type.\n");
9151 return false;
9154 /* Check if the load is a part of an interleaving chain. */
9155 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9157 grouped_load = true;
9158 /* FORNOW */
9159 gcc_assert (!nested_in_vect_loop);
9160 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9162 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9163 group_size = DR_GROUP_SIZE (first_stmt_info);
9165 /* Refuse non-SLP vectorization of SLP-only groups. */
9166 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9168 if (dump_enabled_p ())
9169 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9170 "cannot vectorize load in non-SLP mode.\n");
9171 return false;
9174 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9176 slp_perm = true;
9178 if (!loop_vinfo)
9180 /* In BB vectorization we may not actually use a loaded vector
9181 accessing elements in excess of DR_GROUP_SIZE. */
9182 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9183 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9184 unsigned HOST_WIDE_INT nunits;
9185 unsigned j, k, maxk = 0;
9186 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9187 if (k > maxk)
9188 maxk = k;
9189 tree vectype = SLP_TREE_VECTYPE (slp_node);
9190 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9191 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9193 if (dump_enabled_p ())
9194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9195 "BB vectorization with gaps at the end of "
9196 "a load is not supported\n");
9197 return false;
9201 auto_vec<tree> tem;
9202 unsigned n_perms;
9203 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9204 true, &n_perms))
9206 if (dump_enabled_p ())
9207 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9208 vect_location,
9209 "unsupported load permutation\n");
9210 return false;
9214 /* Invalidate assumptions made by dependence analysis when vectorization
9215 on the unrolled body effectively re-orders stmts. */
9216 if (!PURE_SLP_STMT (stmt_info)
9217 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9218 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9219 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9221 if (dump_enabled_p ())
9222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9223 "cannot perform implicit CSE when performing "
9224 "group loads with negative dependence distance\n");
9225 return false;
9228 else
9229 group_size = 1;
9231 vect_memory_access_type memory_access_type;
9232 enum dr_alignment_support alignment_support_scheme;
9233 int misalignment;
9234 poly_int64 poffset;
9235 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9236 ncopies, &memory_access_type, &poffset,
9237 &alignment_support_scheme, &misalignment, &gs_info))
9238 return false;
9240 if (mask)
9242 if (memory_access_type == VMAT_CONTIGUOUS)
9244 machine_mode vec_mode = TYPE_MODE (vectype);
9245 if (!VECTOR_MODE_P (vec_mode)
9246 || !can_vec_mask_load_store_p (vec_mode,
9247 TYPE_MODE (mask_vectype), true))
9248 return false;
9250 else if (memory_access_type != VMAT_LOAD_STORE_LANES
9251 && memory_access_type != VMAT_GATHER_SCATTER)
9253 if (dump_enabled_p ())
9254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9255 "unsupported access type for masked load.\n");
9256 return false;
9258 else if (memory_access_type == VMAT_GATHER_SCATTER
9259 && gs_info.ifn == IFN_LAST
9260 && !gs_info.decl)
9262 if (dump_enabled_p ())
9263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9264 "unsupported masked emulated gather.\n");
9265 return false;
9269 if (!vec_stmt) /* transformation not required. */
9271 if (slp_node
9272 && mask
9273 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
9274 mask_vectype))
9276 if (dump_enabled_p ())
9277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9278 "incompatible vector types for invariants\n");
9279 return false;
9282 if (!slp)
9283 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
9285 if (loop_vinfo
9286 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9287 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9288 VLS_LOAD, group_size,
9289 memory_access_type, &gs_info,
9290 mask);
9292 if (dump_enabled_p ()
9293 && memory_access_type != VMAT_ELEMENTWISE
9294 && memory_access_type != VMAT_GATHER_SCATTER
9295 && alignment_support_scheme != dr_aligned)
9296 dump_printf_loc (MSG_NOTE, vect_location,
9297 "Vectorizing an unaligned access.\n");
9299 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9300 vinfo->any_known_not_updated_vssa = true;
9302 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9303 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
9304 alignment_support_scheme, misalignment,
9305 &gs_info, slp_node, cost_vec);
9306 return true;
9309 if (!slp)
9310 gcc_assert (memory_access_type
9311 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9313 if (dump_enabled_p ())
9314 dump_printf_loc (MSG_NOTE, vect_location,
9315 "transform load. ncopies = %d\n", ncopies);
9317 /* Transform. */
9319 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9320 ensure_base_align (dr_info);
9322 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9324 vect_build_gather_load_calls (vinfo,
9325 stmt_info, gsi, vec_stmt, &gs_info, mask);
9326 return true;
9329 if (memory_access_type == VMAT_INVARIANT)
9331 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9332 /* If we have versioned for aliasing or the loop doesn't
9333 have any data dependencies that would preclude this,
9334 then we are sure this is a loop invariant load and
9335 thus we can insert it on the preheader edge. */
9336 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9337 && !nested_in_vect_loop
9338 && hoist_defs_of_uses (stmt_info, loop));
9339 if (hoist_p)
9341 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9342 if (dump_enabled_p ())
9343 dump_printf_loc (MSG_NOTE, vect_location,
9344 "hoisting out of the vectorized loop: %G",
9345 (gimple *) stmt);
9346 scalar_dest = copy_ssa_name (scalar_dest);
9347 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9348 edge pe = loop_preheader_edge (loop);
9349 gphi *vphi = get_virtual_phi (loop->header);
9350 tree vuse;
9351 if (vphi)
9352 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
9353 else
9354 vuse = gimple_vuse (gsi_stmt (*gsi));
9355 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
9356 gimple_set_vuse (new_stmt, vuse);
9357 gsi_insert_on_edge_immediate (pe, new_stmt);
9359 /* These copies are all equivalent, but currently the representation
9360 requires a separate STMT_VINFO_VEC_STMT for each one. */
9361 gimple_stmt_iterator gsi2 = *gsi;
9362 gsi_next (&gsi2);
9363 for (j = 0; j < ncopies; j++)
9365 if (hoist_p)
9366 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9367 vectype, NULL);
9368 else
9369 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9370 vectype, &gsi2);
9371 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9372 if (slp)
9373 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9374 else
9376 if (j == 0)
9377 *vec_stmt = new_stmt;
9378 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9381 return true;
9384 if (memory_access_type == VMAT_ELEMENTWISE
9385 || memory_access_type == VMAT_STRIDED_SLP)
9387 gimple_stmt_iterator incr_gsi;
9388 bool insert_after;
9389 tree offvar;
9390 tree ivstep;
9391 tree running_off;
9392 vec<constructor_elt, va_gc> *v = NULL;
9393 tree stride_base, stride_step, alias_off;
9394 /* Checked by get_load_store_type. */
9395 unsigned int const_nunits = nunits.to_constant ();
9396 unsigned HOST_WIDE_INT cst_offset = 0;
9397 tree dr_offset;
9399 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9400 gcc_assert (!nested_in_vect_loop);
9402 if (grouped_load)
9404 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9405 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9407 else
9409 first_stmt_info = stmt_info;
9410 first_dr_info = dr_info;
9412 if (slp && grouped_load)
9414 group_size = DR_GROUP_SIZE (first_stmt_info);
9415 ref_type = get_group_alias_ptr_type (first_stmt_info);
9417 else
9419 if (grouped_load)
9420 cst_offset
9421 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9422 * vect_get_place_in_interleaving_chain (stmt_info,
9423 first_stmt_info));
9424 group_size = 1;
9425 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9428 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9429 stride_base
9430 = fold_build_pointer_plus
9431 (DR_BASE_ADDRESS (first_dr_info->dr),
9432 size_binop (PLUS_EXPR,
9433 convert_to_ptrofftype (dr_offset),
9434 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9435 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9437 /* For a load with loop-invariant (but other than power-of-2)
9438 stride (i.e. not a grouped access) like so:
9440 for (i = 0; i < n; i += stride)
9441 ... = array[i];
9443 we generate a new induction variable and new accesses to
9444 form a new vector (or vectors, depending on ncopies):
9446 for (j = 0; ; j += VF*stride)
9447 tmp1 = array[j];
9448 tmp2 = array[j + stride];
9450 vectemp = {tmp1, tmp2, ...}
9453 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9454 build_int_cst (TREE_TYPE (stride_step), vf));
9456 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9458 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9459 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9460 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
9461 loop, &incr_gsi, insert_after,
9462 &offvar, NULL);
9464 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9466 running_off = offvar;
9467 alias_off = build_int_cst (ref_type, 0);
9468 int nloads = const_nunits;
9469 int lnel = 1;
9470 tree ltype = TREE_TYPE (vectype);
9471 tree lvectype = vectype;
9472 auto_vec<tree> dr_chain;
9473 if (memory_access_type == VMAT_STRIDED_SLP)
9475 if (group_size < const_nunits)
9477 /* First check if vec_init optab supports construction from vector
9478 elts directly. Otherwise avoid emitting a constructor of
9479 vector elements by performing the loads using an integer type
9480 of the same size, constructing a vector of those and then
9481 re-interpreting it as the original vector type. This avoids a
9482 huge runtime penalty due to the general inability to perform
9483 store forwarding from smaller stores to a larger load. */
9484 tree ptype;
9485 tree vtype
9486 = vector_vector_composition_type (vectype,
9487 const_nunits / group_size,
9488 &ptype);
9489 if (vtype != NULL_TREE)
9491 nloads = const_nunits / group_size;
9492 lnel = group_size;
9493 lvectype = vtype;
9494 ltype = ptype;
9497 else
9499 nloads = 1;
9500 lnel = const_nunits;
9501 ltype = vectype;
9503 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9505 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9506 else if (nloads == 1)
9507 ltype = vectype;
9509 if (slp)
9511 /* For SLP permutation support we need to load the whole group,
9512 not only the number of vector stmts the permutation result
9513 fits in. */
9514 if (slp_perm)
9516 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9517 variable VF. */
9518 unsigned int const_vf = vf.to_constant ();
9519 ncopies = CEIL (group_size * const_vf, const_nunits);
9520 dr_chain.create (ncopies);
9522 else
9523 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9525 unsigned int group_el = 0;
9526 unsigned HOST_WIDE_INT
9527 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9528 unsigned int n_groups = 0;
9529 for (j = 0; j < ncopies; j++)
9531 if (nloads > 1)
9532 vec_alloc (v, nloads);
9533 gimple *new_stmt = NULL;
9534 for (i = 0; i < nloads; i++)
9536 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9537 group_el * elsz + cst_offset);
9538 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9539 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9540 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9541 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9542 if (nloads > 1)
9543 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9544 gimple_assign_lhs (new_stmt));
9546 group_el += lnel;
9547 if (! slp
9548 || group_el == group_size)
9550 n_groups++;
9551 /* When doing SLP make sure to not load elements from
9552 the next vector iteration, those will not be accessed
9553 so just use the last element again. See PR107451. */
9554 if (!slp || known_lt (n_groups, vf))
9556 tree newoff = copy_ssa_name (running_off);
9557 gimple *incr
9558 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9559 running_off, stride_step);
9560 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9561 running_off = newoff;
9563 group_el = 0;
9566 if (nloads > 1)
9568 tree vec_inv = build_constructor (lvectype, v);
9569 new_temp = vect_init_vector (vinfo, stmt_info,
9570 vec_inv, lvectype, gsi);
9571 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9572 if (lvectype != vectype)
9574 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9575 VIEW_CONVERT_EXPR,
9576 build1 (VIEW_CONVERT_EXPR,
9577 vectype, new_temp));
9578 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9582 if (slp)
9584 if (slp_perm)
9585 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9586 else
9587 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9589 else
9591 if (j == 0)
9592 *vec_stmt = new_stmt;
9593 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9596 if (slp_perm)
9598 unsigned n_perms;
9599 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9600 false, &n_perms);
9602 return true;
9605 if (memory_access_type == VMAT_GATHER_SCATTER
9606 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9607 grouped_load = false;
9609 if (grouped_load)
9611 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9612 group_size = DR_GROUP_SIZE (first_stmt_info);
9613 /* For SLP vectorization we directly vectorize a subchain
9614 without permutation. */
9615 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9616 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9617 /* For BB vectorization always use the first stmt to base
9618 the data ref pointer on. */
9619 if (bb_vinfo)
9620 first_stmt_info_for_drptr
9621 = vect_find_first_scalar_stmt_in_slp (slp_node);
9623 /* Check if the chain of loads is already vectorized. */
9624 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9625 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9626 ??? But we can only do so if there is exactly one
9627 as we have no way to get at the rest. Leave the CSE
9628 opportunity alone.
9629 ??? With the group load eventually participating
9630 in multiple different permutations (having multiple
9631 slp nodes which refer to the same group) the CSE
9632 is even wrong code. See PR56270. */
9633 && !slp)
9635 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9636 return true;
9638 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9639 group_gap_adj = 0;
9641 /* VEC_NUM is the number of vect stmts to be created for this group. */
9642 if (slp)
9644 grouped_load = false;
9645 /* If an SLP permutation is from N elements to N elements,
9646 and if one vector holds a whole number of N, we can load
9647 the inputs to the permutation in the same way as an
9648 unpermuted sequence. In other cases we need to load the
9649 whole group, not only the number of vector stmts the
9650 permutation result fits in. */
9651 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9652 if (slp_perm
9653 && (group_size != scalar_lanes
9654 || !multiple_p (nunits, group_size)))
9656 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9657 variable VF; see vect_transform_slp_perm_load. */
9658 unsigned int const_vf = vf.to_constant ();
9659 unsigned int const_nunits = nunits.to_constant ();
9660 vec_num = CEIL (group_size * const_vf, const_nunits);
9661 group_gap_adj = vf * group_size - nunits * vec_num;
9663 else
9665 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9666 group_gap_adj
9667 = group_size - scalar_lanes;
9670 else
9671 vec_num = group_size;
9673 ref_type = get_group_alias_ptr_type (first_stmt_info);
9675 else
9677 first_stmt_info = stmt_info;
9678 first_dr_info = dr_info;
9679 group_size = vec_num = 1;
9680 group_gap_adj = 0;
9681 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9682 if (slp)
9683 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9686 gcc_assert (alignment_support_scheme);
9687 vec_loop_masks *loop_masks
9688 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9689 ? &LOOP_VINFO_MASKS (loop_vinfo)
9690 : NULL);
9691 vec_loop_lens *loop_lens
9692 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9693 ? &LOOP_VINFO_LENS (loop_vinfo)
9694 : NULL);
9696 /* Shouldn't go with length-based approach if fully masked. */
9697 gcc_assert (!loop_lens || !loop_masks);
9699 /* Targets with store-lane instructions must not require explicit
9700 realignment. vect_supportable_dr_alignment always returns either
9701 dr_aligned or dr_unaligned_supported for masked operations. */
9702 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9703 && !mask
9704 && !loop_masks)
9705 || alignment_support_scheme == dr_aligned
9706 || alignment_support_scheme == dr_unaligned_supported);
9708 /* In case the vectorization factor (VF) is bigger than the number
9709 of elements that we can fit in a vectype (nunits), we have to generate
9710 more than one vector stmt - i.e - we need to "unroll" the
9711 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9712 from one copy of the vector stmt to the next, in the field
9713 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9714 stages to find the correct vector defs to be used when vectorizing
9715 stmts that use the defs of the current stmt. The example below
9716 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9717 need to create 4 vectorized stmts):
9719 before vectorization:
9720 RELATED_STMT VEC_STMT
9721 S1: x = memref - -
9722 S2: z = x + 1 - -
9724 step 1: vectorize stmt S1:
9725 We first create the vector stmt VS1_0, and, as usual, record a
9726 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9727 Next, we create the vector stmt VS1_1, and record a pointer to
9728 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9729 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9730 stmts and pointers:
9731 RELATED_STMT VEC_STMT
9732 VS1_0: vx0 = memref0 VS1_1 -
9733 VS1_1: vx1 = memref1 VS1_2 -
9734 VS1_2: vx2 = memref2 VS1_3 -
9735 VS1_3: vx3 = memref3 - -
9736 S1: x = load - VS1_0
9737 S2: z = x + 1 - -
9740 /* In case of interleaving (non-unit grouped access):
9742 S1: x2 = &base + 2
9743 S2: x0 = &base
9744 S3: x1 = &base + 1
9745 S4: x3 = &base + 3
9747 Vectorized loads are created in the order of memory accesses
9748 starting from the access of the first stmt of the chain:
9750 VS1: vx0 = &base
9751 VS2: vx1 = &base + vec_size*1
9752 VS3: vx3 = &base + vec_size*2
9753 VS4: vx4 = &base + vec_size*3
9755 Then permutation statements are generated:
9757 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9758 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9761 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9762 (the order of the data-refs in the output of vect_permute_load_chain
9763 corresponds to the order of scalar stmts in the interleaving chain - see
9764 the documentation of vect_permute_load_chain()).
9765 The generation of permutation stmts and recording them in
9766 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9768 In case of both multiple types and interleaving, the vector loads and
9769 permutation stmts above are created for every copy. The result vector
9770 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9771 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9773 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9774 on a target that supports unaligned accesses (dr_unaligned_supported)
9775 we generate the following code:
9776 p = initial_addr;
9777 indx = 0;
9778 loop {
9779 p = p + indx * vectype_size;
9780 vec_dest = *(p);
9781 indx = indx + 1;
9784 Otherwise, the data reference is potentially unaligned on a target that
9785 does not support unaligned accesses (dr_explicit_realign_optimized) -
9786 then generate the following code, in which the data in each iteration is
9787 obtained by two vector loads, one from the previous iteration, and one
9788 from the current iteration:
9789 p1 = initial_addr;
9790 msq_init = *(floor(p1))
9791 p2 = initial_addr + VS - 1;
9792 realignment_token = call target_builtin;
9793 indx = 0;
9794 loop {
9795 p2 = p2 + indx * vectype_size
9796 lsq = *(floor(p2))
9797 vec_dest = realign_load (msq, lsq, realignment_token)
9798 indx = indx + 1;
9799 msq = lsq;
9800 } */
9802 /* If the misalignment remains the same throughout the execution of the
9803 loop, we can create the init_addr and permutation mask at the loop
9804 preheader. Otherwise, it needs to be created inside the loop.
9805 This can only occur when vectorizing memory accesses in the inner-loop
9806 nested within an outer-loop that is being vectorized. */
9808 if (nested_in_vect_loop
9809 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9810 GET_MODE_SIZE (TYPE_MODE (vectype))))
9812 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9813 compute_in_loop = true;
9816 bool diff_first_stmt_info
9817 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9819 tree offset = NULL_TREE;
9820 if ((alignment_support_scheme == dr_explicit_realign_optimized
9821 || alignment_support_scheme == dr_explicit_realign)
9822 && !compute_in_loop)
9824 /* If we have different first_stmt_info, we can't set up realignment
9825 here, since we can't guarantee first_stmt_info DR has been
9826 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9827 distance from first_stmt_info DR instead as below. */
9828 if (!diff_first_stmt_info)
9829 msq = vect_setup_realignment (vinfo,
9830 first_stmt_info, gsi, &realignment_token,
9831 alignment_support_scheme, NULL_TREE,
9832 &at_loop);
9833 if (alignment_support_scheme == dr_explicit_realign_optimized)
9835 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9836 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9837 size_one_node);
9838 gcc_assert (!first_stmt_info_for_drptr);
9841 else
9842 at_loop = loop;
9844 if (!known_eq (poffset, 0))
9845 offset = (offset
9846 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9847 : size_int (poffset));
9849 tree bump;
9850 tree vec_offset = NULL_TREE;
9851 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9853 aggr_type = NULL_TREE;
9854 bump = NULL_TREE;
9856 else if (memory_access_type == VMAT_GATHER_SCATTER)
9858 aggr_type = elem_type;
9859 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9860 &bump, &vec_offset);
9862 else
9864 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9865 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9866 else
9867 aggr_type = vectype;
9868 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9869 memory_access_type);
9872 auto_vec<tree> vec_offsets;
9873 auto_vec<tree> vec_masks;
9874 if (mask)
9876 if (slp_node)
9877 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9878 &vec_masks);
9879 else
9880 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9881 &vec_masks, mask_vectype);
9883 tree vec_mask = NULL_TREE;
9884 poly_uint64 group_elt = 0;
9885 for (j = 0; j < ncopies; j++)
9887 /* 1. Create the vector or array pointer update chain. */
9888 if (j == 0)
9890 bool simd_lane_access_p
9891 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9892 if (simd_lane_access_p
9893 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9894 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9895 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9896 && integer_zerop (DR_INIT (first_dr_info->dr))
9897 && alias_sets_conflict_p (get_alias_set (aggr_type),
9898 get_alias_set (TREE_TYPE (ref_type)))
9899 && (alignment_support_scheme == dr_aligned
9900 || alignment_support_scheme == dr_unaligned_supported))
9902 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9903 dataref_offset = build_int_cst (ref_type, 0);
9905 else if (diff_first_stmt_info)
9907 dataref_ptr
9908 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9909 aggr_type, at_loop, offset, &dummy,
9910 gsi, &ptr_incr, simd_lane_access_p,
9911 bump);
9912 /* Adjust the pointer by the difference to first_stmt. */
9913 data_reference_p ptrdr
9914 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9915 tree diff
9916 = fold_convert (sizetype,
9917 size_binop (MINUS_EXPR,
9918 DR_INIT (first_dr_info->dr),
9919 DR_INIT (ptrdr)));
9920 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9921 stmt_info, diff);
9922 if (alignment_support_scheme == dr_explicit_realign)
9924 msq = vect_setup_realignment (vinfo,
9925 first_stmt_info_for_drptr, gsi,
9926 &realignment_token,
9927 alignment_support_scheme,
9928 dataref_ptr, &at_loop);
9929 gcc_assert (!compute_in_loop);
9932 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9934 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9935 slp_node, &gs_info, &dataref_ptr,
9936 &vec_offsets);
9938 else
9939 dataref_ptr
9940 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9941 at_loop,
9942 offset, &dummy, gsi, &ptr_incr,
9943 simd_lane_access_p, bump);
9944 if (mask)
9945 vec_mask = vec_masks[0];
9947 else
9949 if (dataref_offset)
9950 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9951 bump);
9952 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9953 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9954 stmt_info, bump);
9955 if (mask)
9956 vec_mask = vec_masks[j];
9959 if (grouped_load || slp_perm)
9960 dr_chain.create (vec_num);
9962 gimple *new_stmt = NULL;
9963 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9965 tree vec_array;
9967 vec_array = create_vector_array (vectype, vec_num);
9969 tree final_mask = NULL_TREE;
9970 if (loop_masks)
9971 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9972 vectype, j);
9973 if (vec_mask)
9974 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9975 final_mask, vec_mask, gsi);
9977 gcall *call;
9978 if (final_mask)
9980 /* Emit:
9981 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9982 VEC_MASK). */
9983 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9984 tree alias_ptr = build_int_cst (ref_type, align);
9985 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9986 dataref_ptr, alias_ptr,
9987 final_mask);
9989 else
9991 /* Emit:
9992 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9993 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9994 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9996 gimple_call_set_lhs (call, vec_array);
9997 gimple_call_set_nothrow (call, true);
9998 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9999 new_stmt = call;
10001 /* Extract each vector into an SSA_NAME. */
10002 for (i = 0; i < vec_num; i++)
10004 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10005 vec_array, i);
10006 dr_chain.quick_push (new_temp);
10009 /* Record the mapping between SSA_NAMEs and statements. */
10010 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10012 /* Record that VEC_ARRAY is now dead. */
10013 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10015 else
10017 for (i = 0; i < vec_num; i++)
10019 tree final_mask = NULL_TREE;
10020 if (loop_masks
10021 && memory_access_type != VMAT_INVARIANT)
10022 final_mask = vect_get_loop_mask (gsi, loop_masks,
10023 vec_num * ncopies,
10024 vectype, vec_num * j + i);
10025 if (vec_mask)
10026 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10027 final_mask, vec_mask, gsi);
10029 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10030 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10031 gsi, stmt_info, bump);
10033 /* 2. Create the vector-load in the loop. */
10034 switch (alignment_support_scheme)
10036 case dr_aligned:
10037 case dr_unaligned_supported:
10039 unsigned int misalign;
10040 unsigned HOST_WIDE_INT align;
10042 if (memory_access_type == VMAT_GATHER_SCATTER
10043 && gs_info.ifn != IFN_LAST)
10045 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10046 vec_offset = vec_offsets[vec_num * j + i];
10047 tree zero = build_zero_cst (vectype);
10048 tree scale = size_int (gs_info.scale);
10049 gcall *call;
10050 if (final_mask)
10051 call = gimple_build_call_internal
10052 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
10053 vec_offset, scale, zero, final_mask);
10054 else
10055 call = gimple_build_call_internal
10056 (IFN_GATHER_LOAD, 4, dataref_ptr,
10057 vec_offset, scale, zero);
10058 gimple_call_set_nothrow (call, true);
10059 new_stmt = call;
10060 data_ref = NULL_TREE;
10061 break;
10063 else if (memory_access_type == VMAT_GATHER_SCATTER)
10065 /* Emulated gather-scatter. */
10066 gcc_assert (!final_mask);
10067 unsigned HOST_WIDE_INT const_nunits
10068 = nunits.to_constant ();
10069 unsigned HOST_WIDE_INT const_offset_nunits
10070 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
10071 .to_constant ();
10072 vec<constructor_elt, va_gc> *ctor_elts;
10073 vec_alloc (ctor_elts, const_nunits);
10074 gimple_seq stmts = NULL;
10075 /* We support offset vectors with more elements
10076 than the data vector for now. */
10077 unsigned HOST_WIDE_INT factor
10078 = const_offset_nunits / const_nunits;
10079 vec_offset = vec_offsets[j / factor];
10080 unsigned elt_offset = (j % factor) * const_nunits;
10081 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
10082 tree scale = size_int (gs_info.scale);
10083 align
10084 = get_object_alignment (DR_REF (first_dr_info->dr));
10085 tree ltype = build_aligned_type (TREE_TYPE (vectype),
10086 align);
10087 for (unsigned k = 0; k < const_nunits; ++k)
10089 tree boff = size_binop (MULT_EXPR,
10090 TYPE_SIZE (idx_type),
10091 bitsize_int
10092 (k + elt_offset));
10093 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
10094 idx_type, vec_offset,
10095 TYPE_SIZE (idx_type),
10096 boff);
10097 idx = gimple_convert (&stmts, sizetype, idx);
10098 idx = gimple_build (&stmts, MULT_EXPR,
10099 sizetype, idx, scale);
10100 tree ptr = gimple_build (&stmts, PLUS_EXPR,
10101 TREE_TYPE (dataref_ptr),
10102 dataref_ptr, idx);
10103 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
10104 tree elt = make_ssa_name (TREE_TYPE (vectype));
10105 tree ref = build2 (MEM_REF, ltype, ptr,
10106 build_int_cst (ref_type, 0));
10107 new_stmt = gimple_build_assign (elt, ref);
10108 gimple_set_vuse (new_stmt,
10109 gimple_vuse (gsi_stmt (*gsi)));
10110 gimple_seq_add_stmt (&stmts, new_stmt);
10111 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
10113 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
10114 new_stmt = gimple_build_assign (NULL_TREE,
10115 build_constructor
10116 (vectype, ctor_elts));
10117 data_ref = NULL_TREE;
10118 break;
10121 align =
10122 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
10123 if (alignment_support_scheme == dr_aligned)
10124 misalign = 0;
10125 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
10127 align = dr_alignment
10128 (vect_dr_behavior (vinfo, first_dr_info));
10129 misalign = 0;
10131 else
10132 misalign = misalignment;
10133 if (dataref_offset == NULL_TREE
10134 && TREE_CODE (dataref_ptr) == SSA_NAME)
10135 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
10136 align, misalign);
10137 align = least_bit_hwi (misalign | align);
10139 if (final_mask)
10141 tree ptr = build_int_cst (ref_type,
10142 align * BITS_PER_UNIT);
10143 gcall *call
10144 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
10145 dataref_ptr, ptr,
10146 final_mask);
10147 gimple_call_set_nothrow (call, true);
10148 new_stmt = call;
10149 data_ref = NULL_TREE;
10151 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
10153 tree final_len
10154 = vect_get_loop_len (loop_vinfo, loop_lens,
10155 vec_num * ncopies,
10156 vec_num * j + i);
10157 tree ptr = build_int_cst (ref_type,
10158 align * BITS_PER_UNIT);
10160 machine_mode vmode = TYPE_MODE (vectype);
10161 opt_machine_mode new_ovmode
10162 = get_len_load_store_mode (vmode, true);
10163 machine_mode new_vmode = new_ovmode.require ();
10164 tree qi_type = unsigned_intQI_type_node;
10166 signed char biasval =
10167 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10169 tree bias = build_int_cst (intQI_type_node, biasval);
10171 gcall *call
10172 = gimple_build_call_internal (IFN_LEN_LOAD, 4,
10173 dataref_ptr, ptr,
10174 final_len, bias);
10175 gimple_call_set_nothrow (call, true);
10176 new_stmt = call;
10177 data_ref = NULL_TREE;
10179 /* Need conversion if it's wrapped with VnQI. */
10180 if (vmode != new_vmode)
10182 tree new_vtype
10183 = build_vector_type_for_mode (qi_type, new_vmode);
10184 tree var = vect_get_new_ssa_name (new_vtype,
10185 vect_simple_var);
10186 gimple_set_lhs (call, var);
10187 vect_finish_stmt_generation (vinfo, stmt_info, call,
10188 gsi);
10189 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
10190 new_stmt
10191 = gimple_build_assign (vec_dest,
10192 VIEW_CONVERT_EXPR, op);
10195 else
10197 tree ltype = vectype;
10198 tree new_vtype = NULL_TREE;
10199 unsigned HOST_WIDE_INT gap
10200 = DR_GROUP_GAP (first_stmt_info);
10201 unsigned int vect_align
10202 = vect_known_alignment_in_bytes (first_dr_info,
10203 vectype);
10204 unsigned int scalar_dr_size
10205 = vect_get_scalar_dr_size (first_dr_info);
10206 /* If there's no peeling for gaps but we have a gap
10207 with slp loads then load the lower half of the
10208 vector only. See get_group_load_store_type for
10209 when we apply this optimization. */
10210 if (slp
10211 && loop_vinfo
10212 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
10213 && gap != 0
10214 && known_eq (nunits, (group_size - gap) * 2)
10215 && known_eq (nunits, group_size)
10216 && gap >= (vect_align / scalar_dr_size))
10218 tree half_vtype;
10219 new_vtype
10220 = vector_vector_composition_type (vectype, 2,
10221 &half_vtype);
10222 if (new_vtype != NULL_TREE)
10223 ltype = half_vtype;
10225 tree offset
10226 = (dataref_offset ? dataref_offset
10227 : build_int_cst (ref_type, 0));
10228 if (ltype != vectype
10229 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10231 unsigned HOST_WIDE_INT gap_offset
10232 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
10233 tree gapcst = build_int_cst (ref_type, gap_offset);
10234 offset = size_binop (PLUS_EXPR, offset, gapcst);
10236 data_ref
10237 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
10238 if (alignment_support_scheme == dr_aligned)
10240 else
10241 TREE_TYPE (data_ref)
10242 = build_aligned_type (TREE_TYPE (data_ref),
10243 align * BITS_PER_UNIT);
10244 if (ltype != vectype)
10246 vect_copy_ref_info (data_ref,
10247 DR_REF (first_dr_info->dr));
10248 tree tem = make_ssa_name (ltype);
10249 new_stmt = gimple_build_assign (tem, data_ref);
10250 vect_finish_stmt_generation (vinfo, stmt_info,
10251 new_stmt, gsi);
10252 data_ref = NULL;
10253 vec<constructor_elt, va_gc> *v;
10254 vec_alloc (v, 2);
10255 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10257 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10258 build_zero_cst (ltype));
10259 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10261 else
10263 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10264 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10265 build_zero_cst (ltype));
10267 gcc_assert (new_vtype != NULL_TREE);
10268 if (new_vtype == vectype)
10269 new_stmt = gimple_build_assign (
10270 vec_dest, build_constructor (vectype, v));
10271 else
10273 tree new_vname = make_ssa_name (new_vtype);
10274 new_stmt = gimple_build_assign (
10275 new_vname, build_constructor (new_vtype, v));
10276 vect_finish_stmt_generation (vinfo, stmt_info,
10277 new_stmt, gsi);
10278 new_stmt = gimple_build_assign (
10279 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
10280 new_vname));
10284 break;
10286 case dr_explicit_realign:
10288 tree ptr, bump;
10290 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10292 if (compute_in_loop)
10293 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10294 &realignment_token,
10295 dr_explicit_realign,
10296 dataref_ptr, NULL);
10298 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10299 ptr = copy_ssa_name (dataref_ptr);
10300 else
10301 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
10302 // For explicit realign the target alignment should be
10303 // known at compile time.
10304 unsigned HOST_WIDE_INT align =
10305 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10306 new_stmt = gimple_build_assign
10307 (ptr, BIT_AND_EXPR, dataref_ptr,
10308 build_int_cst
10309 (TREE_TYPE (dataref_ptr),
10310 -(HOST_WIDE_INT) align));
10311 vect_finish_stmt_generation (vinfo, stmt_info,
10312 new_stmt, gsi);
10313 data_ref
10314 = build2 (MEM_REF, vectype, ptr,
10315 build_int_cst (ref_type, 0));
10316 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10317 vec_dest = vect_create_destination_var (scalar_dest,
10318 vectype);
10319 new_stmt = gimple_build_assign (vec_dest, data_ref);
10320 new_temp = make_ssa_name (vec_dest, new_stmt);
10321 gimple_assign_set_lhs (new_stmt, new_temp);
10322 gimple_move_vops (new_stmt, stmt_info->stmt);
10323 vect_finish_stmt_generation (vinfo, stmt_info,
10324 new_stmt, gsi);
10325 msq = new_temp;
10327 bump = size_binop (MULT_EXPR, vs,
10328 TYPE_SIZE_UNIT (elem_type));
10329 bump = size_binop (MINUS_EXPR, bump, size_one_node);
10330 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
10331 stmt_info, bump);
10332 new_stmt = gimple_build_assign
10333 (NULL_TREE, BIT_AND_EXPR, ptr,
10334 build_int_cst
10335 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
10336 if (TREE_CODE (ptr) == SSA_NAME)
10337 ptr = copy_ssa_name (ptr, new_stmt);
10338 else
10339 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
10340 gimple_assign_set_lhs (new_stmt, ptr);
10341 vect_finish_stmt_generation (vinfo, stmt_info,
10342 new_stmt, gsi);
10343 data_ref
10344 = build2 (MEM_REF, vectype, ptr,
10345 build_int_cst (ref_type, 0));
10346 break;
10348 case dr_explicit_realign_optimized:
10350 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10351 new_temp = copy_ssa_name (dataref_ptr);
10352 else
10353 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10354 // We should only be doing this if we know the target
10355 // alignment at compile time.
10356 unsigned HOST_WIDE_INT align =
10357 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10358 new_stmt = gimple_build_assign
10359 (new_temp, BIT_AND_EXPR, dataref_ptr,
10360 build_int_cst (TREE_TYPE (dataref_ptr),
10361 -(HOST_WIDE_INT) align));
10362 vect_finish_stmt_generation (vinfo, stmt_info,
10363 new_stmt, gsi);
10364 data_ref
10365 = build2 (MEM_REF, vectype, new_temp,
10366 build_int_cst (ref_type, 0));
10367 break;
10369 default:
10370 gcc_unreachable ();
10372 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10373 /* DATA_REF is null if we've already built the statement. */
10374 if (data_ref)
10376 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10377 new_stmt = gimple_build_assign (vec_dest, data_ref);
10379 new_temp = make_ssa_name (vec_dest, new_stmt);
10380 gimple_set_lhs (new_stmt, new_temp);
10381 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10383 /* 3. Handle explicit realignment if necessary/supported.
10384 Create in loop:
10385 vec_dest = realign_load (msq, lsq, realignment_token) */
10386 if (alignment_support_scheme == dr_explicit_realign_optimized
10387 || alignment_support_scheme == dr_explicit_realign)
10389 lsq = gimple_assign_lhs (new_stmt);
10390 if (!realignment_token)
10391 realignment_token = dataref_ptr;
10392 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10393 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10394 msq, lsq, realignment_token);
10395 new_temp = make_ssa_name (vec_dest, new_stmt);
10396 gimple_assign_set_lhs (new_stmt, new_temp);
10397 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10399 if (alignment_support_scheme == dr_explicit_realign_optimized)
10401 gcc_assert (phi);
10402 if (i == vec_num - 1 && j == ncopies - 1)
10403 add_phi_arg (phi, lsq,
10404 loop_latch_edge (containing_loop),
10405 UNKNOWN_LOCATION);
10406 msq = lsq;
10410 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10412 tree perm_mask = perm_mask_for_reverse (vectype);
10413 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
10414 perm_mask, stmt_info, gsi);
10415 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10418 /* Collect vector loads and later create their permutation in
10419 vect_transform_grouped_load (). */
10420 if (grouped_load || slp_perm)
10421 dr_chain.quick_push (new_temp);
10423 /* Store vector loads in the corresponding SLP_NODE. */
10424 if (slp && !slp_perm)
10425 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10427 /* With SLP permutation we load the gaps as well, without
10428 we need to skip the gaps after we manage to fully load
10429 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10430 group_elt += nunits;
10431 if (maybe_ne (group_gap_adj, 0U)
10432 && !slp_perm
10433 && known_eq (group_elt, group_size - group_gap_adj))
10435 poly_wide_int bump_val
10436 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10437 * group_gap_adj);
10438 if (tree_int_cst_sgn
10439 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10440 bump_val = -bump_val;
10441 tree bump = wide_int_to_tree (sizetype, bump_val);
10442 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10443 gsi, stmt_info, bump);
10444 group_elt = 0;
10447 /* Bump the vector pointer to account for a gap or for excess
10448 elements loaded for a permuted SLP load. */
10449 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10451 poly_wide_int bump_val
10452 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10453 * group_gap_adj);
10454 if (tree_int_cst_sgn
10455 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10456 bump_val = -bump_val;
10457 tree bump = wide_int_to_tree (sizetype, bump_val);
10458 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10459 stmt_info, bump);
10463 if (slp && !slp_perm)
10464 continue;
10466 if (slp_perm)
10468 unsigned n_perms;
10469 /* For SLP we know we've seen all possible uses of dr_chain so
10470 direct vect_transform_slp_perm_load to DCE the unused parts.
10471 ??? This is a hack to prevent compile-time issues as seen
10472 in PR101120 and friends. */
10473 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10474 gsi, vf, false, &n_perms,
10475 nullptr, true);
10476 gcc_assert (ok);
10478 else
10480 if (grouped_load)
10482 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10483 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10484 group_size, gsi);
10485 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10487 else
10489 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10492 dr_chain.release ();
10494 if (!slp)
10495 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10497 return true;
10500 /* Function vect_is_simple_cond.
10502 Input:
10503 LOOP - the loop that is being vectorized.
10504 COND - Condition that is checked for simple use.
10506 Output:
10507 *COMP_VECTYPE - the vector type for the comparison.
10508 *DTS - The def types for the arguments of the comparison
10510 Returns whether a COND can be vectorized. Checks whether
10511 condition operands are supportable using vec_is_simple_use. */
10513 static bool
10514 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10515 slp_tree slp_node, tree *comp_vectype,
10516 enum vect_def_type *dts, tree vectype)
10518 tree lhs, rhs;
10519 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10520 slp_tree slp_op;
10522 /* Mask case. */
10523 if (TREE_CODE (cond) == SSA_NAME
10524 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10526 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10527 &slp_op, &dts[0], comp_vectype)
10528 || !*comp_vectype
10529 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10530 return false;
10531 return true;
10534 if (!COMPARISON_CLASS_P (cond))
10535 return false;
10537 lhs = TREE_OPERAND (cond, 0);
10538 rhs = TREE_OPERAND (cond, 1);
10540 if (TREE_CODE (lhs) == SSA_NAME)
10542 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10543 &lhs, &slp_op, &dts[0], &vectype1))
10544 return false;
10546 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10547 || TREE_CODE (lhs) == FIXED_CST)
10548 dts[0] = vect_constant_def;
10549 else
10550 return false;
10552 if (TREE_CODE (rhs) == SSA_NAME)
10554 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10555 &rhs, &slp_op, &dts[1], &vectype2))
10556 return false;
10558 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10559 || TREE_CODE (rhs) == FIXED_CST)
10560 dts[1] = vect_constant_def;
10561 else
10562 return false;
10564 if (vectype1 && vectype2
10565 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10566 TYPE_VECTOR_SUBPARTS (vectype2)))
10567 return false;
10569 *comp_vectype = vectype1 ? vectype1 : vectype2;
10570 /* Invariant comparison. */
10571 if (! *comp_vectype)
10573 tree scalar_type = TREE_TYPE (lhs);
10574 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10575 *comp_vectype = truth_type_for (vectype);
10576 else
10578 /* If we can widen the comparison to match vectype do so. */
10579 if (INTEGRAL_TYPE_P (scalar_type)
10580 && !slp_node
10581 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10582 TYPE_SIZE (TREE_TYPE (vectype))))
10583 scalar_type = build_nonstandard_integer_type
10584 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10585 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10586 slp_node);
10590 return true;
10593 /* vectorizable_condition.
10595 Check if STMT_INFO is conditional modify expression that can be vectorized.
10596 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10597 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10598 at GSI.
10600 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10602 Return true if STMT_INFO is vectorizable in this way. */
10604 static bool
10605 vectorizable_condition (vec_info *vinfo,
10606 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10607 gimple **vec_stmt,
10608 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10610 tree scalar_dest = NULL_TREE;
10611 tree vec_dest = NULL_TREE;
10612 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10613 tree then_clause, else_clause;
10614 tree comp_vectype = NULL_TREE;
10615 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10616 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10617 tree vec_compare;
10618 tree new_temp;
10619 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10620 enum vect_def_type dts[4]
10621 = {vect_unknown_def_type, vect_unknown_def_type,
10622 vect_unknown_def_type, vect_unknown_def_type};
10623 int ndts = 4;
10624 int ncopies;
10625 int vec_num;
10626 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10627 int i;
10628 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10629 vec<tree> vec_oprnds0 = vNULL;
10630 vec<tree> vec_oprnds1 = vNULL;
10631 vec<tree> vec_oprnds2 = vNULL;
10632 vec<tree> vec_oprnds3 = vNULL;
10633 tree vec_cmp_type;
10634 bool masked = false;
10636 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10637 return false;
10639 /* Is vectorizable conditional operation? */
10640 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10641 if (!stmt)
10642 return false;
10644 code = gimple_assign_rhs_code (stmt);
10645 if (code != COND_EXPR)
10646 return false;
10648 stmt_vec_info reduc_info = NULL;
10649 int reduc_index = -1;
10650 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10651 bool for_reduction
10652 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10653 if (for_reduction)
10655 if (slp_node)
10656 return false;
10657 reduc_info = info_for_reduction (vinfo, stmt_info);
10658 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10659 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10660 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10661 || reduc_index != -1);
10663 else
10665 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10666 return false;
10669 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10670 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10672 if (slp_node)
10674 ncopies = 1;
10675 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10677 else
10679 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10680 vec_num = 1;
10683 gcc_assert (ncopies >= 1);
10684 if (for_reduction && ncopies > 1)
10685 return false; /* FORNOW */
10687 cond_expr = gimple_assign_rhs1 (stmt);
10689 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10690 &comp_vectype, &dts[0], vectype)
10691 || !comp_vectype)
10692 return false;
10694 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10695 slp_tree then_slp_node, else_slp_node;
10696 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10697 &then_clause, &then_slp_node, &dts[2], &vectype1))
10698 return false;
10699 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10700 &else_clause, &else_slp_node, &dts[3], &vectype2))
10701 return false;
10703 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10704 return false;
10706 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10707 return false;
10709 masked = !COMPARISON_CLASS_P (cond_expr);
10710 vec_cmp_type = truth_type_for (comp_vectype);
10712 if (vec_cmp_type == NULL_TREE)
10713 return false;
10715 cond_code = TREE_CODE (cond_expr);
10716 if (!masked)
10718 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10719 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10722 /* For conditional reductions, the "then" value needs to be the candidate
10723 value calculated by this iteration while the "else" value needs to be
10724 the result carried over from previous iterations. If the COND_EXPR
10725 is the other way around, we need to swap it. */
10726 bool must_invert_cmp_result = false;
10727 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10729 if (masked)
10730 must_invert_cmp_result = true;
10731 else
10733 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10734 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10735 if (new_code == ERROR_MARK)
10736 must_invert_cmp_result = true;
10737 else
10739 cond_code = new_code;
10740 /* Make sure we don't accidentally use the old condition. */
10741 cond_expr = NULL_TREE;
10744 std::swap (then_clause, else_clause);
10747 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10749 /* Boolean values may have another representation in vectors
10750 and therefore we prefer bit operations over comparison for
10751 them (which also works for scalar masks). We store opcodes
10752 to use in bitop1 and bitop2. Statement is vectorized as
10753 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10754 depending on bitop1 and bitop2 arity. */
10755 switch (cond_code)
10757 case GT_EXPR:
10758 bitop1 = BIT_NOT_EXPR;
10759 bitop2 = BIT_AND_EXPR;
10760 break;
10761 case GE_EXPR:
10762 bitop1 = BIT_NOT_EXPR;
10763 bitop2 = BIT_IOR_EXPR;
10764 break;
10765 case LT_EXPR:
10766 bitop1 = BIT_NOT_EXPR;
10767 bitop2 = BIT_AND_EXPR;
10768 std::swap (cond_expr0, cond_expr1);
10769 break;
10770 case LE_EXPR:
10771 bitop1 = BIT_NOT_EXPR;
10772 bitop2 = BIT_IOR_EXPR;
10773 std::swap (cond_expr0, cond_expr1);
10774 break;
10775 case NE_EXPR:
10776 bitop1 = BIT_XOR_EXPR;
10777 break;
10778 case EQ_EXPR:
10779 bitop1 = BIT_XOR_EXPR;
10780 bitop2 = BIT_NOT_EXPR;
10781 break;
10782 default:
10783 return false;
10785 cond_code = SSA_NAME;
10788 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10789 && reduction_type == EXTRACT_LAST_REDUCTION
10790 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10792 if (dump_enabled_p ())
10793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10794 "reduction comparison operation not supported.\n");
10795 return false;
10798 if (!vec_stmt)
10800 if (bitop1 != NOP_EXPR)
10802 machine_mode mode = TYPE_MODE (comp_vectype);
10803 optab optab;
10805 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10806 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10807 return false;
10809 if (bitop2 != NOP_EXPR)
10811 optab = optab_for_tree_code (bitop2, comp_vectype,
10812 optab_default);
10813 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10814 return false;
10818 vect_cost_for_stmt kind = vector_stmt;
10819 if (reduction_type == EXTRACT_LAST_REDUCTION)
10820 /* Count one reduction-like operation per vector. */
10821 kind = vec_to_scalar;
10822 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10823 return false;
10825 if (slp_node
10826 && (!vect_maybe_update_slp_op_vectype
10827 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10828 || (op_adjust == 1
10829 && !vect_maybe_update_slp_op_vectype
10830 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10831 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10832 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10834 if (dump_enabled_p ())
10835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10836 "incompatible vector types for invariants\n");
10837 return false;
10840 if (loop_vinfo && for_reduction
10841 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10843 if (reduction_type == EXTRACT_LAST_REDUCTION)
10844 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10845 ncopies * vec_num, vectype, NULL);
10846 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10847 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10849 if (dump_enabled_p ())
10850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10851 "conditional reduction prevents the use"
10852 " of partial vectors.\n");
10853 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10857 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10858 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10859 cost_vec, kind);
10860 return true;
10863 /* Transform. */
10865 /* Handle def. */
10866 scalar_dest = gimple_assign_lhs (stmt);
10867 if (reduction_type != EXTRACT_LAST_REDUCTION)
10868 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10870 bool swap_cond_operands = false;
10872 /* See whether another part of the vectorized code applies a loop
10873 mask to the condition, or to its inverse. */
10875 vec_loop_masks *masks = NULL;
10876 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10878 if (reduction_type == EXTRACT_LAST_REDUCTION)
10879 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10880 else
10882 scalar_cond_masked_key cond (cond_expr, ncopies);
10883 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10884 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10885 else
10887 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10888 tree_code orig_code = cond.code;
10889 cond.code = invert_tree_comparison (cond.code, honor_nans);
10890 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
10892 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10893 cond_code = cond.code;
10894 swap_cond_operands = true;
10896 else
10898 /* Try the inverse of the current mask. We check if the
10899 inverse mask is live and if so we generate a negate of
10900 the current mask such that we still honor NaNs. */
10901 cond.inverted_p = true;
10902 cond.code = orig_code;
10903 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10905 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10906 cond_code = cond.code;
10907 swap_cond_operands = true;
10908 must_invert_cmp_result = true;
10915 /* Handle cond expr. */
10916 if (masked)
10917 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10918 cond_expr, &vec_oprnds0, comp_vectype,
10919 then_clause, &vec_oprnds2, vectype,
10920 reduction_type != EXTRACT_LAST_REDUCTION
10921 ? else_clause : NULL, &vec_oprnds3, vectype);
10922 else
10923 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10924 cond_expr0, &vec_oprnds0, comp_vectype,
10925 cond_expr1, &vec_oprnds1, comp_vectype,
10926 then_clause, &vec_oprnds2, vectype,
10927 reduction_type != EXTRACT_LAST_REDUCTION
10928 ? else_clause : NULL, &vec_oprnds3, vectype);
10930 /* Arguments are ready. Create the new vector stmt. */
10931 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10933 vec_then_clause = vec_oprnds2[i];
10934 if (reduction_type != EXTRACT_LAST_REDUCTION)
10935 vec_else_clause = vec_oprnds3[i];
10937 if (swap_cond_operands)
10938 std::swap (vec_then_clause, vec_else_clause);
10940 if (masked)
10941 vec_compare = vec_cond_lhs;
10942 else
10944 vec_cond_rhs = vec_oprnds1[i];
10945 if (bitop1 == NOP_EXPR)
10947 gimple_seq stmts = NULL;
10948 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10949 vec_cond_lhs, vec_cond_rhs);
10950 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10952 else
10954 new_temp = make_ssa_name (vec_cmp_type);
10955 gassign *new_stmt;
10956 if (bitop1 == BIT_NOT_EXPR)
10957 new_stmt = gimple_build_assign (new_temp, bitop1,
10958 vec_cond_rhs);
10959 else
10960 new_stmt
10961 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10962 vec_cond_rhs);
10963 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10964 if (bitop2 == NOP_EXPR)
10965 vec_compare = new_temp;
10966 else if (bitop2 == BIT_NOT_EXPR
10967 && reduction_type != EXTRACT_LAST_REDUCTION)
10969 /* Instead of doing ~x ? y : z do x ? z : y. */
10970 vec_compare = new_temp;
10971 std::swap (vec_then_clause, vec_else_clause);
10973 else
10975 vec_compare = make_ssa_name (vec_cmp_type);
10976 if (bitop2 == BIT_NOT_EXPR)
10977 new_stmt
10978 = gimple_build_assign (vec_compare, bitop2, new_temp);
10979 else
10980 new_stmt
10981 = gimple_build_assign (vec_compare, bitop2,
10982 vec_cond_lhs, new_temp);
10983 vect_finish_stmt_generation (vinfo, stmt_info,
10984 new_stmt, gsi);
10989 /* If we decided to apply a loop mask to the result of the vector
10990 comparison, AND the comparison with the mask now. Later passes
10991 should then be able to reuse the AND results between mulitple
10992 vector statements.
10994 For example:
10995 for (int i = 0; i < 100; ++i)
10996 x[i] = y[i] ? z[i] : 10;
10998 results in following optimized GIMPLE:
11000 mask__35.8_43 = vect__4.7_41 != { 0, ... };
11001 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
11002 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
11003 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
11004 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
11005 vect_iftmp.11_47, { 10, ... }>;
11007 instead of using a masked and unmasked forms of
11008 vec != { 0, ... } (masked in the MASK_LOAD,
11009 unmasked in the VEC_COND_EXPR). */
11011 /* Force vec_compare to be an SSA_NAME rather than a comparison,
11012 in cases where that's necessary. */
11014 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
11016 if (!is_gimple_val (vec_compare))
11018 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11019 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11020 vec_compare);
11021 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11022 vec_compare = vec_compare_name;
11025 if (must_invert_cmp_result)
11027 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11028 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11029 BIT_NOT_EXPR,
11030 vec_compare);
11031 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11032 vec_compare = vec_compare_name;
11035 if (masks)
11037 tree loop_mask
11038 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
11039 vectype, i);
11040 tree tmp2 = make_ssa_name (vec_cmp_type);
11041 gassign *g
11042 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
11043 loop_mask);
11044 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
11045 vec_compare = tmp2;
11049 gimple *new_stmt;
11050 if (reduction_type == EXTRACT_LAST_REDUCTION)
11052 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
11053 tree lhs = gimple_get_lhs (old_stmt);
11054 new_stmt = gimple_build_call_internal
11055 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
11056 vec_then_clause);
11057 gimple_call_set_lhs (new_stmt, lhs);
11058 SSA_NAME_DEF_STMT (lhs) = new_stmt;
11059 if (old_stmt == gsi_stmt (*gsi))
11060 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
11061 else
11063 /* In this case we're moving the definition to later in the
11064 block. That doesn't matter because the only uses of the
11065 lhs are in phi statements. */
11066 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
11067 gsi_remove (&old_gsi, true);
11068 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11071 else
11073 new_temp = make_ssa_name (vec_dest);
11074 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
11075 vec_then_clause, vec_else_clause);
11076 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11078 if (slp_node)
11079 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11080 else
11081 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11084 if (!slp_node)
11085 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11087 vec_oprnds0.release ();
11088 vec_oprnds1.release ();
11089 vec_oprnds2.release ();
11090 vec_oprnds3.release ();
11092 return true;
11095 /* vectorizable_comparison.
11097 Check if STMT_INFO is comparison expression that can be vectorized.
11098 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11099 comparison, put it in VEC_STMT, and insert it at GSI.
11101 Return true if STMT_INFO is vectorizable in this way. */
11103 static bool
11104 vectorizable_comparison (vec_info *vinfo,
11105 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11106 gimple **vec_stmt,
11107 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11109 tree lhs, rhs1, rhs2;
11110 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11111 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11112 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
11113 tree new_temp;
11114 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11115 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
11116 int ndts = 2;
11117 poly_uint64 nunits;
11118 int ncopies;
11119 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11120 int i;
11121 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11122 vec<tree> vec_oprnds0 = vNULL;
11123 vec<tree> vec_oprnds1 = vNULL;
11124 tree mask_type;
11125 tree mask;
11127 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11128 return false;
11130 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
11131 return false;
11133 mask_type = vectype;
11134 nunits = TYPE_VECTOR_SUBPARTS (vectype);
11136 if (slp_node)
11137 ncopies = 1;
11138 else
11139 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11141 gcc_assert (ncopies >= 1);
11142 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11143 return false;
11145 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11146 if (!stmt)
11147 return false;
11149 code = gimple_assign_rhs_code (stmt);
11151 if (TREE_CODE_CLASS (code) != tcc_comparison)
11152 return false;
11154 slp_tree slp_rhs1, slp_rhs2;
11155 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11156 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
11157 return false;
11159 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11160 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
11161 return false;
11163 if (vectype1 && vectype2
11164 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11165 TYPE_VECTOR_SUBPARTS (vectype2)))
11166 return false;
11168 vectype = vectype1 ? vectype1 : vectype2;
11170 /* Invariant comparison. */
11171 if (!vectype)
11173 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
11174 vectype = mask_type;
11175 else
11176 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
11177 slp_node);
11178 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
11179 return false;
11181 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
11182 return false;
11184 /* Can't compare mask and non-mask types. */
11185 if (vectype1 && vectype2
11186 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
11187 return false;
11189 /* Boolean values may have another representation in vectors
11190 and therefore we prefer bit operations over comparison for
11191 them (which also works for scalar masks). We store opcodes
11192 to use in bitop1 and bitop2. Statement is vectorized as
11193 BITOP2 (rhs1 BITOP1 rhs2) or
11194 rhs1 BITOP2 (BITOP1 rhs2)
11195 depending on bitop1 and bitop2 arity. */
11196 bool swap_p = false;
11197 if (VECTOR_BOOLEAN_TYPE_P (vectype))
11199 if (code == GT_EXPR)
11201 bitop1 = BIT_NOT_EXPR;
11202 bitop2 = BIT_AND_EXPR;
11204 else if (code == GE_EXPR)
11206 bitop1 = BIT_NOT_EXPR;
11207 bitop2 = BIT_IOR_EXPR;
11209 else if (code == LT_EXPR)
11211 bitop1 = BIT_NOT_EXPR;
11212 bitop2 = BIT_AND_EXPR;
11213 swap_p = true;
11215 else if (code == LE_EXPR)
11217 bitop1 = BIT_NOT_EXPR;
11218 bitop2 = BIT_IOR_EXPR;
11219 swap_p = true;
11221 else
11223 bitop1 = BIT_XOR_EXPR;
11224 if (code == EQ_EXPR)
11225 bitop2 = BIT_NOT_EXPR;
11229 if (!vec_stmt)
11231 if (bitop1 == NOP_EXPR)
11233 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
11234 return false;
11236 else
11238 machine_mode mode = TYPE_MODE (vectype);
11239 optab optab;
11241 optab = optab_for_tree_code (bitop1, vectype, optab_default);
11242 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11243 return false;
11245 if (bitop2 != NOP_EXPR)
11247 optab = optab_for_tree_code (bitop2, vectype, optab_default);
11248 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11249 return false;
11253 /* Put types on constant and invariant SLP children. */
11254 if (slp_node
11255 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
11256 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
11258 if (dump_enabled_p ())
11259 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11260 "incompatible vector types for invariants\n");
11261 return false;
11264 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
11265 vect_model_simple_cost (vinfo, stmt_info,
11266 ncopies * (1 + (bitop2 != NOP_EXPR)),
11267 dts, ndts, slp_node, cost_vec);
11268 return true;
11271 /* Transform. */
11273 /* Handle def. */
11274 lhs = gimple_assign_lhs (stmt);
11275 mask = vect_create_destination_var (lhs, mask_type);
11277 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11278 rhs1, &vec_oprnds0, vectype,
11279 rhs2, &vec_oprnds1, vectype);
11280 if (swap_p)
11281 std::swap (vec_oprnds0, vec_oprnds1);
11283 /* Arguments are ready. Create the new vector stmt. */
11284 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
11286 gimple *new_stmt;
11287 vec_rhs2 = vec_oprnds1[i];
11289 new_temp = make_ssa_name (mask);
11290 if (bitop1 == NOP_EXPR)
11292 new_stmt = gimple_build_assign (new_temp, code,
11293 vec_rhs1, vec_rhs2);
11294 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11296 else
11298 if (bitop1 == BIT_NOT_EXPR)
11299 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
11300 else
11301 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
11302 vec_rhs2);
11303 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11304 if (bitop2 != NOP_EXPR)
11306 tree res = make_ssa_name (mask);
11307 if (bitop2 == BIT_NOT_EXPR)
11308 new_stmt = gimple_build_assign (res, bitop2, new_temp);
11309 else
11310 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
11311 new_temp);
11312 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11315 if (slp_node)
11316 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11317 else
11318 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11321 if (!slp_node)
11322 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11324 vec_oprnds0.release ();
11325 vec_oprnds1.release ();
11327 return true;
11330 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11331 can handle all live statements in the node. Otherwise return true
11332 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11333 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11335 static bool
11336 can_vectorize_live_stmts (vec_info *vinfo,
11337 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11338 slp_tree slp_node, slp_instance slp_node_instance,
11339 bool vec_stmt_p,
11340 stmt_vector_for_cost *cost_vec)
11342 if (slp_node)
11344 stmt_vec_info slp_stmt_info;
11345 unsigned int i;
11346 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11348 if (STMT_VINFO_LIVE_P (slp_stmt_info)
11349 && !vectorizable_live_operation (vinfo,
11350 slp_stmt_info, gsi, slp_node,
11351 slp_node_instance, i,
11352 vec_stmt_p, cost_vec))
11353 return false;
11356 else if (STMT_VINFO_LIVE_P (stmt_info)
11357 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11358 slp_node, slp_node_instance, -1,
11359 vec_stmt_p, cost_vec))
11360 return false;
11362 return true;
11365 /* Make sure the statement is vectorizable. */
11367 opt_result
11368 vect_analyze_stmt (vec_info *vinfo,
11369 stmt_vec_info stmt_info, bool *need_to_vectorize,
11370 slp_tree node, slp_instance node_instance,
11371 stmt_vector_for_cost *cost_vec)
11373 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11374 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11375 bool ok;
11376 gimple_seq pattern_def_seq;
11378 if (dump_enabled_p ())
11379 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11380 stmt_info->stmt);
11382 if (gimple_has_volatile_ops (stmt_info->stmt))
11383 return opt_result::failure_at (stmt_info->stmt,
11384 "not vectorized:"
11385 " stmt has volatile operands: %G\n",
11386 stmt_info->stmt);
11388 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11389 && node == NULL
11390 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11392 gimple_stmt_iterator si;
11394 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11396 stmt_vec_info pattern_def_stmt_info
11397 = vinfo->lookup_stmt (gsi_stmt (si));
11398 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11399 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11401 /* Analyze def stmt of STMT if it's a pattern stmt. */
11402 if (dump_enabled_p ())
11403 dump_printf_loc (MSG_NOTE, vect_location,
11404 "==> examining pattern def statement: %G",
11405 pattern_def_stmt_info->stmt);
11407 opt_result res
11408 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
11409 need_to_vectorize, node, node_instance,
11410 cost_vec);
11411 if (!res)
11412 return res;
11417 /* Skip stmts that do not need to be vectorized. In loops this is expected
11418 to include:
11419 - the COND_EXPR which is the loop exit condition
11420 - any LABEL_EXPRs in the loop
11421 - computations that are used only for array indexing or loop control.
11422 In basic blocks we only analyze statements that are a part of some SLP
11423 instance, therefore, all the statements are relevant.
11425 Pattern statement needs to be analyzed instead of the original statement
11426 if the original statement is not relevant. Otherwise, we analyze both
11427 statements. In basic blocks we are called from some SLP instance
11428 traversal, don't analyze pattern stmts instead, the pattern stmts
11429 already will be part of SLP instance. */
11431 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11432 if (!STMT_VINFO_RELEVANT_P (stmt_info)
11433 && !STMT_VINFO_LIVE_P (stmt_info))
11435 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11436 && pattern_stmt_info
11437 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11438 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11440 /* Analyze PATTERN_STMT instead of the original stmt. */
11441 stmt_info = pattern_stmt_info;
11442 if (dump_enabled_p ())
11443 dump_printf_loc (MSG_NOTE, vect_location,
11444 "==> examining pattern statement: %G",
11445 stmt_info->stmt);
11447 else
11449 if (dump_enabled_p ())
11450 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11452 return opt_result::success ();
11455 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11456 && node == NULL
11457 && pattern_stmt_info
11458 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11459 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11461 /* Analyze PATTERN_STMT too. */
11462 if (dump_enabled_p ())
11463 dump_printf_loc (MSG_NOTE, vect_location,
11464 "==> examining pattern statement: %G",
11465 pattern_stmt_info->stmt);
11467 opt_result res
11468 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11469 node_instance, cost_vec);
11470 if (!res)
11471 return res;
11474 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11476 case vect_internal_def:
11477 break;
11479 case vect_reduction_def:
11480 case vect_nested_cycle:
11481 gcc_assert (!bb_vinfo
11482 && (relevance == vect_used_in_outer
11483 || relevance == vect_used_in_outer_by_reduction
11484 || relevance == vect_used_by_reduction
11485 || relevance == vect_unused_in_scope
11486 || relevance == vect_used_only_live));
11487 break;
11489 case vect_induction_def:
11490 case vect_first_order_recurrence:
11491 gcc_assert (!bb_vinfo);
11492 break;
11494 case vect_constant_def:
11495 case vect_external_def:
11496 case vect_unknown_def_type:
11497 default:
11498 gcc_unreachable ();
11501 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11502 if (node)
11503 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11505 if (STMT_VINFO_RELEVANT_P (stmt_info))
11507 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11508 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11509 || (call && gimple_call_lhs (call) == NULL_TREE));
11510 *need_to_vectorize = true;
11513 if (PURE_SLP_STMT (stmt_info) && !node)
11515 if (dump_enabled_p ())
11516 dump_printf_loc (MSG_NOTE, vect_location,
11517 "handled only by SLP analysis\n");
11518 return opt_result::success ();
11521 ok = true;
11522 if (!bb_vinfo
11523 && (STMT_VINFO_RELEVANT_P (stmt_info)
11524 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11525 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11526 -mveclibabi= takes preference over library functions with
11527 the simd attribute. */
11528 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11529 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11530 cost_vec)
11531 || vectorizable_conversion (vinfo, stmt_info,
11532 NULL, NULL, node, cost_vec)
11533 || vectorizable_operation (vinfo, stmt_info,
11534 NULL, NULL, node, cost_vec)
11535 || vectorizable_assignment (vinfo, stmt_info,
11536 NULL, NULL, node, cost_vec)
11537 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11538 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11539 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11540 node, node_instance, cost_vec)
11541 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11542 NULL, node, cost_vec)
11543 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11544 || vectorizable_condition (vinfo, stmt_info,
11545 NULL, NULL, node, cost_vec)
11546 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11547 cost_vec)
11548 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11549 stmt_info, NULL, node)
11550 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11551 stmt_info, NULL, node, cost_vec));
11552 else
11554 if (bb_vinfo)
11555 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11556 || vectorizable_simd_clone_call (vinfo, stmt_info,
11557 NULL, NULL, node, cost_vec)
11558 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11559 cost_vec)
11560 || vectorizable_shift (vinfo, stmt_info,
11561 NULL, NULL, node, cost_vec)
11562 || vectorizable_operation (vinfo, stmt_info,
11563 NULL, NULL, node, cost_vec)
11564 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11565 cost_vec)
11566 || vectorizable_load (vinfo, stmt_info,
11567 NULL, NULL, node, cost_vec)
11568 || vectorizable_store (vinfo, stmt_info,
11569 NULL, NULL, node, cost_vec)
11570 || vectorizable_condition (vinfo, stmt_info,
11571 NULL, NULL, node, cost_vec)
11572 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11573 cost_vec)
11574 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11577 if (node)
11578 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11580 if (!ok)
11581 return opt_result::failure_at (stmt_info->stmt,
11582 "not vectorized:"
11583 " relevant stmt not supported: %G",
11584 stmt_info->stmt);
11586 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11587 need extra handling, except for vectorizable reductions. */
11588 if (!bb_vinfo
11589 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11590 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11591 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11592 stmt_info, NULL, node, node_instance,
11593 false, cost_vec))
11594 return opt_result::failure_at (stmt_info->stmt,
11595 "not vectorized:"
11596 " live stmt not supported: %G",
11597 stmt_info->stmt);
11599 return opt_result::success ();
11603 /* Function vect_transform_stmt.
11605 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11607 bool
11608 vect_transform_stmt (vec_info *vinfo,
11609 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11610 slp_tree slp_node, slp_instance slp_node_instance)
11612 bool is_store = false;
11613 gimple *vec_stmt = NULL;
11614 bool done;
11616 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11618 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11619 if (slp_node)
11620 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11622 switch (STMT_VINFO_TYPE (stmt_info))
11624 case type_demotion_vec_info_type:
11625 case type_promotion_vec_info_type:
11626 case type_conversion_vec_info_type:
11627 done = vectorizable_conversion (vinfo, stmt_info,
11628 gsi, &vec_stmt, slp_node, NULL);
11629 gcc_assert (done);
11630 break;
11632 case induc_vec_info_type:
11633 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11634 stmt_info, &vec_stmt, slp_node,
11635 NULL);
11636 gcc_assert (done);
11637 break;
11639 case shift_vec_info_type:
11640 done = vectorizable_shift (vinfo, stmt_info,
11641 gsi, &vec_stmt, slp_node, NULL);
11642 gcc_assert (done);
11643 break;
11645 case op_vec_info_type:
11646 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11647 NULL);
11648 gcc_assert (done);
11649 break;
11651 case assignment_vec_info_type:
11652 done = vectorizable_assignment (vinfo, stmt_info,
11653 gsi, &vec_stmt, slp_node, NULL);
11654 gcc_assert (done);
11655 break;
11657 case load_vec_info_type:
11658 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11659 NULL);
11660 gcc_assert (done);
11661 break;
11663 case store_vec_info_type:
11664 done = vectorizable_store (vinfo, stmt_info,
11665 gsi, &vec_stmt, slp_node, NULL);
11666 gcc_assert (done);
11667 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11669 /* In case of interleaving, the whole chain is vectorized when the
11670 last store in the chain is reached. Store stmts before the last
11671 one are skipped, and there vec_stmt_info shouldn't be freed
11672 meanwhile. */
11673 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11674 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11675 is_store = true;
11677 else
11678 is_store = true;
11679 break;
11681 case condition_vec_info_type:
11682 done = vectorizable_condition (vinfo, stmt_info,
11683 gsi, &vec_stmt, slp_node, NULL);
11684 gcc_assert (done);
11685 break;
11687 case comparison_vec_info_type:
11688 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11689 slp_node, NULL);
11690 gcc_assert (done);
11691 break;
11693 case call_vec_info_type:
11694 done = vectorizable_call (vinfo, stmt_info,
11695 gsi, &vec_stmt, slp_node, NULL);
11696 break;
11698 case call_simd_clone_vec_info_type:
11699 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11700 slp_node, NULL);
11701 break;
11703 case reduc_vec_info_type:
11704 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11705 gsi, &vec_stmt, slp_node);
11706 gcc_assert (done);
11707 break;
11709 case cycle_phi_info_type:
11710 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11711 &vec_stmt, slp_node, slp_node_instance);
11712 gcc_assert (done);
11713 break;
11715 case lc_phi_info_type:
11716 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11717 stmt_info, &vec_stmt, slp_node);
11718 gcc_assert (done);
11719 break;
11721 case recurr_info_type:
11722 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11723 stmt_info, &vec_stmt, slp_node, NULL);
11724 gcc_assert (done);
11725 break;
11727 case phi_info_type:
11728 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11729 gcc_assert (done);
11730 break;
11732 default:
11733 if (!STMT_VINFO_LIVE_P (stmt_info))
11735 if (dump_enabled_p ())
11736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11737 "stmt not supported.\n");
11738 gcc_unreachable ();
11740 done = true;
11743 if (!slp_node && vec_stmt)
11744 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11746 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11748 /* Handle stmts whose DEF is used outside the loop-nest that is
11749 being vectorized. */
11750 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11751 slp_node_instance, true, NULL);
11752 gcc_assert (done);
11755 if (slp_node)
11756 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11758 return is_store;
11762 /* Remove a group of stores (for SLP or interleaving), free their
11763 stmt_vec_info. */
11765 void
11766 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11768 stmt_vec_info next_stmt_info = first_stmt_info;
11770 while (next_stmt_info)
11772 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11773 next_stmt_info = vect_orig_stmt (next_stmt_info);
11774 /* Free the attached stmt_vec_info and remove the stmt. */
11775 vinfo->remove_stmt (next_stmt_info);
11776 next_stmt_info = tmp;
11780 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11781 elements of type SCALAR_TYPE, or null if the target doesn't support
11782 such a type.
11784 If NUNITS is zero, return a vector type that contains elements of
11785 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11787 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11788 for this vectorization region and want to "autodetect" the best choice.
11789 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11790 and we want the new type to be interoperable with it. PREVAILING_MODE
11791 in this case can be a scalar integer mode or a vector mode; when it
11792 is a vector mode, the function acts like a tree-level version of
11793 related_vector_mode. */
11795 tree
11796 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11797 tree scalar_type, poly_uint64 nunits)
11799 tree orig_scalar_type = scalar_type;
11800 scalar_mode inner_mode;
11801 machine_mode simd_mode;
11802 tree vectype;
11804 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11805 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11806 return NULL_TREE;
11808 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11810 /* Interoperability between modes requires one to be a constant multiple
11811 of the other, so that the number of vectors required for each operation
11812 is a compile-time constant. */
11813 if (prevailing_mode != VOIDmode
11814 && !constant_multiple_p (nunits * nbytes,
11815 GET_MODE_SIZE (prevailing_mode))
11816 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
11817 nunits * nbytes))
11818 return NULL_TREE;
11820 /* For vector types of elements whose mode precision doesn't
11821 match their types precision we use a element type of mode
11822 precision. The vectorization routines will have to make sure
11823 they support the proper result truncation/extension.
11824 We also make sure to build vector types with INTEGER_TYPE
11825 component type only. */
11826 if (INTEGRAL_TYPE_P (scalar_type)
11827 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11828 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11829 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11830 TYPE_UNSIGNED (scalar_type));
11832 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11833 When the component mode passes the above test simply use a type
11834 corresponding to that mode. The theory is that any use that
11835 would cause problems with this will disable vectorization anyway. */
11836 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11837 && !INTEGRAL_TYPE_P (scalar_type))
11838 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11840 /* We can't build a vector type of elements with alignment bigger than
11841 their size. */
11842 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11843 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11844 TYPE_UNSIGNED (scalar_type));
11846 /* If we felt back to using the mode fail if there was
11847 no scalar type for it. */
11848 if (scalar_type == NULL_TREE)
11849 return NULL_TREE;
11851 /* If no prevailing mode was supplied, use the mode the target prefers.
11852 Otherwise lookup a vector mode based on the prevailing mode. */
11853 if (prevailing_mode == VOIDmode)
11855 gcc_assert (known_eq (nunits, 0U));
11856 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11857 if (SCALAR_INT_MODE_P (simd_mode))
11859 /* Traditional behavior is not to take the integer mode
11860 literally, but simply to use it as a way of determining
11861 the vector size. It is up to mode_for_vector to decide
11862 what the TYPE_MODE should be.
11864 Note that nunits == 1 is allowed in order to support single
11865 element vector types. */
11866 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11867 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11868 return NULL_TREE;
11871 else if (SCALAR_INT_MODE_P (prevailing_mode)
11872 || !related_vector_mode (prevailing_mode,
11873 inner_mode, nunits).exists (&simd_mode))
11875 /* Fall back to using mode_for_vector, mostly in the hope of being
11876 able to use an integer mode. */
11877 if (known_eq (nunits, 0U)
11878 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11879 return NULL_TREE;
11881 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11882 return NULL_TREE;
11885 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11887 /* In cases where the mode was chosen by mode_for_vector, check that
11888 the target actually supports the chosen mode, or that it at least
11889 allows the vector mode to be replaced by a like-sized integer. */
11890 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11891 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11892 return NULL_TREE;
11894 /* Re-attach the address-space qualifier if we canonicalized the scalar
11895 type. */
11896 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11897 return build_qualified_type
11898 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11900 return vectype;
11903 /* Function get_vectype_for_scalar_type.
11905 Returns the vector type corresponding to SCALAR_TYPE as supported
11906 by the target. If GROUP_SIZE is nonzero and we're performing BB
11907 vectorization, make sure that the number of elements in the vector
11908 is no bigger than GROUP_SIZE. */
11910 tree
11911 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11912 unsigned int group_size)
11914 /* For BB vectorization, we should always have a group size once we've
11915 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11916 are tentative requests during things like early data reference
11917 analysis and pattern recognition. */
11918 if (is_a <bb_vec_info> (vinfo))
11919 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11920 else
11921 group_size = 0;
11923 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11924 scalar_type);
11925 if (vectype && vinfo->vector_mode == VOIDmode)
11926 vinfo->vector_mode = TYPE_MODE (vectype);
11928 /* Register the natural choice of vector type, before the group size
11929 has been applied. */
11930 if (vectype)
11931 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11933 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11934 try again with an explicit number of elements. */
11935 if (vectype
11936 && group_size
11937 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11939 /* Start with the biggest number of units that fits within
11940 GROUP_SIZE and halve it until we find a valid vector type.
11941 Usually either the first attempt will succeed or all will
11942 fail (in the latter case because GROUP_SIZE is too small
11943 for the target), but it's possible that a target could have
11944 a hole between supported vector types.
11946 If GROUP_SIZE is not a power of 2, this has the effect of
11947 trying the largest power of 2 that fits within the group,
11948 even though the group is not a multiple of that vector size.
11949 The BB vectorizer will then try to carve up the group into
11950 smaller pieces. */
11951 unsigned int nunits = 1 << floor_log2 (group_size);
11954 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11955 scalar_type, nunits);
11956 nunits /= 2;
11958 while (nunits > 1 && !vectype);
11961 return vectype;
11964 /* Return the vector type corresponding to SCALAR_TYPE as supported
11965 by the target. NODE, if nonnull, is the SLP tree node that will
11966 use the returned vector type. */
11968 tree
11969 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11971 unsigned int group_size = 0;
11972 if (node)
11973 group_size = SLP_TREE_LANES (node);
11974 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11977 /* Function get_mask_type_for_scalar_type.
11979 Returns the mask type corresponding to a result of comparison
11980 of vectors of specified SCALAR_TYPE as supported by target.
11981 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11982 make sure that the number of elements in the vector is no bigger
11983 than GROUP_SIZE. */
11985 tree
11986 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11987 unsigned int group_size)
11989 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11991 if (!vectype)
11992 return NULL;
11994 return truth_type_for (vectype);
11997 /* Function get_same_sized_vectype
11999 Returns a vector type corresponding to SCALAR_TYPE of size
12000 VECTOR_TYPE if supported by the target. */
12002 tree
12003 get_same_sized_vectype (tree scalar_type, tree vector_type)
12005 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12006 return truth_type_for (vector_type);
12008 poly_uint64 nunits;
12009 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
12010 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
12011 return NULL_TREE;
12013 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
12014 scalar_type, nunits);
12017 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
12018 would not change the chosen vector modes. */
12020 bool
12021 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
12023 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
12024 i != vinfo->used_vector_modes.end (); ++i)
12025 if (!VECTOR_MODE_P (*i)
12026 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
12027 return false;
12028 return true;
12031 /* Function vect_is_simple_use.
12033 Input:
12034 VINFO - the vect info of the loop or basic block that is being vectorized.
12035 OPERAND - operand in the loop or bb.
12036 Output:
12037 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
12038 case OPERAND is an SSA_NAME that is defined in the vectorizable region
12039 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
12040 the definition could be anywhere in the function
12041 DT - the type of definition
12043 Returns whether a stmt with OPERAND can be vectorized.
12044 For loops, supportable operands are constants, loop invariants, and operands
12045 that are defined by the current iteration of the loop. Unsupportable
12046 operands are those that are defined by a previous iteration of the loop (as
12047 is the case in reduction/induction computations).
12048 For basic blocks, supportable operands are constants and bb invariants.
12049 For now, operands defined outside the basic block are not supported. */
12051 bool
12052 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12053 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
12055 if (def_stmt_info_out)
12056 *def_stmt_info_out = NULL;
12057 if (def_stmt_out)
12058 *def_stmt_out = NULL;
12059 *dt = vect_unknown_def_type;
12061 if (dump_enabled_p ())
12063 dump_printf_loc (MSG_NOTE, vect_location,
12064 "vect_is_simple_use: operand ");
12065 if (TREE_CODE (operand) == SSA_NAME
12066 && !SSA_NAME_IS_DEFAULT_DEF (operand))
12067 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
12068 else
12069 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
12072 if (CONSTANT_CLASS_P (operand))
12073 *dt = vect_constant_def;
12074 else if (is_gimple_min_invariant (operand))
12075 *dt = vect_external_def;
12076 else if (TREE_CODE (operand) != SSA_NAME)
12077 *dt = vect_unknown_def_type;
12078 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
12079 *dt = vect_external_def;
12080 else
12082 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
12083 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
12084 if (!stmt_vinfo)
12085 *dt = vect_external_def;
12086 else
12088 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
12089 def_stmt = stmt_vinfo->stmt;
12090 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
12091 if (def_stmt_info_out)
12092 *def_stmt_info_out = stmt_vinfo;
12094 if (def_stmt_out)
12095 *def_stmt_out = def_stmt;
12098 if (dump_enabled_p ())
12100 dump_printf (MSG_NOTE, ", type of def: ");
12101 switch (*dt)
12103 case vect_uninitialized_def:
12104 dump_printf (MSG_NOTE, "uninitialized\n");
12105 break;
12106 case vect_constant_def:
12107 dump_printf (MSG_NOTE, "constant\n");
12108 break;
12109 case vect_external_def:
12110 dump_printf (MSG_NOTE, "external\n");
12111 break;
12112 case vect_internal_def:
12113 dump_printf (MSG_NOTE, "internal\n");
12114 break;
12115 case vect_induction_def:
12116 dump_printf (MSG_NOTE, "induction\n");
12117 break;
12118 case vect_reduction_def:
12119 dump_printf (MSG_NOTE, "reduction\n");
12120 break;
12121 case vect_double_reduction_def:
12122 dump_printf (MSG_NOTE, "double reduction\n");
12123 break;
12124 case vect_nested_cycle:
12125 dump_printf (MSG_NOTE, "nested cycle\n");
12126 break;
12127 case vect_first_order_recurrence:
12128 dump_printf (MSG_NOTE, "first order recurrence\n");
12129 break;
12130 case vect_unknown_def_type:
12131 dump_printf (MSG_NOTE, "unknown\n");
12132 break;
12136 if (*dt == vect_unknown_def_type)
12138 if (dump_enabled_p ())
12139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12140 "Unsupported pattern.\n");
12141 return false;
12144 return true;
12147 /* Function vect_is_simple_use.
12149 Same as vect_is_simple_use but also determines the vector operand
12150 type of OPERAND and stores it to *VECTYPE. If the definition of
12151 OPERAND is vect_uninitialized_def, vect_constant_def or
12152 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
12153 is responsible to compute the best suited vector type for the
12154 scalar operand. */
12156 bool
12157 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12158 tree *vectype, stmt_vec_info *def_stmt_info_out,
12159 gimple **def_stmt_out)
12161 stmt_vec_info def_stmt_info;
12162 gimple *def_stmt;
12163 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
12164 return false;
12166 if (def_stmt_out)
12167 *def_stmt_out = def_stmt;
12168 if (def_stmt_info_out)
12169 *def_stmt_info_out = def_stmt_info;
12171 /* Now get a vector type if the def is internal, otherwise supply
12172 NULL_TREE and leave it up to the caller to figure out a proper
12173 type for the use stmt. */
12174 if (*dt == vect_internal_def
12175 || *dt == vect_induction_def
12176 || *dt == vect_reduction_def
12177 || *dt == vect_double_reduction_def
12178 || *dt == vect_nested_cycle
12179 || *dt == vect_first_order_recurrence)
12181 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
12182 gcc_assert (*vectype != NULL_TREE);
12183 if (dump_enabled_p ())
12184 dump_printf_loc (MSG_NOTE, vect_location,
12185 "vect_is_simple_use: vectype %T\n", *vectype);
12187 else if (*dt == vect_uninitialized_def
12188 || *dt == vect_constant_def
12189 || *dt == vect_external_def)
12190 *vectype = NULL_TREE;
12191 else
12192 gcc_unreachable ();
12194 return true;
12197 /* Function vect_is_simple_use.
12199 Same as vect_is_simple_use but determines the operand by operand
12200 position OPERAND from either STMT or SLP_NODE, filling in *OP
12201 and *SLP_DEF (when SLP_NODE is not NULL). */
12203 bool
12204 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
12205 unsigned operand, tree *op, slp_tree *slp_def,
12206 enum vect_def_type *dt,
12207 tree *vectype, stmt_vec_info *def_stmt_info_out)
12209 if (slp_node)
12211 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
12212 *slp_def = child;
12213 *vectype = SLP_TREE_VECTYPE (child);
12214 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
12216 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
12217 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
12219 else
12221 if (def_stmt_info_out)
12222 *def_stmt_info_out = NULL;
12223 *op = SLP_TREE_SCALAR_OPS (child)[0];
12224 *dt = SLP_TREE_DEF_TYPE (child);
12225 return true;
12228 else
12230 *slp_def = NULL;
12231 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
12233 if (gimple_assign_rhs_code (ass) == COND_EXPR
12234 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
12236 if (operand < 2)
12237 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
12238 else
12239 *op = gimple_op (ass, operand);
12241 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
12242 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
12243 else
12244 *op = gimple_op (ass, operand + 1);
12246 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
12247 *op = gimple_call_arg (call, operand);
12248 else
12249 gcc_unreachable ();
12250 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
12254 /* If OP is not NULL and is external or constant update its vector
12255 type with VECTYPE. Returns true if successful or false if not,
12256 for example when conflicting vector types are present. */
12258 bool
12259 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
12261 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
12262 return true;
12263 if (SLP_TREE_VECTYPE (op))
12264 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
12265 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
12266 should be handled by patters. Allow vect_constant_def for now. */
12267 if (VECTOR_BOOLEAN_TYPE_P (vectype)
12268 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
12269 return false;
12270 SLP_TREE_VECTYPE (op) = vectype;
12271 return true;
12274 /* Function supportable_widening_operation
12276 Check whether an operation represented by the code CODE is a
12277 widening operation that is supported by the target platform in
12278 vector form (i.e., when operating on arguments of type VECTYPE_IN
12279 producing a result of type VECTYPE_OUT).
12281 Widening operations we currently support are NOP (CONVERT), FLOAT,
12282 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
12283 are supported by the target platform either directly (via vector
12284 tree-codes), or via target builtins.
12286 Output:
12287 - CODE1 and CODE2 are codes of vector operations to be used when
12288 vectorizing the operation, if available.
12289 - MULTI_STEP_CVT determines the number of required intermediate steps in
12290 case of multi-step conversion (like char->short->int - in that case
12291 MULTI_STEP_CVT will be 1).
12292 - INTERM_TYPES contains the intermediate type required to perform the
12293 widening operation (short in the above example). */
12295 bool
12296 supportable_widening_operation (vec_info *vinfo,
12297 enum tree_code code, stmt_vec_info stmt_info,
12298 tree vectype_out, tree vectype_in,
12299 enum tree_code *code1, enum tree_code *code2,
12300 int *multi_step_cvt,
12301 vec<tree> *interm_types)
12303 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
12304 class loop *vect_loop = NULL;
12305 machine_mode vec_mode;
12306 enum insn_code icode1, icode2;
12307 optab optab1, optab2;
12308 tree vectype = vectype_in;
12309 tree wide_vectype = vectype_out;
12310 enum tree_code c1, c2;
12311 int i;
12312 tree prev_type, intermediate_type;
12313 machine_mode intermediate_mode, prev_mode;
12314 optab optab3, optab4;
12316 *multi_step_cvt = 0;
12317 if (loop_info)
12318 vect_loop = LOOP_VINFO_LOOP (loop_info);
12320 switch (code)
12322 case WIDEN_MULT_EXPR:
12323 /* The result of a vectorized widening operation usually requires
12324 two vectors (because the widened results do not fit into one vector).
12325 The generated vector results would normally be expected to be
12326 generated in the same order as in the original scalar computation,
12327 i.e. if 8 results are generated in each vector iteration, they are
12328 to be organized as follows:
12329 vect1: [res1,res2,res3,res4],
12330 vect2: [res5,res6,res7,res8].
12332 However, in the special case that the result of the widening
12333 operation is used in a reduction computation only, the order doesn't
12334 matter (because when vectorizing a reduction we change the order of
12335 the computation). Some targets can take advantage of this and
12336 generate more efficient code. For example, targets like Altivec,
12337 that support widen_mult using a sequence of {mult_even,mult_odd}
12338 generate the following vectors:
12339 vect1: [res1,res3,res5,res7],
12340 vect2: [res2,res4,res6,res8].
12342 When vectorizing outer-loops, we execute the inner-loop sequentially
12343 (each vectorized inner-loop iteration contributes to VF outer-loop
12344 iterations in parallel). We therefore don't allow to change the
12345 order of the computation in the inner-loop during outer-loop
12346 vectorization. */
12347 /* TODO: Another case in which order doesn't *really* matter is when we
12348 widen and then contract again, e.g. (short)((int)x * y >> 8).
12349 Normally, pack_trunc performs an even/odd permute, whereas the
12350 repack from an even/odd expansion would be an interleave, which
12351 would be significantly simpler for e.g. AVX2. */
12352 /* In any case, in order to avoid duplicating the code below, recurse
12353 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12354 are properly set up for the caller. If we fail, we'll continue with
12355 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12356 if (vect_loop
12357 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
12358 && !nested_in_vect_loop_p (vect_loop, stmt_info)
12359 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
12360 stmt_info, vectype_out,
12361 vectype_in, code1, code2,
12362 multi_step_cvt, interm_types))
12364 /* Elements in a vector with vect_used_by_reduction property cannot
12365 be reordered if the use chain with this property does not have the
12366 same operation. One such an example is s += a * b, where elements
12367 in a and b cannot be reordered. Here we check if the vector defined
12368 by STMT is only directly used in the reduction statement. */
12369 tree lhs = gimple_assign_lhs (stmt_info->stmt);
12370 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12371 if (use_stmt_info
12372 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12373 return true;
12375 c1 = VEC_WIDEN_MULT_LO_EXPR;
12376 c2 = VEC_WIDEN_MULT_HI_EXPR;
12377 break;
12379 case DOT_PROD_EXPR:
12380 c1 = DOT_PROD_EXPR;
12381 c2 = DOT_PROD_EXPR;
12382 break;
12384 case SAD_EXPR:
12385 c1 = SAD_EXPR;
12386 c2 = SAD_EXPR;
12387 break;
12389 case VEC_WIDEN_MULT_EVEN_EXPR:
12390 /* Support the recursion induced just above. */
12391 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12392 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12393 break;
12395 case WIDEN_LSHIFT_EXPR:
12396 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12397 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12398 break;
12400 case WIDEN_PLUS_EXPR:
12401 c1 = VEC_WIDEN_PLUS_LO_EXPR;
12402 c2 = VEC_WIDEN_PLUS_HI_EXPR;
12403 break;
12405 case WIDEN_MINUS_EXPR:
12406 c1 = VEC_WIDEN_MINUS_LO_EXPR;
12407 c2 = VEC_WIDEN_MINUS_HI_EXPR;
12408 break;
12410 CASE_CONVERT:
12411 c1 = VEC_UNPACK_LO_EXPR;
12412 c2 = VEC_UNPACK_HI_EXPR;
12413 break;
12415 case FLOAT_EXPR:
12416 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12417 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12418 break;
12420 case FIX_TRUNC_EXPR:
12421 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12422 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12423 break;
12425 default:
12426 gcc_unreachable ();
12429 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12430 std::swap (c1, c2);
12432 if (code == FIX_TRUNC_EXPR)
12434 /* The signedness is determined from output operand. */
12435 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12436 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12438 else if (CONVERT_EXPR_CODE_P (code)
12439 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12440 && VECTOR_BOOLEAN_TYPE_P (vectype)
12441 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12442 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12444 /* If the input and result modes are the same, a different optab
12445 is needed where we pass in the number of units in vectype. */
12446 optab1 = vec_unpacks_sbool_lo_optab;
12447 optab2 = vec_unpacks_sbool_hi_optab;
12449 else
12451 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12452 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12455 if (!optab1 || !optab2)
12456 return false;
12458 vec_mode = TYPE_MODE (vectype);
12459 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12460 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12461 return false;
12463 *code1 = c1;
12464 *code2 = c2;
12466 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12467 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12469 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12470 return true;
12471 /* For scalar masks we may have different boolean
12472 vector types having the same QImode. Thus we
12473 add additional check for elements number. */
12474 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12475 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12476 return true;
12479 /* Check if it's a multi-step conversion that can be done using intermediate
12480 types. */
12482 prev_type = vectype;
12483 prev_mode = vec_mode;
12485 if (!CONVERT_EXPR_CODE_P (code))
12486 return false;
12488 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12489 intermediate steps in promotion sequence. We try
12490 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12491 not. */
12492 interm_types->create (MAX_INTERM_CVT_STEPS);
12493 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12495 intermediate_mode = insn_data[icode1].operand[0].mode;
12496 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12497 intermediate_type
12498 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12499 else if (VECTOR_MODE_P (intermediate_mode))
12501 tree intermediate_element_type
12502 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
12503 TYPE_UNSIGNED (prev_type));
12504 intermediate_type
12505 = build_vector_type_for_mode (intermediate_element_type,
12506 intermediate_mode);
12508 else
12509 intermediate_type
12510 = lang_hooks.types.type_for_mode (intermediate_mode,
12511 TYPE_UNSIGNED (prev_type));
12513 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12514 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12515 && intermediate_mode == prev_mode
12516 && SCALAR_INT_MODE_P (prev_mode))
12518 /* If the input and result modes are the same, a different optab
12519 is needed where we pass in the number of units in vectype. */
12520 optab3 = vec_unpacks_sbool_lo_optab;
12521 optab4 = vec_unpacks_sbool_hi_optab;
12523 else
12525 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12526 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12529 if (!optab3 || !optab4
12530 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12531 || insn_data[icode1].operand[0].mode != intermediate_mode
12532 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12533 || insn_data[icode2].operand[0].mode != intermediate_mode
12534 || ((icode1 = optab_handler (optab3, intermediate_mode))
12535 == CODE_FOR_nothing)
12536 || ((icode2 = optab_handler (optab4, intermediate_mode))
12537 == CODE_FOR_nothing))
12538 break;
12540 interm_types->quick_push (intermediate_type);
12541 (*multi_step_cvt)++;
12543 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12544 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12546 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12547 return true;
12548 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12549 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12550 return true;
12553 prev_type = intermediate_type;
12554 prev_mode = intermediate_mode;
12557 interm_types->release ();
12558 return false;
12562 /* Function supportable_narrowing_operation
12564 Check whether an operation represented by the code CODE is a
12565 narrowing operation that is supported by the target platform in
12566 vector form (i.e., when operating on arguments of type VECTYPE_IN
12567 and producing a result of type VECTYPE_OUT).
12569 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12570 and FLOAT. This function checks if these operations are supported by
12571 the target platform directly via vector tree-codes.
12573 Output:
12574 - CODE1 is the code of a vector operation to be used when
12575 vectorizing the operation, if available.
12576 - MULTI_STEP_CVT determines the number of required intermediate steps in
12577 case of multi-step conversion (like int->short->char - in that case
12578 MULTI_STEP_CVT will be 1).
12579 - INTERM_TYPES contains the intermediate type required to perform the
12580 narrowing operation (short in the above example). */
12582 bool
12583 supportable_narrowing_operation (enum tree_code code,
12584 tree vectype_out, tree vectype_in,
12585 enum tree_code *code1, int *multi_step_cvt,
12586 vec<tree> *interm_types)
12588 machine_mode vec_mode;
12589 enum insn_code icode1;
12590 optab optab1, interm_optab;
12591 tree vectype = vectype_in;
12592 tree narrow_vectype = vectype_out;
12593 enum tree_code c1;
12594 tree intermediate_type, prev_type;
12595 machine_mode intermediate_mode, prev_mode;
12596 int i;
12597 unsigned HOST_WIDE_INT n_elts;
12598 bool uns;
12600 *multi_step_cvt = 0;
12601 switch (code)
12603 CASE_CONVERT:
12604 c1 = VEC_PACK_TRUNC_EXPR;
12605 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12606 && VECTOR_BOOLEAN_TYPE_P (vectype)
12607 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
12608 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
12609 && n_elts < BITS_PER_UNIT)
12610 optab1 = vec_pack_sbool_trunc_optab;
12611 else
12612 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12613 break;
12615 case FIX_TRUNC_EXPR:
12616 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12617 /* The signedness is determined from output operand. */
12618 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12619 break;
12621 case FLOAT_EXPR:
12622 c1 = VEC_PACK_FLOAT_EXPR;
12623 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12624 break;
12626 default:
12627 gcc_unreachable ();
12630 if (!optab1)
12631 return false;
12633 vec_mode = TYPE_MODE (vectype);
12634 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12635 return false;
12637 *code1 = c1;
12639 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12641 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12642 return true;
12643 /* For scalar masks we may have different boolean
12644 vector types having the same QImode. Thus we
12645 add additional check for elements number. */
12646 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12647 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12648 return true;
12651 if (code == FLOAT_EXPR)
12652 return false;
12654 /* Check if it's a multi-step conversion that can be done using intermediate
12655 types. */
12656 prev_mode = vec_mode;
12657 prev_type = vectype;
12658 if (code == FIX_TRUNC_EXPR)
12659 uns = TYPE_UNSIGNED (vectype_out);
12660 else
12661 uns = TYPE_UNSIGNED (vectype);
12663 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12664 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12665 costly than signed. */
12666 if (code == FIX_TRUNC_EXPR && uns)
12668 enum insn_code icode2;
12670 intermediate_type
12671 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12672 interm_optab
12673 = optab_for_tree_code (c1, intermediate_type, optab_default);
12674 if (interm_optab != unknown_optab
12675 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12676 && insn_data[icode1].operand[0].mode
12677 == insn_data[icode2].operand[0].mode)
12679 uns = false;
12680 optab1 = interm_optab;
12681 icode1 = icode2;
12685 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12686 intermediate steps in promotion sequence. We try
12687 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12688 interm_types->create (MAX_INTERM_CVT_STEPS);
12689 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12691 intermediate_mode = insn_data[icode1].operand[0].mode;
12692 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12693 intermediate_type
12694 = vect_double_mask_nunits (prev_type, intermediate_mode);
12695 else
12696 intermediate_type
12697 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12698 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12699 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12700 && SCALAR_INT_MODE_P (prev_mode)
12701 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
12702 && n_elts < BITS_PER_UNIT)
12703 interm_optab = vec_pack_sbool_trunc_optab;
12704 else
12705 interm_optab
12706 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12707 optab_default);
12708 if (!interm_optab
12709 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12710 || insn_data[icode1].operand[0].mode != intermediate_mode
12711 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12712 == CODE_FOR_nothing))
12713 break;
12715 interm_types->quick_push (intermediate_type);
12716 (*multi_step_cvt)++;
12718 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12720 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12721 return true;
12722 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12723 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12724 return true;
12727 prev_mode = intermediate_mode;
12728 prev_type = intermediate_type;
12729 optab1 = interm_optab;
12732 interm_types->release ();
12733 return false;
12736 /* Generate and return a vector mask of MASK_TYPE such that
12737 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12738 Add the statements to SEQ. */
12740 tree
12741 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12742 tree end_index, const char *name)
12744 tree cmp_type = TREE_TYPE (start_index);
12745 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12746 cmp_type, mask_type,
12747 OPTIMIZE_FOR_SPEED));
12748 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12749 start_index, end_index,
12750 build_zero_cst (mask_type));
12751 tree tmp;
12752 if (name)
12753 tmp = make_temp_ssa_name (mask_type, NULL, name);
12754 else
12755 tmp = make_ssa_name (mask_type);
12756 gimple_call_set_lhs (call, tmp);
12757 gimple_seq_add_stmt (seq, call);
12758 return tmp;
12761 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12762 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12764 tree
12765 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12766 tree end_index)
12768 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12769 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12772 /* Try to compute the vector types required to vectorize STMT_INFO,
12773 returning true on success and false if vectorization isn't possible.
12774 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12775 take sure that the number of elements in the vectors is no bigger
12776 than GROUP_SIZE.
12778 On success:
12780 - Set *STMT_VECTYPE_OUT to:
12781 - NULL_TREE if the statement doesn't need to be vectorized;
12782 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12784 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12785 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12786 statement does not help to determine the overall number of units. */
12788 opt_result
12789 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12790 tree *stmt_vectype_out,
12791 tree *nunits_vectype_out,
12792 unsigned int group_size)
12794 gimple *stmt = stmt_info->stmt;
12796 /* For BB vectorization, we should always have a group size once we've
12797 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12798 are tentative requests during things like early data reference
12799 analysis and pattern recognition. */
12800 if (is_a <bb_vec_info> (vinfo))
12801 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12802 else
12803 group_size = 0;
12805 *stmt_vectype_out = NULL_TREE;
12806 *nunits_vectype_out = NULL_TREE;
12808 if (gimple_get_lhs (stmt) == NULL_TREE
12809 /* MASK_STORE has no lhs, but is ok. */
12810 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12812 if (is_a <gcall *> (stmt))
12814 /* Ignore calls with no lhs. These must be calls to
12815 #pragma omp simd functions, and what vectorization factor
12816 it really needs can't be determined until
12817 vectorizable_simd_clone_call. */
12818 if (dump_enabled_p ())
12819 dump_printf_loc (MSG_NOTE, vect_location,
12820 "defer to SIMD clone analysis.\n");
12821 return opt_result::success ();
12824 return opt_result::failure_at (stmt,
12825 "not vectorized: irregular stmt.%G", stmt);
12828 tree vectype;
12829 tree scalar_type = NULL_TREE;
12830 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12832 vectype = STMT_VINFO_VECTYPE (stmt_info);
12833 if (dump_enabled_p ())
12834 dump_printf_loc (MSG_NOTE, vect_location,
12835 "precomputed vectype: %T\n", vectype);
12837 else if (vect_use_mask_type_p (stmt_info))
12839 unsigned int precision = stmt_info->mask_precision;
12840 scalar_type = build_nonstandard_integer_type (precision, 1);
12841 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12842 if (!vectype)
12843 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12844 " data-type %T\n", scalar_type);
12845 if (dump_enabled_p ())
12846 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12848 else
12850 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12851 scalar_type = TREE_TYPE (DR_REF (dr));
12852 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12853 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12854 else
12855 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12857 if (dump_enabled_p ())
12859 if (group_size)
12860 dump_printf_loc (MSG_NOTE, vect_location,
12861 "get vectype for scalar type (group size %d):"
12862 " %T\n", group_size, scalar_type);
12863 else
12864 dump_printf_loc (MSG_NOTE, vect_location,
12865 "get vectype for scalar type: %T\n", scalar_type);
12867 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12868 if (!vectype)
12869 return opt_result::failure_at (stmt,
12870 "not vectorized:"
12871 " unsupported data-type %T\n",
12872 scalar_type);
12874 if (dump_enabled_p ())
12875 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12878 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12879 return opt_result::failure_at (stmt,
12880 "not vectorized: vector stmt in loop:%G",
12881 stmt);
12883 *stmt_vectype_out = vectype;
12885 /* Don't try to compute scalar types if the stmt produces a boolean
12886 vector; use the existing vector type instead. */
12887 tree nunits_vectype = vectype;
12888 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12890 /* The number of units is set according to the smallest scalar
12891 type (or the largest vector size, but we only support one
12892 vector size per vectorization). */
12893 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12894 TREE_TYPE (vectype));
12895 if (scalar_type != TREE_TYPE (vectype))
12897 if (dump_enabled_p ())
12898 dump_printf_loc (MSG_NOTE, vect_location,
12899 "get vectype for smallest scalar type: %T\n",
12900 scalar_type);
12901 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12902 group_size);
12903 if (!nunits_vectype)
12904 return opt_result::failure_at
12905 (stmt, "not vectorized: unsupported data-type %T\n",
12906 scalar_type);
12907 if (dump_enabled_p ())
12908 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12909 nunits_vectype);
12913 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12914 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12915 return opt_result::failure_at (stmt,
12916 "Not vectorized: Incompatible number "
12917 "of vector subparts between %T and %T\n",
12918 nunits_vectype, *stmt_vectype_out);
12920 if (dump_enabled_p ())
12922 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12923 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12924 dump_printf (MSG_NOTE, "\n");
12927 *nunits_vectype_out = nunits_vectype;
12928 return opt_result::success ();
12931 /* Generate and return statement sequence that sets vector length LEN that is:
12933 min_of_start_and_end = min (START_INDEX, END_INDEX);
12934 left_len = END_INDEX - min_of_start_and_end;
12935 rhs = min (left_len, LEN_LIMIT);
12936 LEN = rhs;
12938 Note: the cost of the code generated by this function is modeled
12939 by vect_estimate_min_profitable_iters, so changes here may need
12940 corresponding changes there. */
12942 gimple_seq
12943 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12945 gimple_seq stmts = NULL;
12946 tree len_type = TREE_TYPE (len);
12947 gcc_assert (TREE_TYPE (start_index) == len_type);
12949 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12950 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12951 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12952 gimple* stmt = gimple_build_assign (len, rhs);
12953 gimple_seq_add_stmt (&stmts, stmt);
12955 return stmts;