libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / tree-vect-stmts.cc
blob9b14b96cb5a6ae0aa38ae3bc4ed68efce936d650
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #define INCLUDE_MEMORY
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "gimple.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "cgraph.h"
36 #include "dumpfile.h"
37 #include "alias.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "tree-eh.h"
41 #include "gimplify.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-cfg.h"
45 #include "tree-ssa-loop-manip.h"
46 #include "cfgloop.h"
47 #include "explow.h"
48 #include "tree-ssa-loop.h"
49 #include "tree-scalar-evolution.h"
50 #include "tree-vectorizer.h"
51 #include "builtins.h"
52 #include "internal-fn.h"
53 #include "tree-vector-builder.h"
54 #include "vec-perm-indices.h"
55 #include "gimple-range.h"
56 #include "tree-ssa-loop-niter.h"
57 #include "gimple-fold.h"
58 #include "regs.h"
59 #include "attribs.h"
60 #include "optabs-libfuncs.h"
62 /* For lang_hooks.types.type_for_mode. */
63 #include "langhooks.h"
65 /* Return the vectorized type for the given statement. */
67 tree
68 stmt_vectype (class _stmt_vec_info *stmt_info)
70 return STMT_VINFO_VECTYPE (stmt_info);
73 /* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
75 bool
76 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
78 gimple *stmt = STMT_VINFO_STMT (stmt_info);
79 basic_block bb = gimple_bb (stmt);
80 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
81 class loop* loop;
83 if (!loop_vinfo)
84 return false;
86 loop = LOOP_VINFO_LOOP (loop_vinfo);
88 return (bb->loop_father == loop->inner);
91 /* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
95 static unsigned
96 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
97 enum vect_cost_for_stmt kind,
98 stmt_vec_info stmt_info, slp_tree node,
99 tree vectype, int misalign,
100 enum vect_cost_model_location where)
102 if ((kind == vector_load || kind == unaligned_load)
103 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104 kind = vector_gather_load;
105 if ((kind == vector_store || kind == unaligned_store)
106 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
107 kind = vector_scatter_store;
109 stmt_info_for_cost si
110 = { count, kind, where, stmt_info, node, vectype, misalign };
111 body_cost_vec->safe_push (si);
113 return (unsigned)
114 (builtin_vectorization_cost (kind, vectype, misalign) * count);
117 unsigned
118 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
119 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
120 tree vectype, int misalign,
121 enum vect_cost_model_location where)
123 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
124 vectype, misalign, where);
127 unsigned
128 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
129 enum vect_cost_for_stmt kind, slp_tree node,
130 tree vectype, int misalign,
131 enum vect_cost_model_location where)
133 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
134 vectype, misalign, where);
137 unsigned
138 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
139 enum vect_cost_for_stmt kind,
140 enum vect_cost_model_location where)
142 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
143 || kind == scalar_stmt);
144 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
145 NULL_TREE, 0, where);
148 /* Return a variable of type ELEM_TYPE[NELEMS]. */
150 static tree
151 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
153 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
154 "vect_array");
157 /* ARRAY is an array of vectors created by create_vector_array.
158 Return an SSA_NAME for the vector in index N. The reference
159 is part of the vectorization of STMT_INFO and the vector is associated
160 with scalar destination SCALAR_DEST. */
162 static tree
163 read_vector_array (vec_info *vinfo,
164 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
165 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
167 tree vect_type, vect, vect_name, array_ref;
168 gimple *new_stmt;
170 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
171 vect_type = TREE_TYPE (TREE_TYPE (array));
172 vect = vect_create_destination_var (scalar_dest, vect_type);
173 array_ref = build4 (ARRAY_REF, vect_type, array,
174 build_int_cst (size_type_node, n),
175 NULL_TREE, NULL_TREE);
177 new_stmt = gimple_build_assign (vect, array_ref);
178 vect_name = make_ssa_name (vect, new_stmt);
179 gimple_assign_set_lhs (new_stmt, vect_name);
180 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
182 return vect_name;
185 /* ARRAY is an array of vectors created by create_vector_array.
186 Emit code to store SSA_NAME VECT in index N of the array.
187 The store is part of the vectorization of STMT_INFO. */
189 static void
190 write_vector_array (vec_info *vinfo,
191 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
192 tree vect, tree array, unsigned HOST_WIDE_INT n)
194 tree array_ref;
195 gimple *new_stmt;
197 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
198 build_int_cst (size_type_node, n),
199 NULL_TREE, NULL_TREE);
201 new_stmt = gimple_build_assign (array_ref, vect);
202 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
205 /* PTR is a pointer to an array of type TYPE. Return a representation
206 of *PTR. The memory reference replaces those in FIRST_DR
207 (and its group). */
209 static tree
210 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
212 tree mem_ref;
214 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
215 /* Arrays have the same alignment as their type. */
216 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
217 return mem_ref;
220 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
221 Emit the clobber before *GSI. */
223 static void
224 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
225 gimple_stmt_iterator *gsi, tree var)
227 tree clobber = build_clobber (TREE_TYPE (var));
228 gimple *new_stmt = gimple_build_assign (var, clobber);
229 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
232 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
234 /* Function vect_mark_relevant.
236 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
238 static void
239 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
240 enum vect_relevant relevant, bool live_p)
242 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
243 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
245 if (dump_enabled_p ())
246 dump_printf_loc (MSG_NOTE, vect_location,
247 "mark relevant %d, live %d: %G", relevant, live_p,
248 stmt_info->stmt);
250 /* If this stmt is an original stmt in a pattern, we might need to mark its
251 related pattern stmt instead of the original stmt. However, such stmts
252 may have their own uses that are not in any pattern, in such cases the
253 stmt itself should be marked. */
254 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
256 /* This is the last stmt in a sequence that was detected as a
257 pattern that can potentially be vectorized. Don't mark the stmt
258 as relevant/live because it's not going to be vectorized.
259 Instead mark the pattern-stmt that replaces it. */
261 if (dump_enabled_p ())
262 dump_printf_loc (MSG_NOTE, vect_location,
263 "last stmt in pattern. don't mark"
264 " relevant/live.\n");
266 stmt_vec_info old_stmt_info = stmt_info;
267 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
268 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
269 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
270 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
272 if (live_p && relevant == vect_unused_in_scope)
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_NOTE, vect_location,
276 "vec_stmt_relevant_p: forcing live pattern stmt "
277 "relevant.\n");
278 relevant = vect_used_only_live;
281 if (dump_enabled_p ())
282 dump_printf_loc (MSG_NOTE, vect_location,
283 "mark relevant %d, live %d: %G", relevant, live_p,
284 stmt_info->stmt);
287 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
288 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
289 STMT_VINFO_RELEVANT (stmt_info) = relevant;
291 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
292 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
294 if (dump_enabled_p ())
295 dump_printf_loc (MSG_NOTE, vect_location,
296 "already marked relevant/live.\n");
297 return;
300 worklist->safe_push (stmt_info);
304 /* Function is_simple_and_all_uses_invariant
306 Return true if STMT_INFO is simple and all uses of it are invariant. */
308 bool
309 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
310 loop_vec_info loop_vinfo)
312 tree op;
313 ssa_op_iter iter;
315 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
316 if (!stmt)
317 return false;
319 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
321 enum vect_def_type dt = vect_uninitialized_def;
323 if (!vect_is_simple_use (op, loop_vinfo, &dt))
325 if (dump_enabled_p ())
326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
327 "use not simple.\n");
328 return false;
331 if (dt != vect_external_def && dt != vect_constant_def)
332 return false;
334 return true;
337 /* Function vect_stmt_relevant_p.
339 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
340 is "relevant for vectorization".
342 A stmt is considered "relevant for vectorization" if:
343 - it has uses outside the loop.
344 - it has vdefs (it alters memory).
345 - control stmts in the loop (except for the exit condition).
346 - it is an induction and we have multiple exits.
348 CHECKME: what other side effects would the vectorizer allow? */
350 static bool
351 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
352 enum vect_relevant *relevant, bool *live_p)
354 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
355 ssa_op_iter op_iter;
356 imm_use_iterator imm_iter;
357 use_operand_p use_p;
358 def_operand_p def_p;
360 *relevant = vect_unused_in_scope;
361 *live_p = false;
363 /* cond stmt other than loop exit cond. */
364 gimple *stmt = STMT_VINFO_STMT (stmt_info);
365 if (is_ctrl_stmt (stmt)
366 && LOOP_VINFO_LOOP_IV_COND (loop_vinfo) != stmt
367 && (!loop->inner || gimple_bb (stmt)->loop_father == loop))
368 *relevant = vect_used_in_scope;
370 /* changing memory. */
371 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
372 if (gimple_vdef (stmt_info->stmt)
373 && !gimple_clobber_p (stmt_info->stmt))
375 if (dump_enabled_p ())
376 dump_printf_loc (MSG_NOTE, vect_location,
377 "vec_stmt_relevant_p: stmt has vdefs.\n");
378 *relevant = vect_used_in_scope;
381 /* uses outside the loop. */
382 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
384 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
386 basic_block bb = gimple_bb (USE_STMT (use_p));
387 if (!flow_bb_inside_loop_p (loop, bb))
389 if (is_gimple_debug (USE_STMT (use_p)))
390 continue;
392 if (dump_enabled_p ())
393 dump_printf_loc (MSG_NOTE, vect_location,
394 "vec_stmt_relevant_p: used out of loop.\n");
396 /* We expect all such uses to be in the loop exit phis
397 (because of loop closed form) */
398 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
400 *live_p = true;
405 /* Check if it's an induction and multiple exits. In this case there will be
406 a usage later on after peeling which is needed for the alternate exit. */
407 if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
408 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
410 if (dump_enabled_p ())
411 dump_printf_loc (MSG_NOTE, vect_location,
412 "vec_stmt_relevant_p: induction forced for "
413 "early break.\n");
414 LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo).safe_push (stmt_info);
415 *live_p = true;
419 if (*live_p && *relevant == vect_unused_in_scope
420 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
422 if (dump_enabled_p ())
423 dump_printf_loc (MSG_NOTE, vect_location,
424 "vec_stmt_relevant_p: stmt live but not relevant.\n");
425 *relevant = vect_used_only_live;
428 return (*live_p || *relevant);
432 /* Function exist_non_indexing_operands_for_use_p
434 USE is one of the uses attached to STMT_INFO. Check if USE is
435 used in STMT_INFO for anything other than indexing an array. */
437 static bool
438 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
440 tree operand;
442 /* USE corresponds to some operand in STMT. If there is no data
443 reference in STMT, then any operand that corresponds to USE
444 is not indexing an array. */
445 if (!STMT_VINFO_DATA_REF (stmt_info))
446 return true;
448 /* STMT has a data_ref. FORNOW this means that its of one of
449 the following forms:
450 -1- ARRAY_REF = var
451 -2- var = ARRAY_REF
452 (This should have been verified in analyze_data_refs).
454 'var' in the second case corresponds to a def, not a use,
455 so USE cannot correspond to any operands that are not used
456 for array indexing.
458 Therefore, all we need to check is if STMT falls into the
459 first case, and whether var corresponds to USE. */
461 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
462 if (!assign || !gimple_assign_copy_p (assign))
464 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
465 if (call && gimple_call_internal_p (call))
467 internal_fn ifn = gimple_call_internal_fn (call);
468 int mask_index = internal_fn_mask_index (ifn);
469 if (mask_index >= 0
470 && use == gimple_call_arg (call, mask_index))
471 return true;
472 int stored_value_index = internal_fn_stored_value_index (ifn);
473 if (stored_value_index >= 0
474 && use == gimple_call_arg (call, stored_value_index))
475 return true;
476 if (internal_gather_scatter_fn_p (ifn)
477 && use == gimple_call_arg (call, 1))
478 return true;
480 return false;
483 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
484 return false;
485 operand = gimple_assign_rhs1 (assign);
486 if (TREE_CODE (operand) != SSA_NAME)
487 return false;
489 if (operand == use)
490 return true;
492 return false;
497 Function process_use.
499 Inputs:
500 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
501 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
502 that defined USE. This is done by calling mark_relevant and passing it
503 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
504 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
505 be performed.
507 Outputs:
508 Generally, LIVE_P and RELEVANT are used to define the liveness and
509 relevance info of the DEF_STMT of this USE:
510 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
511 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
512 Exceptions:
513 - case 1: If USE is used only for address computations (e.g. array indexing),
514 which does not need to be directly vectorized, then the liveness/relevance
515 of the respective DEF_STMT is left unchanged.
516 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
517 we skip DEF_STMT cause it had already been processed.
518 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
519 "relevant" will be modified accordingly.
521 Return true if everything is as expected. Return false otherwise. */
523 static opt_result
524 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
525 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
526 bool force)
528 stmt_vec_info dstmt_vinfo;
529 enum vect_def_type dt;
531 /* case 1: we are only interested in uses that need to be vectorized. Uses
532 that are used for address computation are not considered relevant. */
533 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
534 return opt_result::success ();
536 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
537 return opt_result::failure_at (stmt_vinfo->stmt,
538 "not vectorized:"
539 " unsupported use in stmt.\n");
541 if (!dstmt_vinfo)
542 return opt_result::success ();
544 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
545 basic_block bb = gimple_bb (stmt_vinfo->stmt);
547 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
548 We have to force the stmt live since the epilogue loop needs it to
549 continue computing the reduction. */
550 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
551 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
552 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
553 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
554 && bb->loop_father == def_bb->loop_father)
556 if (dump_enabled_p ())
557 dump_printf_loc (MSG_NOTE, vect_location,
558 "reduc-stmt defining reduc-phi in the same nest.\n");
559 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
560 return opt_result::success ();
563 /* case 3a: outer-loop stmt defining an inner-loop stmt:
564 outer-loop-header-bb:
565 d = dstmt_vinfo
566 inner-loop:
567 stmt # use (d)
568 outer-loop-tail-bb:
569 ... */
570 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
572 if (dump_enabled_p ())
573 dump_printf_loc (MSG_NOTE, vect_location,
574 "outer-loop def-stmt defining inner-loop stmt.\n");
576 switch (relevant)
578 case vect_unused_in_scope:
579 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
580 vect_used_in_scope : vect_unused_in_scope;
581 break;
583 case vect_used_in_outer_by_reduction:
584 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
585 relevant = vect_used_by_reduction;
586 break;
588 case vect_used_in_outer:
589 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
590 relevant = vect_used_in_scope;
591 break;
593 case vect_used_in_scope:
594 break;
596 default:
597 gcc_unreachable ();
601 /* case 3b: inner-loop stmt defining an outer-loop stmt:
602 outer-loop-header-bb:
604 inner-loop:
605 d = dstmt_vinfo
606 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
607 stmt # use (d) */
608 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
610 if (dump_enabled_p ())
611 dump_printf_loc (MSG_NOTE, vect_location,
612 "inner-loop def-stmt defining outer-loop stmt.\n");
614 switch (relevant)
616 case vect_unused_in_scope:
617 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
618 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
619 vect_used_in_outer_by_reduction : vect_unused_in_scope;
620 break;
622 case vect_used_by_reduction:
623 case vect_used_only_live:
624 relevant = vect_used_in_outer_by_reduction;
625 break;
627 case vect_used_in_scope:
628 relevant = vect_used_in_outer;
629 break;
631 default:
632 gcc_unreachable ();
635 /* We are also not interested in uses on loop PHI backedges that are
636 inductions. Otherwise we'll needlessly vectorize the IV increment
637 and cause hybrid SLP for SLP inductions. Unless the PHI is live
638 of course. */
639 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
640 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
641 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
642 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
643 loop_latch_edge (bb->loop_father))
644 == use))
646 if (dump_enabled_p ())
647 dump_printf_loc (MSG_NOTE, vect_location,
648 "induction value on backedge.\n");
649 return opt_result::success ();
653 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
654 return opt_result::success ();
658 /* Function vect_mark_stmts_to_be_vectorized.
660 Not all stmts in the loop need to be vectorized. For example:
662 for i...
663 for j...
664 1. T0 = i + j
665 2. T1 = a[T0]
667 3. j = j + 1
669 Stmt 1 and 3 do not need to be vectorized, because loop control and
670 addressing of vectorized data-refs are handled differently.
672 This pass detects such stmts. */
674 opt_result
675 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
677 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
678 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
679 unsigned int nbbs = loop->num_nodes;
680 gimple_stmt_iterator si;
681 unsigned int i;
682 basic_block bb;
683 bool live_p;
684 enum vect_relevant relevant;
686 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
688 auto_vec<stmt_vec_info, 64> worklist;
690 /* 1. Init worklist. */
691 for (i = 0; i < nbbs; i++)
693 bb = bbs[i];
694 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
696 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
697 if (dump_enabled_p ())
698 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
699 phi_info->stmt);
701 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
702 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
704 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
706 if (is_gimple_debug (gsi_stmt (si)))
707 continue;
708 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
709 if (dump_enabled_p ())
710 dump_printf_loc (MSG_NOTE, vect_location,
711 "init: stmt relevant? %G", stmt_info->stmt);
713 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
714 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
718 /* 2. Process_worklist */
719 while (worklist.length () > 0)
721 use_operand_p use_p;
722 ssa_op_iter iter;
724 stmt_vec_info stmt_vinfo = worklist.pop ();
725 if (dump_enabled_p ())
726 dump_printf_loc (MSG_NOTE, vect_location,
727 "worklist: examine stmt: %G", stmt_vinfo->stmt);
729 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
730 (DEF_STMT) as relevant/irrelevant according to the relevance property
731 of STMT. */
732 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
734 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
735 propagated as is to the DEF_STMTs of its USEs.
737 One exception is when STMT has been identified as defining a reduction
738 variable; in this case we set the relevance to vect_used_by_reduction.
739 This is because we distinguish between two kinds of relevant stmts -
740 those that are used by a reduction computation, and those that are
741 (also) used by a regular computation. This allows us later on to
742 identify stmts that are used solely by a reduction, and therefore the
743 order of the results that they produce does not have to be kept. */
745 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
747 case vect_reduction_def:
748 gcc_assert (relevant != vect_unused_in_scope);
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_in_scope
751 && relevant != vect_used_by_reduction
752 && relevant != vect_used_only_live)
753 return opt_result::failure_at
754 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
755 break;
757 case vect_nested_cycle:
758 if (relevant != vect_unused_in_scope
759 && relevant != vect_used_in_outer_by_reduction
760 && relevant != vect_used_in_outer)
761 return opt_result::failure_at
762 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
763 break;
765 case vect_double_reduction_def:
766 if (relevant != vect_unused_in_scope
767 && relevant != vect_used_by_reduction
768 && relevant != vect_used_only_live)
769 return opt_result::failure_at
770 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
771 break;
773 default:
774 break;
777 if (is_pattern_stmt_p (stmt_vinfo))
779 /* Pattern statements are not inserted into the code, so
780 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
781 have to scan the RHS or function arguments instead. */
782 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
784 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
785 tree op = gimple_assign_rhs1 (assign);
787 i = 1;
788 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
790 opt_result res
791 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
792 loop_vinfo, relevant, &worklist, false);
793 if (!res)
794 return res;
795 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
796 loop_vinfo, relevant, &worklist, false);
797 if (!res)
798 return res;
799 i = 2;
801 for (; i < gimple_num_ops (assign); i++)
803 op = gimple_op (assign, i);
804 if (TREE_CODE (op) == SSA_NAME)
806 opt_result res
807 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
808 &worklist, false);
809 if (!res)
810 return res;
814 else if (gcond *cond = dyn_cast <gcond *> (stmt_vinfo->stmt))
816 tree_code rhs_code = gimple_cond_code (cond);
817 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
818 opt_result res
819 = process_use (stmt_vinfo, gimple_cond_lhs (cond),
820 loop_vinfo, relevant, &worklist, false);
821 if (!res)
822 return res;
823 res = process_use (stmt_vinfo, gimple_cond_rhs (cond),
824 loop_vinfo, relevant, &worklist, false);
825 if (!res)
826 return res;
828 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
830 for (i = 0; i < gimple_call_num_args (call); i++)
832 tree arg = gimple_call_arg (call, i);
833 opt_result res
834 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
835 &worklist, false);
836 if (!res)
837 return res;
840 else
841 gcc_unreachable ();
843 else
844 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
846 tree op = USE_FROM_PTR (use_p);
847 opt_result res
848 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
849 &worklist, false);
850 if (!res)
851 return res;
854 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
856 gather_scatter_info gs_info;
857 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
858 gcc_unreachable ();
859 opt_result res
860 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
861 &worklist, true);
862 if (!res)
864 if (fatal)
865 *fatal = false;
866 return res;
869 } /* while worklist */
871 return opt_result::success ();
874 /* Function vect_model_simple_cost.
876 Models cost for simple operations, i.e. those that only emit ncopies of a
877 single op. Right now, this does not account for multiple insns that could
878 be generated for the single vector op. We will handle that shortly. */
880 static void
881 vect_model_simple_cost (vec_info *,
882 stmt_vec_info stmt_info, int ncopies,
883 enum vect_def_type *dt,
884 int ndts,
885 slp_tree node,
886 stmt_vector_for_cost *cost_vec,
887 vect_cost_for_stmt kind = vector_stmt)
889 int inside_cost = 0, prologue_cost = 0;
891 gcc_assert (cost_vec != NULL);
893 /* ??? Somehow we need to fix this at the callers. */
894 if (node)
895 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
897 if (!node)
898 /* Cost the "broadcast" of a scalar operand in to a vector operand.
899 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
900 cost model. */
901 for (int i = 0; i < ndts; i++)
902 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
903 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
904 stmt_info, 0, vect_prologue);
906 /* Pass the inside-of-loop statements to the target-specific cost model. */
907 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
908 stmt_info, 0, vect_body);
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE, vect_location,
912 "vect_model_simple_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost, prologue_cost);
917 /* Model cost for type demotion and promotion operations. PWR is
918 normally zero for single-step promotions and demotions. It will be
919 one if two-step promotion/demotion is required, and so on. NCOPIES
920 is the number of vector results (and thus number of instructions)
921 for the narrowest end of the operation chain. Each additional
922 step doubles the number of instructions required. If WIDEN_ARITH
923 is true the stmt is doing widening arithmetic. */
925 static void
926 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
927 enum vect_def_type *dt,
928 unsigned int ncopies, int pwr,
929 stmt_vector_for_cost *cost_vec,
930 bool widen_arith)
932 int i;
933 int inside_cost = 0, prologue_cost = 0;
935 for (i = 0; i < pwr + 1; i++)
937 inside_cost += record_stmt_cost (cost_vec, ncopies,
938 widen_arith
939 ? vector_stmt : vec_promote_demote,
940 stmt_info, 0, vect_body);
941 ncopies *= 2;
944 /* FORNOW: Assuming maximum 2 args per stmts. */
945 for (i = 0; i < 2; i++)
946 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
947 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
948 stmt_info, 0, vect_prologue);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_promotion_demotion_cost: inside_cost = %d, "
953 "prologue_cost = %d .\n", inside_cost, prologue_cost);
956 /* Returns true if the current function returns DECL. */
958 static bool
959 cfun_returns (tree decl)
961 edge_iterator ei;
962 edge e;
963 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
965 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
966 if (!ret)
967 continue;
968 if (gimple_return_retval (ret) == decl)
969 return true;
970 /* We often end up with an aggregate copy to the result decl,
971 handle that case as well. First skip intermediate clobbers
972 though. */
973 gimple *def = ret;
976 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
978 while (gimple_clobber_p (def));
979 if (is_a <gassign *> (def)
980 && gimple_assign_lhs (def) == gimple_return_retval (ret)
981 && gimple_assign_rhs1 (def) == decl)
982 return true;
984 return false;
987 /* Calculate cost of DR's memory access. */
988 void
989 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
990 dr_alignment_support alignment_support_scheme,
991 int misalignment,
992 unsigned int *inside_cost,
993 stmt_vector_for_cost *body_cost_vec)
995 switch (alignment_support_scheme)
997 case dr_aligned:
999 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1000 vector_store, stmt_info, 0,
1001 vect_body);
1003 if (dump_enabled_p ())
1004 dump_printf_loc (MSG_NOTE, vect_location,
1005 "vect_model_store_cost: aligned.\n");
1006 break;
1009 case dr_unaligned_supported:
1011 /* Here, we assign an additional cost for the unaligned store. */
1012 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1013 unaligned_store, stmt_info,
1014 misalignment, vect_body);
1015 if (dump_enabled_p ())
1016 dump_printf_loc (MSG_NOTE, vect_location,
1017 "vect_model_store_cost: unaligned supported by "
1018 "hardware.\n");
1019 break;
1022 case dr_unaligned_unsupported:
1024 *inside_cost = VECT_MAX_COST;
1026 if (dump_enabled_p ())
1027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1028 "vect_model_store_cost: unsupported access.\n");
1029 break;
1032 default:
1033 gcc_unreachable ();
1037 /* Calculate cost of DR's memory access. */
1038 void
1039 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1040 dr_alignment_support alignment_support_scheme,
1041 int misalignment,
1042 bool add_realign_cost, unsigned int *inside_cost,
1043 unsigned int *prologue_cost,
1044 stmt_vector_for_cost *prologue_cost_vec,
1045 stmt_vector_for_cost *body_cost_vec,
1046 bool record_prologue_costs)
1048 switch (alignment_support_scheme)
1050 case dr_aligned:
1052 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1053 stmt_info, 0, vect_body);
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_NOTE, vect_location,
1057 "vect_model_load_cost: aligned.\n");
1059 break;
1061 case dr_unaligned_supported:
1063 /* Here, we assign an additional cost for the unaligned load. */
1064 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1065 unaligned_load, stmt_info,
1066 misalignment, vect_body);
1068 if (dump_enabled_p ())
1069 dump_printf_loc (MSG_NOTE, vect_location,
1070 "vect_model_load_cost: unaligned supported by "
1071 "hardware.\n");
1073 break;
1075 case dr_explicit_realign:
1077 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1078 vector_load, stmt_info, 0, vect_body);
1079 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1080 vec_perm, stmt_info, 0, vect_body);
1082 /* FIXME: If the misalignment remains fixed across the iterations of
1083 the containing loop, the following cost should be added to the
1084 prologue costs. */
1085 if (targetm.vectorize.builtin_mask_for_load)
1086 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1087 stmt_info, 0, vect_body);
1089 if (dump_enabled_p ())
1090 dump_printf_loc (MSG_NOTE, vect_location,
1091 "vect_model_load_cost: explicit realign\n");
1093 break;
1095 case dr_explicit_realign_optimized:
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_NOTE, vect_location,
1099 "vect_model_load_cost: unaligned software "
1100 "pipelined.\n");
1102 /* Unaligned software pipeline has a load of an address, an initial
1103 load, and possibly a mask operation to "prime" the loop. However,
1104 if this is an access in a group of loads, which provide grouped
1105 access, then the above cost should only be considered for one
1106 access in the group. Inside the loop, there is a load op
1107 and a realignment op. */
1109 if (add_realign_cost && record_prologue_costs)
1111 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1112 vector_stmt, stmt_info,
1113 0, vect_prologue);
1114 if (targetm.vectorize.builtin_mask_for_load)
1115 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1116 vector_stmt, stmt_info,
1117 0, vect_prologue);
1120 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1121 stmt_info, 0, vect_body);
1122 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1123 stmt_info, 0, vect_body);
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_NOTE, vect_location,
1127 "vect_model_load_cost: explicit realign optimized"
1128 "\n");
1130 break;
1133 case dr_unaligned_unsupported:
1135 *inside_cost = VECT_MAX_COST;
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1139 "vect_model_load_cost: unsupported access.\n");
1140 break;
1143 default:
1144 gcc_unreachable ();
1148 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1149 the loop preheader for the vectorized stmt STMT_VINFO. */
1151 static void
1152 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1153 gimple_stmt_iterator *gsi)
1155 if (gsi)
1156 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1157 else
1158 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1160 if (dump_enabled_p ())
1161 dump_printf_loc (MSG_NOTE, vect_location,
1162 "created new init_stmt: %G", new_stmt);
1165 /* Function vect_init_vector.
1167 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1168 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1169 vector type a vector with all elements equal to VAL is created first.
1170 Place the initialization at GSI if it is not NULL. Otherwise, place the
1171 initialization at the loop preheader.
1172 Return the DEF of INIT_STMT.
1173 It will be used in the vectorization of STMT_INFO. */
1175 tree
1176 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1177 gimple_stmt_iterator *gsi)
1179 gimple *init_stmt;
1180 tree new_temp;
1182 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1183 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1185 gcc_assert (VECTOR_TYPE_P (type));
1186 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1188 /* Scalar boolean value should be transformed into
1189 all zeros or all ones value before building a vector. */
1190 if (VECTOR_BOOLEAN_TYPE_P (type))
1192 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1193 tree false_val = build_zero_cst (TREE_TYPE (type));
1195 if (CONSTANT_CLASS_P (val))
1196 val = integer_zerop (val) ? false_val : true_val;
1197 else
1199 new_temp = make_ssa_name (TREE_TYPE (type));
1200 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1201 val, true_val, false_val);
1202 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1203 val = new_temp;
1206 else
1208 gimple_seq stmts = NULL;
1209 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1210 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1211 TREE_TYPE (type), val);
1212 else
1213 /* ??? Condition vectorization expects us to do
1214 promotion of invariant/external defs. */
1215 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1216 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1217 !gsi_end_p (gsi2); )
1219 init_stmt = gsi_stmt (gsi2);
1220 gsi_remove (&gsi2, false);
1221 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1225 val = build_vector_from_val (type, val);
1228 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1229 init_stmt = gimple_build_assign (new_temp, val);
1230 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1231 return new_temp;
1235 /* Function vect_get_vec_defs_for_operand.
1237 OP is an operand in STMT_VINFO. This function returns a vector of
1238 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1240 In the case that OP is an SSA_NAME which is defined in the loop, then
1241 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1243 In case OP is an invariant or constant, a new stmt that creates a vector def
1244 needs to be introduced. VECTYPE may be used to specify a required type for
1245 vector invariant. */
1247 void
1248 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1249 unsigned ncopies,
1250 tree op, vec<tree> *vec_oprnds, tree vectype)
1252 gimple *def_stmt;
1253 enum vect_def_type dt;
1254 bool is_simple_use;
1255 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1257 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "vect_get_vec_defs_for_operand: %T\n", op);
1261 stmt_vec_info def_stmt_info;
1262 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1263 &def_stmt_info, &def_stmt);
1264 gcc_assert (is_simple_use);
1265 if (def_stmt && dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1268 vec_oprnds->create (ncopies);
1269 if (dt == vect_constant_def || dt == vect_external_def)
1271 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1272 tree vector_type;
1274 if (vectype)
1275 vector_type = vectype;
1276 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1277 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1278 vector_type = truth_type_for (stmt_vectype);
1279 else
1280 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1282 gcc_assert (vector_type);
1283 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1284 while (ncopies--)
1285 vec_oprnds->quick_push (vop);
1287 else
1289 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1290 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1291 for (unsigned i = 0; i < ncopies; ++i)
1292 vec_oprnds->quick_push (gimple_get_lhs
1293 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1298 /* Get vectorized definitions for OP0 and OP1. */
1300 void
1301 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1302 unsigned ncopies,
1303 tree op0, tree vectype0, vec<tree> *vec_oprnds0,
1304 tree op1, tree vectype1, vec<tree> *vec_oprnds1,
1305 tree op2, tree vectype2, vec<tree> *vec_oprnds2,
1306 tree op3, tree vectype3, vec<tree> *vec_oprnds3)
1308 if (slp_node)
1310 if (op0)
1311 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1312 if (op1)
1313 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1314 if (op2)
1315 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1316 if (op3)
1317 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1319 else
1321 if (op0)
1322 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1323 op0, vec_oprnds0, vectype0);
1324 if (op1)
1325 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1326 op1, vec_oprnds1, vectype1);
1327 if (op2)
1328 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1329 op2, vec_oprnds2, vectype2);
1330 if (op3)
1331 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1332 op3, vec_oprnds3, vectype3);
1336 void
1337 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1338 unsigned ncopies,
1339 tree op0, vec<tree> *vec_oprnds0,
1340 tree op1, vec<tree> *vec_oprnds1,
1341 tree op2, vec<tree> *vec_oprnds2,
1342 tree op3, vec<tree> *vec_oprnds3)
1344 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1345 op0, NULL_TREE, vec_oprnds0,
1346 op1, NULL_TREE, vec_oprnds1,
1347 op2, NULL_TREE, vec_oprnds2,
1348 op3, NULL_TREE, vec_oprnds3);
1351 /* Helper function called by vect_finish_replace_stmt and
1352 vect_finish_stmt_generation. Set the location of the new
1353 statement and create and return a stmt_vec_info for it. */
1355 static void
1356 vect_finish_stmt_generation_1 (vec_info *,
1357 stmt_vec_info stmt_info, gimple *vec_stmt)
1359 if (dump_enabled_p ())
1360 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1362 if (stmt_info)
1364 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1366 /* While EH edges will generally prevent vectorization, stmt might
1367 e.g. be in a must-not-throw region. Ensure newly created stmts
1368 that could throw are part of the same region. */
1369 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1370 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1371 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1373 else
1374 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1377 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1378 which sets the same scalar result as STMT_INFO did. Create and return a
1379 stmt_vec_info for VEC_STMT. */
1381 void
1382 vect_finish_replace_stmt (vec_info *vinfo,
1383 stmt_vec_info stmt_info, gimple *vec_stmt)
1385 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1386 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1388 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1389 gsi_replace (&gsi, vec_stmt, true);
1391 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1394 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1395 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1397 void
1398 vect_finish_stmt_generation (vec_info *vinfo,
1399 stmt_vec_info stmt_info, gimple *vec_stmt,
1400 gimple_stmt_iterator *gsi)
1402 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1404 if (!gsi_end_p (*gsi)
1405 && gimple_has_mem_ops (vec_stmt))
1407 gimple *at_stmt = gsi_stmt (*gsi);
1408 tree vuse = gimple_vuse (at_stmt);
1409 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1411 tree vdef = gimple_vdef (at_stmt);
1412 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1413 gimple_set_modified (vec_stmt, true);
1414 /* If we have an SSA vuse and insert a store, update virtual
1415 SSA form to avoid triggering the renamer. Do so only
1416 if we can easily see all uses - which is what almost always
1417 happens with the way vectorized stmts are inserted. */
1418 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1419 && ((is_gimple_assign (vec_stmt)
1420 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1421 || (is_gimple_call (vec_stmt)
1422 && (!(gimple_call_flags (vec_stmt)
1423 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1424 || (gimple_call_lhs (vec_stmt)
1425 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1427 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1428 gimple_set_vdef (vec_stmt, new_vdef);
1429 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1433 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1434 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1437 /* We want to vectorize a call to combined function CFN with function
1438 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1439 as the types of all inputs. Check whether this is possible using
1440 an internal function, returning its code if so or IFN_LAST if not. */
1442 static internal_fn
1443 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1444 tree vectype_out, tree vectype_in)
1446 internal_fn ifn;
1447 if (internal_fn_p (cfn))
1448 ifn = as_internal_fn (cfn);
1449 else
1450 ifn = associated_internal_fn (fndecl);
1451 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1453 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1454 if (info.vectorizable)
1456 bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1457 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1458 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1460 /* The type size of both the vectype_in and vectype_out should be
1461 exactly the same when vectype_out isn't participating the optab.
1462 While there is no restriction for type size when vectype_out
1463 is part of the optab query. */
1464 if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1465 return IFN_LAST;
1467 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1468 OPTIMIZE_FOR_SPEED))
1469 return ifn;
1472 return IFN_LAST;
1476 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1477 gimple_stmt_iterator *);
1479 /* Check whether a load or store statement in the loop described by
1480 LOOP_VINFO is possible in a loop using partial vectors. This is
1481 testing whether the vectorizer pass has the appropriate support,
1482 as well as whether the target does.
1484 VLS_TYPE says whether the statement is a load or store and VECTYPE
1485 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1486 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1487 says how the load or store is going to be implemented and GROUP_SIZE
1488 is the number of load or store statements in the containing group.
1489 If the access is a gather load or scatter store, GS_INFO describes
1490 its arguments. If the load or store is conditional, SCALAR_MASK is the
1491 condition under which it occurs.
1493 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1494 vectors is not supported, otherwise record the required rgroup control
1495 types. */
1497 static void
1498 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1499 slp_tree slp_node,
1500 vec_load_store_type vls_type,
1501 int group_size,
1502 vect_memory_access_type
1503 memory_access_type,
1504 gather_scatter_info *gs_info,
1505 tree scalar_mask)
1507 /* Invariant loads need no special support. */
1508 if (memory_access_type == VMAT_INVARIANT)
1509 return;
1511 unsigned int nvectors = vect_get_num_copies (loop_vinfo, slp_node, vectype);
1512 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1513 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1514 machine_mode vecmode = TYPE_MODE (vectype);
1515 bool is_load = (vls_type == VLS_LOAD);
1516 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1518 if (slp_node)
1519 nvectors /= group_size;
1520 internal_fn ifn
1521 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1522 : vect_store_lanes_supported (vectype, group_size, true));
1523 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1524 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1525 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1526 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1527 scalar_mask);
1528 else
1530 if (dump_enabled_p ())
1531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1532 "can't operate on partial vectors because"
1533 " the target doesn't have an appropriate"
1534 " load/store-lanes instruction.\n");
1535 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1537 return;
1540 if (memory_access_type == VMAT_GATHER_SCATTER)
1542 internal_fn ifn = (is_load
1543 ? IFN_MASK_GATHER_LOAD
1544 : IFN_MASK_SCATTER_STORE);
1545 internal_fn len_ifn = (is_load
1546 ? IFN_MASK_LEN_GATHER_LOAD
1547 : IFN_MASK_LEN_SCATTER_STORE);
1548 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1549 gs_info->memory_type,
1550 gs_info->offset_vectype,
1551 gs_info->scale))
1552 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1553 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1554 gs_info->memory_type,
1555 gs_info->offset_vectype,
1556 gs_info->scale))
1557 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1558 scalar_mask);
1559 else
1561 if (dump_enabled_p ())
1562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1563 "can't operate on partial vectors because"
1564 " the target doesn't have an appropriate"
1565 " gather load or scatter store instruction.\n");
1566 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1568 return;
1571 if (memory_access_type != VMAT_CONTIGUOUS
1572 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1574 /* Element X of the data must come from iteration i * VF + X of the
1575 scalar loop. We need more work to support other mappings. */
1576 if (dump_enabled_p ())
1577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1578 "can't operate on partial vectors because an"
1579 " access isn't contiguous.\n");
1580 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1581 return;
1584 if (!VECTOR_MODE_P (vecmode))
1586 if (dump_enabled_p ())
1587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1588 "can't operate on partial vectors when emulating"
1589 " vector operations.\n");
1590 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1591 return;
1594 /* We might load more scalars than we need for permuting SLP loads.
1595 We checked in get_group_load_store_type that the extra elements
1596 don't leak into a new vector. */
1597 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1599 unsigned int nvectors;
1600 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1601 return nvectors;
1602 gcc_unreachable ();
1605 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1606 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1607 machine_mode mask_mode;
1608 machine_mode vmode;
1609 bool using_partial_vectors_p = false;
1610 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1612 nvectors = group_memory_nvectors (group_size * vf, nunits);
1613 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1614 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1615 using_partial_vectors_p = true;
1617 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1618 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1620 nvectors = group_memory_nvectors (group_size * vf, nunits);
1621 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1622 using_partial_vectors_p = true;
1625 if (!using_partial_vectors_p)
1627 if (dump_enabled_p ())
1628 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1629 "can't operate on partial vectors because the"
1630 " target doesn't have the appropriate partial"
1631 " vectorization load or store.\n");
1632 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1636 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1637 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1638 that needs to be applied to all loads and stores in a vectorized loop.
1639 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1640 otherwise return VEC_MASK & LOOP_MASK.
1642 MASK_TYPE is the type of both masks. If new statements are needed,
1643 insert them before GSI. */
1645 tree
1646 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1647 tree vec_mask, gimple_stmt_iterator *gsi)
1649 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1650 if (!loop_mask)
1651 return vec_mask;
1653 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1655 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1656 return vec_mask;
1658 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1659 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1660 vec_mask, loop_mask);
1662 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1663 return and_res;
1666 /* Determine whether we can use a gather load or scatter store to vectorize
1667 strided load or store STMT_INFO by truncating the current offset to a
1668 smaller width. We need to be able to construct an offset vector:
1670 { 0, X, X*2, X*3, ... }
1672 without loss of precision, where X is STMT_INFO's DR_STEP.
1674 Return true if this is possible, describing the gather load or scatter
1675 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1677 static bool
1678 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1679 loop_vec_info loop_vinfo, bool masked_p,
1680 gather_scatter_info *gs_info)
1682 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1683 data_reference *dr = dr_info->dr;
1684 tree step = DR_STEP (dr);
1685 if (TREE_CODE (step) != INTEGER_CST)
1687 /* ??? Perhaps we could use range information here? */
1688 if (dump_enabled_p ())
1689 dump_printf_loc (MSG_NOTE, vect_location,
1690 "cannot truncate variable step.\n");
1691 return false;
1694 /* Get the number of bits in an element. */
1695 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1696 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1697 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1699 /* Set COUNT to the upper limit on the number of elements - 1.
1700 Start with the maximum vectorization factor. */
1701 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1703 /* Try lowering COUNT to the number of scalar latch iterations. */
1704 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1705 widest_int max_iters;
1706 if (max_loop_iterations (loop, &max_iters)
1707 && max_iters < count)
1708 count = max_iters.to_shwi ();
1710 /* Try scales of 1 and the element size. */
1711 unsigned int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1712 wi::overflow_type overflow = wi::OVF_NONE;
1713 for (int i = 0; i < 2; ++i)
1715 unsigned int scale = scales[i];
1716 widest_int factor;
1717 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1718 continue;
1720 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1721 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1722 if (overflow)
1723 continue;
1724 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1725 unsigned int min_offset_bits = wi::min_precision (range, sign);
1727 /* Find the narrowest viable offset type. */
1728 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1729 tree offset_type = build_nonstandard_integer_type (offset_bits,
1730 sign == UNSIGNED);
1732 /* See whether the target supports the operation with an offset
1733 no narrower than OFFSET_TYPE. */
1734 tree memory_type = TREE_TYPE (DR_REF (dr));
1735 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1736 vectype, memory_type, offset_type, scale,
1737 &gs_info->ifn, &gs_info->offset_vectype)
1738 || gs_info->ifn == IFN_LAST)
1739 continue;
1741 gs_info->decl = NULL_TREE;
1742 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1743 but we don't need to store that here. */
1744 gs_info->base = NULL_TREE;
1745 gs_info->element_type = TREE_TYPE (vectype);
1746 gs_info->offset = fold_convert (offset_type, step);
1747 gs_info->offset_dt = vect_constant_def;
1748 gs_info->scale = scale;
1749 gs_info->memory_type = memory_type;
1750 return true;
1753 if (overflow && dump_enabled_p ())
1754 dump_printf_loc (MSG_NOTE, vect_location,
1755 "truncating gather/scatter offset to %d bits"
1756 " might change its value.\n", element_bits);
1758 return false;
1761 /* Return true if we can use gather/scatter internal functions to
1762 vectorize STMT_INFO, which is a grouped or strided load or store.
1763 MASKED_P is true if load or store is conditional. When returning
1764 true, fill in GS_INFO with the information required to perform the
1765 operation. */
1767 static bool
1768 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1769 loop_vec_info loop_vinfo, bool masked_p,
1770 gather_scatter_info *gs_info)
1772 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1773 || gs_info->ifn == IFN_LAST)
1774 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1775 masked_p, gs_info);
1777 tree old_offset_type = TREE_TYPE (gs_info->offset);
1778 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1780 gcc_assert (TYPE_PRECISION (new_offset_type)
1781 >= TYPE_PRECISION (old_offset_type));
1782 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1784 if (dump_enabled_p ())
1785 dump_printf_loc (MSG_NOTE, vect_location,
1786 "using gather/scatter for strided/grouped access,"
1787 " scale = %d\n", gs_info->scale);
1789 return true;
1792 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1793 elements with a known constant step. Return -1 if that step
1794 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1797 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1799 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1800 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1801 size_zero_node);
1804 /* If the target supports a permute mask that reverses the elements in
1805 a vector of type VECTYPE, return that mask, otherwise return null. */
1807 tree
1808 perm_mask_for_reverse (tree vectype)
1810 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1812 /* The encoding has a single stepped pattern. */
1813 vec_perm_builder sel (nunits, 1, 3);
1814 for (int i = 0; i < 3; ++i)
1815 sel.quick_push (nunits - 1 - i);
1817 vec_perm_indices indices (sel, 1, nunits);
1818 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1819 indices))
1820 return NULL_TREE;
1821 return vect_gen_perm_mask_checked (vectype, indices);
1824 /* A subroutine of get_load_store_type, with a subset of the same
1825 arguments. Handle the case where STMT_INFO is a load or store that
1826 accesses consecutive elements with a negative step. Sets *POFFSET
1827 to the offset to be applied to the DR for the first access. */
1829 static vect_memory_access_type
1830 get_negative_load_store_type (vec_info *vinfo,
1831 stmt_vec_info stmt_info, tree vectype,
1832 vec_load_store_type vls_type,
1833 unsigned int ncopies, poly_int64 *poffset)
1835 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1836 dr_alignment_support alignment_support_scheme;
1838 if (ncopies > 1)
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1842 "multiple types with negative step.\n");
1843 return VMAT_ELEMENTWISE;
1846 /* For backward running DRs the first access in vectype actually is
1847 N-1 elements before the address of the DR. */
1848 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1849 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1851 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1852 alignment_support_scheme
1853 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1854 if (alignment_support_scheme != dr_aligned
1855 && alignment_support_scheme != dr_unaligned_supported)
1857 if (dump_enabled_p ())
1858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1859 "negative step but alignment required.\n");
1860 *poffset = 0;
1861 return VMAT_ELEMENTWISE;
1864 if (vls_type == VLS_STORE_INVARIANT)
1866 if (dump_enabled_p ())
1867 dump_printf_loc (MSG_NOTE, vect_location,
1868 "negative step with invariant source;"
1869 " no permute needed.\n");
1870 return VMAT_CONTIGUOUS_DOWN;
1873 if (!perm_mask_for_reverse (vectype))
1875 if (dump_enabled_p ())
1876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1877 "negative step and reversing not supported.\n");
1878 *poffset = 0;
1879 return VMAT_ELEMENTWISE;
1882 return VMAT_CONTIGUOUS_REVERSE;
1885 /* STMT_INFO is either a masked or unconditional store. Return the value
1886 being stored. */
1888 tree
1889 vect_get_store_rhs (stmt_vec_info stmt_info)
1891 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1893 gcc_assert (gimple_assign_single_p (assign));
1894 return gimple_assign_rhs1 (assign);
1896 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1898 internal_fn ifn = gimple_call_internal_fn (call);
1899 int index = internal_fn_stored_value_index (ifn);
1900 gcc_assert (index >= 0);
1901 return gimple_call_arg (call, index);
1903 gcc_unreachable ();
1906 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1908 This function returns a vector type which can be composed with NETLS pieces,
1909 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1910 same vector size as the return vector. It checks target whether supports
1911 pieces-size vector mode for construction firstly, if target fails to, check
1912 pieces-size scalar mode for construction further. It returns NULL_TREE if
1913 fails to find the available composition.
1915 For example, for (vtype=V16QI, nelts=4), we can probably get:
1916 - V16QI with PTYPE V4QI.
1917 - V4SI with PTYPE SI.
1918 - NULL_TREE. */
1920 static tree
1921 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1923 gcc_assert (VECTOR_TYPE_P (vtype));
1924 gcc_assert (known_gt (nelts, 0U));
1926 machine_mode vmode = TYPE_MODE (vtype);
1927 if (!VECTOR_MODE_P (vmode))
1928 return NULL_TREE;
1930 /* When we are asked to compose the vector from its components let
1931 that happen directly. */
1932 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1934 *ptype = TREE_TYPE (vtype);
1935 return vtype;
1938 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
1939 unsigned int pbsize;
1940 if (constant_multiple_p (vbsize, nelts, &pbsize))
1942 /* First check if vec_init optab supports construction from
1943 vector pieces directly. */
1944 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1945 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
1946 machine_mode rmode;
1947 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
1948 && (convert_optab_handler (vec_init_optab, vmode, rmode)
1949 != CODE_FOR_nothing))
1951 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1952 return vtype;
1955 /* Otherwise check if exists an integer type of the same piece size and
1956 if vec_init optab supports construction from it directly. */
1957 if (int_mode_for_size (pbsize, 0).exists (&elmode)
1958 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
1959 && (convert_optab_handler (vec_init_optab, rmode, elmode)
1960 != CODE_FOR_nothing))
1962 *ptype = build_nonstandard_integer_type (pbsize, 1);
1963 return build_vector_type (*ptype, nelts);
1967 return NULL_TREE;
1970 /* A subroutine of get_load_store_type, with a subset of the same
1971 arguments. Handle the case where STMT_INFO is part of a grouped load
1972 or store.
1974 For stores, the statements in the group are all consecutive
1975 and there is no gap at the end. For loads, the statements in the
1976 group might not be consecutive; there can be gaps between statements
1977 as well as at the end. */
1979 static bool
1980 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1981 tree vectype, slp_tree slp_node,
1982 bool masked_p, vec_load_store_type vls_type,
1983 vect_memory_access_type *memory_access_type,
1984 poly_int64 *poffset,
1985 dr_alignment_support *alignment_support_scheme,
1986 int *misalignment,
1987 gather_scatter_info *gs_info,
1988 internal_fn *lanes_ifn)
1990 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1991 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1992 stmt_vec_info first_stmt_info;
1993 unsigned int group_size;
1994 unsigned HOST_WIDE_INT gap;
1995 bool single_element_p;
1996 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1998 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1999 group_size = DR_GROUP_SIZE (first_stmt_info);
2000 gap = DR_GROUP_GAP (first_stmt_info);
2001 single_element_p = (stmt_info == first_stmt_info
2002 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2004 else
2006 first_stmt_info = stmt_info;
2007 group_size = 1;
2008 gap = 0;
2009 single_element_p = true;
2011 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2012 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2014 /* True if the vectorized statements would access beyond the last
2015 statement in the group. */
2016 bool overrun_p = false;
2018 /* True if we can cope with such overrun by peeling for gaps, so that
2019 there is at least one final scalar iteration after the vector loop. */
2020 bool can_overrun_p = (!masked_p
2021 && vls_type == VLS_LOAD
2022 && loop_vinfo
2023 && !loop->inner);
2025 /* There can only be a gap at the end of the group if the stride is
2026 known at compile time. */
2027 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2029 /* Stores can't yet have gaps. */
2030 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2032 if (slp_node)
2034 /* For SLP vectorization we directly vectorize a subchain
2035 without permutation. */
2036 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2037 first_dr_info
2038 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2039 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2040 /* Try to use consecutive accesses of as many elements as possible,
2041 separated by the stride, until we have a complete vector.
2042 Fall back to scalar accesses if that isn't possible. */
2043 *memory_access_type = VMAT_STRIDED_SLP;
2044 else
2046 int cmp = compare_step_with_zero (vinfo, stmt_info);
2047 if (cmp < 0)
2049 if (single_element_p)
2050 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2051 only correct for single element "interleaving" SLP. */
2052 *memory_access_type = get_negative_load_store_type
2053 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2054 else
2056 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2057 separated by the stride, until we have a complete vector.
2058 Fall back to scalar accesses if that isn't possible. */
2059 if (multiple_p (nunits, group_size))
2060 *memory_access_type = VMAT_STRIDED_SLP;
2061 else
2062 *memory_access_type = VMAT_ELEMENTWISE;
2065 else if (cmp == 0 && loop_vinfo)
2067 gcc_assert (vls_type == VLS_LOAD);
2068 *memory_access_type = VMAT_INVARIANT;
2069 /* Invariant accesses perform only component accesses, alignment
2070 is irrelevant for them. */
2071 *alignment_support_scheme = dr_unaligned_supported;
2073 /* Try using LOAD/STORE_LANES. */
2074 else if (slp_node->ldst_lanes
2075 && (*lanes_ifn
2076 = (vls_type == VLS_LOAD
2077 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2078 : vect_store_lanes_supported (vectype, group_size,
2079 masked_p))) != IFN_LAST)
2080 *memory_access_type = VMAT_LOAD_STORE_LANES;
2081 else
2082 *memory_access_type = VMAT_CONTIGUOUS;
2084 overrun_p = loop_vinfo && gap != 0;
2085 if (overrun_p && vls_type != VLS_LOAD)
2087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2088 "Grouped store with gaps requires"
2089 " non-consecutive accesses\n");
2090 return false;
2092 /* An overrun is fine if the trailing elements are smaller
2093 than the alignment boundary B. Every vector access will
2094 be a multiple of B and so we are guaranteed to access a
2095 non-gap element in the same B-sized block. */
2096 if (overrun_p
2097 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2098 vectype)
2099 / vect_get_scalar_dr_size (first_dr_info)))
2100 overrun_p = false;
2102 /* When we have a contiguous access across loop iterations
2103 but the access in the loop doesn't cover the full vector
2104 we can end up with no gap recorded but still excess
2105 elements accessed, see PR103116. Make sure we peel for
2106 gaps if necessary and sufficient and give up if not.
2108 If there is a combination of the access not covering the full
2109 vector and a gap recorded then we may need to peel twice. */
2110 if (loop_vinfo
2111 && (*memory_access_type == VMAT_CONTIGUOUS
2112 || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
2113 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2114 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2115 nunits))
2116 overrun_p = true;
2118 /* If the gap splits the vector in half and the target
2119 can do half-vector operations avoid the epilogue peeling
2120 by simply loading half of the vector only. Usually
2121 the construction with an upper zero half will be elided. */
2122 dr_alignment_support alss;
2123 int misalign = dr_misalignment (first_dr_info, vectype);
2124 tree half_vtype;
2125 poly_uint64 remain;
2126 unsigned HOST_WIDE_INT tem, num;
2127 if (overrun_p
2128 && !masked_p
2129 && *memory_access_type != VMAT_LOAD_STORE_LANES
2130 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2131 vectype, misalign)))
2132 == dr_aligned
2133 || alss == dr_unaligned_supported)
2134 && can_div_trunc_p (group_size
2135 * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap,
2136 nunits, &tem, &remain)
2137 && (known_eq (remain, 0u)
2138 || (known_ne (remain, 0u)
2139 && constant_multiple_p (nunits, remain, &num)
2140 && (vector_vector_composition_type (vectype, num,
2141 &half_vtype)
2142 != NULL_TREE))))
2143 overrun_p = false;
2145 if (overrun_p && !can_overrun_p)
2147 if (dump_enabled_p ())
2148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2149 "Peeling for outer loop is not supported\n");
2150 return false;
2152 /* Peeling for gaps assumes that a single scalar iteration
2153 is enough to make sure the last vector iteration doesn't
2154 access excess elements. */
2155 if (overrun_p
2156 && (!can_div_trunc_p (group_size
2157 * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap,
2158 nunits, &tem, &remain)
2159 || maybe_lt (remain + group_size, nunits)))
2161 /* But peeling a single scalar iteration is enough if
2162 we can use the next power-of-two sized partial
2163 access and that is sufficiently small to be covered
2164 by the single scalar iteration. */
2165 unsigned HOST_WIDE_INT cnunits, cvf, cremain, cpart_size;
2166 if (!nunits.is_constant (&cnunits)
2167 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2168 || (((cremain = group_size * cvf - gap % cnunits), true)
2169 && ((cpart_size = (1 << ceil_log2 (cremain))) != cnunits)
2170 && (cremain + group_size < cpart_size
2171 || vector_vector_composition_type
2172 (vectype, cnunits / cpart_size,
2173 &half_vtype) == NULL_TREE)))
2175 if (dump_enabled_p ())
2176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2177 "peeling for gaps insufficient for "
2178 "access\n");
2179 return false;
2183 /* If this is single-element interleaving with an element
2184 distance that leaves unused vector loads around punt - we
2185 at least create very sub-optimal code in that case (and
2186 blow up memory, see PR65518). */
2187 if (loop_vinfo
2188 && *memory_access_type == VMAT_CONTIGUOUS
2189 && single_element_p
2190 && maybe_gt (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
2192 if (SLP_TREE_LANES (slp_node) == 1)
2194 *memory_access_type = VMAT_ELEMENTWISE;
2195 if (dump_enabled_p ())
2196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2197 "single-element interleaving not supported "
2198 "for not adjacent vector loads, using "
2199 "elementwise access\n");
2201 else
2203 if (dump_enabled_p ())
2204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2205 "single-element interleaving not supported "
2206 "for not adjacent vector loads\n");
2207 return false;
2212 else
2214 /* We can always handle this case using elementwise accesses,
2215 but see if something more efficient is available. */
2216 *memory_access_type = VMAT_ELEMENTWISE;
2218 /* If there is a gap at the end of the group then these optimizations
2219 would access excess elements in the last iteration. */
2220 bool would_overrun_p = (gap != 0);
2221 /* An overrun is fine if the trailing elements are smaller than the
2222 alignment boundary B. Every vector access will be a multiple of B
2223 and so we are guaranteed to access a non-gap element in the
2224 same B-sized block. */
2225 if (would_overrun_p
2226 && !masked_p
2227 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2228 / vect_get_scalar_dr_size (first_dr_info)))
2229 would_overrun_p = false;
2231 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2232 && (can_overrun_p || !would_overrun_p)
2233 && compare_step_with_zero (vinfo, stmt_info) > 0)
2235 /* First cope with the degenerate case of a single-element
2236 vector. */
2237 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2240 else
2242 /* Otherwise try using LOAD/STORE_LANES. */
2243 *lanes_ifn
2244 = vls_type == VLS_LOAD
2245 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2246 : vect_store_lanes_supported (vectype, group_size,
2247 masked_p);
2248 if (*lanes_ifn != IFN_LAST)
2250 *memory_access_type = VMAT_LOAD_STORE_LANES;
2251 overrun_p = would_overrun_p;
2254 /* If that fails, try using permuting loads. */
2255 else if (vls_type == VLS_LOAD
2256 ? vect_grouped_load_supported (vectype,
2257 single_element_p,
2258 group_size)
2259 : vect_grouped_store_supported (vectype, group_size))
2261 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2262 overrun_p = would_overrun_p;
2268 /* As a last resort, trying using a gather load or scatter store.
2270 ??? Although the code can handle all group sizes correctly,
2271 it probably isn't a win to use separate strided accesses based
2272 on nearby locations. Or, even if it's a win over scalar code,
2273 it might not be a win over vectorizing at a lower VF, if that
2274 allows us to use contiguous accesses. */
2275 if (*memory_access_type == VMAT_ELEMENTWISE
2276 && single_element_p
2277 && loop_vinfo
2278 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2279 masked_p, gs_info))
2280 *memory_access_type = VMAT_GATHER_SCATTER;
2282 if (*memory_access_type == VMAT_GATHER_SCATTER
2283 || *memory_access_type == VMAT_ELEMENTWISE)
2285 *alignment_support_scheme = dr_unaligned_supported;
2286 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2288 else
2290 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2291 *alignment_support_scheme
2292 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2293 *misalignment);
2296 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2298 /* STMT is the leader of the group. Check the operands of all the
2299 stmts of the group. */
2300 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2301 while (next_stmt_info)
2303 tree op = vect_get_store_rhs (next_stmt_info);
2304 enum vect_def_type dt;
2305 if (!vect_is_simple_use (op, vinfo, &dt))
2307 if (dump_enabled_p ())
2308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2309 "use not simple.\n");
2310 return false;
2312 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2316 if (overrun_p)
2318 gcc_assert (can_overrun_p);
2319 if (dump_enabled_p ())
2320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2321 "Data access with gaps requires scalar "
2322 "epilogue loop\n");
2323 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2326 return true;
2329 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2330 if there is a memory access type that the vectorized form can use,
2331 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2332 or scatters, fill in GS_INFO accordingly. In addition
2333 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2334 the target does not support the alignment scheme. *MISALIGNMENT
2335 is set according to the alignment of the access (including
2336 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2338 SLP says whether we're performing SLP rather than loop vectorization.
2339 MASKED_P is true if the statement is conditional on a vectorized mask.
2340 VECTYPE is the vector type that the vectorized statements will use.
2341 NCOPIES is the number of vector statements that will be needed. */
2343 static bool
2344 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2345 tree vectype, slp_tree slp_node,
2346 bool masked_p, vec_load_store_type vls_type,
2347 unsigned int ncopies,
2348 vect_memory_access_type *memory_access_type,
2349 poly_int64 *poffset,
2350 dr_alignment_support *alignment_support_scheme,
2351 int *misalignment,
2352 gather_scatter_info *gs_info,
2353 internal_fn *lanes_ifn)
2355 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2356 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2357 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2358 *poffset = 0;
2359 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2361 *memory_access_type = VMAT_GATHER_SCATTER;
2362 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2363 gcc_unreachable ();
2364 /* When using internal functions, we rely on pattern recognition
2365 to convert the type of the offset to the type that the target
2366 requires, with the result being a call to an internal function.
2367 If that failed for some reason (e.g. because another pattern
2368 took priority), just handle cases in which the offset already
2369 has the right type. */
2370 else if (gs_info->ifn != IFN_LAST
2371 && !is_gimple_call (stmt_info->stmt)
2372 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2373 TREE_TYPE (gs_info->offset_vectype)))
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 "%s offset requires a conversion\n",
2378 vls_type == VLS_LOAD ? "gather" : "scatter");
2379 return false;
2381 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2382 &gs_info->offset_dt,
2383 &gs_info->offset_vectype))
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "%s index use not simple.\n",
2388 vls_type == VLS_LOAD ? "gather" : "scatter");
2389 return false;
2391 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2393 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2394 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2395 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2396 (gs_info->offset_vectype),
2397 TYPE_VECTOR_SUBPARTS (vectype)))
2399 if (dump_enabled_p ())
2400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2401 "unsupported vector types for emulated "
2402 "gather.\n");
2403 return false;
2406 /* Gather-scatter accesses perform only component accesses, alignment
2407 is irrelevant for them. */
2408 *alignment_support_scheme = dr_unaligned_supported;
2410 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2412 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2413 masked_p,
2414 vls_type, memory_access_type, poffset,
2415 alignment_support_scheme,
2416 misalignment, gs_info, lanes_ifn))
2417 return false;
2419 else if (STMT_VINFO_STRIDED_P (stmt_info))
2421 gcc_assert (!slp_node);
2422 if (loop_vinfo
2423 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2424 masked_p, gs_info))
2425 *memory_access_type = VMAT_GATHER_SCATTER;
2426 else
2427 *memory_access_type = VMAT_ELEMENTWISE;
2428 /* Alignment is irrelevant here. */
2429 *alignment_support_scheme = dr_unaligned_supported;
2431 else
2433 int cmp = compare_step_with_zero (vinfo, stmt_info);
2434 if (cmp == 0)
2436 gcc_assert (vls_type == VLS_LOAD);
2437 *memory_access_type = VMAT_INVARIANT;
2438 /* Invariant accesses perform only component accesses, alignment
2439 is irrelevant for them. */
2440 *alignment_support_scheme = dr_unaligned_supported;
2442 else
2444 if (cmp < 0)
2445 *memory_access_type = get_negative_load_store_type
2446 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2447 else
2448 *memory_access_type = VMAT_CONTIGUOUS;
2449 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2450 vectype, *poffset);
2451 *alignment_support_scheme
2452 = vect_supportable_dr_alignment (vinfo,
2453 STMT_VINFO_DR_INFO (stmt_info),
2454 vectype, *misalignment);
2458 if ((*memory_access_type == VMAT_ELEMENTWISE
2459 || *memory_access_type == VMAT_STRIDED_SLP)
2460 && !nunits.is_constant ())
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464 "Not using elementwise accesses due to variable "
2465 "vectorization factor.\n");
2466 return false;
2469 if (*alignment_support_scheme == dr_unaligned_unsupported)
2471 if (dump_enabled_p ())
2472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2473 "unsupported unaligned access\n");
2474 return false;
2477 /* FIXME: At the moment the cost model seems to underestimate the
2478 cost of using elementwise accesses. This check preserves the
2479 traditional behavior until that can be fixed. */
2480 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2481 if (!first_stmt_info)
2482 first_stmt_info = stmt_info;
2483 if (*memory_access_type == VMAT_ELEMENTWISE
2484 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2485 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2486 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2487 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2489 if (dump_enabled_p ())
2490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2491 "not falling back to elementwise accesses\n");
2492 return false;
2494 return true;
2497 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2498 conditional operation STMT_INFO. When returning true, store the mask
2499 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2500 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2501 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2503 static bool
2504 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2505 slp_tree slp_node, unsigned mask_index,
2506 tree *mask, slp_tree *mask_node,
2507 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2509 enum vect_def_type mask_dt;
2510 tree mask_vectype;
2511 slp_tree mask_node_1;
2512 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2513 mask, &mask_node_1, &mask_dt, &mask_vectype))
2515 if (dump_enabled_p ())
2516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2517 "mask use not simple.\n");
2518 return false;
2521 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2523 if (dump_enabled_p ())
2524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2525 "mask argument is not a boolean.\n");
2526 return false;
2529 /* If the caller is not prepared for adjusting an external/constant
2530 SLP mask vector type fail. */
2531 if (slp_node
2532 && !mask_node
2533 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2535 if (dump_enabled_p ())
2536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2537 "SLP mask argument is not vectorized.\n");
2538 return false;
2541 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2542 if (!mask_vectype)
2543 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2544 mask_node_1);
2546 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2548 if (dump_enabled_p ())
2549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2550 "could not find an appropriate vector mask type.\n");
2551 return false;
2554 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2555 TYPE_VECTOR_SUBPARTS (vectype)))
2557 if (dump_enabled_p ())
2558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2559 "vector mask type %T"
2560 " does not match vector data type %T.\n",
2561 mask_vectype, vectype);
2563 return false;
2566 *mask_dt_out = mask_dt;
2567 *mask_vectype_out = mask_vectype;
2568 if (mask_node)
2569 *mask_node = mask_node_1;
2570 return true;
2573 /* Return true if stored value is suitable for vectorizing store
2574 statement STMT_INFO. When returning true, store the scalar stored
2575 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2576 the type of the vectorized store value in
2577 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2579 static bool
2580 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2581 slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
2582 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2583 vec_load_store_type *vls_type_out)
2585 int op_no = 0;
2586 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2588 if (gimple_call_internal_p (call)
2589 && internal_store_fn_p (gimple_call_internal_fn (call)))
2590 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2592 if (slp_node)
2593 op_no = vect_slp_child_index_for_operand
2594 (stmt_info->stmt, op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2596 enum vect_def_type rhs_dt;
2597 tree rhs_vectype;
2598 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2599 rhs, rhs_node, &rhs_dt, &rhs_vectype))
2601 if (dump_enabled_p ())
2602 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2603 "use not simple.\n");
2604 return false;
2607 /* In the case this is a store from a constant make sure
2608 native_encode_expr can handle it. */
2609 if (rhs_dt == vect_constant_def
2610 && CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
2612 if (dump_enabled_p ())
2613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2614 "cannot encode constant as a byte sequence.\n");
2615 return false;
2618 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2619 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2621 if (dump_enabled_p ())
2622 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2623 "incompatible vector types.\n");
2624 return false;
2627 *rhs_dt_out = rhs_dt;
2628 *rhs_vectype_out = rhs_vectype;
2629 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2630 *vls_type_out = VLS_STORE_INVARIANT;
2631 else
2632 *vls_type_out = VLS_STORE;
2633 return true;
2636 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2637 Note that we support masks with floating-point type, in which case the
2638 floats are interpreted as a bitmask. */
2640 static tree
2641 vect_build_all_ones_mask (vec_info *vinfo,
2642 stmt_vec_info stmt_info, tree masktype)
2644 if (TREE_CODE (masktype) == INTEGER_TYPE)
2645 return build_int_cst (masktype, -1);
2646 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2647 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2649 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2650 mask = build_vector_from_val (masktype, mask);
2651 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2653 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2655 REAL_VALUE_TYPE r;
2656 long tmp[6];
2657 for (int j = 0; j < 6; ++j)
2658 tmp[j] = -1;
2659 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2660 tree mask = build_real (TREE_TYPE (masktype), r);
2661 mask = build_vector_from_val (masktype, mask);
2662 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2664 gcc_unreachable ();
2667 /* Build an all-zero merge value of type VECTYPE while vectorizing
2668 STMT_INFO as a gather load. */
2670 static tree
2671 vect_build_zero_merge_argument (vec_info *vinfo,
2672 stmt_vec_info stmt_info, tree vectype)
2674 tree merge;
2675 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2676 merge = build_int_cst (TREE_TYPE (vectype), 0);
2677 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2679 REAL_VALUE_TYPE r;
2680 long tmp[6];
2681 for (int j = 0; j < 6; ++j)
2682 tmp[j] = 0;
2683 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2684 merge = build_real (TREE_TYPE (vectype), r);
2686 else
2687 gcc_unreachable ();
2688 merge = build_vector_from_val (vectype, merge);
2689 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2692 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2693 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2694 the gather load operation. If the load is conditional, MASK is the
2695 vectorized condition, otherwise MASK is null. PTR is the base
2696 pointer and OFFSET is the vectorized offset. */
2698 static gimple *
2699 vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2700 gimple_stmt_iterator *gsi,
2701 gather_scatter_info *gs_info,
2702 tree ptr, tree offset, tree mask)
2704 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2705 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2706 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2707 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2708 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2709 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2710 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2711 tree scaletype = TREE_VALUE (arglist);
2712 tree var;
2713 gcc_checking_assert (types_compatible_p (srctype, rettype)
2714 && (!mask
2715 || TREE_CODE (masktype) == INTEGER_TYPE
2716 || types_compatible_p (srctype, masktype)));
2718 tree op = offset;
2719 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2721 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2722 TYPE_VECTOR_SUBPARTS (idxtype)));
2723 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2724 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2725 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2726 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2727 op = var;
2730 tree src_op = NULL_TREE;
2731 tree mask_op = NULL_TREE;
2732 if (mask)
2734 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2736 tree utype, optype = TREE_TYPE (mask);
2737 if (VECTOR_TYPE_P (masktype)
2738 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2739 utype = masktype;
2740 else
2741 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2742 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2743 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2744 gassign *new_stmt
2745 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2746 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2747 mask_arg = var;
2748 if (!useless_type_conversion_p (masktype, utype))
2750 gcc_assert (TYPE_PRECISION (utype)
2751 <= TYPE_PRECISION (masktype));
2752 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2753 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2754 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2755 mask_arg = var;
2757 src_op = build_zero_cst (srctype);
2758 mask_op = mask_arg;
2760 else
2762 src_op = mask;
2763 mask_op = mask;
2766 else
2768 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2769 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2772 tree scale = build_int_cst (scaletype, gs_info->scale);
2773 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2774 mask_op, scale);
2776 if (!useless_type_conversion_p (vectype, rettype))
2778 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2779 TYPE_VECTOR_SUBPARTS (rettype)));
2780 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2781 gimple_call_set_lhs (new_stmt, op);
2782 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2783 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2784 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2787 return new_stmt;
2790 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2791 instructions before GSI. GS_INFO describes the scatter store operation.
2792 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2793 vectorized data to store.
2794 If the store is conditional, MASK is the vectorized condition, otherwise
2795 MASK is null. */
2797 static gimple *
2798 vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
2799 gimple_stmt_iterator *gsi,
2800 gather_scatter_info *gs_info,
2801 tree ptr, tree offset, tree oprnd, tree mask)
2803 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2804 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2805 /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
2806 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2807 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2808 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2809 tree scaletype = TREE_VALUE (arglist);
2810 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2811 && TREE_CODE (rettype) == VOID_TYPE);
2813 tree mask_arg = NULL_TREE;
2814 if (mask)
2816 mask_arg = mask;
2817 tree optype = TREE_TYPE (mask_arg);
2818 tree utype;
2819 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2820 utype = masktype;
2821 else
2822 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2823 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2824 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2825 gassign *new_stmt
2826 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2827 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2828 mask_arg = var;
2829 if (!useless_type_conversion_p (masktype, utype))
2831 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2832 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2833 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2834 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2835 mask_arg = var;
2838 else
2840 mask_arg = build_int_cst (masktype, -1);
2841 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
2844 tree src = oprnd;
2845 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2847 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2848 TYPE_VECTOR_SUBPARTS (srctype)));
2849 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2850 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2851 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2852 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2853 src = var;
2856 tree op = offset;
2857 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2859 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2860 TYPE_VECTOR_SUBPARTS (idxtype)));
2861 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2862 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2863 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2864 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2865 op = var;
2868 tree scale = build_int_cst (scaletype, gs_info->scale);
2869 gcall *new_stmt
2870 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2871 return new_stmt;
2874 /* Prepare the base and offset in GS_INFO for vectorization.
2875 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2876 to the vectorized offset argument for the first copy of STMT_INFO.
2877 STMT_INFO is the statement described by GS_INFO and LOOP is the
2878 containing loop. */
2880 static void
2881 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2882 class loop *loop, stmt_vec_info stmt_info,
2883 slp_tree slp_node, gather_scatter_info *gs_info,
2884 tree *dataref_ptr, vec<tree> *vec_offset)
2886 gimple_seq stmts = NULL;
2887 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2888 if (stmts != NULL)
2890 basic_block new_bb;
2891 edge pe = loop_preheader_edge (loop);
2892 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2893 gcc_assert (!new_bb);
2895 if (slp_node)
2896 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2897 else
2899 unsigned ncopies
2900 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2901 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2902 gs_info->offset, vec_offset,
2903 gs_info->offset_vectype);
2907 /* Prepare to implement a grouped or strided load or store using
2908 the gather load or scatter store operation described by GS_INFO.
2909 STMT_INFO is the load or store statement.
2911 Set *DATAREF_BUMP to the amount that should be added to the base
2912 address after each copy of the vectorized statement. Set *VEC_OFFSET
2913 to an invariant offset vector in which element I has the value
2914 I * DR_STEP / SCALE. */
2916 static void
2917 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2918 loop_vec_info loop_vinfo,
2919 gimple_stmt_iterator *gsi,
2920 gather_scatter_info *gs_info,
2921 tree *dataref_bump, tree *vec_offset,
2922 vec_loop_lens *loop_lens)
2924 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2925 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2927 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2929 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2930 ivtmp_8 = _31 * 16 (step in bytes);
2931 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2932 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2933 tree loop_len
2934 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
2935 tree tmp
2936 = fold_build2 (MULT_EXPR, sizetype,
2937 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2938 loop_len);
2939 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
2940 GSI_SAME_STMT);
2942 else
2944 tree bump
2945 = size_binop (MULT_EXPR,
2946 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2947 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2948 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2951 /* The offset given in GS_INFO can have pointer type, so use the element
2952 type of the vector instead. */
2953 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2955 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2956 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2957 ssize_int (gs_info->scale));
2958 step = fold_convert (offset_type, step);
2960 /* Create {0, X, X*2, X*3, ...}. */
2961 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2962 build_zero_cst (offset_type), step);
2963 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2966 /* Prepare the pointer IVs which needs to be updated by a variable amount.
2967 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2968 allow each iteration process the flexible number of elements as long as
2969 the number <= vf elments.
2971 Return data reference according to SELECT_VL.
2972 If new statements are needed, insert them before GSI. */
2974 static tree
2975 vect_get_loop_variant_data_ptr_increment (
2976 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
2977 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
2978 vect_memory_access_type memory_access_type)
2980 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2981 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2983 /* gather/scatter never reach here. */
2984 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
2986 /* When we support SELECT_VL pattern, we dynamic adjust
2987 the memory address by .SELECT_VL result.
2989 The result of .SELECT_VL is the number of elements to
2990 be processed of each iteration. So the memory address
2991 adjustment operation should be:
2993 addr = addr + .SELECT_VL (ARG..) * step;
2995 tree loop_len
2996 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
2997 tree len_type = TREE_TYPE (loop_len);
2998 /* Since the outcome of .SELECT_VL is element size, we should adjust
2999 it into bytesize so that it can be used in address pointer variable
3000 amount IVs adjustment. */
3001 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
3002 wide_int_to_tree (len_type, wi::to_widest (step)));
3003 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3004 gassign *assign = gimple_build_assign (bump, tmp);
3005 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3006 return bump;
3009 /* Return the amount that should be added to a vector pointer to move
3010 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3011 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3012 vectorization. */
3014 static tree
3015 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3016 dr_vec_info *dr_info, tree aggr_type,
3017 vect_memory_access_type memory_access_type,
3018 vec_loop_lens *loop_lens = nullptr)
3020 if (memory_access_type == VMAT_INVARIANT)
3021 return size_zero_node;
3023 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3024 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3025 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3026 loop_lens, dr_info,
3027 memory_access_type);
3029 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3030 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3031 if (tree_int_cst_sgn (step) == -1)
3032 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3033 return iv_step;
3036 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3038 static bool
3039 vectorizable_bswap (vec_info *vinfo,
3040 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3041 gimple **vec_stmt, slp_tree slp_node,
3042 slp_tree *slp_op,
3043 tree vectype_in, stmt_vector_for_cost *cost_vec)
3045 tree op, vectype;
3046 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3047 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3048 unsigned ncopies;
3050 op = gimple_call_arg (stmt, 0);
3051 vectype = STMT_VINFO_VECTYPE (stmt_info);
3052 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3054 /* Multiple types in SLP are handled by creating the appropriate number of
3055 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3056 case of SLP. */
3057 if (slp_node)
3058 ncopies = 1;
3059 else
3060 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3062 gcc_assert (ncopies >= 1);
3064 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype))
3066 if (dump_enabled_p ())
3067 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3068 "mismatched vector sizes %T and %T\n",
3069 vectype_in, vectype);
3070 return false;
3073 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3074 if (! char_vectype)
3075 return false;
3077 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3078 unsigned word_bytes;
3079 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3080 return false;
3082 /* The encoding uses one stepped pattern for each byte in the word. */
3083 vec_perm_builder elts (num_bytes, word_bytes, 3);
3084 for (unsigned i = 0; i < 3; ++i)
3085 for (unsigned j = 0; j < word_bytes; ++j)
3086 elts.quick_push ((i + 1) * word_bytes - j - 1);
3088 vec_perm_indices indices (elts, 1, num_bytes);
3089 machine_mode vmode = TYPE_MODE (char_vectype);
3090 if (!can_vec_perm_const_p (vmode, vmode, indices))
3091 return false;
3093 if (! vec_stmt)
3095 if (slp_node
3096 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3098 if (dump_enabled_p ())
3099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3100 "incompatible vector types for invariants\n");
3101 return false;
3104 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3105 DUMP_VECT_SCOPE ("vectorizable_bswap");
3106 record_stmt_cost (cost_vec,
3107 1, vector_stmt, stmt_info, 0, vect_prologue);
3108 record_stmt_cost (cost_vec,
3109 slp_node
3110 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3111 vec_perm, stmt_info, 0, vect_body);
3112 return true;
3115 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3117 /* Transform. */
3118 vec<tree> vec_oprnds = vNULL;
3119 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3120 op, &vec_oprnds);
3121 /* Arguments are ready. create the new vector stmt. */
3122 unsigned i;
3123 tree vop;
3124 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3126 gimple *new_stmt;
3127 tree tem = make_ssa_name (char_vectype);
3128 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3129 char_vectype, vop));
3130 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3131 tree tem2 = make_ssa_name (char_vectype);
3132 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3133 tem, tem, bswap_vconst);
3134 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3135 tem = make_ssa_name (vectype);
3136 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3137 vectype, tem2));
3138 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3139 if (slp_node)
3140 slp_node->push_vec_def (new_stmt);
3141 else
3142 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3145 if (!slp_node)
3146 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3148 vec_oprnds.release ();
3149 return true;
3152 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3153 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3154 in a single step. On success, store the binary pack code in
3155 *CONVERT_CODE. */
3157 static bool
3158 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3159 code_helper *convert_code)
3161 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3162 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3163 return false;
3165 code_helper code;
3166 int multi_step_cvt = 0;
3167 auto_vec <tree, 8> interm_types;
3168 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3169 &code, &multi_step_cvt, &interm_types)
3170 || multi_step_cvt)
3171 return false;
3173 *convert_code = code;
3174 return true;
3177 /* Function vectorizable_call.
3179 Check if STMT_INFO performs a function call that can be vectorized.
3180 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3181 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3182 Return true if STMT_INFO is vectorizable in this way. */
3184 static bool
3185 vectorizable_call (vec_info *vinfo,
3186 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3187 gimple **vec_stmt, slp_tree slp_node,
3188 stmt_vector_for_cost *cost_vec)
3190 gcall *stmt;
3191 tree vec_dest;
3192 tree scalar_dest;
3193 tree op;
3194 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3195 tree vectype_out, vectype_in;
3196 poly_uint64 nunits_in;
3197 poly_uint64 nunits_out;
3198 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3199 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3200 tree fndecl, new_temp, rhs_type;
3201 enum vect_def_type dt[4]
3202 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3203 vect_unknown_def_type };
3204 tree vectypes[ARRAY_SIZE (dt)] = {};
3205 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3206 int ndts = ARRAY_SIZE (dt);
3207 int ncopies, j;
3208 auto_vec<tree, 8> vargs;
3209 enum { NARROW, NONE, WIDEN } modifier;
3210 size_t i, nargs;
3211 tree lhs;
3212 tree clz_ctz_arg1 = NULL_TREE;
3214 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3215 return false;
3217 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3218 && ! vec_stmt)
3219 return false;
3221 /* Is STMT_INFO a vectorizable call? */
3222 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3223 if (!stmt)
3224 return false;
3226 if (gimple_call_internal_p (stmt)
3227 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3228 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3229 /* Handled by vectorizable_load and vectorizable_store. */
3230 return false;
3232 if (gimple_call_lhs (stmt) == NULL_TREE
3233 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3234 return false;
3236 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3238 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3240 /* Process function arguments. */
3241 rhs_type = NULL_TREE;
3242 vectype_in = NULL_TREE;
3243 nargs = gimple_call_num_args (stmt);
3245 /* Bail out if the function has more than four arguments, we do not have
3246 interesting builtin functions to vectorize with more than two arguments
3247 except for fma. No arguments is also not good. */
3248 if (nargs == 0 || nargs > 4)
3249 return false;
3251 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3252 combined_fn cfn = gimple_call_combined_fn (stmt);
3253 if (cfn == CFN_GOMP_SIMD_LANE)
3255 nargs = 0;
3256 rhs_type = unsigned_type_node;
3258 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3259 argument just says whether it is well-defined at zero or not and what
3260 value should be returned for it. */
3261 if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
3263 nargs = 1;
3264 clz_ctz_arg1 = gimple_call_arg (stmt, 1);
3267 int mask_opno = -1;
3268 if (internal_fn_p (cfn))
3269 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3271 for (i = 0; i < nargs; i++)
3273 if ((int) i == mask_opno)
3275 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3276 &op, &slp_op[i], &dt[i], &vectypes[i]))
3277 return false;
3278 continue;
3281 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3282 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3286 "use not simple.\n");
3287 return false;
3290 /* We can only handle calls with arguments of the same type. */
3291 if (rhs_type
3292 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3294 if (dump_enabled_p ())
3295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3296 "argument types differ.\n");
3297 return false;
3299 if (!rhs_type)
3300 rhs_type = TREE_TYPE (op);
3302 if (!vectype_in)
3303 vectype_in = vectypes[i];
3304 else if (vectypes[i]
3305 && !types_compatible_p (vectypes[i], vectype_in))
3307 if (dump_enabled_p ())
3308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3309 "argument vector types differ.\n");
3310 return false;
3313 /* If all arguments are external or constant defs, infer the vector type
3314 from the scalar type. */
3315 if (!vectype_in)
3316 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3317 if (vec_stmt)
3318 gcc_assert (vectype_in);
3319 if (!vectype_in)
3321 if (dump_enabled_p ())
3322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3323 "no vectype for scalar type %T\n", rhs_type);
3325 return false;
3328 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3329 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3331 if (dump_enabled_p ())
3332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3333 "mixed mask and nonmask vector types\n");
3334 return false;
3337 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3339 if (dump_enabled_p ())
3340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3341 "use emulated vector type for call\n");
3342 return false;
3345 /* FORNOW */
3346 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3347 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3348 if (known_eq (nunits_in * 2, nunits_out))
3349 modifier = NARROW;
3350 else if (known_eq (nunits_out, nunits_in))
3351 modifier = NONE;
3352 else if (known_eq (nunits_out * 2, nunits_in))
3353 modifier = WIDEN;
3354 else
3355 return false;
3357 /* We only handle functions that do not read or clobber memory. */
3358 if (gimple_vuse (stmt))
3360 if (dump_enabled_p ())
3361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3362 "function reads from or writes to memory.\n");
3363 return false;
3366 /* For now, we only vectorize functions if a target specific builtin
3367 is available. TODO -- in some cases, it might be profitable to
3368 insert the calls for pieces of the vector, in order to be able
3369 to vectorize other operations in the loop. */
3370 fndecl = NULL_TREE;
3371 internal_fn ifn = IFN_LAST;
3372 tree callee = gimple_call_fndecl (stmt);
3374 /* First try using an internal function. */
3375 code_helper convert_code = MAX_TREE_CODES;
3376 if (cfn != CFN_LAST
3377 && (modifier == NONE
3378 || (modifier == NARROW
3379 && simple_integer_narrowing (vectype_out, vectype_in,
3380 &convert_code))))
3381 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3382 vectype_in);
3384 /* If that fails, try asking for a target-specific built-in function. */
3385 if (ifn == IFN_LAST)
3387 if (cfn != CFN_LAST)
3388 fndecl = targetm.vectorize.builtin_vectorized_function
3389 (cfn, vectype_out, vectype_in);
3390 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3391 fndecl = targetm.vectorize.builtin_md_vectorized_function
3392 (callee, vectype_out, vectype_in);
3395 if (ifn == IFN_LAST && !fndecl)
3397 if (cfn == CFN_GOMP_SIMD_LANE
3398 && (!slp_node || SLP_TREE_LANES (slp_node) == 1)
3399 && loop_vinfo
3400 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3401 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3402 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3403 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3405 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3406 { 0, 1, 2, ... vf - 1 } vector. */
3407 gcc_assert (nargs == 0);
3409 else if (modifier == NONE
3410 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3411 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3412 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3413 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3414 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3415 slp_op, vectype_in, cost_vec);
3416 else
3418 if (dump_enabled_p ())
3419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3420 "function is not vectorizable.\n");
3421 return false;
3425 if (slp_node)
3426 ncopies = 1;
3427 else if (modifier == NARROW && ifn == IFN_LAST)
3428 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3429 else
3430 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3432 /* Sanity check: make sure that at least one copy of the vectorized stmt
3433 needs to be generated. */
3434 gcc_assert (ncopies >= 1);
3436 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3437 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3438 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3439 int len_opno = internal_fn_len_index (cond_len_fn);
3440 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3441 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3442 if (!vec_stmt) /* transformation not required. */
3444 if (slp_node)
3445 for (i = 0; i < nargs; ++i)
3446 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3447 vectypes[i]
3448 ? vectypes[i] : vectype_in))
3450 if (dump_enabled_p ())
3451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3452 "incompatible vector types for invariants\n");
3453 return false;
3455 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3456 DUMP_VECT_SCOPE ("vectorizable_call");
3457 vect_model_simple_cost (vinfo, stmt_info,
3458 ncopies, dt, ndts, slp_node, cost_vec);
3459 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3460 record_stmt_cost (cost_vec, ncopies / 2,
3461 vec_promote_demote, stmt_info, 0, vect_body);
3463 if (loop_vinfo
3464 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3465 && (reduc_idx >= 0 || mask_opno >= 0))
3467 if (reduc_idx >= 0
3468 && (cond_fn == IFN_LAST
3469 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3470 OPTIMIZE_FOR_SPEED))
3471 && (cond_len_fn == IFN_LAST
3472 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3473 OPTIMIZE_FOR_SPEED)))
3475 if (dump_enabled_p ())
3476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3477 "can't use a fully-masked loop because no"
3478 " conditional operation is available.\n");
3479 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3481 else
3483 unsigned int nvectors
3484 = (slp_node
3485 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3486 : ncopies);
3487 tree scalar_mask = NULL_TREE;
3488 if (mask_opno >= 0)
3489 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3490 if (cond_len_fn != IFN_LAST
3491 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3492 OPTIMIZE_FOR_SPEED))
3493 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3495 else
3496 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3497 scalar_mask);
3500 return true;
3503 /* Transform. */
3505 if (dump_enabled_p ())
3506 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3508 /* Handle def. */
3509 scalar_dest = gimple_call_lhs (stmt);
3510 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3512 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3513 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3514 unsigned int vect_nargs = nargs;
3515 if (len_loop_p)
3517 if (len_opno >= 0)
3519 ifn = cond_len_fn;
3520 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3521 vect_nargs += 2;
3523 else if (reduc_idx >= 0)
3524 gcc_unreachable ();
3526 else if (masked_loop_p && reduc_idx >= 0)
3528 ifn = cond_fn;
3529 vect_nargs += 2;
3531 if (clz_ctz_arg1)
3532 ++vect_nargs;
3534 if (modifier == NONE || ifn != IFN_LAST)
3536 tree prev_res = NULL_TREE;
3537 vargs.safe_grow (vect_nargs, true);
3538 auto_vec<vec<tree> > vec_defs (nargs);
3539 for (j = 0; j < ncopies; ++j)
3541 /* Build argument list for the vectorized call. */
3542 if (slp_node)
3544 if (cfn == CFN_GOMP_SIMD_LANE)
3546 for (i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++i)
3548 /* ??? For multi-lane SLP we'd need to build
3549 { 0, 0, .., 1, 1, ... }. */
3550 tree cst = build_index_vector (vectype_out,
3551 i * nunits_out, 1);
3552 tree new_var
3553 = vect_get_new_ssa_name (vectype_out, vect_simple_var,
3554 "cst_");
3555 gimple *init_stmt = gimple_build_assign (new_var, cst);
3556 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3557 new_temp = make_ssa_name (vec_dest);
3558 gimple *new_stmt
3559 = gimple_build_assign (new_temp, new_var);
3560 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
3561 gsi);
3562 slp_node->push_vec_def (new_stmt);
3564 continue;
3567 vec<tree> vec_oprnds0;
3568 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3569 vec_oprnds0 = vec_defs[0];
3571 /* Arguments are ready. Create the new vector stmt. */
3572 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3574 int varg = 0;
3575 if (masked_loop_p && reduc_idx >= 0)
3577 unsigned int vec_num = vec_oprnds0.length ();
3578 /* Always true for SLP. */
3579 gcc_assert (ncopies == 1);
3580 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3581 gsi, masks, vec_num,
3582 vectype_out, i);
3584 size_t k;
3585 for (k = 0; k < nargs; k++)
3587 vec<tree> vec_oprndsk = vec_defs[k];
3588 vargs[varg++] = vec_oprndsk[i];
3590 if (masked_loop_p && reduc_idx >= 0)
3591 vargs[varg++] = vargs[reduc_idx + 1];
3592 if (clz_ctz_arg1)
3593 vargs[varg++] = clz_ctz_arg1;
3595 gimple *new_stmt;
3596 if (modifier == NARROW)
3598 /* We don't define any narrowing conditional functions
3599 at present. */
3600 gcc_assert (mask_opno < 0);
3601 tree half_res = make_ssa_name (vectype_in);
3602 gcall *call
3603 = gimple_build_call_internal_vec (ifn, vargs);
3604 gimple_call_set_lhs (call, half_res);
3605 gimple_call_set_nothrow (call, true);
3606 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3607 if ((i & 1) == 0)
3609 prev_res = half_res;
3610 continue;
3612 new_temp = make_ssa_name (vec_dest);
3613 new_stmt = vect_gimple_build (new_temp, convert_code,
3614 prev_res, half_res);
3615 vect_finish_stmt_generation (vinfo, stmt_info,
3616 new_stmt, gsi);
3618 else
3620 if (len_opno >= 0 && len_loop_p)
3622 unsigned int vec_num = vec_oprnds0.length ();
3623 /* Always true for SLP. */
3624 gcc_assert (ncopies == 1);
3625 tree len
3626 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3627 vectype_out, i, 1);
3628 signed char biasval
3629 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3630 tree bias = build_int_cst (intQI_type_node, biasval);
3631 vargs[len_opno] = len;
3632 vargs[len_opno + 1] = bias;
3634 else if (mask_opno >= 0 && masked_loop_p)
3636 unsigned int vec_num = vec_oprnds0.length ();
3637 /* Always true for SLP. */
3638 gcc_assert (ncopies == 1);
3639 tree mask = vect_get_loop_mask (loop_vinfo,
3640 gsi, masks, vec_num,
3641 vectype_out, i);
3642 vargs[mask_opno] = prepare_vec_mask
3643 (loop_vinfo, TREE_TYPE (mask), mask,
3644 vargs[mask_opno], gsi);
3647 gcall *call;
3648 if (ifn != IFN_LAST)
3649 call = gimple_build_call_internal_vec (ifn, vargs);
3650 else
3651 call = gimple_build_call_vec (fndecl, vargs);
3652 new_temp = make_ssa_name (vec_dest, call);
3653 gimple_call_set_lhs (call, new_temp);
3654 gimple_call_set_nothrow (call, true);
3655 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3656 new_stmt = call;
3658 slp_node->push_vec_def (new_stmt);
3660 continue;
3663 int varg = 0;
3664 if (masked_loop_p && reduc_idx >= 0)
3665 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3666 vectype_out, j);
3667 for (i = 0; i < nargs; i++)
3669 op = gimple_call_arg (stmt, i);
3670 if (j == 0)
3672 vec_defs.quick_push (vNULL);
3673 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3674 op, &vec_defs[i],
3675 vectypes[i]);
3677 vargs[varg++] = vec_defs[i][j];
3679 if (masked_loop_p && reduc_idx >= 0)
3680 vargs[varg++] = vargs[reduc_idx + 1];
3681 if (clz_ctz_arg1)
3682 vargs[varg++] = clz_ctz_arg1;
3684 if (len_opno >= 0 && len_loop_p)
3686 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3687 vectype_out, j, 1);
3688 signed char biasval
3689 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3690 tree bias = build_int_cst (intQI_type_node, biasval);
3691 vargs[len_opno] = len;
3692 vargs[len_opno + 1] = bias;
3694 else if (mask_opno >= 0 && masked_loop_p)
3696 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3697 vectype_out, j);
3698 vargs[mask_opno]
3699 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3700 vargs[mask_opno], gsi);
3703 gimple *new_stmt;
3704 if (cfn == CFN_GOMP_SIMD_LANE)
3706 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3707 tree new_var
3708 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3709 gimple *init_stmt = gimple_build_assign (new_var, cst);
3710 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3711 new_temp = make_ssa_name (vec_dest);
3712 new_stmt = gimple_build_assign (new_temp, new_var);
3713 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3715 else if (modifier == NARROW)
3717 /* We don't define any narrowing conditional functions at
3718 present. */
3719 gcc_assert (mask_opno < 0);
3720 tree half_res = make_ssa_name (vectype_in);
3721 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3722 gimple_call_set_lhs (call, half_res);
3723 gimple_call_set_nothrow (call, true);
3724 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3725 if ((j & 1) == 0)
3727 prev_res = half_res;
3728 continue;
3730 new_temp = make_ssa_name (vec_dest);
3731 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3732 half_res);
3733 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3735 else
3737 gcall *call;
3738 if (ifn != IFN_LAST)
3739 call = gimple_build_call_internal_vec (ifn, vargs);
3740 else
3741 call = gimple_build_call_vec (fndecl, vargs);
3742 new_temp = make_ssa_name (vec_dest, call);
3743 gimple_call_set_lhs (call, new_temp);
3744 gimple_call_set_nothrow (call, true);
3745 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3746 new_stmt = call;
3749 if (j == (modifier == NARROW ? 1 : 0))
3750 *vec_stmt = new_stmt;
3751 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3753 for (i = 0; i < nargs; i++)
3755 vec<tree> vec_oprndsi = vec_defs[i];
3756 vec_oprndsi.release ();
3759 else if (modifier == NARROW)
3761 auto_vec<vec<tree> > vec_defs (nargs);
3762 /* We don't define any narrowing conditional functions at present. */
3763 gcc_assert (mask_opno < 0);
3764 for (j = 0; j < ncopies; ++j)
3766 /* Build argument list for the vectorized call. */
3767 if (j == 0)
3768 vargs.create (nargs * 2);
3769 else
3770 vargs.truncate (0);
3772 if (slp_node)
3774 vec<tree> vec_oprnds0;
3776 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3777 vec_oprnds0 = vec_defs[0];
3779 /* Arguments are ready. Create the new vector stmt. */
3780 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3782 size_t k;
3783 vargs.truncate (0);
3784 for (k = 0; k < nargs; k++)
3786 vec<tree> vec_oprndsk = vec_defs[k];
3787 vargs.quick_push (vec_oprndsk[i]);
3788 vargs.quick_push (vec_oprndsk[i + 1]);
3790 gcall *call;
3791 if (ifn != IFN_LAST)
3792 call = gimple_build_call_internal_vec (ifn, vargs);
3793 else
3794 call = gimple_build_call_vec (fndecl, vargs);
3795 new_temp = make_ssa_name (vec_dest, call);
3796 gimple_call_set_lhs (call, new_temp);
3797 gimple_call_set_nothrow (call, true);
3798 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3799 slp_node->push_vec_def (call);
3801 continue;
3804 for (i = 0; i < nargs; i++)
3806 op = gimple_call_arg (stmt, i);
3807 if (j == 0)
3809 vec_defs.quick_push (vNULL);
3810 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3811 op, &vec_defs[i], vectypes[i]);
3813 vec_oprnd0 = vec_defs[i][2*j];
3814 vec_oprnd1 = vec_defs[i][2*j+1];
3816 vargs.quick_push (vec_oprnd0);
3817 vargs.quick_push (vec_oprnd1);
3820 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3821 new_temp = make_ssa_name (vec_dest, new_stmt);
3822 gimple_call_set_lhs (new_stmt, new_temp);
3823 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3825 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3828 if (!slp_node)
3829 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3831 for (i = 0; i < nargs; i++)
3833 vec<tree> vec_oprndsi = vec_defs[i];
3834 vec_oprndsi.release ();
3837 else
3838 /* No current target implements this case. */
3839 return false;
3841 vargs.release ();
3843 /* The call in STMT might prevent it from being removed in dce.
3844 We however cannot remove it here, due to the way the ssa name
3845 it defines is mapped to the new definition. So just replace
3846 rhs of the statement with something harmless. */
3848 if (slp_node)
3849 return true;
3851 stmt_info = vect_orig_stmt (stmt_info);
3852 lhs = gimple_get_lhs (stmt_info->stmt);
3854 gassign *new_stmt
3855 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3856 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3858 return true;
3862 struct simd_call_arg_info
3864 tree vectype;
3865 tree op;
3866 HOST_WIDE_INT linear_step;
3867 enum vect_def_type dt;
3868 unsigned int align;
3869 bool simd_lane_linear;
3872 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3873 is linear within simd lane (but not within whole loop), note it in
3874 *ARGINFO. */
3876 static void
3877 vect_simd_lane_linear (tree op, class loop *loop,
3878 struct simd_call_arg_info *arginfo)
3880 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3882 if (!is_gimple_assign (def_stmt)
3883 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3884 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3885 return;
3887 tree base = gimple_assign_rhs1 (def_stmt);
3888 HOST_WIDE_INT linear_step = 0;
3889 tree v = gimple_assign_rhs2 (def_stmt);
3890 while (TREE_CODE (v) == SSA_NAME)
3892 tree t;
3893 def_stmt = SSA_NAME_DEF_STMT (v);
3894 if (is_gimple_assign (def_stmt))
3895 switch (gimple_assign_rhs_code (def_stmt))
3897 case PLUS_EXPR:
3898 t = gimple_assign_rhs2 (def_stmt);
3899 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3900 return;
3901 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3902 v = gimple_assign_rhs1 (def_stmt);
3903 continue;
3904 case MULT_EXPR:
3905 t = gimple_assign_rhs2 (def_stmt);
3906 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3907 return;
3908 linear_step = tree_to_shwi (t);
3909 v = gimple_assign_rhs1 (def_stmt);
3910 continue;
3911 CASE_CONVERT:
3912 t = gimple_assign_rhs1 (def_stmt);
3913 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3914 || (TYPE_PRECISION (TREE_TYPE (v))
3915 < TYPE_PRECISION (TREE_TYPE (t))))
3916 return;
3917 if (!linear_step)
3918 linear_step = 1;
3919 v = t;
3920 continue;
3921 default:
3922 return;
3924 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3925 && loop->simduid
3926 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3927 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3928 == loop->simduid))
3930 if (!linear_step)
3931 linear_step = 1;
3932 arginfo->linear_step = linear_step;
3933 arginfo->op = base;
3934 arginfo->simd_lane_linear = true;
3935 return;
3940 /* Function vectorizable_simd_clone_call.
3942 Check if STMT_INFO performs a function call that can be vectorized
3943 by calling a simd clone of the function.
3944 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3945 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3946 Return true if STMT_INFO is vectorizable in this way. */
3948 static bool
3949 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3950 gimple_stmt_iterator *gsi,
3951 gimple **vec_stmt, slp_tree slp_node,
3952 stmt_vector_for_cost *)
3954 tree vec_dest;
3955 tree scalar_dest;
3956 tree op, type;
3957 tree vec_oprnd0 = NULL_TREE;
3958 tree vectype;
3959 poly_uint64 nunits;
3960 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3961 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3962 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3963 tree fndecl, new_temp;
3964 int ncopies, j;
3965 auto_vec<simd_call_arg_info> arginfo;
3966 vec<tree> vargs = vNULL;
3967 size_t i, nargs;
3968 tree lhs, rtype, ratype;
3969 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3970 int masked_call_offset = 0;
3972 /* Is STMT a vectorizable call? */
3973 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3974 if (!stmt)
3975 return false;
3977 fndecl = gimple_call_fndecl (stmt);
3978 if (fndecl == NULL_TREE
3979 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
3981 fndecl = gimple_call_arg (stmt, 0);
3982 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
3983 fndecl = TREE_OPERAND (fndecl, 0);
3984 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
3985 masked_call_offset = 1;
3987 if (fndecl == NULL_TREE)
3988 return false;
3990 struct cgraph_node *node = cgraph_node::get (fndecl);
3991 if (node == NULL || node->simd_clones == NULL)
3992 return false;
3994 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3995 return false;
3997 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3998 && ! vec_stmt)
3999 return false;
4001 if (gimple_call_lhs (stmt)
4002 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4003 return false;
4005 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4007 vectype = STMT_VINFO_VECTYPE (stmt_info);
4009 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4010 return false;
4012 /* Process function arguments. */
4013 nargs = gimple_call_num_args (stmt) - masked_call_offset;
4015 /* Bail out if the function has zero arguments. */
4016 if (nargs == 0)
4017 return false;
4019 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
4020 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
4021 if (!vec_stmt)
4022 simd_clone_info.truncate (0);
4023 arginfo.reserve (nargs, true);
4024 auto_vec<slp_tree> slp_op;
4025 slp_op.safe_grow_cleared (nargs);
4027 for (i = 0; i < nargs; i++)
4029 simd_call_arg_info thisarginfo;
4030 affine_iv iv;
4032 thisarginfo.linear_step = 0;
4033 thisarginfo.align = 0;
4034 thisarginfo.op = NULL_TREE;
4035 thisarginfo.simd_lane_linear = false;
4037 int op_no = i + masked_call_offset;
4038 if (slp_node)
4039 op_no = vect_slp_child_index_for_operand (stmt, op_no, false);
4040 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4041 op_no, &op, &slp_op[i],
4042 &thisarginfo.dt, &thisarginfo.vectype)
4043 || thisarginfo.dt == vect_uninitialized_def)
4045 if (dump_enabled_p ())
4046 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4047 "use not simple.\n");
4048 return false;
4051 if (thisarginfo.dt == vect_constant_def
4052 || thisarginfo.dt == vect_external_def)
4054 /* With SLP we determine the vector type of constants/externals
4055 at analysis time, handling conflicts via
4056 vect_maybe_update_slp_op_vectype. At transform time
4057 we have a vector type recorded for SLP. */
4058 gcc_assert (!vec_stmt
4059 || !slp_node
4060 || thisarginfo.vectype != NULL_TREE);
4061 if (!vec_stmt)
4062 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
4063 TREE_TYPE (op),
4064 slp_node);
4066 else
4067 gcc_assert (thisarginfo.vectype != NULL_TREE);
4069 /* For linear arguments, the analyze phase should have saved
4070 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
4071 if (vec_stmt
4072 && i * 3 + 4 <= simd_clone_info.length ()
4073 && simd_clone_info[i * 3 + 2])
4075 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
4076 thisarginfo.op = simd_clone_info[i * 3 + 1];
4077 thisarginfo.simd_lane_linear
4078 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
4079 /* If loop has been peeled for alignment, we need to adjust it. */
4080 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4081 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4082 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4084 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4085 tree step = simd_clone_info[i * 3 + 2];
4086 tree opt = TREE_TYPE (thisarginfo.op);
4087 bias = fold_convert (TREE_TYPE (step), bias);
4088 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4089 thisarginfo.op
4090 = fold_build2 (POINTER_TYPE_P (opt)
4091 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4092 thisarginfo.op, bias);
4095 else if (!vec_stmt
4096 && thisarginfo.dt != vect_constant_def
4097 && thisarginfo.dt != vect_external_def
4098 && loop_vinfo
4099 && TREE_CODE (op) == SSA_NAME
4100 && simple_iv (loop, loop_containing_stmt (stmt), op,
4101 &iv, false)
4102 && tree_fits_shwi_p (iv.step))
4104 thisarginfo.linear_step = tree_to_shwi (iv.step);
4105 thisarginfo.op = iv.base;
4107 else if ((thisarginfo.dt == vect_constant_def
4108 || thisarginfo.dt == vect_external_def)
4109 && POINTER_TYPE_P (TREE_TYPE (op)))
4110 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4111 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4112 linear too. */
4113 if (POINTER_TYPE_P (TREE_TYPE (op))
4114 && !thisarginfo.linear_step
4115 && !vec_stmt
4116 && thisarginfo.dt != vect_constant_def
4117 && thisarginfo.dt != vect_external_def
4118 && loop_vinfo
4119 && TREE_CODE (op) == SSA_NAME)
4120 vect_simd_lane_linear (op, loop, &thisarginfo);
4122 arginfo.quick_push (thisarginfo);
4125 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4126 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4127 unsigned int badness = 0;
4128 struct cgraph_node *bestn = NULL;
4129 if (vec_stmt)
4130 bestn = cgraph_node::get (simd_clone_info[0]);
4131 else
4132 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4133 n = n->simdclone->next_clone)
4135 unsigned int this_badness = 0;
4136 unsigned int num_calls;
4137 /* The number of arguments in the call and the number of parameters in
4138 the simdclone should match. However, when the simdclone is
4139 'inbranch', it could have one more paramater than nargs when using
4140 an inbranch simdclone to call a non-inbranch call, either in a
4141 non-masked loop using a all true constant mask, or inside a masked
4142 loop using it's mask. */
4143 size_t simd_nargs = n->simdclone->nargs;
4144 if (!masked_call_offset && n->simdclone->inbranch)
4145 simd_nargs--;
4146 if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4147 &num_calls)
4148 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4149 || (nargs != simd_nargs))
4150 continue;
4151 if (num_calls != 1)
4152 this_badness += floor_log2 (num_calls) * 4096;
4153 if (n->simdclone->inbranch)
4154 this_badness += 8192;
4155 int target_badness = targetm.simd_clone.usable (n);
4156 if (target_badness < 0)
4157 continue;
4158 this_badness += target_badness * 512;
4159 for (i = 0; i < nargs; i++)
4161 switch (n->simdclone->args[i].arg_type)
4163 case SIMD_CLONE_ARG_TYPE_VECTOR:
4164 if (!useless_type_conversion_p
4165 (n->simdclone->args[i].orig_type,
4166 TREE_TYPE (gimple_call_arg (stmt,
4167 i + masked_call_offset))))
4168 i = -1;
4169 else if (arginfo[i].dt == vect_constant_def
4170 || arginfo[i].dt == vect_external_def
4171 || arginfo[i].linear_step)
4172 this_badness += 64;
4173 break;
4174 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4175 if (arginfo[i].dt != vect_constant_def
4176 && arginfo[i].dt != vect_external_def)
4177 i = -1;
4178 break;
4179 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4180 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4181 if (arginfo[i].dt == vect_constant_def
4182 || arginfo[i].dt == vect_external_def
4183 || (arginfo[i].linear_step
4184 != n->simdclone->args[i].linear_step))
4185 i = -1;
4186 break;
4187 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4188 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4189 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4190 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4191 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4193 /* FORNOW */
4194 i = -1;
4195 break;
4196 case SIMD_CLONE_ARG_TYPE_MASK:
4197 /* While we can create a traditional data vector from
4198 an incoming integer mode mask we have no good way to
4199 force generate an integer mode mask from a traditional
4200 boolean vector input. */
4201 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4202 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4203 i = -1;
4204 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4205 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4206 this_badness += 2048;
4207 break;
4209 if (i == (size_t) -1)
4210 break;
4211 if (n->simdclone->args[i].alignment > arginfo[i].align)
4213 i = -1;
4214 break;
4216 if (arginfo[i].align)
4217 this_badness += (exact_log2 (arginfo[i].align)
4218 - exact_log2 (n->simdclone->args[i].alignment));
4220 if (i == (size_t) -1)
4221 continue;
4222 if (masked_call_offset == 0
4223 && n->simdclone->inbranch
4224 && n->simdclone->nargs > nargs)
4226 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4227 SIMD_CLONE_ARG_TYPE_MASK);
4228 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4229 not in a branch, as we'd have to construct an all-true mask. */
4230 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4231 this_badness += 64;
4233 if (bestn == NULL || this_badness < badness)
4235 bestn = n;
4236 badness = this_badness;
4240 if (bestn == NULL)
4241 return false;
4243 unsigned int num_mask_args = 0;
4244 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4245 for (i = 0; i < nargs; i++)
4246 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4247 num_mask_args++;
4249 for (i = 0; i < nargs; i++)
4251 if ((arginfo[i].dt == vect_constant_def
4252 || arginfo[i].dt == vect_external_def)
4253 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4255 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4256 i + masked_call_offset));
4257 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4258 slp_node);
4259 if (arginfo[i].vectype == NULL
4260 || !constant_multiple_p (bestn->simdclone->simdlen,
4261 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4262 return false;
4265 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4266 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4268 if (dump_enabled_p ())
4269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4270 "vector mask arguments are not supported.\n");
4271 return false;
4274 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4276 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4277 if (bestn->simdclone->mask_mode == VOIDmode)
4279 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype),
4280 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4282 /* FORNOW we only have partial support for vector-type masks
4283 that can't hold all of simdlen. */
4284 if (dump_enabled_p ())
4285 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4286 vect_location,
4287 "in-branch vector clones are not yet"
4288 " supported for mismatched vector sizes.\n");
4289 return false;
4291 if (!expand_vec_cond_expr_p (clone_arg_vectype,
4292 arginfo[i].vectype, ERROR_MARK))
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4296 vect_location,
4297 "cannot compute mask argument for"
4298 " in-branch vector clones.\n");
4299 return false;
4302 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4304 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4305 || maybe_ne (exact_div (bestn->simdclone->simdlen,
4306 num_mask_args),
4307 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4309 /* FORNOW we only have partial support for integer-type masks
4310 that represent the same number of lanes as the
4311 vectorized mask inputs. */
4312 if (dump_enabled_p ())
4313 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4314 vect_location,
4315 "in-branch vector clones are not yet "
4316 "supported for mismatched vector sizes.\n");
4317 return false;
4320 else
4322 if (dump_enabled_p ())
4323 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4324 vect_location,
4325 "in-branch vector clones not supported"
4326 " on this target.\n");
4327 return false;
4332 fndecl = bestn->decl;
4333 nunits = bestn->simdclone->simdlen;
4334 if (slp_node)
4335 ncopies = vector_unroll_factor (vf * group_size, nunits);
4336 else
4337 ncopies = vector_unroll_factor (vf, nunits);
4339 /* If the function isn't const, only allow it in simd loops where user
4340 has asserted that at least nunits consecutive iterations can be
4341 performed using SIMD instructions. */
4342 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4343 && gimple_vuse (stmt))
4344 return false;
4346 /* Sanity check: make sure that at least one copy of the vectorized stmt
4347 needs to be generated. */
4348 gcc_assert (ncopies >= 1);
4350 if (!vec_stmt) /* transformation not required. */
4352 if (slp_node)
4353 for (unsigned i = 0; i < nargs; ++i)
4354 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4356 if (dump_enabled_p ())
4357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4358 "incompatible vector types for invariants\n");
4359 return false;
4361 /* When the original call is pure or const but the SIMD ABI dictates
4362 an aggregate return we will have to use a virtual definition and
4363 in a loop eventually even need to add a virtual PHI. That's
4364 not straight-forward so allow to fix this up via renaming. */
4365 if (gimple_call_lhs (stmt)
4366 && !gimple_vdef (stmt)
4367 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4368 vinfo->any_known_not_updated_vssa = true;
4369 /* ??? For SLP code-gen we end up inserting after the last
4370 vector argument def rather than at the original call position
4371 so automagic virtual operand updating doesn't work. */
4372 if (gimple_vuse (stmt) && slp_node)
4373 vinfo->any_known_not_updated_vssa = true;
4374 simd_clone_info.safe_push (bestn->decl);
4375 for (i = 0; i < bestn->simdclone->nargs; i++)
4377 switch (bestn->simdclone->args[i].arg_type)
4379 default:
4380 continue;
4381 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4382 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4384 simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
4385 simd_clone_info.safe_push (arginfo[i].op);
4386 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4387 ? size_type_node : TREE_TYPE (arginfo[i].op);
4388 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4389 simd_clone_info.safe_push (ls);
4390 tree sll = arginfo[i].simd_lane_linear
4391 ? boolean_true_node : boolean_false_node;
4392 simd_clone_info.safe_push (sll);
4394 break;
4395 case SIMD_CLONE_ARG_TYPE_MASK:
4396 if (loop_vinfo
4397 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4399 unsigned nmasks
4400 = exact_div (ncopies * bestn->simdclone->simdlen,
4401 TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
4402 vect_record_loop_mask (loop_vinfo,
4403 &LOOP_VINFO_MASKS (loop_vinfo),
4404 nmasks, vectype, op);
4407 break;
4411 if (!bestn->simdclone->inbranch && loop_vinfo)
4413 if (dump_enabled_p ()
4414 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4415 dump_printf_loc (MSG_NOTE, vect_location,
4416 "can't use a fully-masked loop because a"
4417 " non-masked simd clone was selected.\n");
4418 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4421 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4422 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4423 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4424 dt, slp_node, cost_vec); */
4425 return true;
4428 /* Transform. */
4430 if (dump_enabled_p ())
4431 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4433 /* Handle def. */
4434 scalar_dest = gimple_call_lhs (stmt);
4435 vec_dest = NULL_TREE;
4436 rtype = NULL_TREE;
4437 ratype = NULL_TREE;
4438 if (scalar_dest)
4440 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4441 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4442 if (TREE_CODE (rtype) == ARRAY_TYPE)
4444 ratype = rtype;
4445 rtype = TREE_TYPE (ratype);
4449 auto_vec<vec<tree> > vec_oprnds;
4450 auto_vec<unsigned> vec_oprnds_i;
4451 vec_oprnds_i.safe_grow_cleared (nargs, true);
4452 if (slp_node)
4454 vec_oprnds.reserve_exact (nargs);
4455 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4457 else
4458 vec_oprnds.safe_grow_cleared (nargs, true);
4459 for (j = 0; j < ncopies; ++j)
4461 poly_uint64 callee_nelements;
4462 poly_uint64 caller_nelements;
4463 /* Build argument list for the vectorized call. */
4464 if (j == 0)
4465 vargs.create (nargs);
4466 else
4467 vargs.truncate (0);
4469 for (i = 0; i < nargs; i++)
4471 unsigned int k, l, m, o;
4472 tree atype;
4473 op = gimple_call_arg (stmt, i + masked_call_offset);
4474 switch (bestn->simdclone->args[i].arg_type)
4476 case SIMD_CLONE_ARG_TYPE_VECTOR:
4477 atype = bestn->simdclone->args[i].vector_type;
4478 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4479 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4480 o = vector_unroll_factor (nunits, callee_nelements);
4481 for (m = j * o; m < (j + 1) * o; m++)
4483 if (known_lt (callee_nelements, caller_nelements))
4485 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4486 if (!constant_multiple_p (caller_nelements,
4487 callee_nelements, &k))
4488 gcc_unreachable ();
4490 gcc_assert ((k & (k - 1)) == 0);
4491 if (m == 0)
4493 if (!slp_node)
4494 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4495 ncopies * o / k, op,
4496 &vec_oprnds[i]);
4497 vec_oprnds_i[i] = 0;
4498 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4500 else
4502 vec_oprnd0 = arginfo[i].op;
4503 if ((m & (k - 1)) == 0)
4504 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4506 arginfo[i].op = vec_oprnd0;
4507 vec_oprnd0
4508 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4509 bitsize_int (prec),
4510 bitsize_int ((m & (k - 1)) * prec));
4511 gassign *new_stmt
4512 = gimple_build_assign (make_ssa_name (atype),
4513 vec_oprnd0);
4514 vect_finish_stmt_generation (vinfo, stmt_info,
4515 new_stmt, gsi);
4516 vargs.safe_push (gimple_assign_lhs (new_stmt));
4518 else
4520 if (!constant_multiple_p (callee_nelements,
4521 caller_nelements, &k))
4522 gcc_unreachable ();
4523 gcc_assert ((k & (k - 1)) == 0);
4524 vec<constructor_elt, va_gc> *ctor_elts;
4525 if (k != 1)
4526 vec_alloc (ctor_elts, k);
4527 else
4528 ctor_elts = NULL;
4529 for (l = 0; l < k; l++)
4531 if (m == 0 && l == 0)
4533 if (!slp_node)
4534 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4535 k * o * ncopies,
4537 &vec_oprnds[i]);
4538 vec_oprnds_i[i] = 0;
4539 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4541 else
4542 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4543 arginfo[i].op = vec_oprnd0;
4544 if (k == 1)
4545 break;
4546 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4547 vec_oprnd0);
4549 if (k == 1)
4550 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4551 atype))
4553 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4554 vec_oprnd0);
4555 gassign *new_stmt
4556 = gimple_build_assign (make_ssa_name (atype),
4557 vec_oprnd0);
4558 vect_finish_stmt_generation (vinfo, stmt_info,
4559 new_stmt, gsi);
4560 vargs.safe_push (gimple_get_lhs (new_stmt));
4562 else
4563 vargs.safe_push (vec_oprnd0);
4564 else
4566 vec_oprnd0 = build_constructor (atype, ctor_elts);
4567 gassign *new_stmt
4568 = gimple_build_assign (make_ssa_name (atype),
4569 vec_oprnd0);
4570 vect_finish_stmt_generation (vinfo, stmt_info,
4571 new_stmt, gsi);
4572 vargs.safe_push (gimple_assign_lhs (new_stmt));
4576 break;
4577 case SIMD_CLONE_ARG_TYPE_MASK:
4578 if (bestn->simdclone->mask_mode == VOIDmode)
4580 atype = bestn->simdclone->args[i].vector_type;
4581 tree elt_type = TREE_TYPE (atype);
4582 tree one = fold_convert (elt_type, integer_one_node);
4583 tree zero = fold_convert (elt_type, integer_zero_node);
4584 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4585 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4586 o = vector_unroll_factor (nunits, callee_nelements);
4587 for (m = j * o; m < (j + 1) * o; m++)
4589 if (maybe_lt (callee_nelements, caller_nelements))
4591 /* The mask type has fewer elements than simdlen. */
4593 /* FORNOW */
4594 gcc_unreachable ();
4596 else if (known_eq (callee_nelements, caller_nelements))
4598 /* The SIMD clone function has the same number of
4599 elements as the current function. */
4600 if (m == 0)
4602 if (!slp_node)
4603 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4604 o * ncopies,
4606 &vec_oprnds[i]);
4607 vec_oprnds_i[i] = 0;
4609 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4610 if (loop_vinfo
4611 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4613 vec_loop_masks *loop_masks
4614 = &LOOP_VINFO_MASKS (loop_vinfo);
4615 tree loop_mask
4616 = vect_get_loop_mask (loop_vinfo, gsi,
4617 loop_masks, ncopies,
4618 vectype, j);
4619 vec_oprnd0
4620 = prepare_vec_mask (loop_vinfo,
4621 TREE_TYPE (loop_mask),
4622 loop_mask, vec_oprnd0,
4623 gsi);
4624 loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4625 loop_mask });
4628 vec_oprnd0
4629 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4630 build_vector_from_val (atype, one),
4631 build_vector_from_val (atype, zero));
4632 gassign *new_stmt
4633 = gimple_build_assign (make_ssa_name (atype),
4634 vec_oprnd0);
4635 vect_finish_stmt_generation (vinfo, stmt_info,
4636 new_stmt, gsi);
4637 vargs.safe_push (gimple_assign_lhs (new_stmt));
4639 else
4641 /* The mask type has more elements than simdlen. */
4643 /* FORNOW */
4644 gcc_unreachable ();
4648 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4650 atype = bestn->simdclone->args[i].vector_type;
4651 /* Guess the number of lanes represented by atype. */
4652 poly_uint64 atype_subparts
4653 = exact_div (bestn->simdclone->simdlen,
4654 num_mask_args);
4655 o = vector_unroll_factor (nunits, atype_subparts);
4656 for (m = j * o; m < (j + 1) * o; m++)
4658 if (m == 0)
4660 if (!slp_node)
4661 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4662 o * ncopies,
4664 &vec_oprnds[i]);
4665 vec_oprnds_i[i] = 0;
4667 if (maybe_lt (atype_subparts,
4668 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4670 /* The mask argument has fewer elements than the
4671 input vector. */
4672 /* FORNOW */
4673 gcc_unreachable ();
4675 else if (known_eq (atype_subparts,
4676 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4678 /* The vector mask argument matches the input
4679 in the number of lanes, but not necessarily
4680 in the mode. */
4681 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4682 tree st = lang_hooks.types.type_for_mode
4683 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4684 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4685 vec_oprnd0);
4686 gassign *new_stmt
4687 = gimple_build_assign (make_ssa_name (st),
4688 vec_oprnd0);
4689 vect_finish_stmt_generation (vinfo, stmt_info,
4690 new_stmt, gsi);
4691 if (!types_compatible_p (atype, st))
4693 new_stmt
4694 = gimple_build_assign (make_ssa_name (atype),
4695 NOP_EXPR,
4696 gimple_assign_lhs
4697 (new_stmt));
4698 vect_finish_stmt_generation (vinfo, stmt_info,
4699 new_stmt, gsi);
4701 vargs.safe_push (gimple_assign_lhs (new_stmt));
4703 else
4705 /* The mask argument has more elements than the
4706 input vector. */
4707 /* FORNOW */
4708 gcc_unreachable ();
4712 else
4713 gcc_unreachable ();
4714 break;
4715 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4716 vargs.safe_push (op);
4717 break;
4718 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4719 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4720 if (j == 0)
4722 gimple_seq stmts;
4723 arginfo[i].op
4724 = force_gimple_operand (unshare_expr (arginfo[i].op),
4725 &stmts, true, NULL_TREE);
4726 if (stmts != NULL)
4728 basic_block new_bb;
4729 edge pe = loop_preheader_edge (loop);
4730 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4731 gcc_assert (!new_bb);
4733 if (arginfo[i].simd_lane_linear)
4735 vargs.safe_push (arginfo[i].op);
4736 break;
4738 tree phi_res = copy_ssa_name (op);
4739 gphi *new_phi = create_phi_node (phi_res, loop->header);
4740 add_phi_arg (new_phi, arginfo[i].op,
4741 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4742 enum tree_code code
4743 = POINTER_TYPE_P (TREE_TYPE (op))
4744 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4745 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4746 ? sizetype : TREE_TYPE (op);
4747 poly_widest_int cst
4748 = wi::mul (bestn->simdclone->args[i].linear_step,
4749 ncopies * nunits);
4750 tree tcst = wide_int_to_tree (type, cst);
4751 tree phi_arg = copy_ssa_name (op);
4752 gassign *new_stmt
4753 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4754 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4755 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4756 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4757 UNKNOWN_LOCATION);
4758 arginfo[i].op = phi_res;
4759 vargs.safe_push (phi_res);
4761 else
4763 enum tree_code code
4764 = POINTER_TYPE_P (TREE_TYPE (op))
4765 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4766 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4767 ? sizetype : TREE_TYPE (op);
4768 poly_widest_int cst
4769 = wi::mul (bestn->simdclone->args[i].linear_step,
4770 j * nunits);
4771 tree tcst = wide_int_to_tree (type, cst);
4772 new_temp = make_ssa_name (TREE_TYPE (op));
4773 gassign *new_stmt
4774 = gimple_build_assign (new_temp, code,
4775 arginfo[i].op, tcst);
4776 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4777 vargs.safe_push (new_temp);
4779 break;
4780 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4781 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4782 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4783 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4784 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4785 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4786 default:
4787 gcc_unreachable ();
4791 if (masked_call_offset == 0
4792 && bestn->simdclone->inbranch
4793 && bestn->simdclone->nargs > nargs)
4795 unsigned long m, o;
4796 size_t mask_i = bestn->simdclone->nargs - 1;
4797 tree mask;
4798 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4799 SIMD_CLONE_ARG_TYPE_MASK);
4801 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4802 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4803 /* Guess the number of lanes represented by masktype. */
4804 callee_nelements = exact_div (bestn->simdclone->simdlen,
4805 bestn->simdclone->nargs - nargs);
4806 else
4807 callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4808 o = vector_unroll_factor (nunits, callee_nelements);
4809 for (m = j * o; m < (j + 1) * o; m++)
4811 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4813 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4814 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4815 ncopies, vectype, j);
4817 else
4818 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4820 gassign *new_stmt;
4821 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4823 /* This means we are dealing with integer mask modes.
4824 First convert to an integer type with the same size as
4825 the current vector type. */
4826 unsigned HOST_WIDE_INT intermediate_size
4827 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4828 tree mid_int_type =
4829 build_nonstandard_integer_type (intermediate_size, 1);
4830 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4831 new_stmt
4832 = gimple_build_assign (make_ssa_name (mid_int_type),
4833 mask);
4834 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4835 /* Then zero-extend to the mask mode. */
4836 mask = fold_build1 (NOP_EXPR, masktype,
4837 gimple_get_lhs (new_stmt));
4839 else if (bestn->simdclone->mask_mode == VOIDmode)
4841 tree one = fold_convert (TREE_TYPE (masktype),
4842 integer_one_node);
4843 tree zero = fold_convert (TREE_TYPE (masktype),
4844 integer_zero_node);
4845 mask = build3 (VEC_COND_EXPR, masktype, mask,
4846 build_vector_from_val (masktype, one),
4847 build_vector_from_val (masktype, zero));
4849 else
4850 gcc_unreachable ();
4852 new_stmt = gimple_build_assign (make_ssa_name (masktype), mask);
4853 vect_finish_stmt_generation (vinfo, stmt_info,
4854 new_stmt, gsi);
4855 mask = gimple_assign_lhs (new_stmt);
4856 vargs.safe_push (mask);
4860 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4861 if (vec_dest)
4863 gcc_assert (ratype
4864 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4865 if (ratype)
4866 new_temp = create_tmp_var (ratype);
4867 else if (useless_type_conversion_p (vectype, rtype))
4868 new_temp = make_ssa_name (vec_dest, new_call);
4869 else
4870 new_temp = make_ssa_name (rtype, new_call);
4871 gimple_call_set_lhs (new_call, new_temp);
4873 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4874 gimple *new_stmt = new_call;
4876 if (vec_dest)
4878 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4880 unsigned int k, l;
4881 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4882 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4883 k = vector_unroll_factor (nunits,
4884 TYPE_VECTOR_SUBPARTS (vectype));
4885 gcc_assert ((k & (k - 1)) == 0);
4886 for (l = 0; l < k; l++)
4888 tree t;
4889 if (ratype)
4891 t = build_fold_addr_expr (new_temp);
4892 t = build2 (MEM_REF, vectype, t,
4893 build_int_cst (TREE_TYPE (t), l * bytes));
4895 else
4896 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4897 bitsize_int (prec), bitsize_int (l * prec));
4898 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4899 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4901 if (j == 0 && l == 0)
4902 *vec_stmt = new_stmt;
4903 if (slp_node)
4904 SLP_TREE_VEC_DEFS (slp_node)
4905 .quick_push (gimple_assign_lhs (new_stmt));
4906 else
4907 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4910 if (ratype)
4911 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4912 continue;
4914 else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
4916 unsigned int k;
4917 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
4918 TYPE_VECTOR_SUBPARTS (rtype), &k))
4919 gcc_unreachable ();
4920 gcc_assert ((k & (k - 1)) == 0);
4921 if ((j & (k - 1)) == 0)
4922 vec_alloc (ret_ctor_elts, k);
4923 if (ratype)
4925 unsigned int m, o;
4926 o = vector_unroll_factor (nunits,
4927 TYPE_VECTOR_SUBPARTS (rtype));
4928 for (m = 0; m < o; m++)
4930 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4931 size_int (m), NULL_TREE, NULL_TREE);
4932 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4933 tem);
4934 vect_finish_stmt_generation (vinfo, stmt_info,
4935 new_stmt, gsi);
4936 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4937 gimple_assign_lhs (new_stmt));
4939 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4941 else
4942 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4943 if ((j & (k - 1)) != k - 1)
4944 continue;
4945 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4946 new_stmt
4947 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4948 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4950 if ((unsigned) j == k - 1)
4951 *vec_stmt = new_stmt;
4952 if (slp_node)
4953 SLP_TREE_VEC_DEFS (slp_node)
4954 .quick_push (gimple_assign_lhs (new_stmt));
4955 else
4956 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4957 continue;
4959 else if (ratype)
4961 tree t = build_fold_addr_expr (new_temp);
4962 t = build2 (MEM_REF, vectype, t,
4963 build_int_cst (TREE_TYPE (t), 0));
4964 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4965 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4966 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4968 else if (!useless_type_conversion_p (vectype, rtype))
4970 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4971 new_stmt
4972 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4973 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4977 if (j == 0)
4978 *vec_stmt = new_stmt;
4979 if (slp_node)
4980 SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
4981 else
4982 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4985 for (i = 0; i < nargs; ++i)
4987 vec<tree> oprndsi = vec_oprnds[i];
4988 oprndsi.release ();
4990 vargs.release ();
4992 /* Mark the clone as no longer being a candidate for GC. */
4993 bestn->gc_candidate = false;
4995 /* The call in STMT might prevent it from being removed in dce.
4996 We however cannot remove it here, due to the way the ssa name
4997 it defines is mapped to the new definition. So just replace
4998 rhs of the statement with something harmless. */
5000 if (slp_node)
5001 return true;
5003 gimple *new_stmt;
5004 if (scalar_dest)
5006 type = TREE_TYPE (scalar_dest);
5007 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
5008 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
5010 else
5011 new_stmt = gimple_build_nop ();
5012 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
5013 unlink_stmt_vdef (stmt);
5015 return true;
5019 /* Function vect_gen_widened_results_half
5021 Create a vector stmt whose code, type, number of arguments, and result
5022 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5023 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5024 In the case that CODE is a CALL_EXPR, this means that a call to DECL
5025 needs to be created (DECL is a function-decl of a target-builtin).
5026 STMT_INFO is the original scalar stmt that we are vectorizing. */
5028 static gimple *
5029 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
5030 tree vec_oprnd0, tree vec_oprnd1, int op_type,
5031 tree vec_dest, gimple_stmt_iterator *gsi,
5032 stmt_vec_info stmt_info)
5034 gimple *new_stmt;
5035 tree new_temp;
5037 /* Generate half of the widened result: */
5038 if (op_type != binary_op)
5039 vec_oprnd1 = NULL;
5040 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
5041 new_temp = make_ssa_name (vec_dest, new_stmt);
5042 gimple_set_lhs (new_stmt, new_temp);
5043 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5045 return new_stmt;
5049 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5050 For multi-step conversions store the resulting vectors and call the function
5051 recursively. When NARROW_SRC_P is true, there's still a conversion after
5052 narrowing, don't store the vectors in the SLP_NODE or in vector info of
5053 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5055 static void
5056 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
5057 int multi_step_cvt,
5058 stmt_vec_info stmt_info,
5059 vec<tree> &vec_dsts,
5060 gimple_stmt_iterator *gsi,
5061 slp_tree slp_node, code_helper code,
5062 bool narrow_src_p)
5064 unsigned int i;
5065 tree vop0, vop1, new_tmp, vec_dest;
5067 vec_dest = vec_dsts.pop ();
5069 for (i = 0; i < vec_oprnds->length (); i += 2)
5071 /* Create demotion operation. */
5072 vop0 = (*vec_oprnds)[i];
5073 vop1 = (*vec_oprnds)[i + 1];
5074 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
5075 new_tmp = make_ssa_name (vec_dest, new_stmt);
5076 gimple_set_lhs (new_stmt, new_tmp);
5077 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5078 if (multi_step_cvt || narrow_src_p)
5079 /* Store the resulting vector for next recursive call,
5080 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5081 (*vec_oprnds)[i/2] = new_tmp;
5082 else
5084 /* This is the last step of the conversion sequence. Store the
5085 vectors in SLP_NODE or in vector info of the scalar statement
5086 (or in STMT_VINFO_RELATED_STMT chain). */
5087 if (slp_node)
5088 slp_node->push_vec_def (new_stmt);
5089 else
5090 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5094 /* For multi-step demotion operations we first generate demotion operations
5095 from the source type to the intermediate types, and then combine the
5096 results (stored in VEC_OPRNDS) in demotion operation to the destination
5097 type. */
5098 if (multi_step_cvt)
5100 /* At each level of recursion we have half of the operands we had at the
5101 previous level. */
5102 vec_oprnds->truncate ((i+1)/2);
5103 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5104 multi_step_cvt - 1,
5105 stmt_info, vec_dsts, gsi,
5106 slp_node, VEC_PACK_TRUNC_EXPR,
5107 narrow_src_p);
5110 vec_dsts.quick_push (vec_dest);
5114 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5115 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5116 STMT_INFO. For multi-step conversions store the resulting vectors and
5117 call the function recursively. */
5119 static void
5120 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5121 vec<tree> *vec_oprnds0,
5122 vec<tree> *vec_oprnds1,
5123 stmt_vec_info stmt_info, tree vec_dest,
5124 gimple_stmt_iterator *gsi,
5125 code_helper ch1,
5126 code_helper ch2, int op_type)
5128 int i;
5129 tree vop0, vop1, new_tmp1, new_tmp2;
5130 gimple *new_stmt1, *new_stmt2;
5131 vec<tree> vec_tmp = vNULL;
5133 vec_tmp.create (vec_oprnds0->length () * 2);
5134 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5136 if (op_type == binary_op)
5137 vop1 = (*vec_oprnds1)[i];
5138 else
5139 vop1 = NULL_TREE;
5141 /* Generate the two halves of promotion operation. */
5142 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5143 op_type, vec_dest, gsi,
5144 stmt_info);
5145 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5146 op_type, vec_dest, gsi,
5147 stmt_info);
5148 if (is_gimple_call (new_stmt1))
5150 new_tmp1 = gimple_call_lhs (new_stmt1);
5151 new_tmp2 = gimple_call_lhs (new_stmt2);
5153 else
5155 new_tmp1 = gimple_assign_lhs (new_stmt1);
5156 new_tmp2 = gimple_assign_lhs (new_stmt2);
5159 /* Store the results for the next step. */
5160 vec_tmp.quick_push (new_tmp1);
5161 vec_tmp.quick_push (new_tmp2);
5164 vec_oprnds0->release ();
5165 *vec_oprnds0 = vec_tmp;
5168 /* Create vectorized promotion stmts for widening stmts using only half the
5169 potential vector size for input. */
5170 static void
5171 vect_create_half_widening_stmts (vec_info *vinfo,
5172 vec<tree> *vec_oprnds0,
5173 vec<tree> *vec_oprnds1,
5174 stmt_vec_info stmt_info, tree vec_dest,
5175 gimple_stmt_iterator *gsi,
5176 code_helper code1,
5177 int op_type)
5179 int i;
5180 tree vop0, vop1;
5181 gimple *new_stmt1;
5182 gimple *new_stmt2;
5183 gimple *new_stmt3;
5184 vec<tree> vec_tmp = vNULL;
5186 vec_tmp.create (vec_oprnds0->length ());
5187 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5189 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5191 gcc_assert (op_type == binary_op);
5192 vop1 = (*vec_oprnds1)[i];
5194 /* Widen the first vector input. */
5195 out_type = TREE_TYPE (vec_dest);
5196 new_tmp1 = make_ssa_name (out_type);
5197 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5198 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5199 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5201 /* Widen the second vector input. */
5202 new_tmp2 = make_ssa_name (out_type);
5203 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5204 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5205 /* Perform the operation. With both vector inputs widened. */
5206 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5208 else
5210 /* Perform the operation. With the single vector input widened. */
5211 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5214 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5215 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5216 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5218 /* Store the results for the next step. */
5219 vec_tmp.quick_push (new_tmp3);
5222 vec_oprnds0->release ();
5223 *vec_oprnds0 = vec_tmp;
5227 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5228 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5229 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5230 Return true if STMT_INFO is vectorizable in this way. */
5232 static bool
5233 vectorizable_conversion (vec_info *vinfo,
5234 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5235 gimple **vec_stmt, slp_tree slp_node,
5236 stmt_vector_for_cost *cost_vec)
5238 tree vec_dest, cvt_op = NULL_TREE;
5239 tree scalar_dest;
5240 tree op0, op1 = NULL_TREE;
5241 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5242 tree_code tc1;
5243 code_helper code, code1, code2;
5244 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5245 tree new_temp;
5246 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5247 int ndts = 2;
5248 poly_uint64 nunits_in;
5249 poly_uint64 nunits_out;
5250 tree vectype_out, vectype_in;
5251 int ncopies, i;
5252 tree lhs_type, rhs_type;
5253 /* For conversions between floating point and integer, there're 2 NARROW
5254 cases. NARROW_SRC is for FLOAT_EXPR, means
5255 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5256 This is safe when the range of the source integer can fit into the lower
5257 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5258 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5259 For other conversions, when there's narrowing, NARROW_DST is used as
5260 default. */
5261 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5262 vec<tree> vec_oprnds0 = vNULL;
5263 vec<tree> vec_oprnds1 = vNULL;
5264 tree vop0;
5265 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5266 int multi_step_cvt = 0;
5267 vec<tree> interm_types = vNULL;
5268 tree intermediate_type, cvt_type = NULL_TREE;
5269 int op_type;
5270 unsigned short fltsz;
5272 /* Is STMT a vectorizable conversion? */
5274 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5275 return false;
5277 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5278 && ! vec_stmt)
5279 return false;
5281 gimple* stmt = stmt_info->stmt;
5282 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5283 return false;
5285 if (gimple_get_lhs (stmt) == NULL_TREE
5286 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5287 return false;
5289 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5290 return false;
5292 if (is_gimple_assign (stmt))
5294 code = gimple_assign_rhs_code (stmt);
5295 op_type = TREE_CODE_LENGTH ((tree_code) code);
5297 else if (gimple_call_internal_p (stmt))
5299 code = gimple_call_internal_fn (stmt);
5300 op_type = gimple_call_num_args (stmt);
5302 else
5303 return false;
5305 bool widen_arith = (code == WIDEN_MULT_EXPR
5306 || code == WIDEN_LSHIFT_EXPR
5307 || widening_fn_p (code));
5309 if (!widen_arith
5310 && !CONVERT_EXPR_CODE_P (code)
5311 && code != FIX_TRUNC_EXPR
5312 && code != FLOAT_EXPR)
5313 return false;
5315 /* Check types of lhs and rhs. */
5316 scalar_dest = gimple_get_lhs (stmt);
5317 lhs_type = TREE_TYPE (scalar_dest);
5318 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5320 /* Check the operands of the operation. */
5321 slp_tree slp_op0, slp_op1 = NULL;
5322 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5323 0, &op0, &slp_op0, &dt[0], &vectype_in))
5325 if (dump_enabled_p ())
5326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5327 "use not simple.\n");
5328 return false;
5331 rhs_type = TREE_TYPE (op0);
5332 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5333 && !((INTEGRAL_TYPE_P (lhs_type)
5334 && INTEGRAL_TYPE_P (rhs_type))
5335 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5336 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5337 return false;
5339 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5340 && ((INTEGRAL_TYPE_P (lhs_type)
5341 && !type_has_mode_precision_p (lhs_type))
5342 || (INTEGRAL_TYPE_P (rhs_type)
5343 && !type_has_mode_precision_p (rhs_type))))
5345 if (dump_enabled_p ())
5346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5347 "type conversion to/from bit-precision unsupported."
5348 "\n");
5349 return false;
5352 if (op_type == binary_op)
5354 gcc_assert (code == WIDEN_MULT_EXPR
5355 || code == WIDEN_LSHIFT_EXPR
5356 || widening_fn_p (code));
5358 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5359 gimple_call_arg (stmt, 0);
5360 tree vectype1_in;
5361 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5362 &op1, &slp_op1, &dt[1], &vectype1_in))
5364 if (dump_enabled_p ())
5365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5366 "use not simple.\n");
5367 return false;
5369 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5370 OP1. */
5371 if (!vectype_in)
5372 vectype_in = vectype1_in;
5375 /* If op0 is an external or constant def, infer the vector type
5376 from the scalar type. */
5377 if (!vectype_in)
5378 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5379 if (vec_stmt)
5380 gcc_assert (vectype_in);
5381 if (!vectype_in)
5383 if (dump_enabled_p ())
5384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5385 "no vectype for scalar type %T\n", rhs_type);
5387 return false;
5390 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5391 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5393 if (dump_enabled_p ())
5394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5395 "can't convert between boolean and non "
5396 "boolean vectors %T\n", rhs_type);
5398 return false;
5401 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5402 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5403 if (known_eq (nunits_out, nunits_in))
5404 if (widen_arith)
5405 modifier = WIDEN;
5406 else
5407 modifier = NONE;
5408 else if (multiple_p (nunits_out, nunits_in))
5409 modifier = NARROW_DST;
5410 else
5412 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5413 modifier = WIDEN;
5416 /* Multiple types in SLP are handled by creating the appropriate number of
5417 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5418 case of SLP. */
5419 if (slp_node)
5420 ncopies = 1;
5421 else if (modifier == NARROW_DST)
5422 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5423 else
5424 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5426 /* Sanity check: make sure that at least one copy of the vectorized stmt
5427 needs to be generated. */
5428 gcc_assert (ncopies >= 1);
5430 bool found_mode = false;
5431 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5432 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5433 opt_scalar_mode rhs_mode_iter;
5434 vec<std::pair<tree, tree_code> > converts = vNULL;
5436 /* Supportable by target? */
5437 switch (modifier)
5439 case NONE:
5440 if (code != FIX_TRUNC_EXPR
5441 && code != FLOAT_EXPR
5442 && !CONVERT_EXPR_CODE_P (code))
5443 return false;
5444 gcc_assert (code.is_tree_code ());
5445 if (supportable_indirect_convert_operation (code,
5446 vectype_out,
5447 vectype_in,
5448 &converts,
5449 op0))
5451 gcc_assert (converts.length () <= 2);
5452 if (converts.length () == 1)
5453 code1 = converts[0].second;
5454 else
5456 cvt_type = NULL_TREE;
5457 multi_step_cvt = converts.length () - 1;
5458 codecvt1 = converts[0].second;
5459 code1 = converts[1].second;
5460 interm_types.safe_push (converts[0].first);
5462 break;
5465 /* FALLTHRU */
5466 unsupported:
5467 if (dump_enabled_p ())
5468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5469 "conversion not supported by target.\n");
5470 return false;
5472 case WIDEN:
5473 if (known_eq (nunits_in, nunits_out))
5475 if (!(code.is_tree_code ()
5476 && supportable_half_widening_operation ((tree_code) code,
5477 vectype_out, vectype_in,
5478 &tc1)))
5479 goto unsupported;
5480 code1 = tc1;
5481 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5482 break;
5484 if (supportable_widening_operation (vinfo, code, stmt_info,
5485 vectype_out, vectype_in, &code1,
5486 &code2, &multi_step_cvt,
5487 &interm_types))
5489 /* Binary widening operation can only be supported directly by the
5490 architecture. */
5491 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5492 break;
5495 if (code != FLOAT_EXPR
5496 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5497 goto unsupported;
5499 fltsz = GET_MODE_SIZE (lhs_mode);
5500 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5502 rhs_mode = rhs_mode_iter.require ();
5503 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5504 break;
5506 cvt_type
5507 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5508 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5509 if (cvt_type == NULL_TREE)
5510 goto unsupported;
5512 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5514 tc1 = ERROR_MARK;
5515 gcc_assert (code.is_tree_code ());
5516 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5517 cvt_type, &tc1))
5518 goto unsupported;
5519 codecvt1 = tc1;
5521 else if (!supportable_widening_operation (vinfo, code,
5522 stmt_info, vectype_out,
5523 cvt_type, &codecvt1,
5524 &codecvt2, &multi_step_cvt,
5525 &interm_types))
5526 continue;
5527 else
5528 gcc_assert (multi_step_cvt == 0);
5530 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5531 cvt_type,
5532 vectype_in, &code1,
5533 &code2, &multi_step_cvt,
5534 &interm_types))
5536 found_mode = true;
5537 break;
5541 if (!found_mode)
5542 goto unsupported;
5544 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5545 codecvt2 = ERROR_MARK;
5546 else
5548 multi_step_cvt++;
5549 interm_types.safe_push (cvt_type);
5550 cvt_type = NULL_TREE;
5552 break;
5554 case NARROW_DST:
5555 gcc_assert (op_type == unary_op);
5556 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5557 &code1, &multi_step_cvt,
5558 &interm_types))
5559 break;
5561 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5562 goto unsupported;
5564 if (code == FIX_TRUNC_EXPR)
5566 cvt_type
5567 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5568 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5569 if (cvt_type == NULL_TREE)
5570 goto unsupported;
5571 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5572 &tc1))
5573 codecvt1 = tc1;
5574 else
5575 goto unsupported;
5576 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5577 &code1, &multi_step_cvt,
5578 &interm_types))
5579 break;
5581 /* If op0 can be represented with low precision integer,
5582 truncate it to cvt_type and the do FLOAT_EXPR. */
5583 else if (code == FLOAT_EXPR)
5585 wide_int op_min_value, op_max_value;
5586 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5587 goto unsupported;
5589 cvt_type
5590 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5591 if (cvt_type == NULL_TREE
5592 || (wi::min_precision (op_max_value, SIGNED)
5593 > TYPE_PRECISION (cvt_type))
5594 || (wi::min_precision (op_min_value, SIGNED)
5595 > TYPE_PRECISION (cvt_type)))
5596 goto unsupported;
5598 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5599 if (cvt_type == NULL_TREE)
5600 goto unsupported;
5601 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5602 &code1, &multi_step_cvt,
5603 &interm_types))
5604 goto unsupported;
5605 if (supportable_convert_operation ((tree_code) code, vectype_out,
5606 cvt_type, &tc1))
5608 codecvt1 = tc1;
5609 modifier = NARROW_SRC;
5610 break;
5614 goto unsupported;
5616 default:
5617 gcc_unreachable ();
5620 if (!vec_stmt) /* transformation not required. */
5622 if (slp_node
5623 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5624 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5626 if (dump_enabled_p ())
5627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5628 "incompatible vector types for invariants\n");
5629 return false;
5631 DUMP_VECT_SCOPE ("vectorizable_conversion");
5632 if (modifier == NONE)
5634 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5635 vect_model_simple_cost (vinfo, stmt_info,
5636 ncopies * (1 + multi_step_cvt),
5637 dt, ndts, slp_node, cost_vec);
5639 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5641 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5642 /* The final packing step produces one vector result per copy. */
5643 unsigned int nvectors
5644 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5645 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5646 multi_step_cvt, cost_vec,
5647 widen_arith);
5649 else
5651 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5652 /* The initial unpacking step produces two vector results
5653 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5654 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5655 unsigned int nvectors
5656 = (slp_node
5657 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5658 : ncopies * 2);
5659 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5660 multi_step_cvt, cost_vec,
5661 widen_arith);
5663 interm_types.release ();
5664 return true;
5667 /* Transform. */
5668 if (dump_enabled_p ())
5669 dump_printf_loc (MSG_NOTE, vect_location,
5670 "transform conversion. ncopies = %d.\n", ncopies);
5672 if (op_type == binary_op)
5674 if (CONSTANT_CLASS_P (op0))
5675 op0 = fold_convert (TREE_TYPE (op1), op0);
5676 else if (CONSTANT_CLASS_P (op1))
5677 op1 = fold_convert (TREE_TYPE (op0), op1);
5680 /* In case of multi-step conversion, we first generate conversion operations
5681 to the intermediate types, and then from that types to the final one.
5682 We create vector destinations for the intermediate type (TYPES) received
5683 from supportable_*_operation, and store them in the correct order
5684 for future use in vect_create_vectorized_*_stmts (). */
5685 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5686 bool widen_or_narrow_float_p
5687 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5688 vec_dest = vect_create_destination_var (scalar_dest,
5689 widen_or_narrow_float_p
5690 ? cvt_type : vectype_out);
5691 vec_dsts.quick_push (vec_dest);
5693 if (multi_step_cvt)
5695 for (i = interm_types.length () - 1;
5696 interm_types.iterate (i, &intermediate_type); i--)
5698 vec_dest = vect_create_destination_var (scalar_dest,
5699 intermediate_type);
5700 vec_dsts.quick_push (vec_dest);
5704 if (cvt_type)
5705 vec_dest = vect_create_destination_var (scalar_dest,
5706 widen_or_narrow_float_p
5707 ? vectype_out : cvt_type);
5709 int ninputs = 1;
5710 if (!slp_node)
5712 if (modifier == WIDEN)
5714 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5716 if (multi_step_cvt)
5717 ninputs = vect_pow2 (multi_step_cvt);
5718 ninputs *= 2;
5722 switch (modifier)
5724 case NONE:
5725 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5726 op0, vectype_in, &vec_oprnds0);
5727 /* vec_dest is intermediate type operand when multi_step_cvt. */
5728 if (multi_step_cvt)
5730 cvt_op = vec_dest;
5731 vec_dest = vec_dsts[0];
5734 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5736 /* Arguments are ready, create the new vector stmt. */
5737 gimple* new_stmt;
5738 if (multi_step_cvt)
5740 gcc_assert (multi_step_cvt == 1);
5741 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5742 new_temp = make_ssa_name (cvt_op, new_stmt);
5743 gimple_assign_set_lhs (new_stmt, new_temp);
5744 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5745 vop0 = new_temp;
5747 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5748 new_temp = make_ssa_name (vec_dest, new_stmt);
5749 gimple_set_lhs (new_stmt, new_temp);
5750 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5752 if (slp_node)
5753 slp_node->push_vec_def (new_stmt);
5754 else
5755 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5757 break;
5759 case WIDEN:
5760 /* In case the vectorization factor (VF) is bigger than the number
5761 of elements that we can fit in a vectype (nunits), we have to
5762 generate more than one vector stmt - i.e - we need to "unroll"
5763 the vector stmt by a factor VF/nunits. */
5764 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5765 op0, vectype_in, &vec_oprnds0,
5766 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5767 vectype_in, &vec_oprnds1);
5768 if (code == WIDEN_LSHIFT_EXPR)
5770 int oprnds_size = vec_oprnds0.length ();
5771 vec_oprnds1.create (oprnds_size);
5772 for (i = 0; i < oprnds_size; ++i)
5773 vec_oprnds1.quick_push (op1);
5775 /* Arguments are ready. Create the new vector stmts. */
5776 for (i = multi_step_cvt; i >= 0; i--)
5778 tree this_dest = vec_dsts[i];
5779 code_helper c1 = code1, c2 = code2;
5780 if (i == 0 && codecvt2 != ERROR_MARK)
5782 c1 = codecvt1;
5783 c2 = codecvt2;
5785 if (known_eq (nunits_out, nunits_in))
5786 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5787 stmt_info, this_dest, gsi, c1,
5788 op_type);
5789 else
5790 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5791 &vec_oprnds1, stmt_info,
5792 this_dest, gsi,
5793 c1, c2, op_type);
5796 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5798 gimple *new_stmt;
5799 if (cvt_type)
5801 new_temp = make_ssa_name (vec_dest);
5802 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5803 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5805 else
5806 new_stmt = SSA_NAME_DEF_STMT (vop0);
5808 if (slp_node)
5809 slp_node->push_vec_def (new_stmt);
5810 else
5811 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5813 break;
5815 case NARROW_SRC:
5816 case NARROW_DST:
5817 /* In case the vectorization factor (VF) is bigger than the number
5818 of elements that we can fit in a vectype (nunits), we have to
5819 generate more than one vector stmt - i.e - we need to "unroll"
5820 the vector stmt by a factor VF/nunits. */
5821 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5822 op0, vectype_in, &vec_oprnds0);
5823 /* Arguments are ready. Create the new vector stmts. */
5824 if (cvt_type && modifier == NARROW_DST)
5825 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5827 new_temp = make_ssa_name (vec_dest);
5828 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5829 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5830 vec_oprnds0[i] = new_temp;
5833 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5834 multi_step_cvt,
5835 stmt_info, vec_dsts, gsi,
5836 slp_node, code1,
5837 modifier == NARROW_SRC);
5838 /* After demoting op0 to cvt_type, convert it to dest. */
5839 if (cvt_type && code == FLOAT_EXPR)
5841 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5843 /* Arguments are ready, create the new vector stmt. */
5844 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5845 gimple *new_stmt
5846 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5847 new_temp = make_ssa_name (vec_dest, new_stmt);
5848 gimple_set_lhs (new_stmt, new_temp);
5849 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5851 /* This is the last step of the conversion sequence. Store the
5852 vectors in SLP_NODE or in vector info of the scalar statement
5853 (or in STMT_VINFO_RELATED_STMT chain). */
5854 if (slp_node)
5855 slp_node->push_vec_def (new_stmt);
5856 else
5857 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5860 break;
5862 if (!slp_node)
5863 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5865 vec_oprnds0.release ();
5866 vec_oprnds1.release ();
5867 interm_types.release ();
5869 return true;
5872 /* Return true if we can assume from the scalar form of STMT_INFO that
5873 neither the scalar nor the vector forms will generate code. STMT_INFO
5874 is known not to involve a data reference. */
5876 bool
5877 vect_nop_conversion_p (stmt_vec_info stmt_info)
5879 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5880 if (!stmt)
5881 return false;
5883 tree lhs = gimple_assign_lhs (stmt);
5884 tree_code code = gimple_assign_rhs_code (stmt);
5885 tree rhs = gimple_assign_rhs1 (stmt);
5887 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5888 return true;
5890 if (CONVERT_EXPR_CODE_P (code))
5891 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5893 return false;
5896 /* Function vectorizable_assignment.
5898 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5899 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5900 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5901 Return true if STMT_INFO is vectorizable in this way. */
5903 static bool
5904 vectorizable_assignment (vec_info *vinfo,
5905 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5906 gimple **vec_stmt, slp_tree slp_node,
5907 stmt_vector_for_cost *cost_vec)
5909 tree vec_dest;
5910 tree scalar_dest;
5911 tree op;
5912 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5913 tree new_temp;
5914 enum vect_def_type dt[1] = {vect_unknown_def_type};
5915 int ndts = 1;
5916 int ncopies;
5917 int i;
5918 vec<tree> vec_oprnds = vNULL;
5919 tree vop;
5920 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5921 enum tree_code code;
5922 tree vectype_in;
5924 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5925 return false;
5927 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5928 && ! vec_stmt)
5929 return false;
5931 /* Is vectorizable assignment? */
5932 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5933 if (!stmt)
5934 return false;
5936 scalar_dest = gimple_assign_lhs (stmt);
5937 if (TREE_CODE (scalar_dest) != SSA_NAME)
5938 return false;
5940 if (STMT_VINFO_DATA_REF (stmt_info))
5941 return false;
5943 code = gimple_assign_rhs_code (stmt);
5944 if (!(gimple_assign_single_p (stmt)
5945 || code == PAREN_EXPR
5946 || CONVERT_EXPR_CODE_P (code)))
5947 return false;
5949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5950 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5952 /* Multiple types in SLP are handled by creating the appropriate number of
5953 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5954 case of SLP. */
5955 if (slp_node)
5956 ncopies = 1;
5957 else
5958 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5960 gcc_assert (ncopies >= 1);
5962 slp_tree slp_op;
5963 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5964 &dt[0], &vectype_in))
5966 if (dump_enabled_p ())
5967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5968 "use not simple.\n");
5969 return false;
5971 if (!vectype_in)
5972 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5974 /* We can handle VIEW_CONVERT conversions that do not change the number
5975 of elements or the vector size or other conversions when the component
5976 types are nop-convertible. */
5977 if (!vectype_in
5978 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5979 || (code == VIEW_CONVERT_EXPR
5980 && maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5981 GET_MODE_SIZE (TYPE_MODE (vectype_in))))
5982 || (CONVERT_EXPR_CODE_P (code)
5983 && !tree_nop_conversion_p (TREE_TYPE (vectype),
5984 TREE_TYPE (vectype_in))))
5985 return false;
5987 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5989 if (dump_enabled_p ())
5990 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5991 "can't convert between boolean and non "
5992 "boolean vectors %T\n", TREE_TYPE (op));
5994 return false;
5997 /* We do not handle bit-precision changes. */
5998 if ((CONVERT_EXPR_CODE_P (code)
5999 || code == VIEW_CONVERT_EXPR)
6000 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6001 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6002 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
6003 && !type_has_mode_precision_p (TREE_TYPE (op))))
6004 /* But a conversion that does not change the bit-pattern is ok. */
6005 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6006 && INTEGRAL_TYPE_P (TREE_TYPE (op))
6007 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
6008 > TYPE_PRECISION (TREE_TYPE (op)))
6009 && TYPE_UNSIGNED (TREE_TYPE (op)))
6010 || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
6011 == TYPE_PRECISION (TREE_TYPE (op))))))
6013 if (dump_enabled_p ())
6014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6015 "type conversion to/from bit-precision "
6016 "unsupported.\n");
6017 return false;
6020 if (!vec_stmt) /* transformation not required. */
6022 if (slp_node
6023 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6025 if (dump_enabled_p ())
6026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6027 "incompatible vector types for invariants\n");
6028 return false;
6030 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6031 DUMP_VECT_SCOPE ("vectorizable_assignment");
6032 if (!vect_nop_conversion_p (stmt_info))
6033 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
6034 cost_vec);
6035 return true;
6038 /* Transform. */
6039 if (dump_enabled_p ())
6040 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6042 /* Handle def. */
6043 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6045 /* Handle use. */
6046 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
6048 /* Arguments are ready. create the new vector stmt. */
6049 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6051 if (CONVERT_EXPR_CODE_P (code)
6052 || code == VIEW_CONVERT_EXPR)
6053 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6054 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6055 new_temp = make_ssa_name (vec_dest, new_stmt);
6056 gimple_assign_set_lhs (new_stmt, new_temp);
6057 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6058 if (slp_node)
6059 slp_node->push_vec_def (new_stmt);
6060 else
6061 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6063 if (!slp_node)
6064 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6066 vec_oprnds.release ();
6067 return true;
6071 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6072 either as shift by a scalar or by a vector. */
6074 bool
6075 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6078 machine_mode vec_mode;
6079 optab optab;
6080 int icode;
6081 tree vectype;
6083 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6084 if (!vectype)
6085 return false;
6087 optab = optab_for_tree_code (code, vectype, optab_scalar);
6088 if (!optab
6089 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6091 optab = optab_for_tree_code (code, vectype, optab_vector);
6092 if (!optab
6093 || (optab_handler (optab, TYPE_MODE (vectype))
6094 == CODE_FOR_nothing))
6095 return false;
6098 vec_mode = TYPE_MODE (vectype);
6099 icode = (int) optab_handler (optab, vec_mode);
6100 if (icode == CODE_FOR_nothing)
6101 return false;
6103 return true;
6107 /* Function vectorizable_shift.
6109 Check if STMT_INFO performs a shift operation that can be vectorized.
6110 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6111 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6112 Return true if STMT_INFO is vectorizable in this way. */
6114 static bool
6115 vectorizable_shift (vec_info *vinfo,
6116 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6117 gimple **vec_stmt, slp_tree slp_node,
6118 stmt_vector_for_cost *cost_vec)
6120 tree vec_dest;
6121 tree scalar_dest;
6122 tree op0, op1 = NULL;
6123 tree vec_oprnd1 = NULL_TREE;
6124 tree vectype;
6125 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6126 enum tree_code code;
6127 machine_mode vec_mode;
6128 tree new_temp;
6129 optab optab;
6130 int icode;
6131 machine_mode optab_op2_mode;
6132 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6133 int ndts = 2;
6134 poly_uint64 nunits_in;
6135 poly_uint64 nunits_out;
6136 tree vectype_out;
6137 tree op1_vectype;
6138 int ncopies;
6139 int i;
6140 vec<tree> vec_oprnds0 = vNULL;
6141 vec<tree> vec_oprnds1 = vNULL;
6142 tree vop0, vop1;
6143 unsigned int k;
6144 bool scalar_shift_arg = true;
6145 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6146 bool incompatible_op1_vectype_p = false;
6148 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6149 return false;
6151 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6152 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6153 && ! vec_stmt)
6154 return false;
6156 /* Is STMT a vectorizable binary/unary operation? */
6157 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6158 if (!stmt)
6159 return false;
6161 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6162 return false;
6164 code = gimple_assign_rhs_code (stmt);
6166 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6167 || code == RROTATE_EXPR))
6168 return false;
6170 scalar_dest = gimple_assign_lhs (stmt);
6171 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6172 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6174 if (dump_enabled_p ())
6175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6176 "bit-precision shifts not supported.\n");
6177 return false;
6180 slp_tree slp_op0;
6181 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6182 0, &op0, &slp_op0, &dt[0], &vectype))
6184 if (dump_enabled_p ())
6185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6186 "use not simple.\n");
6187 return false;
6189 /* If op0 is an external or constant def, infer the vector type
6190 from the scalar type. */
6191 if (!vectype)
6192 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6193 if (vec_stmt)
6194 gcc_assert (vectype);
6195 if (!vectype)
6197 if (dump_enabled_p ())
6198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6199 "no vectype for scalar type\n");
6200 return false;
6203 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6204 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6205 if (maybe_ne (nunits_out, nunits_in))
6206 return false;
6208 stmt_vec_info op1_def_stmt_info;
6209 slp_tree slp_op1;
6210 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6211 &dt[1], &op1_vectype, &op1_def_stmt_info))
6213 if (dump_enabled_p ())
6214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6215 "use not simple.\n");
6216 return false;
6219 /* Multiple types in SLP are handled by creating the appropriate number of
6220 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6221 case of SLP. */
6222 if (slp_node)
6223 ncopies = 1;
6224 else
6225 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6227 gcc_assert (ncopies >= 1);
6229 /* Determine whether the shift amount is a vector, or scalar. If the
6230 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6232 if ((dt[1] == vect_internal_def
6233 || dt[1] == vect_induction_def
6234 || dt[1] == vect_nested_cycle)
6235 && (!slp_node || SLP_TREE_LANES (slp_node) == 1))
6236 scalar_shift_arg = false;
6237 else if (dt[1] == vect_constant_def
6238 || dt[1] == vect_external_def
6239 || dt[1] == vect_internal_def)
6241 /* In SLP, need to check whether the shift count is the same,
6242 in loops if it is a constant or invariant, it is always
6243 a scalar shift. */
6244 if (slp_node)
6246 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6247 stmt_vec_info slpstmt_info;
6249 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6250 if (slpstmt_info)
6252 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6253 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6254 scalar_shift_arg = false;
6257 /* For internal SLP defs we have to make sure we see scalar stmts
6258 for all vector elements.
6259 ??? For different vectors we could resort to a different
6260 scalar shift operand but code-generation below simply always
6261 takes the first. */
6262 if (dt[1] == vect_internal_def
6263 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6264 stmts.length ()))
6265 scalar_shift_arg = false;
6268 /* If the shift amount is computed by a pattern stmt we cannot
6269 use the scalar amount directly thus give up and use a vector
6270 shift. */
6271 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6272 scalar_shift_arg = false;
6274 else
6276 if (dump_enabled_p ())
6277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6278 "operand mode requires invariant argument.\n");
6279 return false;
6282 /* Vector shifted by vector. */
6283 bool was_scalar_shift_arg = scalar_shift_arg;
6284 if (!scalar_shift_arg)
6286 optab = optab_for_tree_code (code, vectype, optab_vector);
6287 if (dump_enabled_p ())
6288 dump_printf_loc (MSG_NOTE, vect_location,
6289 "vector/vector shift/rotate found.\n");
6291 if (!op1_vectype)
6292 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6293 slp_op1);
6294 incompatible_op1_vectype_p
6295 = (op1_vectype == NULL_TREE
6296 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6297 TYPE_VECTOR_SUBPARTS (vectype))
6298 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6299 if (incompatible_op1_vectype_p
6300 && (!slp_node
6301 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6302 || slp_op1->refcnt != 1))
6304 if (dump_enabled_p ())
6305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6306 "unusable type for last operand in"
6307 " vector/vector shift/rotate.\n");
6308 return false;
6311 /* See if the machine has a vector shifted by scalar insn and if not
6312 then see if it has a vector shifted by vector insn. */
6313 else
6315 optab = optab_for_tree_code (code, vectype, optab_scalar);
6316 if (optab
6317 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6319 if (dump_enabled_p ())
6320 dump_printf_loc (MSG_NOTE, vect_location,
6321 "vector/scalar shift/rotate found.\n");
6323 else
6325 optab = optab_for_tree_code (code, vectype, optab_vector);
6326 if (optab
6327 && (optab_handler (optab, TYPE_MODE (vectype))
6328 != CODE_FOR_nothing))
6330 scalar_shift_arg = false;
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_NOTE, vect_location,
6334 "vector/vector shift/rotate found.\n");
6336 if (!op1_vectype)
6337 op1_vectype = get_vectype_for_scalar_type (vinfo,
6338 TREE_TYPE (op1),
6339 slp_op1);
6341 /* Unlike the other binary operators, shifts/rotates have
6342 the rhs being int, instead of the same type as the lhs,
6343 so make sure the scalar is the right type if we are
6344 dealing with vectors of long long/long/short/char. */
6345 incompatible_op1_vectype_p
6346 = (!op1_vectype
6347 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6348 TREE_TYPE (op1)));
6349 if (incompatible_op1_vectype_p
6350 && dt[1] == vect_internal_def)
6352 if (dump_enabled_p ())
6353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6354 "unusable type for last operand in"
6355 " vector/vector shift/rotate.\n");
6356 return false;
6362 /* Supportable by target? */
6363 if (!optab)
6365 if (dump_enabled_p ())
6366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6367 "no optab.\n");
6368 return false;
6370 vec_mode = TYPE_MODE (vectype);
6371 icode = (int) optab_handler (optab, vec_mode);
6372 if (icode == CODE_FOR_nothing)
6374 if (dump_enabled_p ())
6375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6376 "op not supported by target.\n");
6377 return false;
6379 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6380 if (vect_emulated_vector_p (vectype))
6381 return false;
6383 if (!vec_stmt) /* transformation not required. */
6385 if (slp_node
6386 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6387 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6388 && (!incompatible_op1_vectype_p
6389 || dt[1] == vect_constant_def)
6390 && !vect_maybe_update_slp_op_vectype
6391 (slp_op1,
6392 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6394 if (dump_enabled_p ())
6395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6396 "incompatible vector types for invariants\n");
6397 return false;
6399 /* Now adjust the constant shift amount in place. */
6400 if (slp_node
6401 && incompatible_op1_vectype_p
6402 && dt[1] == vect_constant_def)
6404 for (unsigned i = 0;
6405 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6407 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6408 = fold_convert (TREE_TYPE (vectype),
6409 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6410 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6411 == INTEGER_CST));
6414 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6415 DUMP_VECT_SCOPE ("vectorizable_shift");
6416 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6417 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6418 return true;
6421 /* Transform. */
6423 if (dump_enabled_p ())
6424 dump_printf_loc (MSG_NOTE, vect_location,
6425 "transform binary/unary operation.\n");
6427 if (incompatible_op1_vectype_p && !slp_node)
6429 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6430 op1 = fold_convert (TREE_TYPE (vectype), op1);
6431 if (dt[1] != vect_constant_def)
6432 op1 = vect_init_vector (vinfo, stmt_info, op1,
6433 TREE_TYPE (vectype), NULL);
6436 /* Handle def. */
6437 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6439 if (scalar_shift_arg && dt[1] != vect_internal_def)
6441 /* Vector shl and shr insn patterns can be defined with scalar
6442 operand 2 (shift operand). In this case, use constant or loop
6443 invariant op1 directly, without extending it to vector mode
6444 first. */
6445 optab_op2_mode = insn_data[icode].operand[2].mode;
6446 if (!VECTOR_MODE_P (optab_op2_mode))
6448 if (dump_enabled_p ())
6449 dump_printf_loc (MSG_NOTE, vect_location,
6450 "operand 1 using scalar mode.\n");
6451 vec_oprnd1 = op1;
6452 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6453 vec_oprnds1.quick_push (vec_oprnd1);
6454 /* Store vec_oprnd1 for every vector stmt to be created.
6455 We check during the analysis that all the shift arguments
6456 are the same.
6457 TODO: Allow different constants for different vector
6458 stmts generated for an SLP instance. */
6459 for (k = 0;
6460 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6461 vec_oprnds1.quick_push (vec_oprnd1);
6464 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6466 if (was_scalar_shift_arg)
6468 /* If the argument was the same in all lanes create
6469 the correctly typed vector shift amount directly. */
6470 op1 = fold_convert (TREE_TYPE (vectype), op1);
6471 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6472 !loop_vinfo ? gsi : NULL);
6473 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6474 !loop_vinfo ? gsi : NULL);
6475 vec_oprnds1.create (slp_node->vec_stmts_size);
6476 for (k = 0; k < slp_node->vec_stmts_size; k++)
6477 vec_oprnds1.quick_push (vec_oprnd1);
6479 else if (dt[1] == vect_constant_def)
6480 /* The constant shift amount has been adjusted in place. */
6482 else
6483 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6486 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6487 (a special case for certain kind of vector shifts); otherwise,
6488 operand 1 should be of a vector type (the usual case). */
6489 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6490 op0, &vec_oprnds0,
6491 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6493 /* Arguments are ready. Create the new vector stmt. */
6494 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6496 /* For internal defs where we need to use a scalar shift arg
6497 extract the first lane. */
6498 if (scalar_shift_arg && dt[1] == vect_internal_def)
6500 vop1 = vec_oprnds1[0];
6501 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6502 gassign *new_stmt
6503 = gimple_build_assign (new_temp,
6504 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6505 vop1,
6506 TYPE_SIZE (TREE_TYPE (new_temp)),
6507 bitsize_zero_node));
6508 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6509 vop1 = new_temp;
6511 else
6512 vop1 = vec_oprnds1[i];
6513 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6514 new_temp = make_ssa_name (vec_dest, new_stmt);
6515 gimple_assign_set_lhs (new_stmt, new_temp);
6516 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6517 if (slp_node)
6518 slp_node->push_vec_def (new_stmt);
6519 else
6520 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6523 if (!slp_node)
6524 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6526 vec_oprnds0.release ();
6527 vec_oprnds1.release ();
6529 return true;
6532 /* Function vectorizable_operation.
6534 Check if STMT_INFO performs a binary, unary or ternary operation that can
6535 be vectorized.
6536 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6537 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6538 Return true if STMT_INFO is vectorizable in this way. */
6540 static bool
6541 vectorizable_operation (vec_info *vinfo,
6542 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6543 gimple **vec_stmt, slp_tree slp_node,
6544 stmt_vector_for_cost *cost_vec)
6546 tree vec_dest;
6547 tree scalar_dest;
6548 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6549 tree vectype;
6550 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6551 enum tree_code code, orig_code;
6552 machine_mode vec_mode;
6553 tree new_temp;
6554 int op_type;
6555 optab optab;
6556 bool target_support_p;
6557 enum vect_def_type dt[3]
6558 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6559 int ndts = 3;
6560 poly_uint64 nunits_in;
6561 poly_uint64 nunits_out;
6562 tree vectype_out;
6563 unsigned int ncopies;
6564 int vec_num;
6565 int i;
6566 vec<tree> vec_oprnds0 = vNULL;
6567 vec<tree> vec_oprnds1 = vNULL;
6568 vec<tree> vec_oprnds2 = vNULL;
6569 tree vop0, vop1, vop2;
6570 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6572 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6573 return false;
6575 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6576 && ! vec_stmt)
6577 return false;
6579 /* Is STMT a vectorizable binary/unary operation? */
6580 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6581 if (!stmt)
6582 return false;
6584 /* Loads and stores are handled in vectorizable_{load,store}. */
6585 if (STMT_VINFO_DATA_REF (stmt_info))
6586 return false;
6588 orig_code = code = gimple_assign_rhs_code (stmt);
6590 /* Shifts are handled in vectorizable_shift. */
6591 if (code == LSHIFT_EXPR
6592 || code == RSHIFT_EXPR
6593 || code == LROTATE_EXPR
6594 || code == RROTATE_EXPR)
6595 return false;
6597 /* Comparisons are handled in vectorizable_comparison. */
6598 if (TREE_CODE_CLASS (code) == tcc_comparison)
6599 return false;
6601 /* Conditions are handled in vectorizable_condition. */
6602 if (code == COND_EXPR)
6603 return false;
6605 /* For pointer addition and subtraction, we should use the normal
6606 plus and minus for the vector operation. */
6607 if (code == POINTER_PLUS_EXPR)
6608 code = PLUS_EXPR;
6609 if (code == POINTER_DIFF_EXPR)
6610 code = MINUS_EXPR;
6612 /* Support only unary or binary operations. */
6613 op_type = TREE_CODE_LENGTH (code);
6614 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6616 if (dump_enabled_p ())
6617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6618 "num. args = %d (not unary/binary/ternary op).\n",
6619 op_type);
6620 return false;
6623 scalar_dest = gimple_assign_lhs (stmt);
6624 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6626 /* Most operations cannot handle bit-precision types without extra
6627 truncations. */
6628 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6629 if (!mask_op_p
6630 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6631 /* Exception are bitwise binary operations. */
6632 && code != BIT_IOR_EXPR
6633 && code != BIT_XOR_EXPR
6634 && code != BIT_AND_EXPR)
6636 if (dump_enabled_p ())
6637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6638 "bit-precision arithmetic not supported.\n");
6639 return false;
6642 slp_tree slp_op0;
6643 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6644 0, &op0, &slp_op0, &dt[0], &vectype))
6646 if (dump_enabled_p ())
6647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6648 "use not simple.\n");
6649 return false;
6651 bool is_invariant = (dt[0] == vect_external_def
6652 || dt[0] == vect_constant_def);
6653 /* If op0 is an external or constant def, infer the vector type
6654 from the scalar type. */
6655 if (!vectype)
6657 /* For boolean type we cannot determine vectype by
6658 invariant value (don't know whether it is a vector
6659 of booleans or vector of integers). We use output
6660 vectype because operations on boolean don't change
6661 type. */
6662 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6664 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6666 if (dump_enabled_p ())
6667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6668 "not supported operation on bool value.\n");
6669 return false;
6671 vectype = vectype_out;
6673 else
6674 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6675 slp_node);
6677 if (vec_stmt)
6678 gcc_assert (vectype);
6679 if (!vectype)
6681 if (dump_enabled_p ())
6682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6683 "no vectype for scalar type %T\n",
6684 TREE_TYPE (op0));
6686 return false;
6689 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6690 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6691 if (maybe_ne (nunits_out, nunits_in)
6692 || !tree_nop_conversion_p (TREE_TYPE (vectype_out), TREE_TYPE (vectype)))
6693 return false;
6695 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6696 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6697 if (op_type == binary_op || op_type == ternary_op)
6699 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6700 1, &op1, &slp_op1, &dt[1], &vectype2))
6702 if (dump_enabled_p ())
6703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6704 "use not simple.\n");
6705 return false;
6707 is_invariant &= (dt[1] == vect_external_def
6708 || dt[1] == vect_constant_def);
6709 if (vectype2
6710 && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2))
6711 || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6712 TREE_TYPE (vectype2))))
6713 return false;
6715 if (op_type == ternary_op)
6717 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6718 2, &op2, &slp_op2, &dt[2], &vectype3))
6720 if (dump_enabled_p ())
6721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6722 "use not simple.\n");
6723 return false;
6725 is_invariant &= (dt[2] == vect_external_def
6726 || dt[2] == vect_constant_def);
6727 if (vectype3
6728 && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3))
6729 || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6730 TREE_TYPE (vectype3))))
6731 return false;
6734 /* Multiple types in SLP are handled by creating the appropriate number of
6735 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6736 case of SLP. */
6737 if (slp_node)
6739 ncopies = 1;
6740 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6742 else
6744 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6745 vec_num = 1;
6748 gcc_assert (ncopies >= 1);
6750 /* Reject attempts to combine mask types with nonmask types, e.g. if
6751 we have an AND between a (nonmask) boolean loaded from memory and
6752 a (mask) boolean result of a comparison.
6754 TODO: We could easily fix these cases up using pattern statements. */
6755 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6756 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6757 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6759 if (dump_enabled_p ())
6760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6761 "mixed mask and nonmask vector types\n");
6762 return false;
6765 /* Supportable by target? */
6767 vec_mode = TYPE_MODE (vectype);
6768 if (code == MULT_HIGHPART_EXPR)
6769 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6770 else
6772 optab = optab_for_tree_code (code, vectype, optab_default);
6773 if (!optab)
6775 if (dump_enabled_p ())
6776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6777 "no optab.\n");
6778 return false;
6780 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6781 || optab_libfunc (optab, vec_mode));
6784 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6785 if (!target_support_p || using_emulated_vectors_p)
6787 if (dump_enabled_p ())
6788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6789 "op not supported by target.\n");
6790 /* When vec_mode is not a vector mode and we verified ops we
6791 do not have to lower like AND are natively supported let
6792 those through even when the mode isn't word_mode. For
6793 ops we have to lower the lowering code assumes we are
6794 dealing with word_mode. */
6795 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
6796 || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6797 || !target_support_p)
6798 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6799 /* Check only during analysis. */
6800 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6802 if (dump_enabled_p ())
6803 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6804 return false;
6806 if (dump_enabled_p ())
6807 dump_printf_loc (MSG_NOTE, vect_location,
6808 "proceeding using word mode.\n");
6809 using_emulated_vectors_p = true;
6812 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6813 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6814 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6815 internal_fn cond_fn = get_conditional_internal_fn (code);
6816 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6818 /* If operating on inactive elements could generate spurious traps,
6819 we need to restrict the operation to active lanes. Note that this
6820 specifically doesn't apply to unhoisted invariants, since they
6821 operate on the same value for every lane.
6823 Similarly, if this operation is part of a reduction, a fully-masked
6824 loop should only change the active lanes of the reduction chain,
6825 keeping the inactive lanes as-is. */
6826 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6827 || reduc_idx >= 0);
6829 if (!vec_stmt) /* transformation not required. */
6831 if (loop_vinfo
6832 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6833 && mask_out_inactive)
6835 if (cond_len_fn != IFN_LAST
6836 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6837 OPTIMIZE_FOR_SPEED))
6838 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6840 else if (cond_fn != IFN_LAST
6841 && direct_internal_fn_supported_p (cond_fn, vectype,
6842 OPTIMIZE_FOR_SPEED))
6843 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6844 vectype, NULL);
6845 else
6847 if (dump_enabled_p ())
6848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6849 "can't use a fully-masked loop because no"
6850 " conditional operation is available.\n");
6851 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6855 /* Put types on constant and invariant SLP children. */
6856 if (slp_node
6857 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6858 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6859 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6861 if (dump_enabled_p ())
6862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6863 "incompatible vector types for invariants\n");
6864 return false;
6867 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6868 DUMP_VECT_SCOPE ("vectorizable_operation");
6869 vect_model_simple_cost (vinfo, stmt_info,
6870 ncopies, dt, ndts, slp_node, cost_vec);
6871 if (using_emulated_vectors_p)
6873 /* The above vect_model_simple_cost call handles constants
6874 in the prologue and (mis-)costs one of the stmts as
6875 vector stmt. See below for the actual lowering that will
6876 be applied. */
6877 unsigned n
6878 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6879 switch (code)
6881 case PLUS_EXPR:
6882 n *= 5;
6883 break;
6884 case MINUS_EXPR:
6885 n *= 6;
6886 break;
6887 case NEGATE_EXPR:
6888 n *= 4;
6889 break;
6890 default:
6891 /* Bit operations do not have extra cost and are accounted
6892 as vector stmt by vect_model_simple_cost. */
6893 n = 0;
6894 break;
6896 if (n != 0)
6898 /* We also need to materialize two large constants. */
6899 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6900 0, vect_prologue);
6901 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6902 0, vect_body);
6905 return true;
6908 /* Transform. */
6910 if (dump_enabled_p ())
6911 dump_printf_loc (MSG_NOTE, vect_location,
6912 "transform binary/unary operation.\n");
6914 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6915 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6917 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6918 vectors with unsigned elements, but the result is signed. So, we
6919 need to compute the MINUS_EXPR into vectype temporary and
6920 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6921 tree vec_cvt_dest = NULL_TREE;
6922 if (orig_code == POINTER_DIFF_EXPR)
6924 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6925 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6927 /* Handle def. */
6928 else
6929 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6931 /* In case the vectorization factor (VF) is bigger than the number
6932 of elements that we can fit in a vectype (nunits), we have to generate
6933 more than one vector stmt - i.e - we need to "unroll" the
6934 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6935 from one copy of the vector stmt to the next, in the field
6936 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6937 stages to find the correct vector defs to be used when vectorizing
6938 stmts that use the defs of the current stmt. The example below
6939 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6940 we need to create 4 vectorized stmts):
6942 before vectorization:
6943 RELATED_STMT VEC_STMT
6944 S1: x = memref - -
6945 S2: z = x + 1 - -
6947 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6948 there):
6949 RELATED_STMT VEC_STMT
6950 VS1_0: vx0 = memref0 VS1_1 -
6951 VS1_1: vx1 = memref1 VS1_2 -
6952 VS1_2: vx2 = memref2 VS1_3 -
6953 VS1_3: vx3 = memref3 - -
6954 S1: x = load - VS1_0
6955 S2: z = x + 1 - -
6957 step2: vectorize stmt S2 (done here):
6958 To vectorize stmt S2 we first need to find the relevant vector
6959 def for the first operand 'x'. This is, as usual, obtained from
6960 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6961 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6962 relevant vector def 'vx0'. Having found 'vx0' we can generate
6963 the vector stmt VS2_0, and as usual, record it in the
6964 STMT_VINFO_VEC_STMT of stmt S2.
6965 When creating the second copy (VS2_1), we obtain the relevant vector
6966 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6967 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6968 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6969 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6970 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6971 chain of stmts and pointers:
6972 RELATED_STMT VEC_STMT
6973 VS1_0: vx0 = memref0 VS1_1 -
6974 VS1_1: vx1 = memref1 VS1_2 -
6975 VS1_2: vx2 = memref2 VS1_3 -
6976 VS1_3: vx3 = memref3 - -
6977 S1: x = load - VS1_0
6978 VS2_0: vz0 = vx0 + v1 VS2_1 -
6979 VS2_1: vz1 = vx1 + v1 VS2_2 -
6980 VS2_2: vz2 = vx2 + v1 VS2_3 -
6981 VS2_3: vz3 = vx3 + v1 - -
6982 S2: z = x + 1 - VS2_0 */
6984 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6985 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6986 /* Arguments are ready. Create the new vector stmt. */
6987 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6989 gimple *new_stmt = NULL;
6990 vop1 = ((op_type == binary_op || op_type == ternary_op)
6991 ? vec_oprnds1[i] : NULL_TREE);
6992 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6993 if (using_emulated_vectors_p
6994 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6996 /* Lower the operation. This follows vector lowering. */
6997 unsigned int width = vector_element_bits (vectype);
6998 tree inner_type = TREE_TYPE (vectype);
6999 tree word_type
7000 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
7001 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
7002 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
7003 tree high_bits
7004 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
7005 tree wvop0 = make_ssa_name (word_type);
7006 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
7007 build1 (VIEW_CONVERT_EXPR,
7008 word_type, vop0));
7009 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7010 tree result_low, signs;
7011 if (code == PLUS_EXPR || code == MINUS_EXPR)
7013 tree wvop1 = make_ssa_name (word_type);
7014 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
7015 build1 (VIEW_CONVERT_EXPR,
7016 word_type, vop1));
7017 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7018 signs = make_ssa_name (word_type);
7019 new_stmt = gimple_build_assign (signs,
7020 BIT_XOR_EXPR, wvop0, wvop1);
7021 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7022 tree b_low = make_ssa_name (word_type);
7023 new_stmt = gimple_build_assign (b_low,
7024 BIT_AND_EXPR, wvop1, low_bits);
7025 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7026 tree a_low = make_ssa_name (word_type);
7027 if (code == PLUS_EXPR)
7028 new_stmt = gimple_build_assign (a_low,
7029 BIT_AND_EXPR, wvop0, low_bits);
7030 else
7031 new_stmt = gimple_build_assign (a_low,
7032 BIT_IOR_EXPR, wvop0, high_bits);
7033 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7034 if (code == MINUS_EXPR)
7036 new_stmt = gimple_build_assign (NULL_TREE,
7037 BIT_NOT_EXPR, signs);
7038 signs = make_ssa_name (word_type);
7039 gimple_assign_set_lhs (new_stmt, signs);
7040 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7042 new_stmt = gimple_build_assign (NULL_TREE,
7043 BIT_AND_EXPR, signs, high_bits);
7044 signs = make_ssa_name (word_type);
7045 gimple_assign_set_lhs (new_stmt, signs);
7046 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7047 result_low = make_ssa_name (word_type);
7048 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7049 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7051 else
7053 tree a_low = make_ssa_name (word_type);
7054 new_stmt = gimple_build_assign (a_low,
7055 BIT_AND_EXPR, wvop0, low_bits);
7056 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7057 signs = make_ssa_name (word_type);
7058 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7059 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7060 new_stmt = gimple_build_assign (NULL_TREE,
7061 BIT_AND_EXPR, signs, high_bits);
7062 signs = make_ssa_name (word_type);
7063 gimple_assign_set_lhs (new_stmt, signs);
7064 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7065 result_low = make_ssa_name (word_type);
7066 new_stmt = gimple_build_assign (result_low,
7067 MINUS_EXPR, high_bits, a_low);
7068 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7070 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7071 signs);
7072 result_low = make_ssa_name (word_type);
7073 gimple_assign_set_lhs (new_stmt, result_low);
7074 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7075 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7076 build1 (VIEW_CONVERT_EXPR,
7077 vectype, result_low));
7078 new_temp = make_ssa_name (vectype);
7079 gimple_assign_set_lhs (new_stmt, new_temp);
7080 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7082 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7084 tree mask;
7085 if (masked_loop_p)
7086 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7087 vec_num * ncopies, vectype, i);
7088 else
7089 /* Dummy mask. */
7090 mask = build_minus_one_cst (truth_type_for (vectype));
7091 auto_vec<tree> vops (6);
7092 vops.quick_push (mask);
7093 vops.quick_push (vop0);
7094 if (vop1)
7095 vops.quick_push (vop1);
7096 if (vop2)
7097 vops.quick_push (vop2);
7098 if (reduc_idx >= 0)
7100 /* Perform the operation on active elements only and take
7101 inactive elements from the reduction chain input. */
7102 gcc_assert (!vop2);
7103 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7105 else
7107 auto else_value = targetm.preferred_else_value
7108 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7109 vops.quick_push (else_value);
7111 if (len_loop_p)
7113 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7114 vec_num * ncopies, vectype, i, 1);
7115 signed char biasval
7116 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7117 tree bias = build_int_cst (intQI_type_node, biasval);
7118 vops.quick_push (len);
7119 vops.quick_push (bias);
7121 gcall *call
7122 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7123 : cond_len_fn,
7124 vops);
7125 new_temp = make_ssa_name (vec_dest, call);
7126 gimple_call_set_lhs (call, new_temp);
7127 gimple_call_set_nothrow (call, true);
7128 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7129 new_stmt = call;
7131 else
7133 tree mask = NULL_TREE;
7134 /* When combining two masks check if either of them is elsewhere
7135 combined with a loop mask, if that's the case we can mark that the
7136 new combined mask doesn't need to be combined with a loop mask. */
7137 if (masked_loop_p
7138 && code == BIT_AND_EXPR
7139 && VECTOR_BOOLEAN_TYPE_P (vectype))
7141 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7142 ncopies}))
7144 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7145 vec_num * ncopies, vectype, i);
7147 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7148 vop0, gsi);
7151 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7152 ncopies }))
7154 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7155 vec_num * ncopies, vectype, i);
7157 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7158 vop1, gsi);
7162 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7163 new_temp = make_ssa_name (vec_dest, new_stmt);
7164 gimple_assign_set_lhs (new_stmt, new_temp);
7165 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7166 if (using_emulated_vectors_p)
7167 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7169 /* Enter the combined value into the vector cond hash so we don't
7170 AND it with a loop mask again. */
7171 if (mask)
7172 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7175 if (vec_cvt_dest)
7177 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7178 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7179 new_temp);
7180 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7181 gimple_assign_set_lhs (new_stmt, new_temp);
7182 vect_finish_stmt_generation (vinfo, stmt_info,
7183 new_stmt, gsi);
7186 if (slp_node)
7187 slp_node->push_vec_def (new_stmt);
7188 else
7189 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7192 if (!slp_node)
7193 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7195 vec_oprnds0.release ();
7196 vec_oprnds1.release ();
7197 vec_oprnds2.release ();
7199 return true;
7202 /* A helper function to ensure data reference DR_INFO's base alignment. */
7204 static void
7205 ensure_base_align (dr_vec_info *dr_info)
7207 /* Alignment is only analyzed for the first element of a DR group,
7208 use that to look at base alignment we need to enforce. */
7209 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7210 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7212 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7214 if (dr_info->base_misaligned)
7216 tree base_decl = dr_info->base_decl;
7218 // We should only be able to increase the alignment of a base object if
7219 // we know what its new alignment should be at compile time.
7220 unsigned HOST_WIDE_INT align_base_to =
7221 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7223 if (decl_in_symtab_p (base_decl))
7224 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7225 else if (DECL_ALIGN (base_decl) < align_base_to)
7227 SET_DECL_ALIGN (base_decl, align_base_to);
7228 DECL_USER_ALIGN (base_decl) = 1;
7230 dr_info->base_misaligned = false;
7235 /* Function get_group_alias_ptr_type.
7237 Return the alias type for the group starting at FIRST_STMT_INFO. */
7239 static tree
7240 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7242 struct data_reference *first_dr, *next_dr;
7244 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7245 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7246 while (next_stmt_info)
7248 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7249 if (get_alias_set (DR_REF (first_dr))
7250 != get_alias_set (DR_REF (next_dr)))
7252 if (dump_enabled_p ())
7253 dump_printf_loc (MSG_NOTE, vect_location,
7254 "conflicting alias set types.\n");
7255 return ptr_type_node;
7257 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7259 return reference_alias_ptr_type (DR_REF (first_dr));
7263 /* Function scan_operand_equal_p.
7265 Helper function for check_scan_store. Compare two references
7266 with .GOMP_SIMD_LANE bases. */
7268 static bool
7269 scan_operand_equal_p (tree ref1, tree ref2)
7271 tree ref[2] = { ref1, ref2 };
7272 poly_int64 bitsize[2], bitpos[2];
7273 tree offset[2], base[2];
7274 for (int i = 0; i < 2; ++i)
7276 machine_mode mode;
7277 int unsignedp, reversep, volatilep = 0;
7278 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7279 &offset[i], &mode, &unsignedp,
7280 &reversep, &volatilep);
7281 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7282 return false;
7283 if (TREE_CODE (base[i]) == MEM_REF
7284 && offset[i] == NULL_TREE
7285 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7287 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7288 if (is_gimple_assign (def_stmt)
7289 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7290 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7291 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7293 if (maybe_ne (mem_ref_offset (base[i]), 0))
7294 return false;
7295 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7296 offset[i] = gimple_assign_rhs2 (def_stmt);
7301 if (!operand_equal_p (base[0], base[1], 0))
7302 return false;
7303 if (maybe_ne (bitsize[0], bitsize[1]))
7304 return false;
7305 if (offset[0] != offset[1])
7307 if (!offset[0] || !offset[1])
7308 return false;
7309 if (!operand_equal_p (offset[0], offset[1], 0))
7311 tree step[2];
7312 for (int i = 0; i < 2; ++i)
7314 step[i] = integer_one_node;
7315 if (TREE_CODE (offset[i]) == SSA_NAME)
7317 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7318 if (is_gimple_assign (def_stmt)
7319 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7320 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7321 == INTEGER_CST))
7323 step[i] = gimple_assign_rhs2 (def_stmt);
7324 offset[i] = gimple_assign_rhs1 (def_stmt);
7327 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7329 step[i] = TREE_OPERAND (offset[i], 1);
7330 offset[i] = TREE_OPERAND (offset[i], 0);
7332 tree rhs1 = NULL_TREE;
7333 if (TREE_CODE (offset[i]) == SSA_NAME)
7335 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7336 if (gimple_assign_cast_p (def_stmt))
7337 rhs1 = gimple_assign_rhs1 (def_stmt);
7339 else if (CONVERT_EXPR_P (offset[i]))
7340 rhs1 = TREE_OPERAND (offset[i], 0);
7341 if (rhs1
7342 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7343 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7344 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7345 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7346 offset[i] = rhs1;
7348 if (!operand_equal_p (offset[0], offset[1], 0)
7349 || !operand_equal_p (step[0], step[1], 0))
7350 return false;
7353 return true;
7357 enum scan_store_kind {
7358 /* Normal permutation. */
7359 scan_store_kind_perm,
7361 /* Whole vector left shift permutation with zero init. */
7362 scan_store_kind_lshift_zero,
7364 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7365 scan_store_kind_lshift_cond
7368 /* Function check_scan_store.
7370 Verify if we can perform the needed permutations or whole vector shifts.
7371 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7372 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7373 to do at each step. */
7375 static int
7376 scan_store_can_perm_p (tree vectype, tree init,
7377 vec<enum scan_store_kind> *use_whole_vector = NULL)
7379 enum machine_mode vec_mode = TYPE_MODE (vectype);
7380 unsigned HOST_WIDE_INT nunits;
7381 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7382 return -1;
7383 int units_log2 = exact_log2 (nunits);
7384 if (units_log2 <= 0)
7385 return -1;
7387 int i;
7388 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7389 for (i = 0; i <= units_log2; ++i)
7391 unsigned HOST_WIDE_INT j, k;
7392 enum scan_store_kind kind = scan_store_kind_perm;
7393 vec_perm_builder sel (nunits, nunits, 1);
7394 sel.quick_grow (nunits);
7395 if (i == units_log2)
7397 for (j = 0; j < nunits; ++j)
7398 sel[j] = nunits - 1;
7400 else
7402 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7403 sel[j] = j;
7404 for (k = 0; j < nunits; ++j, ++k)
7405 sel[j] = nunits + k;
7407 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7408 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7410 if (i == units_log2)
7411 return -1;
7413 if (whole_vector_shift_kind == scan_store_kind_perm)
7415 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7416 return -1;
7417 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7418 /* Whole vector shifts shift in zeros, so if init is all zero
7419 constant, there is no need to do anything further. */
7420 if ((TREE_CODE (init) != INTEGER_CST
7421 && TREE_CODE (init) != REAL_CST)
7422 || !initializer_zerop (init))
7424 tree masktype = truth_type_for (vectype);
7425 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7426 return -1;
7427 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7430 kind = whole_vector_shift_kind;
7432 if (use_whole_vector)
7434 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7435 use_whole_vector->safe_grow_cleared (i, true);
7436 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7437 use_whole_vector->safe_push (kind);
7441 return units_log2;
7445 /* Function check_scan_store.
7447 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7449 static bool
7450 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7451 enum vect_def_type rhs_dt, slp_tree slp_node, tree mask,
7452 vect_memory_access_type memory_access_type)
7454 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7455 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7456 tree ref_type;
7458 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7459 if ((slp_node && SLP_TREE_LANES (slp_node) > 1)
7460 || mask
7461 || memory_access_type != VMAT_CONTIGUOUS
7462 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7463 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7464 || loop_vinfo == NULL
7465 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7466 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7467 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7468 || !integer_zerop (DR_INIT (dr_info->dr))
7469 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7470 || !alias_sets_conflict_p (get_alias_set (vectype),
7471 get_alias_set (TREE_TYPE (ref_type))))
7473 if (dump_enabled_p ())
7474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7475 "unsupported OpenMP scan store.\n");
7476 return false;
7479 /* We need to pattern match code built by OpenMP lowering and simplified
7480 by following optimizations into something we can handle.
7481 #pragma omp simd reduction(inscan,+:r)
7482 for (...)
7484 r += something ();
7485 #pragma omp scan inclusive (r)
7486 use (r);
7488 shall have body with:
7489 // Initialization for input phase, store the reduction initializer:
7490 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7491 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7492 D.2042[_21] = 0;
7493 // Actual input phase:
7495 r.0_5 = D.2042[_20];
7496 _6 = _4 + r.0_5;
7497 D.2042[_20] = _6;
7498 // Initialization for scan phase:
7499 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7500 _26 = D.2043[_25];
7501 _27 = D.2042[_25];
7502 _28 = _26 + _27;
7503 D.2043[_25] = _28;
7504 D.2042[_25] = _28;
7505 // Actual scan phase:
7507 r.1_8 = D.2042[_20];
7509 The "omp simd array" variable D.2042 holds the privatized copy used
7510 inside of the loop and D.2043 is another one that holds copies of
7511 the current original list item. The separate GOMP_SIMD_LANE ifn
7512 kinds are there in order to allow optimizing the initializer store
7513 and combiner sequence, e.g. if it is originally some C++ish user
7514 defined reduction, but allow the vectorizer to pattern recognize it
7515 and turn into the appropriate vectorized scan.
7517 For exclusive scan, this is slightly different:
7518 #pragma omp simd reduction(inscan,+:r)
7519 for (...)
7521 use (r);
7522 #pragma omp scan exclusive (r)
7523 r += something ();
7525 shall have body with:
7526 // Initialization for input phase, store the reduction initializer:
7527 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7528 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7529 D.2042[_21] = 0;
7530 // Actual input phase:
7532 r.0_5 = D.2042[_20];
7533 _6 = _4 + r.0_5;
7534 D.2042[_20] = _6;
7535 // Initialization for scan phase:
7536 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7537 _26 = D.2043[_25];
7538 D.2044[_25] = _26;
7539 _27 = D.2042[_25];
7540 _28 = _26 + _27;
7541 D.2043[_25] = _28;
7542 // Actual scan phase:
7544 r.1_8 = D.2044[_20];
7545 ... */
7547 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7549 /* Match the D.2042[_21] = 0; store above. Just require that
7550 it is a constant or external definition store. */
7551 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7553 fail_init:
7554 if (dump_enabled_p ())
7555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7556 "unsupported OpenMP scan initializer store.\n");
7557 return false;
7560 if (! loop_vinfo->scan_map)
7561 loop_vinfo->scan_map = new hash_map<tree, tree>;
7562 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7563 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7564 if (cached)
7565 goto fail_init;
7566 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7568 /* These stores can be vectorized normally. */
7569 return true;
7572 if (rhs_dt != vect_internal_def)
7574 fail:
7575 if (dump_enabled_p ())
7576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7577 "unsupported OpenMP scan combiner pattern.\n");
7578 return false;
7581 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7582 tree rhs = gimple_assign_rhs1 (stmt);
7583 if (TREE_CODE (rhs) != SSA_NAME)
7584 goto fail;
7586 gimple *other_store_stmt = NULL;
7587 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7588 bool inscan_var_store
7589 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7591 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7593 if (!inscan_var_store)
7595 use_operand_p use_p;
7596 imm_use_iterator iter;
7597 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7599 gimple *use_stmt = USE_STMT (use_p);
7600 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7601 continue;
7602 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7603 || !is_gimple_assign (use_stmt)
7604 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7605 || other_store_stmt
7606 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7607 goto fail;
7608 other_store_stmt = use_stmt;
7610 if (other_store_stmt == NULL)
7611 goto fail;
7612 rhs = gimple_assign_lhs (other_store_stmt);
7613 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7614 goto fail;
7617 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7619 use_operand_p use_p;
7620 imm_use_iterator iter;
7621 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7623 gimple *use_stmt = USE_STMT (use_p);
7624 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7625 continue;
7626 if (other_store_stmt)
7627 goto fail;
7628 other_store_stmt = use_stmt;
7631 else
7632 goto fail;
7634 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7635 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7636 || !is_gimple_assign (def_stmt)
7637 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7638 goto fail;
7640 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7641 /* For pointer addition, we should use the normal plus for the vector
7642 operation. */
7643 switch (code)
7645 case POINTER_PLUS_EXPR:
7646 code = PLUS_EXPR;
7647 break;
7648 case MULT_HIGHPART_EXPR:
7649 goto fail;
7650 default:
7651 break;
7653 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7654 goto fail;
7656 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7657 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7658 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7659 goto fail;
7661 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7662 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7663 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7664 || !gimple_assign_load_p (load1_stmt)
7665 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7666 || !gimple_assign_load_p (load2_stmt))
7667 goto fail;
7669 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7670 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7671 if (load1_stmt_info == NULL
7672 || load2_stmt_info == NULL
7673 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7674 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7675 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7676 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7677 goto fail;
7679 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7681 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7682 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7683 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7684 goto fail;
7685 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7686 tree lrhs;
7687 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7688 lrhs = rhs1;
7689 else
7690 lrhs = rhs2;
7691 use_operand_p use_p;
7692 imm_use_iterator iter;
7693 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7695 gimple *use_stmt = USE_STMT (use_p);
7696 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7697 continue;
7698 if (other_store_stmt)
7699 goto fail;
7700 other_store_stmt = use_stmt;
7704 if (other_store_stmt == NULL)
7705 goto fail;
7706 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7707 || !gimple_store_p (other_store_stmt))
7708 goto fail;
7710 stmt_vec_info other_store_stmt_info
7711 = loop_vinfo->lookup_stmt (other_store_stmt);
7712 if (other_store_stmt_info == NULL
7713 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7714 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7715 goto fail;
7717 gimple *stmt1 = stmt;
7718 gimple *stmt2 = other_store_stmt;
7719 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7720 std::swap (stmt1, stmt2);
7721 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7722 gimple_assign_rhs1 (load2_stmt)))
7724 std::swap (rhs1, rhs2);
7725 std::swap (load1_stmt, load2_stmt);
7726 std::swap (load1_stmt_info, load2_stmt_info);
7728 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7729 gimple_assign_rhs1 (load1_stmt)))
7730 goto fail;
7732 tree var3 = NULL_TREE;
7733 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7734 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7735 gimple_assign_rhs1 (load2_stmt)))
7736 goto fail;
7737 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7739 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7740 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7741 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7742 goto fail;
7743 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7744 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7745 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7746 || lookup_attribute ("omp simd inscan exclusive",
7747 DECL_ATTRIBUTES (var3)))
7748 goto fail;
7751 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7752 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7753 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7754 goto fail;
7756 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7757 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7758 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7759 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7760 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7761 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7762 goto fail;
7764 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7765 std::swap (var1, var2);
7767 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7769 if (!lookup_attribute ("omp simd inscan exclusive",
7770 DECL_ATTRIBUTES (var1)))
7771 goto fail;
7772 var1 = var3;
7775 if (loop_vinfo->scan_map == NULL)
7776 goto fail;
7777 tree *init = loop_vinfo->scan_map->get (var1);
7778 if (init == NULL)
7779 goto fail;
7781 /* The IL is as expected, now check if we can actually vectorize it.
7782 Inclusive scan:
7783 _26 = D.2043[_25];
7784 _27 = D.2042[_25];
7785 _28 = _26 + _27;
7786 D.2043[_25] = _28;
7787 D.2042[_25] = _28;
7788 should be vectorized as (where _40 is the vectorized rhs
7789 from the D.2042[_21] = 0; store):
7790 _30 = MEM <vector(8) int> [(int *)&D.2043];
7791 _31 = MEM <vector(8) int> [(int *)&D.2042];
7792 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7793 _33 = _31 + _32;
7794 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7795 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7796 _35 = _33 + _34;
7797 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7798 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7799 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7800 _37 = _35 + _36;
7801 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7802 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7803 _38 = _30 + _37;
7804 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7805 MEM <vector(8) int> [(int *)&D.2043] = _39;
7806 MEM <vector(8) int> [(int *)&D.2042] = _38;
7807 Exclusive scan:
7808 _26 = D.2043[_25];
7809 D.2044[_25] = _26;
7810 _27 = D.2042[_25];
7811 _28 = _26 + _27;
7812 D.2043[_25] = _28;
7813 should be vectorized as (where _40 is the vectorized rhs
7814 from the D.2042[_21] = 0; store):
7815 _30 = MEM <vector(8) int> [(int *)&D.2043];
7816 _31 = MEM <vector(8) int> [(int *)&D.2042];
7817 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7818 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7819 _34 = _32 + _33;
7820 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7821 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7822 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7823 _36 = _34 + _35;
7824 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7825 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7826 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7827 _38 = _36 + _37;
7828 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7829 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7830 _39 = _30 + _38;
7831 _50 = _31 + _39;
7832 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7833 MEM <vector(8) int> [(int *)&D.2044] = _39;
7834 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7835 enum machine_mode vec_mode = TYPE_MODE (vectype);
7836 optab optab = optab_for_tree_code (code, vectype, optab_default);
7837 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7838 goto fail;
7840 int units_log2 = scan_store_can_perm_p (vectype, *init);
7841 if (units_log2 == -1)
7842 goto fail;
7844 return true;
7848 /* Function vectorizable_scan_store.
7850 Helper of vectorizable_score, arguments like on vectorizable_store.
7851 Handle only the transformation, checking is done in check_scan_store. */
7853 static bool
7854 vectorizable_scan_store (vec_info *vinfo, stmt_vec_info stmt_info,
7855 slp_tree slp_node, gimple_stmt_iterator *gsi,
7856 gimple **vec_stmt, int ncopies)
7858 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7859 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7860 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7861 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7863 if (dump_enabled_p ())
7864 dump_printf_loc (MSG_NOTE, vect_location,
7865 "transform scan store. ncopies = %d\n", ncopies);
7867 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7868 tree rhs = gimple_assign_rhs1 (stmt);
7869 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7871 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7872 bool inscan_var_store
7873 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7875 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7877 use_operand_p use_p;
7878 imm_use_iterator iter;
7879 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7881 gimple *use_stmt = USE_STMT (use_p);
7882 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7883 continue;
7884 rhs = gimple_assign_lhs (use_stmt);
7885 break;
7889 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7890 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7891 if (code == POINTER_PLUS_EXPR)
7892 code = PLUS_EXPR;
7893 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7894 && commutative_tree_code (code));
7895 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7896 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7897 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7898 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7899 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7900 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7901 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7902 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7903 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7904 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7905 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7907 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7909 std::swap (rhs1, rhs2);
7910 std::swap (var1, var2);
7911 std::swap (load1_dr_info, load2_dr_info);
7914 tree *init = loop_vinfo->scan_map->get (var1);
7915 gcc_assert (init);
7917 unsigned HOST_WIDE_INT nunits;
7918 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7919 gcc_unreachable ();
7920 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7921 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7922 gcc_assert (units_log2 > 0);
7923 auto_vec<tree, 16> perms;
7924 perms.quick_grow (units_log2 + 1);
7925 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7926 for (int i = 0; i <= units_log2; ++i)
7928 unsigned HOST_WIDE_INT j, k;
7929 vec_perm_builder sel (nunits, nunits, 1);
7930 sel.quick_grow (nunits);
7931 if (i == units_log2)
7932 for (j = 0; j < nunits; ++j)
7933 sel[j] = nunits - 1;
7934 else
7936 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7937 sel[j] = j;
7938 for (k = 0; j < nunits; ++j, ++k)
7939 sel[j] = nunits + k;
7941 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7942 if (!use_whole_vector.is_empty ()
7943 && use_whole_vector[i] != scan_store_kind_perm)
7945 if (zero_vec == NULL_TREE)
7946 zero_vec = build_zero_cst (vectype);
7947 if (masktype == NULL_TREE
7948 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7949 masktype = truth_type_for (vectype);
7950 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7952 else
7953 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7956 tree vec_oprnd1 = NULL_TREE;
7957 tree vec_oprnd2 = NULL_TREE;
7958 tree vec_oprnd3 = NULL_TREE;
7959 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7960 tree dataref_offset = build_int_cst (ref_type, 0);
7961 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7962 vectype, VMAT_CONTIGUOUS);
7963 tree ldataref_ptr = NULL_TREE;
7964 tree orig = NULL_TREE;
7965 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7966 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7967 /* The initialization is invariant. */
7968 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, *init, vectype, NULL);
7969 auto_vec<tree> vec_oprnds2;
7970 auto_vec<tree> vec_oprnds3;
7971 if (ldataref_ptr == NULL)
7973 /* We want to lookup the vector operands of the reduction, not those
7974 of the store - for SLP we have to use the proper SLP node for the
7975 lookup, which should be the single child of the scan store. */
7976 vect_get_vec_defs (vinfo, stmt_info, SLP_TREE_CHILDREN (slp_node)[0],
7977 ncopies, rhs1, &vec_oprnds2, rhs2, &vec_oprnds3);
7978 /* ??? For SLP we do not key the def on 'rhs1' or 'rhs2' but get
7979 them in SLP child order. So we have to swap here with logic
7980 similar to above. */
7981 stmt_vec_info load
7982 = SLP_TREE_SCALAR_STMTS (SLP_TREE_CHILDREN
7983 (SLP_TREE_CHILDREN (slp_node)[0])[0])[0];
7984 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (load);
7985 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7986 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)))
7987 for (unsigned i = 0; i < vec_oprnds2.length (); ++i)
7988 std::swap (vec_oprnds2[i], vec_oprnds3[i]);;
7990 else
7991 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
7992 rhs2, &vec_oprnds3);
7993 for (unsigned j = 0; j < vec_oprnds3.length (); j++)
7995 if (ldataref_ptr == NULL)
7996 vec_oprnd2 = vec_oprnds2[j];
7997 vec_oprnd3 = vec_oprnds3[j];
7998 if (j == 0)
7999 orig = vec_oprnd3;
8000 else if (!inscan_var_store)
8001 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8003 if (ldataref_ptr)
8005 vec_oprnd2 = make_ssa_name (vectype);
8006 tree data_ref = fold_build2 (MEM_REF, vectype,
8007 unshare_expr (ldataref_ptr),
8008 dataref_offset);
8009 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
8010 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
8011 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8012 if (! slp_node)
8014 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8015 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8019 tree v = vec_oprnd2;
8020 for (int i = 0; i < units_log2; ++i)
8022 tree new_temp = make_ssa_name (vectype);
8023 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
8024 (zero_vec
8025 && (use_whole_vector[i]
8026 != scan_store_kind_perm))
8027 ? zero_vec : vec_oprnd1, v,
8028 perms[i]);
8029 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8030 if (! slp_node)
8032 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8033 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8036 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
8038 /* Whole vector shift shifted in zero bits, but if *init
8039 is not initializer_zerop, we need to replace those elements
8040 with elements from vec_oprnd1. */
8041 tree_vector_builder vb (masktype, nunits, 1);
8042 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
8043 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
8044 ? boolean_false_node : boolean_true_node);
8046 tree new_temp2 = make_ssa_name (vectype);
8047 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8048 new_temp, vec_oprnd1);
8049 vect_finish_stmt_generation (vinfo, stmt_info,
8050 g, gsi);
8051 if (! slp_node)
8052 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8053 new_temp = new_temp2;
8056 /* For exclusive scan, perform the perms[i] permutation once
8057 more. */
8058 if (i == 0
8059 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8060 && v == vec_oprnd2)
8062 v = new_temp;
8063 --i;
8064 continue;
8067 tree new_temp2 = make_ssa_name (vectype);
8068 g = gimple_build_assign (new_temp2, code, v, new_temp);
8069 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8070 if (! slp_node)
8071 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8073 v = new_temp2;
8076 tree new_temp = make_ssa_name (vectype);
8077 gimple *g = gimple_build_assign (new_temp, code, orig, v);
8078 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8079 if (! slp_node)
8080 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8082 tree last_perm_arg = new_temp;
8083 /* For exclusive scan, new_temp computed above is the exclusive scan
8084 prefix sum. Turn it into inclusive prefix sum for the broadcast
8085 of the last element into orig. */
8086 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8088 last_perm_arg = make_ssa_name (vectype);
8089 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8090 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8091 if (! slp_node)
8092 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8095 orig = make_ssa_name (vectype);
8096 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8097 last_perm_arg, perms[units_log2]);
8098 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8099 if (! slp_node)
8100 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8102 if (!inscan_var_store)
8104 tree data_ref = fold_build2 (MEM_REF, vectype,
8105 unshare_expr (dataref_ptr),
8106 dataref_offset);
8107 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8108 g = gimple_build_assign (data_ref, new_temp);
8109 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8110 if (! slp_node)
8111 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8115 if (inscan_var_store)
8116 for (unsigned j = 0; j < vec_oprnds3.length (); j++)
8118 if (j != 0)
8119 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8121 tree data_ref = fold_build2 (MEM_REF, vectype,
8122 unshare_expr (dataref_ptr),
8123 dataref_offset);
8124 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8125 gimple *g = gimple_build_assign (data_ref, orig);
8126 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8127 if (! slp_node)
8128 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8130 return true;
8134 /* Function vectorizable_store.
8136 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8137 that can be vectorized.
8138 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8139 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8140 Return true if STMT_INFO is vectorizable in this way. */
8142 static bool
8143 vectorizable_store (vec_info *vinfo,
8144 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8145 gimple **vec_stmt, slp_tree slp_node,
8146 stmt_vector_for_cost *cost_vec)
8148 tree data_ref;
8149 tree vec_oprnd = NULL_TREE;
8150 tree elem_type;
8151 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8152 class loop *loop = NULL;
8153 machine_mode vec_mode;
8154 tree dummy;
8155 enum vect_def_type rhs_dt = vect_unknown_def_type;
8156 enum vect_def_type mask_dt = vect_unknown_def_type;
8157 tree dataref_ptr = NULL_TREE;
8158 tree dataref_offset = NULL_TREE;
8159 gimple *ptr_incr = NULL;
8160 int ncopies;
8161 int j;
8162 stmt_vec_info first_stmt_info;
8163 bool grouped_store;
8164 unsigned int group_size, i;
8165 bool slp = (slp_node != NULL);
8166 unsigned int vec_num;
8167 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8168 tree aggr_type;
8169 gather_scatter_info gs_info;
8170 poly_uint64 vf;
8171 vec_load_store_type vls_type;
8172 tree ref_type;
8174 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8175 return false;
8177 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8178 && ! vec_stmt)
8179 return false;
8181 /* Is vectorizable store? */
8183 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8184 slp_tree mask_node = NULL;
8185 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8187 tree scalar_dest = gimple_assign_lhs (assign);
8188 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8189 && is_pattern_stmt_p (stmt_info))
8190 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8191 if (TREE_CODE (scalar_dest) != ARRAY_REF
8192 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8193 && TREE_CODE (scalar_dest) != INDIRECT_REF
8194 && TREE_CODE (scalar_dest) != COMPONENT_REF
8195 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8196 && TREE_CODE (scalar_dest) != REALPART_EXPR
8197 && TREE_CODE (scalar_dest) != MEM_REF)
8198 return false;
8200 else
8202 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8203 if (!call || !gimple_call_internal_p (call))
8204 return false;
8206 internal_fn ifn = gimple_call_internal_fn (call);
8207 if (!internal_store_fn_p (ifn))
8208 return false;
8210 int mask_index = internal_fn_mask_index (ifn);
8211 if (mask_index >= 0 && slp_node)
8212 mask_index = vect_slp_child_index_for_operand
8213 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8214 if (mask_index >= 0
8215 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8216 &mask, &mask_node, &mask_dt,
8217 &mask_vectype))
8218 return false;
8221 /* Cannot have hybrid store SLP -- that would mean storing to the
8222 same location twice. */
8223 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8225 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8226 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8228 if (loop_vinfo)
8230 loop = LOOP_VINFO_LOOP (loop_vinfo);
8231 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8233 else
8234 vf = 1;
8236 /* Multiple types in SLP are handled by creating the appropriate number of
8237 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8238 case of SLP. */
8239 if (slp)
8240 ncopies = 1;
8241 else
8242 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8244 gcc_assert (ncopies >= 1);
8246 /* FORNOW. This restriction should be relaxed. */
8247 if (loop
8248 && nested_in_vect_loop_p (loop, stmt_info)
8249 && (ncopies > 1 || (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)))
8251 if (dump_enabled_p ())
8252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8253 "multiple types in nested loop.\n");
8254 return false;
8257 tree op;
8258 slp_tree op_node;
8259 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8260 &op, &op_node, &rhs_dt, &rhs_vectype, &vls_type))
8261 return false;
8263 elem_type = TREE_TYPE (vectype);
8264 vec_mode = TYPE_MODE (vectype);
8266 if (!STMT_VINFO_DATA_REF (stmt_info))
8267 return false;
8269 vect_memory_access_type memory_access_type;
8270 enum dr_alignment_support alignment_support_scheme;
8271 int misalignment;
8272 poly_int64 poffset;
8273 internal_fn lanes_ifn;
8274 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8275 ncopies, &memory_access_type, &poffset,
8276 &alignment_support_scheme, &misalignment, &gs_info,
8277 &lanes_ifn))
8278 return false;
8280 if (slp_node
8281 && slp_node->ldst_lanes
8282 && memory_access_type != VMAT_LOAD_STORE_LANES)
8284 if (dump_enabled_p ())
8285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8286 "discovered store-lane but cannot use it.\n");
8287 return false;
8290 if (mask)
8292 if (memory_access_type == VMAT_CONTIGUOUS)
8294 if (!VECTOR_MODE_P (vec_mode)
8295 || !can_vec_mask_load_store_p (vec_mode,
8296 TYPE_MODE (mask_vectype), false))
8297 return false;
8299 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8300 && (memory_access_type != VMAT_GATHER_SCATTER
8301 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8303 if (dump_enabled_p ())
8304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8305 "unsupported access type for masked store.\n");
8306 return false;
8308 else if (memory_access_type == VMAT_GATHER_SCATTER
8309 && gs_info.ifn == IFN_LAST
8310 && !gs_info.decl)
8312 if (dump_enabled_p ())
8313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8314 "unsupported masked emulated scatter.\n");
8315 return false;
8318 else
8320 /* FORNOW. In some cases can vectorize even if data-type not supported
8321 (e.g. - array initialization with 0). */
8322 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8323 return false;
8326 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8327 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8328 && memory_access_type != VMAT_GATHER_SCATTER
8329 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8330 if (grouped_store)
8332 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8333 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8334 group_size = DR_GROUP_SIZE (first_stmt_info);
8336 else
8338 first_stmt_info = stmt_info;
8339 first_dr_info = dr_info;
8340 group_size = vec_num = 1;
8343 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8345 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node, mask,
8346 memory_access_type))
8347 return false;
8350 bool costing_p = !vec_stmt;
8351 if (costing_p) /* transformation not required. */
8353 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8354 if (slp_node)
8355 SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
8357 if (loop_vinfo
8358 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8359 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8360 vls_type, group_size,
8361 memory_access_type, &gs_info,
8362 mask);
8364 if (slp_node
8365 && (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8366 || (mask
8367 && !vect_maybe_update_slp_op_vectype (mask_node,
8368 mask_vectype))))
8370 if (dump_enabled_p ())
8371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8372 "incompatible vector types for invariants\n");
8373 return false;
8376 if (dump_enabled_p ()
8377 && memory_access_type != VMAT_ELEMENTWISE
8378 && memory_access_type != VMAT_GATHER_SCATTER
8379 && alignment_support_scheme != dr_aligned)
8380 dump_printf_loc (MSG_NOTE, vect_location,
8381 "Vectorizing an unaligned access.\n");
8383 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8385 /* As function vect_transform_stmt shows, for interleaving stores
8386 the whole chain is vectorized when the last store in the chain
8387 is reached, the other stores in the group are skipped. So we
8388 want to only cost the last one here, but it's not trivial to
8389 get the last, as it's equivalent to use the first one for
8390 costing, use the first one instead. */
8391 if (grouped_store
8392 && !slp
8393 && first_stmt_info != stmt_info)
8394 return true;
8396 if (slp_node)
8397 gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (stmt_info));
8398 else
8399 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8401 /* Transform. */
8403 ensure_base_align (dr_info);
8405 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8407 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8408 gcc_assert (!slp || SLP_TREE_LANES (slp_node) == 1);
8409 if (costing_p)
8411 unsigned int inside_cost = 0, prologue_cost = 0;
8412 if (vls_type == VLS_STORE_INVARIANT)
8413 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8414 stmt_info, 0, vect_prologue);
8415 vect_get_store_cost (vinfo, stmt_info, ncopies,
8416 alignment_support_scheme, misalignment,
8417 &inside_cost, cost_vec);
8419 if (dump_enabled_p ())
8420 dump_printf_loc (MSG_NOTE, vect_location,
8421 "vect_model_store_cost: inside_cost = %d, "
8422 "prologue_cost = %d .\n",
8423 inside_cost, prologue_cost);
8425 return true;
8427 return vectorizable_scan_store (vinfo, stmt_info, slp_node,
8428 gsi, vec_stmt, ncopies);
8431 if (grouped_store || slp)
8433 /* FORNOW */
8434 gcc_assert (!grouped_store
8435 || !loop
8436 || !nested_in_vect_loop_p (loop, stmt_info));
8438 if (slp)
8440 grouped_store = false;
8441 /* VEC_NUM is the number of vect stmts to be created for this
8442 group. */
8443 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8444 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8445 gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info)
8446 || (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8447 == first_stmt_info));
8448 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8449 op = vect_get_store_rhs (first_stmt_info);
8451 else
8452 /* VEC_NUM is the number of vect stmts to be created for this
8453 group. */
8454 vec_num = group_size;
8456 ref_type = get_group_alias_ptr_type (first_stmt_info);
8458 else
8459 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8461 if (!costing_p && dump_enabled_p ())
8462 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8463 ncopies);
8465 /* Check if we need to update prologue cost for invariant,
8466 and update it accordingly if so. If it's not for
8467 interleaving store, we can just check vls_type; but if
8468 it's for interleaving store, need to check the def_type
8469 of the stored value since the current vls_type is just
8470 for first_stmt_info. */
8471 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8473 gcc_assert (costing_p);
8474 if (slp)
8475 return;
8476 if (grouped_store)
8478 gcc_assert (store_rhs);
8479 enum vect_def_type cdt;
8480 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8481 if (cdt != vect_constant_def && cdt != vect_external_def)
8482 return;
8484 else if (vls_type != VLS_STORE_INVARIANT)
8485 return;
8486 *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
8487 0, vect_prologue);
8490 if (memory_access_type == VMAT_ELEMENTWISE
8491 || memory_access_type == VMAT_STRIDED_SLP)
8493 unsigned inside_cost = 0, prologue_cost = 0;
8494 gimple_stmt_iterator incr_gsi;
8495 bool insert_after;
8496 gimple *incr;
8497 tree offvar;
8498 tree ivstep;
8499 tree running_off;
8500 tree stride_base, stride_step, alias_off;
8501 tree vec_oprnd = NULL_TREE;
8502 tree dr_offset;
8503 unsigned int g;
8504 /* Checked by get_load_store_type. */
8505 unsigned int const_nunits = nunits.to_constant ();
8507 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8508 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8510 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8511 stride_base
8512 = fold_build_pointer_plus
8513 (DR_BASE_ADDRESS (first_dr_info->dr),
8514 size_binop (PLUS_EXPR,
8515 convert_to_ptrofftype (dr_offset),
8516 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8517 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8519 /* For a store with loop-invariant (but other than power-of-2)
8520 stride (i.e. not a grouped access) like so:
8522 for (i = 0; i < n; i += stride)
8523 array[i] = ...;
8525 we generate a new induction variable and new stores from
8526 the components of the (vectorized) rhs:
8528 for (j = 0; ; j += VF*stride)
8529 vectemp = ...;
8530 tmp1 = vectemp[0];
8531 array[j] = tmp1;
8532 tmp2 = vectemp[1];
8533 array[j + stride] = tmp2;
8537 unsigned nstores = const_nunits;
8538 unsigned lnel = 1;
8539 tree ltype = elem_type;
8540 tree lvectype = vectype;
8541 if (slp)
8543 HOST_WIDE_INT n = gcd (group_size, const_nunits);
8544 if (n == const_nunits)
8546 int mis_align = dr_misalignment (first_dr_info, vectype);
8547 dr_alignment_support dr_align
8548 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8549 mis_align);
8550 if (dr_align == dr_aligned
8551 || dr_align == dr_unaligned_supported)
8553 nstores = 1;
8554 lnel = const_nunits;
8555 ltype = vectype;
8556 lvectype = vectype;
8557 alignment_support_scheme = dr_align;
8558 misalignment = mis_align;
8561 else if (n > 1)
8563 nstores = const_nunits / n;
8564 lnel = n;
8565 ltype = build_vector_type (elem_type, n);
8566 lvectype = vectype;
8568 /* First check if vec_extract optab doesn't support extraction
8569 of vector elts directly. */
8570 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8571 machine_mode vmode;
8572 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8573 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8574 n).exists (&vmode)
8575 || (convert_optab_handler (vec_extract_optab,
8576 TYPE_MODE (vectype), vmode)
8577 == CODE_FOR_nothing))
8579 /* Try to avoid emitting an extract of vector elements
8580 by performing the extracts using an integer type of the
8581 same size, extracting from a vector of those and then
8582 re-interpreting it as the original vector type if
8583 supported. */
8584 unsigned lsize
8585 = n * GET_MODE_BITSIZE (elmode);
8586 unsigned int lnunits = const_nunits / n;
8587 /* If we can't construct such a vector fall back to
8588 element extracts from the original vector type and
8589 element size stores. */
8590 if (int_mode_for_size (lsize, 0).exists (&elmode)
8591 && VECTOR_MODE_P (TYPE_MODE (vectype))
8592 && related_vector_mode (TYPE_MODE (vectype), elmode,
8593 lnunits).exists (&vmode)
8594 && (convert_optab_handler (vec_extract_optab,
8595 vmode, elmode)
8596 != CODE_FOR_nothing))
8598 nstores = lnunits;
8599 lnel = n;
8600 ltype = build_nonstandard_integer_type (lsize, 1);
8601 lvectype = build_vector_type (ltype, nstores);
8603 /* Else fall back to vector extraction anyway.
8604 Fewer stores are more important than avoiding spilling
8605 of the vector we extract from. Compared to the
8606 construction case in vectorizable_load no store-forwarding
8607 issue exists here for reasonable archs. */
8610 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8611 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8614 if (!costing_p)
8616 ivstep = stride_step;
8617 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8618 build_int_cst (TREE_TYPE (ivstep), vf));
8620 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8622 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8623 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8624 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8625 insert_after, &offvar, NULL);
8626 incr = gsi_stmt (incr_gsi);
8628 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8631 alias_off = build_int_cst (ref_type, 0);
8632 stmt_vec_info next_stmt_info = first_stmt_info;
8633 auto_vec<tree> vec_oprnds;
8634 /* For costing some adjacent vector stores, we'd like to cost with
8635 the total number of them once instead of cost each one by one. */
8636 unsigned int n_adjacent_stores = 0;
8637 for (g = 0; g < group_size; g++)
8639 running_off = offvar;
8640 if (!costing_p)
8642 if (g)
8644 tree size = TYPE_SIZE_UNIT (ltype);
8645 tree pos
8646 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8647 tree newoff = copy_ssa_name (running_off, NULL);
8648 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8649 running_off, pos);
8650 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8651 running_off = newoff;
8654 if (!slp)
8655 op = vect_get_store_rhs (next_stmt_info);
8656 if (!costing_p)
8657 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
8658 &vec_oprnds);
8659 else
8660 update_prologue_cost (&prologue_cost, op);
8661 unsigned int group_el = 0;
8662 unsigned HOST_WIDE_INT
8663 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8664 for (j = 0; j < ncopies; j++)
8666 if (!costing_p)
8668 vec_oprnd = vec_oprnds[j];
8669 /* Pun the vector to extract from if necessary. */
8670 if (lvectype != vectype)
8672 tree tem = make_ssa_name (lvectype);
8673 tree cvt
8674 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8675 gimple *pun = gimple_build_assign (tem, cvt);
8676 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8677 vec_oprnd = tem;
8680 for (i = 0; i < nstores; i++)
8682 if (costing_p)
8684 /* Only need vector extracting when there are more
8685 than one stores. */
8686 if (nstores > 1)
8687 inside_cost
8688 += record_stmt_cost (cost_vec, 1, vec_to_scalar,
8689 stmt_info, 0, vect_body);
8690 /* Take a single lane vector type store as scalar
8691 store to avoid ICE like 110776. */
8692 if (VECTOR_TYPE_P (ltype)
8693 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8694 n_adjacent_stores++;
8695 else
8696 inside_cost
8697 += record_stmt_cost (cost_vec, 1, scalar_store,
8698 stmt_info, 0, vect_body);
8699 continue;
8701 tree newref, newoff;
8702 gimple *incr, *assign;
8703 tree size = TYPE_SIZE (ltype);
8704 /* Extract the i'th component. */
8705 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8706 bitsize_int (i), size);
8707 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8708 size, pos);
8710 elem = force_gimple_operand_gsi (gsi, elem, true,
8711 NULL_TREE, true,
8712 GSI_SAME_STMT);
8714 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8715 group_el * elsz);
8716 newref = build2 (MEM_REF, ltype,
8717 running_off, this_off);
8718 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8720 /* And store it to *running_off. */
8721 assign = gimple_build_assign (newref, elem);
8722 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8724 group_el += lnel;
8725 if (! slp
8726 || group_el == group_size)
8728 newoff = copy_ssa_name (running_off, NULL);
8729 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8730 running_off, stride_step);
8731 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8733 running_off = newoff;
8734 group_el = 0;
8736 if (g == group_size - 1
8737 && !slp)
8739 if (j == 0 && i == 0)
8740 *vec_stmt = assign;
8741 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8745 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8746 vec_oprnds.truncate(0);
8747 if (slp)
8748 break;
8751 if (costing_p)
8753 if (n_adjacent_stores > 0)
8754 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8755 alignment_support_scheme, misalignment,
8756 &inside_cost, cost_vec);
8757 if (dump_enabled_p ())
8758 dump_printf_loc (MSG_NOTE, vect_location,
8759 "vect_model_store_cost: inside_cost = %d, "
8760 "prologue_cost = %d .\n",
8761 inside_cost, prologue_cost);
8764 return true;
8767 gcc_assert (alignment_support_scheme);
8768 vec_loop_masks *loop_masks
8769 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8770 ? &LOOP_VINFO_MASKS (loop_vinfo)
8771 : NULL);
8772 vec_loop_lens *loop_lens
8773 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8774 ? &LOOP_VINFO_LENS (loop_vinfo)
8775 : NULL);
8777 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
8778 are some difference here. We cannot enable both the lens and masks
8779 during transform but it is allowed during analysis.
8780 Shouldn't go with length-based approach if fully masked. */
8781 if (cost_vec == NULL)
8782 /* The cost_vec is NULL during transfrom. */
8783 gcc_assert ((!loop_lens || !loop_masks));
8785 /* Targets with store-lane instructions must not require explicit
8786 realignment. vect_supportable_dr_alignment always returns either
8787 dr_aligned or dr_unaligned_supported for masked operations. */
8788 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8789 && !mask
8790 && !loop_masks)
8791 || alignment_support_scheme == dr_aligned
8792 || alignment_support_scheme == dr_unaligned_supported);
8794 tree offset = NULL_TREE;
8795 if (!known_eq (poffset, 0))
8796 offset = size_int (poffset);
8798 tree bump;
8799 tree vec_offset = NULL_TREE;
8800 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8802 aggr_type = NULL_TREE;
8803 bump = NULL_TREE;
8805 else if (memory_access_type == VMAT_GATHER_SCATTER)
8807 aggr_type = elem_type;
8808 if (!costing_p)
8809 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8810 &bump, &vec_offset, loop_lens);
8812 else
8814 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8815 aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
8816 else
8817 aggr_type = vectype;
8818 if (!costing_p)
8819 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8820 memory_access_type, loop_lens);
8823 if (mask && !costing_p)
8824 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8826 /* In case the vectorization factor (VF) is bigger than the number
8827 of elements that we can fit in a vectype (nunits), we have to generate
8828 more than one vector stmt - i.e - we need to "unroll" the
8829 vector stmt by a factor VF/nunits. */
8831 /* In case of interleaving (non-unit grouped access):
8833 S1: &base + 2 = x2
8834 S2: &base = x0
8835 S3: &base + 1 = x1
8836 S4: &base + 3 = x3
8838 We create vectorized stores starting from base address (the access of the
8839 first stmt in the chain (S2 in the above example), when the last store stmt
8840 of the chain (S4) is reached:
8842 VS1: &base = vx2
8843 VS2: &base + vec_size*1 = vx0
8844 VS3: &base + vec_size*2 = vx1
8845 VS4: &base + vec_size*3 = vx3
8847 Then permutation statements are generated:
8849 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8850 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8853 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8854 (the order of the data-refs in the output of vect_permute_store_chain
8855 corresponds to the order of scalar stmts in the interleaving chain - see
8856 the documentation of vect_permute_store_chain()).
8858 In case of both multiple types and interleaving, above vector stores and
8859 permutation stmts are created for every copy. The result vector stmts are
8860 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8861 STMT_VINFO_RELATED_STMT for the next copies.
8864 auto_vec<tree> dr_chain (group_size);
8865 auto_vec<tree> vec_masks;
8866 tree vec_mask = NULL;
8867 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8868 for (i = 0; i < group_size; i++)
8869 gvec_oprnds.quick_push (new auto_vec<tree> ());
8871 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8873 if (costing_p && slp_node)
8874 /* Update all incoming store operand nodes, the general handling
8875 above only handles the mask and the first store operand node. */
8876 for (slp_tree child : SLP_TREE_CHILDREN (slp_node))
8877 if (child != mask_node
8878 && !vect_maybe_update_slp_op_vectype (child, vectype))
8880 if (dump_enabled_p ())
8881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8882 "incompatible vector types for invariants\n");
8883 return false;
8885 unsigned inside_cost = 0, prologue_cost = 0;
8886 /* For costing some adjacent vector stores, we'd like to cost with
8887 the total number of them once instead of cost each one by one. */
8888 unsigned int n_adjacent_stores = 0;
8889 if (slp)
8890 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) / group_size;
8891 for (j = 0; j < ncopies; j++)
8893 gimple *new_stmt;
8894 if (j == 0)
8896 /* For interleaved stores we collect vectorized defs for all
8897 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8898 as an input to vect_permute_store_chain(). */
8899 stmt_vec_info next_stmt_info = first_stmt_info;
8900 for (i = 0; i < group_size; i++)
8902 /* Since gaps are not supported for interleaved stores,
8903 DR_GROUP_SIZE is the exact number of stmts in the
8904 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8905 op = vect_get_store_rhs (next_stmt_info);
8906 if (costing_p)
8907 update_prologue_cost (&prologue_cost, op);
8908 else if (!slp)
8910 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8911 ncopies, op,
8912 gvec_oprnds[i]);
8913 vec_oprnd = (*gvec_oprnds[i])[0];
8914 dr_chain.quick_push (vec_oprnd);
8916 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8919 if (!costing_p)
8921 if (mask)
8923 if (slp_node)
8924 vect_get_slp_defs (mask_node, &vec_masks);
8925 else
8926 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8927 mask, &vec_masks,
8928 mask_vectype);
8929 vec_mask = vec_masks[0];
8932 dataref_ptr
8933 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8934 aggr_type, NULL, offset, &dummy,
8935 gsi, &ptr_incr, false, bump);
8938 else if (!costing_p)
8940 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8941 /* DR_CHAIN is then used as an input to
8942 vect_permute_store_chain(). */
8943 if (!slp)
8945 /* We should have caught mismatched types earlier. */
8946 gcc_assert (
8947 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8948 for (i = 0; i < group_size; i++)
8950 vec_oprnd = (*gvec_oprnds[i])[j];
8951 dr_chain[i] = vec_oprnd;
8954 if (mask)
8955 vec_mask = vec_masks[j];
8956 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8957 stmt_info, bump);
8960 if (costing_p)
8962 n_adjacent_stores += group_size;
8963 continue;
8966 /* Get an array into which we can store the individual vectors. */
8967 tree vec_array = create_vector_array (vectype, group_size);
8969 /* Invalidate the current contents of VEC_ARRAY. This should
8970 become an RTL clobber too, which prevents the vector registers
8971 from being upward-exposed. */
8972 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8974 /* Store the individual vectors into the array. */
8975 for (i = 0; i < group_size; i++)
8977 if (slp)
8979 slp_tree child;
8980 if (i == 0 || !mask_node)
8981 child = SLP_TREE_CHILDREN (slp_node)[i];
8982 else
8983 child = SLP_TREE_CHILDREN (slp_node)[i + 1];
8984 vec_oprnd = SLP_TREE_VEC_DEFS (child)[j];
8986 else
8987 vec_oprnd = dr_chain[i];
8988 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8992 tree final_mask = NULL;
8993 tree final_len = NULL;
8994 tree bias = NULL;
8995 if (loop_masks)
8996 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8997 ncopies, vectype, j);
8998 if (vec_mask)
8999 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9000 vec_mask, gsi);
9002 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
9004 if (loop_lens)
9005 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9006 ncopies, vectype, j, 1);
9007 else
9008 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9009 signed char biasval
9010 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9011 bias = build_int_cst (intQI_type_node, biasval);
9012 if (!final_mask)
9014 mask_vectype = truth_type_for (vectype);
9015 final_mask = build_minus_one_cst (mask_vectype);
9019 gcall *call;
9020 if (final_len && final_mask)
9022 /* Emit:
9023 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
9024 LEN, BIAS, VEC_ARRAY). */
9025 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9026 tree alias_ptr = build_int_cst (ref_type, align);
9027 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
9028 dataref_ptr, alias_ptr,
9029 final_mask, final_len, bias,
9030 vec_array);
9032 else if (final_mask)
9034 /* Emit:
9035 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
9036 VEC_ARRAY). */
9037 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9038 tree alias_ptr = build_int_cst (ref_type, align);
9039 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
9040 dataref_ptr, alias_ptr,
9041 final_mask, vec_array);
9043 else
9045 /* Emit:
9046 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
9047 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9048 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
9049 gimple_call_set_lhs (call, data_ref);
9051 gimple_call_set_nothrow (call, true);
9052 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9053 new_stmt = call;
9055 /* Record that VEC_ARRAY is now dead. */
9056 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9057 if (j == 0 && !slp)
9058 *vec_stmt = new_stmt;
9059 if (!slp)
9060 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9063 if (costing_p)
9065 if (n_adjacent_stores > 0)
9066 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
9067 alignment_support_scheme, misalignment,
9068 &inside_cost, cost_vec);
9069 if (dump_enabled_p ())
9070 dump_printf_loc (MSG_NOTE, vect_location,
9071 "vect_model_store_cost: inside_cost = %d, "
9072 "prologue_cost = %d .\n",
9073 inside_cost, prologue_cost);
9076 return true;
9079 if (memory_access_type == VMAT_GATHER_SCATTER)
9081 gcc_assert (!grouped_store);
9082 auto_vec<tree> vec_offsets;
9083 unsigned int inside_cost = 0, prologue_cost = 0;
9084 for (j = 0; j < ncopies; j++)
9086 gimple *new_stmt;
9087 if (j == 0)
9089 if (costing_p && vls_type == VLS_STORE_INVARIANT)
9090 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
9091 stmt_info, 0, vect_prologue);
9092 else if (!costing_p)
9094 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
9095 DR_CHAIN is of size 1. */
9096 gcc_assert (group_size == 1);
9097 if (slp_node)
9098 vect_get_slp_defs (op_node, gvec_oprnds[0]);
9099 else
9100 vect_get_vec_defs_for_operand (vinfo, first_stmt_info,
9101 ncopies, op, gvec_oprnds[0]);
9102 if (mask)
9104 if (slp_node)
9105 vect_get_slp_defs (mask_node, &vec_masks);
9106 else
9107 vect_get_vec_defs_for_operand (vinfo, stmt_info,
9108 ncopies,
9109 mask, &vec_masks,
9110 mask_vectype);
9113 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9114 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9115 slp_node, &gs_info,
9116 &dataref_ptr, &vec_offsets);
9117 else
9118 dataref_ptr
9119 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
9120 aggr_type, NULL, offset,
9121 &dummy, gsi, &ptr_incr, false,
9122 bump);
9125 else if (!costing_p)
9127 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9128 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9129 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9130 gsi, stmt_info, bump);
9133 new_stmt = NULL;
9134 for (i = 0; i < vec_num; ++i)
9136 if (!costing_p)
9138 vec_oprnd = (*gvec_oprnds[0])[vec_num * j + i];
9139 if (mask)
9140 vec_mask = vec_masks[vec_num * j + i];
9141 /* We should have catched mismatched types earlier. */
9142 gcc_assert (useless_type_conversion_p (vectype,
9143 TREE_TYPE (vec_oprnd)));
9145 unsigned HOST_WIDE_INT align;
9146 tree final_mask = NULL_TREE;
9147 tree final_len = NULL_TREE;
9148 tree bias = NULL_TREE;
9149 if (!costing_p)
9151 if (loop_masks)
9152 final_mask = vect_get_loop_mask (loop_vinfo, gsi,
9153 loop_masks, ncopies,
9154 vectype, j);
9155 if (vec_mask)
9156 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9157 final_mask, vec_mask, gsi);
9160 if (gs_info.ifn != IFN_LAST)
9162 if (costing_p)
9164 unsigned int cnunits = vect_nunits_for_cost (vectype);
9165 inside_cost
9166 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9167 stmt_info, 0, vect_body);
9168 continue;
9171 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9172 vec_offset = vec_offsets[vec_num * j + i];
9173 tree scale = size_int (gs_info.scale);
9175 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9177 if (loop_lens)
9178 final_len = vect_get_loop_len (loop_vinfo, gsi,
9179 loop_lens, ncopies,
9180 vectype, j, 1);
9181 else
9182 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9183 signed char biasval
9184 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9185 bias = build_int_cst (intQI_type_node, biasval);
9186 if (!final_mask)
9188 mask_vectype = truth_type_for (vectype);
9189 final_mask = build_minus_one_cst (mask_vectype);
9193 gcall *call;
9194 if (final_len && final_mask)
9195 call = gimple_build_call_internal
9196 (IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
9197 vec_offset, scale, vec_oprnd, final_mask,
9198 final_len, bias);
9199 else if (final_mask)
9200 call = gimple_build_call_internal
9201 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
9202 vec_offset, scale, vec_oprnd, final_mask);
9203 else
9204 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9205 dataref_ptr, vec_offset,
9206 scale, vec_oprnd);
9207 gimple_call_set_nothrow (call, true);
9208 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9209 new_stmt = call;
9211 else if (gs_info.decl)
9213 /* The builtin decls path for scatter is legacy, x86 only. */
9214 gcc_assert (nunits.is_constant ()
9215 && (!final_mask
9216 || SCALAR_INT_MODE_P
9217 (TYPE_MODE (TREE_TYPE (final_mask)))));
9218 if (costing_p)
9220 unsigned int cnunits = vect_nunits_for_cost (vectype);
9221 inside_cost
9222 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9223 stmt_info, 0, vect_body);
9224 continue;
9226 poly_uint64 offset_nunits
9227 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
9228 if (known_eq (nunits, offset_nunits))
9230 new_stmt = vect_build_one_scatter_store_call
9231 (vinfo, stmt_info, gsi, &gs_info,
9232 dataref_ptr, vec_offsets[vec_num * j + i],
9233 vec_oprnd, final_mask);
9234 vect_finish_stmt_generation (vinfo, stmt_info,
9235 new_stmt, gsi);
9237 else if (known_eq (nunits, offset_nunits * 2))
9239 /* We have a offset vector with half the number of
9240 lanes but the builtins will store full vectype
9241 data from the lower lanes. */
9242 new_stmt = vect_build_one_scatter_store_call
9243 (vinfo, stmt_info, gsi, &gs_info,
9244 dataref_ptr,
9245 vec_offsets[2 * vec_num * j + 2 * i],
9246 vec_oprnd, final_mask);
9247 vect_finish_stmt_generation (vinfo, stmt_info,
9248 new_stmt, gsi);
9249 int count = nunits.to_constant ();
9250 vec_perm_builder sel (count, count, 1);
9251 sel.quick_grow (count);
9252 for (int i = 0; i < count; ++i)
9253 sel[i] = i | (count / 2);
9254 vec_perm_indices indices (sel, 2, count);
9255 tree perm_mask
9256 = vect_gen_perm_mask_checked (vectype, indices);
9257 new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9258 vec_oprnd, vec_oprnd,
9259 perm_mask);
9260 vec_oprnd = make_ssa_name (vectype);
9261 gimple_set_lhs (new_stmt, vec_oprnd);
9262 vect_finish_stmt_generation (vinfo, stmt_info,
9263 new_stmt, gsi);
9264 if (final_mask)
9266 new_stmt = gimple_build_assign (NULL_TREE,
9267 VEC_UNPACK_HI_EXPR,
9268 final_mask);
9269 final_mask = make_ssa_name
9270 (truth_type_for (gs_info.offset_vectype));
9271 gimple_set_lhs (new_stmt, final_mask);
9272 vect_finish_stmt_generation (vinfo, stmt_info,
9273 new_stmt, gsi);
9275 new_stmt = vect_build_one_scatter_store_call
9276 (vinfo, stmt_info, gsi, &gs_info,
9277 dataref_ptr,
9278 vec_offsets[2 * vec_num * j + 2 * i + 1],
9279 vec_oprnd, final_mask);
9280 vect_finish_stmt_generation (vinfo, stmt_info,
9281 new_stmt, gsi);
9283 else if (known_eq (nunits * 2, offset_nunits))
9285 /* We have a offset vector with double the number of
9286 lanes. Select the low/high part accordingly. */
9287 vec_offset = vec_offsets[(vec_num * j + i) / 2];
9288 if ((vec_num * j + i) & 1)
9290 int count = offset_nunits.to_constant ();
9291 vec_perm_builder sel (count, count, 1);
9292 sel.quick_grow (count);
9293 for (int i = 0; i < count; ++i)
9294 sel[i] = i | (count / 2);
9295 vec_perm_indices indices (sel, 2, count);
9296 tree perm_mask = vect_gen_perm_mask_checked
9297 (TREE_TYPE (vec_offset), indices);
9298 new_stmt = gimple_build_assign (NULL_TREE,
9299 VEC_PERM_EXPR,
9300 vec_offset,
9301 vec_offset,
9302 perm_mask);
9303 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9304 gimple_set_lhs (new_stmt, vec_offset);
9305 vect_finish_stmt_generation (vinfo, stmt_info,
9306 new_stmt, gsi);
9308 new_stmt = vect_build_one_scatter_store_call
9309 (vinfo, stmt_info, gsi, &gs_info,
9310 dataref_ptr, vec_offset,
9311 vec_oprnd, final_mask);
9312 vect_finish_stmt_generation (vinfo, stmt_info,
9313 new_stmt, gsi);
9315 else
9316 gcc_unreachable ();
9318 else
9320 /* Emulated scatter. */
9321 gcc_assert (!final_mask);
9322 if (costing_p)
9324 unsigned int cnunits = vect_nunits_for_cost (vectype);
9325 /* For emulated scatter N offset vector element extracts
9326 (we assume the scalar scaling and ptr + offset add is
9327 consumed by the load). */
9328 inside_cost
9329 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9330 stmt_info, 0, vect_body);
9331 /* N scalar stores plus extracting the elements. */
9332 inside_cost
9333 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9334 stmt_info, 0, vect_body);
9335 inside_cost
9336 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9337 stmt_info, 0, vect_body);
9338 continue;
9341 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9342 unsigned HOST_WIDE_INT const_offset_nunits
9343 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9344 vec<constructor_elt, va_gc> *ctor_elts;
9345 vec_alloc (ctor_elts, const_nunits);
9346 gimple_seq stmts = NULL;
9347 tree elt_type = TREE_TYPE (vectype);
9348 unsigned HOST_WIDE_INT elt_size
9349 = tree_to_uhwi (TYPE_SIZE (elt_type));
9350 /* We support offset vectors with more elements
9351 than the data vector for now. */
9352 unsigned HOST_WIDE_INT factor
9353 = const_offset_nunits / const_nunits;
9354 vec_offset = vec_offsets[(vec_num * j + i) / factor];
9355 unsigned elt_offset
9356 = ((vec_num * j + i) % factor) * const_nunits;
9357 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9358 tree scale = size_int (gs_info.scale);
9359 align = get_object_alignment (DR_REF (first_dr_info->dr));
9360 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9361 for (unsigned k = 0; k < const_nunits; ++k)
9363 /* Compute the offsetted pointer. */
9364 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9365 bitsize_int (k + elt_offset));
9366 tree idx
9367 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
9368 vec_offset, TYPE_SIZE (idx_type), boff);
9369 idx = gimple_convert (&stmts, sizetype, idx);
9370 idx = gimple_build (&stmts, MULT_EXPR, sizetype,
9371 idx, scale);
9372 tree ptr
9373 = gimple_build (&stmts, PLUS_EXPR,
9374 TREE_TYPE (dataref_ptr),
9375 dataref_ptr, idx);
9376 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9377 /* Extract the element to be stored. */
9378 tree elt
9379 = gimple_build (&stmts, BIT_FIELD_REF,
9380 TREE_TYPE (vectype),
9381 vec_oprnd, TYPE_SIZE (elt_type),
9382 bitsize_int (k * elt_size));
9383 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9384 stmts = NULL;
9385 tree ref
9386 = build2 (MEM_REF, ltype, ptr,
9387 build_int_cst (ref_type, 0));
9388 new_stmt = gimple_build_assign (ref, elt);
9389 vect_finish_stmt_generation (vinfo, stmt_info,
9390 new_stmt, gsi);
9392 if (slp)
9393 slp_node->push_vec_def (new_stmt);
9396 if (!slp && !costing_p)
9397 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9400 if (!slp && !costing_p)
9401 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9403 if (costing_p && dump_enabled_p ())
9404 dump_printf_loc (MSG_NOTE, vect_location,
9405 "vect_model_store_cost: inside_cost = %d, "
9406 "prologue_cost = %d .\n",
9407 inside_cost, prologue_cost);
9409 return true;
9412 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9413 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9414 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9415 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9417 unsigned inside_cost = 0, prologue_cost = 0;
9418 /* For costing some adjacent vector stores, we'd like to cost with
9419 the total number of them once instead of cost each one by one. */
9420 unsigned int n_adjacent_stores = 0;
9421 auto_vec<tree> result_chain (group_size);
9422 auto_vec<tree, 1> vec_oprnds;
9423 for (j = 0; j < ncopies; j++)
9425 gimple *new_stmt;
9426 if (j == 0)
9428 if (slp && !costing_p)
9430 /* Get vectorized arguments for SLP_NODE. */
9431 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9432 &vec_oprnds, mask, &vec_masks);
9433 vec_oprnd = vec_oprnds[0];
9434 if (mask)
9435 vec_mask = vec_masks[0];
9437 else
9439 /* For interleaved stores we collect vectorized defs for all the
9440 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9441 input to vect_permute_store_chain().
9443 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9444 is of size 1. */
9445 stmt_vec_info next_stmt_info = first_stmt_info;
9446 for (i = 0; i < group_size; i++)
9448 /* Since gaps are not supported for interleaved stores,
9449 DR_GROUP_SIZE is the exact number of stmts in the chain.
9450 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9451 that there is no interleaving, DR_GROUP_SIZE is 1,
9452 and only one iteration of the loop will be executed. */
9453 op = vect_get_store_rhs (next_stmt_info);
9454 if (costing_p)
9455 update_prologue_cost (&prologue_cost, op);
9456 else
9458 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
9459 ncopies, op,
9460 gvec_oprnds[i]);
9461 vec_oprnd = (*gvec_oprnds[i])[0];
9462 dr_chain.quick_push (vec_oprnd);
9464 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9466 if (mask && !costing_p)
9468 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9469 mask, &vec_masks,
9470 mask_vectype);
9471 vec_mask = vec_masks[0];
9475 /* We should have catched mismatched types earlier. */
9476 gcc_assert (costing_p
9477 || useless_type_conversion_p (vectype,
9478 TREE_TYPE (vec_oprnd)));
9479 bool simd_lane_access_p
9480 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9481 if (!costing_p
9482 && simd_lane_access_p
9483 && !loop_masks
9484 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9485 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9486 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9487 && integer_zerop (DR_INIT (first_dr_info->dr))
9488 && alias_sets_conflict_p (get_alias_set (aggr_type),
9489 get_alias_set (TREE_TYPE (ref_type))))
9491 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9492 dataref_offset = build_int_cst (ref_type, 0);
9494 else if (!costing_p)
9495 dataref_ptr
9496 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9497 simd_lane_access_p ? loop : NULL,
9498 offset, &dummy, gsi, &ptr_incr,
9499 simd_lane_access_p, bump);
9501 else if (!costing_p)
9503 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9504 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9505 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9506 of size 1. */
9507 for (i = 0; i < group_size; i++)
9509 vec_oprnd = (*gvec_oprnds[i])[j];
9510 dr_chain[i] = vec_oprnd;
9512 if (mask)
9513 vec_mask = vec_masks[j];
9514 if (dataref_offset)
9515 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9516 else
9517 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9518 stmt_info, bump);
9521 new_stmt = NULL;
9522 if (grouped_store)
9524 /* Permute. */
9525 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9526 if (costing_p)
9528 int group_size = DR_GROUP_SIZE (first_stmt_info);
9529 int nstmts = ceil_log2 (group_size) * group_size;
9530 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
9531 stmt_info, 0, vect_body);
9532 if (dump_enabled_p ())
9533 dump_printf_loc (MSG_NOTE, vect_location,
9534 "vect_model_store_cost: "
9535 "strided group_size = %d .\n",
9536 group_size);
9538 else
9539 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9540 gsi, &result_chain);
9543 stmt_vec_info next_stmt_info = first_stmt_info;
9544 for (i = 0; i < vec_num; i++)
9546 if (!costing_p)
9548 if (slp)
9549 vec_oprnd = vec_oprnds[i];
9550 else if (grouped_store)
9551 /* For grouped stores vectorized defs are interleaved in
9552 vect_permute_store_chain(). */
9553 vec_oprnd = result_chain[i];
9556 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9558 if (costing_p)
9559 inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9560 stmt_info, 0, vect_body);
9561 else
9563 tree perm_mask = perm_mask_for_reverse (vectype);
9564 tree perm_dest = vect_create_destination_var (
9565 vect_get_store_rhs (stmt_info), vectype);
9566 tree new_temp = make_ssa_name (perm_dest);
9568 /* Generate the permute statement. */
9569 gimple *perm_stmt
9570 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9571 vec_oprnd, perm_mask);
9572 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
9573 gsi);
9575 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9576 vec_oprnd = new_temp;
9580 if (costing_p)
9582 n_adjacent_stores++;
9584 if (!slp)
9586 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9587 if (!next_stmt_info)
9588 break;
9591 continue;
9594 tree final_mask = NULL_TREE;
9595 tree final_len = NULL_TREE;
9596 tree bias = NULL_TREE;
9597 if (loop_masks)
9598 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9599 vec_num * ncopies, vectype,
9600 vec_num * j + i);
9601 if (slp && vec_mask)
9602 vec_mask = vec_masks[i];
9603 if (vec_mask)
9604 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9605 vec_mask, gsi);
9607 if (i > 0)
9608 /* Bump the vector pointer. */
9609 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9610 stmt_info, bump);
9612 unsigned misalign;
9613 unsigned HOST_WIDE_INT align;
9614 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9615 if (alignment_support_scheme == dr_aligned)
9616 misalign = 0;
9617 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9619 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9620 misalign = 0;
9622 else
9623 misalign = misalignment;
9624 if (dataref_offset == NULL_TREE
9625 && TREE_CODE (dataref_ptr) == SSA_NAME)
9626 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9627 misalign);
9628 align = least_bit_hwi (misalign | align);
9630 /* Compute IFN when LOOP_LENS or final_mask valid. */
9631 machine_mode vmode = TYPE_MODE (vectype);
9632 machine_mode new_vmode = vmode;
9633 internal_fn partial_ifn = IFN_LAST;
9634 if (loop_lens)
9636 opt_machine_mode new_ovmode
9637 = get_len_load_store_mode (vmode, false, &partial_ifn);
9638 new_vmode = new_ovmode.require ();
9639 unsigned factor
9640 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9641 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9642 vec_num * ncopies, vectype,
9643 vec_num * j + i, factor);
9645 else if (final_mask)
9647 if (!can_vec_mask_load_store_p (
9648 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9649 &partial_ifn))
9650 gcc_unreachable ();
9653 if (partial_ifn == IFN_MASK_LEN_STORE)
9655 if (!final_len)
9657 /* Pass VF value to 'len' argument of
9658 MASK_LEN_STORE if LOOP_LENS is invalid. */
9659 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9661 if (!final_mask)
9663 /* Pass all ones value to 'mask' argument of
9664 MASK_LEN_STORE if final_mask is invalid. */
9665 mask_vectype = truth_type_for (vectype);
9666 final_mask = build_minus_one_cst (mask_vectype);
9669 if (final_len)
9671 signed char biasval
9672 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9674 bias = build_int_cst (intQI_type_node, biasval);
9677 /* Arguments are ready. Create the new vector stmt. */
9678 if (final_len)
9680 gcall *call;
9681 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9682 /* Need conversion if it's wrapped with VnQI. */
9683 if (vmode != new_vmode)
9685 tree new_vtype
9686 = build_vector_type_for_mode (unsigned_intQI_type_node,
9687 new_vmode);
9688 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9689 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9690 gassign *new_stmt
9691 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9692 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9693 vec_oprnd = var;
9696 if (partial_ifn == IFN_MASK_LEN_STORE)
9697 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9698 dataref_ptr, ptr, final_mask,
9699 final_len, bias, vec_oprnd);
9700 else
9701 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9702 dataref_ptr, ptr, final_len,
9703 bias, vec_oprnd);
9704 gimple_call_set_nothrow (call, true);
9705 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9706 new_stmt = call;
9708 else if (final_mask)
9710 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9711 gcall *call
9712 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9713 ptr, final_mask, vec_oprnd);
9714 gimple_call_set_nothrow (call, true);
9715 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9716 new_stmt = call;
9718 else
9720 data_ref
9721 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9722 dataref_offset ? dataref_offset
9723 : build_int_cst (ref_type, 0));
9724 if (alignment_support_scheme == dr_aligned)
9726 else
9727 TREE_TYPE (data_ref)
9728 = build_aligned_type (TREE_TYPE (data_ref),
9729 align * BITS_PER_UNIT);
9730 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9731 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9732 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9735 if (slp)
9736 continue;
9738 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9739 if (!next_stmt_info)
9740 break;
9742 if (!slp && !costing_p)
9744 if (j == 0)
9745 *vec_stmt = new_stmt;
9746 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9750 if (costing_p)
9752 if (n_adjacent_stores > 0)
9753 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
9754 alignment_support_scheme, misalignment,
9755 &inside_cost, cost_vec);
9757 /* When vectorizing a store into the function result assign
9758 a penalty if the function returns in a multi-register location.
9759 In this case we assume we'll end up with having to spill the
9760 vector result and do piecewise loads as a conservative estimate. */
9761 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9762 if (base
9763 && (TREE_CODE (base) == RESULT_DECL
9764 || (DECL_P (base) && cfun_returns (base)))
9765 && !aggregate_value_p (base, cfun->decl))
9767 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9768 /* ??? Handle PARALLEL in some way. */
9769 if (REG_P (reg))
9771 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9772 /* Assume that a single reg-reg move is possible and cheap,
9773 do not account for vector to gp register move cost. */
9774 if (nregs > 1)
9776 /* Spill. */
9777 prologue_cost
9778 += record_stmt_cost (cost_vec, ncopies, vector_store,
9779 stmt_info, 0, vect_epilogue);
9780 /* Loads. */
9781 prologue_cost
9782 += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
9783 stmt_info, 0, vect_epilogue);
9787 if (dump_enabled_p ())
9788 dump_printf_loc (MSG_NOTE, vect_location,
9789 "vect_model_store_cost: inside_cost = %d, "
9790 "prologue_cost = %d .\n",
9791 inside_cost, prologue_cost);
9794 return true;
9797 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9798 VECTOR_CST mask. No checks are made that the target platform supports the
9799 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9800 vect_gen_perm_mask_checked. */
9802 tree
9803 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9805 tree mask_type;
9807 poly_uint64 nunits = sel.length ();
9808 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9810 mask_type = build_vector_type (ssizetype, nunits);
9811 return vec_perm_indices_to_tree (mask_type, sel);
9814 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9815 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9817 tree
9818 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9820 machine_mode vmode = TYPE_MODE (vectype);
9821 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9822 return vect_gen_perm_mask_any (vectype, sel);
9825 /* Given a vector variable X and Y, that was generated for the scalar
9826 STMT_INFO, generate instructions to permute the vector elements of X and Y
9827 using permutation mask MASK_VEC, insert them at *GSI and return the
9828 permuted vector variable. */
9830 static tree
9831 permute_vec_elements (vec_info *vinfo,
9832 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9833 gimple_stmt_iterator *gsi)
9835 tree vectype = TREE_TYPE (x);
9836 tree perm_dest, data_ref;
9837 gimple *perm_stmt;
9839 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9840 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9841 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9842 else
9843 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9844 data_ref = make_ssa_name (perm_dest);
9846 /* Generate the permute statement. */
9847 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9848 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9850 return data_ref;
9853 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9854 inserting them on the loops preheader edge. Returns true if we
9855 were successful in doing so (and thus STMT_INFO can be moved then),
9856 otherwise returns false. HOIST_P indicates if we want to hoist the
9857 definitions of all SSA uses, it would be false when we are costing. */
9859 static bool
9860 hoist_defs_of_uses (gimple *stmt, class loop *loop, bool hoist_p)
9862 ssa_op_iter i;
9863 use_operand_p use_p;
9864 auto_vec<use_operand_p, 8> to_hoist;
9866 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, i, SSA_OP_USE)
9868 gimple *def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
9869 if (!gimple_nop_p (def_stmt)
9870 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9872 /* Make sure we don't need to recurse. While we could do
9873 so in simple cases when there are more complex use webs
9874 we don't have an easy way to preserve stmt order to fulfil
9875 dependencies within them. */
9876 tree op2;
9877 ssa_op_iter i2;
9878 if (gimple_code (def_stmt) == GIMPLE_PHI
9879 || (single_ssa_def_operand (def_stmt, SSA_OP_DEF)
9880 == NULL_DEF_OPERAND_P))
9881 return false;
9882 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9884 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9885 if (!gimple_nop_p (def_stmt2)
9886 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9887 return false;
9889 to_hoist.safe_push (use_p);
9893 if (to_hoist.is_empty ())
9894 return true;
9896 if (!hoist_p)
9897 return true;
9899 /* Instead of moving defs we copy them so we can zero their UID to not
9900 confuse dominance queries in the preheader. */
9901 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
9902 for (use_operand_p use_p : to_hoist)
9904 gimple *def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
9905 gimple *copy = gimple_copy (def_stmt);
9906 gimple_set_uid (copy, 0);
9907 def_operand_p def_p = single_ssa_def_operand (def_stmt, SSA_OP_DEF);
9908 tree new_def = duplicate_ssa_name (DEF_FROM_PTR (def_p), copy);
9909 update_stmt (copy);
9910 def_p = single_ssa_def_operand (copy, SSA_OP_DEF);
9911 SET_DEF (def_p, new_def);
9912 SET_USE (use_p, new_def);
9913 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
9916 return true;
9919 /* vectorizable_load.
9921 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9922 that can be vectorized.
9923 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9924 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9925 Return true if STMT_INFO is vectorizable in this way. */
9927 static bool
9928 vectorizable_load (vec_info *vinfo,
9929 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9930 gimple **vec_stmt, slp_tree slp_node,
9931 stmt_vector_for_cost *cost_vec)
9933 tree scalar_dest;
9934 tree vec_dest = NULL;
9935 tree data_ref = NULL;
9936 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9937 class loop *loop = NULL;
9938 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9939 bool nested_in_vect_loop = false;
9940 tree elem_type;
9941 /* Avoid false positive uninitialized warning, see PR110652. */
9942 tree new_temp = NULL_TREE;
9943 machine_mode mode;
9944 tree dummy;
9945 tree dataref_ptr = NULL_TREE;
9946 tree dataref_offset = NULL_TREE;
9947 gimple *ptr_incr = NULL;
9948 int ncopies;
9949 int i, j;
9950 unsigned int group_size;
9951 poly_uint64 group_gap_adj;
9952 tree msq = NULL_TREE, lsq;
9953 tree realignment_token = NULL_TREE;
9954 gphi *phi = NULL;
9955 vec<tree> dr_chain = vNULL;
9956 bool grouped_load = false;
9957 stmt_vec_info first_stmt_info;
9958 stmt_vec_info first_stmt_info_for_drptr = NULL;
9959 bool compute_in_loop = false;
9960 class loop *at_loop;
9961 int vec_num;
9962 bool slp = (slp_node != NULL);
9963 bool slp_perm = false;
9964 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9965 poly_uint64 vf;
9966 tree aggr_type;
9967 gather_scatter_info gs_info;
9968 tree ref_type;
9969 enum vect_def_type mask_dt = vect_unknown_def_type;
9971 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9972 return false;
9974 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9975 && ! vec_stmt)
9976 return false;
9978 if (!STMT_VINFO_DATA_REF (stmt_info))
9979 return false;
9981 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9982 int mask_index = -1;
9983 slp_tree slp_op = NULL;
9984 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9986 scalar_dest = gimple_assign_lhs (assign);
9987 if (TREE_CODE (scalar_dest) != SSA_NAME)
9988 return false;
9990 tree_code code = gimple_assign_rhs_code (assign);
9991 if (code != ARRAY_REF
9992 && code != BIT_FIELD_REF
9993 && code != INDIRECT_REF
9994 && code != COMPONENT_REF
9995 && code != IMAGPART_EXPR
9996 && code != REALPART_EXPR
9997 && code != MEM_REF
9998 && TREE_CODE_CLASS (code) != tcc_declaration)
9999 return false;
10001 else
10003 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10004 if (!call || !gimple_call_internal_p (call))
10005 return false;
10007 internal_fn ifn = gimple_call_internal_fn (call);
10008 if (!internal_load_fn_p (ifn))
10009 return false;
10011 scalar_dest = gimple_call_lhs (call);
10012 if (!scalar_dest)
10013 return false;
10015 mask_index = internal_fn_mask_index (ifn);
10016 if (mask_index >= 0 && slp_node)
10017 mask_index = vect_slp_child_index_for_operand
10018 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
10019 if (mask_index >= 0
10020 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
10021 &mask, &slp_op, &mask_dt, &mask_vectype))
10022 return false;
10025 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10026 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10028 if (loop_vinfo)
10030 loop = LOOP_VINFO_LOOP (loop_vinfo);
10031 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
10032 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
10034 else
10035 vf = 1;
10037 /* Multiple types in SLP are handled by creating the appropriate number of
10038 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
10039 case of SLP. */
10040 if (slp)
10041 ncopies = 1;
10042 else
10043 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10045 gcc_assert (ncopies >= 1);
10047 /* FORNOW. This restriction should be relaxed. */
10048 if (nested_in_vect_loop
10049 && (ncopies > 1 || (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)))
10051 if (dump_enabled_p ())
10052 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10053 "multiple types in nested loop.\n");
10054 return false;
10057 /* Invalidate assumptions made by dependence analysis when vectorization
10058 on the unrolled body effectively re-orders stmts. */
10059 if (ncopies > 1
10060 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
10061 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
10062 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
10064 if (dump_enabled_p ())
10065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10066 "cannot perform implicit CSE when unrolling "
10067 "with negative dependence distance\n");
10068 return false;
10071 elem_type = TREE_TYPE (vectype);
10072 mode = TYPE_MODE (vectype);
10074 /* FORNOW. In some cases can vectorize even if data-type not supported
10075 (e.g. - data copies). */
10076 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
10078 if (dump_enabled_p ())
10079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10080 "Aligned load, but unsupported type.\n");
10081 return false;
10084 /* Check if the load is a part of an interleaving chain. */
10085 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
10087 grouped_load = true;
10088 /* FORNOW */
10089 gcc_assert (!nested_in_vect_loop);
10090 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
10092 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10093 group_size = DR_GROUP_SIZE (first_stmt_info);
10095 /* Refuse non-SLP vectorization of SLP-only groups. */
10096 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
10098 if (dump_enabled_p ())
10099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10100 "cannot vectorize load in non-SLP mode.\n");
10101 return false;
10104 /* Invalidate assumptions made by dependence analysis when vectorization
10105 on the unrolled body effectively re-orders stmts. */
10106 if (STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
10107 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
10108 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
10110 if (dump_enabled_p ())
10111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10112 "cannot perform implicit CSE when performing "
10113 "group loads with negative dependence distance\n");
10114 return false;
10117 else
10118 group_size = 1;
10120 vect_memory_access_type memory_access_type;
10121 enum dr_alignment_support alignment_support_scheme;
10122 int misalignment;
10123 poly_int64 poffset;
10124 internal_fn lanes_ifn;
10125 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
10126 ncopies, &memory_access_type, &poffset,
10127 &alignment_support_scheme, &misalignment, &gs_info,
10128 &lanes_ifn))
10129 return false;
10131 /* ??? The following checks should really be part of
10132 get_group_load_store_type. */
10133 if (slp
10134 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
10135 && !((memory_access_type == VMAT_ELEMENTWISE
10136 || memory_access_type == VMAT_GATHER_SCATTER)
10137 && SLP_TREE_LANES (slp_node) == 1))
10139 slp_perm = true;
10141 if (!loop_vinfo)
10143 /* In BB vectorization we may not actually use a loaded vector
10144 accessing elements in excess of DR_GROUP_SIZE. */
10145 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10146 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
10147 unsigned HOST_WIDE_INT nunits;
10148 unsigned j, k, maxk = 0;
10149 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
10150 if (k > maxk)
10151 maxk = k;
10152 tree vectype = SLP_TREE_VECTYPE (slp_node);
10153 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
10154 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
10156 if (dump_enabled_p ())
10157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10158 "BB vectorization with gaps at the end of "
10159 "a load is not supported\n");
10160 return false;
10164 auto_vec<tree> tem;
10165 unsigned n_perms;
10166 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
10167 true, &n_perms))
10169 if (dump_enabled_p ())
10170 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
10171 vect_location,
10172 "unsupported load permutation\n");
10173 return false;
10177 if (slp_node
10178 && slp_node->ldst_lanes
10179 && memory_access_type != VMAT_LOAD_STORE_LANES)
10181 if (dump_enabled_p ())
10182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10183 "discovered load-lane but cannot use it.\n");
10184 return false;
10187 if (mask)
10189 if (memory_access_type == VMAT_CONTIGUOUS)
10191 machine_mode vec_mode = TYPE_MODE (vectype);
10192 if (!VECTOR_MODE_P (vec_mode)
10193 || !can_vec_mask_load_store_p (vec_mode,
10194 TYPE_MODE (mask_vectype), true))
10195 return false;
10197 else if (memory_access_type != VMAT_LOAD_STORE_LANES
10198 && memory_access_type != VMAT_GATHER_SCATTER)
10200 if (dump_enabled_p ())
10201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10202 "unsupported access type for masked load.\n");
10203 return false;
10205 else if (memory_access_type == VMAT_GATHER_SCATTER
10206 && gs_info.ifn == IFN_LAST
10207 && !gs_info.decl)
10209 if (dump_enabled_p ())
10210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10211 "unsupported masked emulated gather.\n");
10212 return false;
10214 else if (memory_access_type == VMAT_ELEMENTWISE
10215 || memory_access_type == VMAT_STRIDED_SLP)
10217 if (dump_enabled_p ())
10218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10219 "unsupported masked strided access.\n");
10220 return false;
10224 bool costing_p = !vec_stmt;
10226 if (costing_p) /* transformation not required. */
10228 if (slp_node
10229 && mask
10230 && !vect_maybe_update_slp_op_vectype (slp_op,
10231 mask_vectype))
10233 if (dump_enabled_p ())
10234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10235 "incompatible vector types for invariants\n");
10236 return false;
10239 if (!slp)
10240 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10241 else
10242 SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
10244 if (loop_vinfo
10245 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10246 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10247 VLS_LOAD, group_size,
10248 memory_access_type, &gs_info,
10249 mask);
10251 if (dump_enabled_p ()
10252 && memory_access_type != VMAT_ELEMENTWISE
10253 && memory_access_type != VMAT_GATHER_SCATTER
10254 && alignment_support_scheme != dr_aligned)
10255 dump_printf_loc (MSG_NOTE, vect_location,
10256 "Vectorizing an unaligned access.\n");
10258 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10259 vinfo->any_known_not_updated_vssa = true;
10261 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10264 if (!slp)
10265 gcc_assert (memory_access_type
10266 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10267 else
10268 gcc_assert (memory_access_type
10269 == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
10271 if (dump_enabled_p () && !costing_p)
10272 dump_printf_loc (MSG_NOTE, vect_location,
10273 "transform load. ncopies = %d\n", ncopies);
10275 /* Transform. */
10277 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10278 ensure_base_align (dr_info);
10280 if (memory_access_type == VMAT_INVARIANT)
10282 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10283 /* If we have versioned for aliasing or the loop doesn't
10284 have any data dependencies that would preclude this,
10285 then we are sure this is a loop invariant load and
10286 thus we can insert it on the preheader edge.
10287 TODO: hoist_defs_of_uses should ideally be computed
10288 once at analysis time, remembered and used in the
10289 transform time. */
10290 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10291 && !nested_in_vect_loop
10292 && hoist_defs_of_uses (stmt_info->stmt, loop, false));
10293 if (costing_p)
10295 enum vect_cost_model_location cost_loc
10296 = hoist_p ? vect_prologue : vect_body;
10297 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10298 stmt_info, 0, cost_loc);
10299 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
10300 cost_loc);
10301 unsigned int prologue_cost = hoist_p ? cost : 0;
10302 unsigned int inside_cost = hoist_p ? 0 : cost;
10303 if (dump_enabled_p ())
10304 dump_printf_loc (MSG_NOTE, vect_location,
10305 "vect_model_load_cost: inside_cost = %d, "
10306 "prologue_cost = %d .\n",
10307 inside_cost, prologue_cost);
10308 return true;
10310 if (hoist_p)
10312 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
10313 if (dump_enabled_p ())
10314 dump_printf_loc (MSG_NOTE, vect_location,
10315 "hoisting out of the vectorized loop: %G",
10316 (gimple *) stmt);
10317 scalar_dest = copy_ssa_name (scalar_dest);
10318 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10319 edge pe = loop_preheader_edge (loop);
10320 gphi *vphi = get_virtual_phi (loop->header);
10321 tree vuse;
10322 if (vphi)
10323 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10324 else
10325 vuse = gimple_vuse (gsi_stmt (*gsi));
10326 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10327 gimple_set_vuse (new_stmt, vuse);
10328 gsi_insert_on_edge_immediate (pe, new_stmt);
10329 hoist_defs_of_uses (new_stmt, loop, true);
10331 /* These copies are all equivalent. */
10332 if (hoist_p)
10333 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10334 vectype, NULL);
10335 else
10337 gimple_stmt_iterator gsi2 = *gsi;
10338 gsi_next (&gsi2);
10339 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10340 vectype, &gsi2);
10342 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10343 if (slp)
10344 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10345 slp_node->push_vec_def (new_stmt);
10346 else
10348 for (j = 0; j < ncopies; ++j)
10349 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10350 *vec_stmt = new_stmt;
10352 return true;
10355 if (memory_access_type == VMAT_ELEMENTWISE
10356 || memory_access_type == VMAT_STRIDED_SLP)
10358 gimple_stmt_iterator incr_gsi;
10359 bool insert_after;
10360 tree offvar;
10361 tree ivstep;
10362 tree running_off;
10363 vec<constructor_elt, va_gc> *v = NULL;
10364 tree stride_base, stride_step, alias_off;
10365 /* Checked by get_load_store_type. */
10366 unsigned int const_nunits = nunits.to_constant ();
10367 unsigned HOST_WIDE_INT cst_offset = 0;
10368 tree dr_offset;
10369 unsigned int inside_cost = 0;
10371 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10372 gcc_assert (!nested_in_vect_loop);
10374 if (grouped_load)
10376 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10377 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10379 else
10381 first_stmt_info = stmt_info;
10382 first_dr_info = dr_info;
10385 if (slp && grouped_load
10386 && memory_access_type == VMAT_STRIDED_SLP)
10388 group_size = DR_GROUP_SIZE (first_stmt_info);
10389 ref_type = get_group_alias_ptr_type (first_stmt_info);
10391 else
10393 if (grouped_load)
10394 cst_offset
10395 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10396 * vect_get_place_in_interleaving_chain (stmt_info,
10397 first_stmt_info));
10398 group_size = 1;
10399 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10402 if (!costing_p)
10404 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10405 stride_base = fold_build_pointer_plus (
10406 DR_BASE_ADDRESS (first_dr_info->dr),
10407 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10408 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10409 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10411 /* For a load with loop-invariant (but other than power-of-2)
10412 stride (i.e. not a grouped access) like so:
10414 for (i = 0; i < n; i += stride)
10415 ... = array[i];
10417 we generate a new induction variable and new accesses to
10418 form a new vector (or vectors, depending on ncopies):
10420 for (j = 0; ; j += VF*stride)
10421 tmp1 = array[j];
10422 tmp2 = array[j + stride];
10424 vectemp = {tmp1, tmp2, ...}
10427 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10428 build_int_cst (TREE_TYPE (stride_step), vf));
10430 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10432 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10433 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10434 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10435 loop, &incr_gsi, insert_after,
10436 &offvar, NULL);
10438 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10441 running_off = offvar;
10442 alias_off = build_int_cst (ref_type, 0);
10443 int nloads = const_nunits;
10444 int lnel = 1;
10445 tree ltype = TREE_TYPE (vectype);
10446 tree lvectype = vectype;
10447 auto_vec<tree> dr_chain;
10448 if (memory_access_type == VMAT_STRIDED_SLP)
10450 HOST_WIDE_INT n = gcd (group_size, const_nunits);
10451 /* Use the target vector type if the group size is a multiple
10452 of it. */
10453 if (n == const_nunits)
10455 nloads = 1;
10456 lnel = const_nunits;
10457 ltype = vectype;
10459 /* Else use the biggest vector we can load the group without
10460 accessing excess elements. */
10461 else if (n > 1)
10463 tree ptype;
10464 tree vtype
10465 = vector_vector_composition_type (vectype, const_nunits / n,
10466 &ptype);
10467 if (vtype != NULL_TREE)
10469 nloads = const_nunits / n;
10470 lnel = n;
10471 lvectype = vtype;
10472 ltype = ptype;
10475 /* Else fall back to the default element-wise access. */
10476 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10478 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10479 else if (nloads == 1)
10480 ltype = vectype;
10482 if (slp)
10484 /* For SLP permutation support we need to load the whole group,
10485 not only the number of vector stmts the permutation result
10486 fits in. */
10487 if (slp_perm)
10489 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10490 variable VF. */
10491 unsigned int const_vf = vf.to_constant ();
10492 ncopies = CEIL (group_size * const_vf, const_nunits);
10493 dr_chain.create (ncopies);
10495 else
10496 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10498 unsigned int group_el = 0;
10499 unsigned HOST_WIDE_INT
10500 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10501 unsigned int n_groups = 0;
10502 /* For costing some adjacent vector loads, we'd like to cost with
10503 the total number of them once instead of cost each one by one. */
10504 unsigned int n_adjacent_loads = 0;
10505 for (j = 0; j < ncopies; j++)
10507 if (nloads > 1 && !costing_p)
10508 vec_alloc (v, nloads);
10509 gimple *new_stmt = NULL;
10510 for (i = 0; i < nloads; i++)
10512 if (costing_p)
10514 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10515 avoid ICE, see PR110776. */
10516 if (VECTOR_TYPE_P (ltype)
10517 && memory_access_type != VMAT_ELEMENTWISE)
10518 n_adjacent_loads++;
10519 else
10520 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10521 stmt_info, 0, vect_body);
10522 continue;
10524 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10525 group_el * elsz + cst_offset);
10526 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10527 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10528 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10529 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10530 if (nloads > 1)
10531 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10532 gimple_assign_lhs (new_stmt));
10534 group_el += lnel;
10535 if (! slp
10536 || group_el == group_size)
10538 n_groups++;
10539 /* When doing SLP make sure to not load elements from
10540 the next vector iteration, those will not be accessed
10541 so just use the last element again. See PR107451. */
10542 if (!slp || known_lt (n_groups, vf))
10544 tree newoff = copy_ssa_name (running_off);
10545 gimple *incr
10546 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10547 running_off, stride_step);
10548 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10549 running_off = newoff;
10551 group_el = 0;
10555 if (nloads > 1)
10557 if (costing_p)
10558 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10559 stmt_info, 0, vect_body);
10560 else
10562 tree vec_inv = build_constructor (lvectype, v);
10563 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10564 lvectype, gsi);
10565 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10566 if (lvectype != vectype)
10568 new_stmt
10569 = gimple_build_assign (make_ssa_name (vectype),
10570 VIEW_CONVERT_EXPR,
10571 build1 (VIEW_CONVERT_EXPR,
10572 vectype, new_temp));
10573 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10574 gsi);
10579 if (!costing_p)
10581 if (slp)
10583 if (slp_perm)
10584 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10585 else
10586 slp_node->push_vec_def (new_stmt);
10588 else
10590 if (j == 0)
10591 *vec_stmt = new_stmt;
10592 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10596 if (slp_perm)
10598 unsigned n_perms;
10599 if (costing_p)
10601 unsigned n_loads;
10602 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10603 true, &n_perms, &n_loads);
10604 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10605 first_stmt_info, 0, vect_body);
10607 else
10608 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10609 false, &n_perms);
10612 if (costing_p)
10614 if (n_adjacent_loads > 0)
10615 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10616 alignment_support_scheme, misalignment, false,
10617 &inside_cost, nullptr, cost_vec, cost_vec,
10618 true);
10619 if (dump_enabled_p ())
10620 dump_printf_loc (MSG_NOTE, vect_location,
10621 "vect_model_load_cost: inside_cost = %u, "
10622 "prologue_cost = 0 .\n",
10623 inside_cost);
10626 return true;
10629 if (memory_access_type == VMAT_GATHER_SCATTER
10630 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10631 grouped_load = false;
10633 if (grouped_load
10634 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10636 if (grouped_load)
10638 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10639 group_size = DR_GROUP_SIZE (first_stmt_info);
10641 else
10643 first_stmt_info = stmt_info;
10644 group_size = 1;
10646 /* For SLP vectorization we directly vectorize a subchain
10647 without permutation. */
10648 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10649 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10650 /* For BB vectorization always use the first stmt to base
10651 the data ref pointer on. */
10652 if (bb_vinfo)
10653 first_stmt_info_for_drptr
10654 = vect_find_first_scalar_stmt_in_slp (slp_node);
10656 /* Check if the chain of loads is already vectorized. */
10657 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10658 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10659 ??? But we can only do so if there is exactly one
10660 as we have no way to get at the rest. Leave the CSE
10661 opportunity alone.
10662 ??? With the group load eventually participating
10663 in multiple different permutations (having multiple
10664 slp nodes which refer to the same group) the CSE
10665 is even wrong code. See PR56270. */
10666 && !slp)
10668 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10669 return true;
10671 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10672 group_gap_adj = 0;
10674 /* VEC_NUM is the number of vect stmts to be created for this group. */
10675 if (slp)
10677 grouped_load = false;
10678 /* If an SLP permutation is from N elements to N elements,
10679 and if one vector holds a whole number of N, we can load
10680 the inputs to the permutation in the same way as an
10681 unpermuted sequence. In other cases we need to load the
10682 whole group, not only the number of vector stmts the
10683 permutation result fits in. */
10684 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10685 if (nested_in_vect_loop)
10686 /* We do not support grouped accesses in a nested loop,
10687 instead the access is contiguous but it might be
10688 permuted. No gap adjustment is needed though. */
10689 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10690 else if (slp_perm
10691 && (group_size != scalar_lanes
10692 || !multiple_p (nunits, group_size)))
10694 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10695 variable VF; see vect_transform_slp_perm_load. */
10696 unsigned int const_vf = vf.to_constant ();
10697 unsigned int const_nunits = nunits.to_constant ();
10698 vec_num = CEIL (group_size * const_vf, const_nunits);
10699 group_gap_adj = vf * group_size - nunits * vec_num;
10701 else
10703 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10704 group_gap_adj
10705 = group_size - scalar_lanes;
10708 else
10709 vec_num = group_size;
10711 ref_type = get_group_alias_ptr_type (first_stmt_info);
10713 else
10715 first_stmt_info = stmt_info;
10716 first_dr_info = dr_info;
10717 group_size = vec_num = 1;
10718 group_gap_adj = 0;
10719 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10720 if (slp)
10721 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10724 gcc_assert (alignment_support_scheme);
10725 vec_loop_masks *loop_masks
10726 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10727 ? &LOOP_VINFO_MASKS (loop_vinfo)
10728 : NULL);
10729 vec_loop_lens *loop_lens
10730 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10731 ? &LOOP_VINFO_LENS (loop_vinfo)
10732 : NULL);
10734 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
10735 are some difference here. We cannot enable both the lens and masks
10736 during transform but it is allowed during analysis.
10737 Shouldn't go with length-based approach if fully masked. */
10738 if (cost_vec == NULL)
10739 /* The cost_vec is NULL during transfrom. */
10740 gcc_assert ((!loop_lens || !loop_masks));
10742 /* Targets with store-lane instructions must not require explicit
10743 realignment. vect_supportable_dr_alignment always returns either
10744 dr_aligned or dr_unaligned_supported for masked operations. */
10745 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10746 && !mask
10747 && !loop_masks)
10748 || alignment_support_scheme == dr_aligned
10749 || alignment_support_scheme == dr_unaligned_supported);
10751 /* In case the vectorization factor (VF) is bigger than the number
10752 of elements that we can fit in a vectype (nunits), we have to generate
10753 more than one vector stmt - i.e - we need to "unroll" the
10754 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10755 from one copy of the vector stmt to the next, in the field
10756 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10757 stages to find the correct vector defs to be used when vectorizing
10758 stmts that use the defs of the current stmt. The example below
10759 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10760 need to create 4 vectorized stmts):
10762 before vectorization:
10763 RELATED_STMT VEC_STMT
10764 S1: x = memref - -
10765 S2: z = x + 1 - -
10767 step 1: vectorize stmt S1:
10768 We first create the vector stmt VS1_0, and, as usual, record a
10769 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10770 Next, we create the vector stmt VS1_1, and record a pointer to
10771 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10772 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10773 stmts and pointers:
10774 RELATED_STMT VEC_STMT
10775 VS1_0: vx0 = memref0 VS1_1 -
10776 VS1_1: vx1 = memref1 VS1_2 -
10777 VS1_2: vx2 = memref2 VS1_3 -
10778 VS1_3: vx3 = memref3 - -
10779 S1: x = load - VS1_0
10780 S2: z = x + 1 - -
10783 /* In case of interleaving (non-unit grouped access):
10785 S1: x2 = &base + 2
10786 S2: x0 = &base
10787 S3: x1 = &base + 1
10788 S4: x3 = &base + 3
10790 Vectorized loads are created in the order of memory accesses
10791 starting from the access of the first stmt of the chain:
10793 VS1: vx0 = &base
10794 VS2: vx1 = &base + vec_size*1
10795 VS3: vx3 = &base + vec_size*2
10796 VS4: vx4 = &base + vec_size*3
10798 Then permutation statements are generated:
10800 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10801 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10804 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10805 (the order of the data-refs in the output of vect_permute_load_chain
10806 corresponds to the order of scalar stmts in the interleaving chain - see
10807 the documentation of vect_permute_load_chain()).
10808 The generation of permutation stmts and recording them in
10809 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10811 In case of both multiple types and interleaving, the vector loads and
10812 permutation stmts above are created for every copy. The result vector
10813 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10814 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10816 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10817 on a target that supports unaligned accesses (dr_unaligned_supported)
10818 we generate the following code:
10819 p = initial_addr;
10820 indx = 0;
10821 loop {
10822 p = p + indx * vectype_size;
10823 vec_dest = *(p);
10824 indx = indx + 1;
10827 Otherwise, the data reference is potentially unaligned on a target that
10828 does not support unaligned accesses (dr_explicit_realign_optimized) -
10829 then generate the following code, in which the data in each iteration is
10830 obtained by two vector loads, one from the previous iteration, and one
10831 from the current iteration:
10832 p1 = initial_addr;
10833 msq_init = *(floor(p1))
10834 p2 = initial_addr + VS - 1;
10835 realignment_token = call target_builtin;
10836 indx = 0;
10837 loop {
10838 p2 = p2 + indx * vectype_size
10839 lsq = *(floor(p2))
10840 vec_dest = realign_load (msq, lsq, realignment_token)
10841 indx = indx + 1;
10842 msq = lsq;
10843 } */
10845 /* If the misalignment remains the same throughout the execution of the
10846 loop, we can create the init_addr and permutation mask at the loop
10847 preheader. Otherwise, it needs to be created inside the loop.
10848 This can only occur when vectorizing memory accesses in the inner-loop
10849 nested within an outer-loop that is being vectorized. */
10851 if (nested_in_vect_loop
10852 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10853 GET_MODE_SIZE (TYPE_MODE (vectype))))
10855 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10856 compute_in_loop = true;
10859 bool diff_first_stmt_info
10860 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10862 tree offset = NULL_TREE;
10863 if ((alignment_support_scheme == dr_explicit_realign_optimized
10864 || alignment_support_scheme == dr_explicit_realign)
10865 && !compute_in_loop)
10867 /* If we have different first_stmt_info, we can't set up realignment
10868 here, since we can't guarantee first_stmt_info DR has been
10869 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10870 distance from first_stmt_info DR instead as below. */
10871 if (!costing_p)
10873 if (!diff_first_stmt_info)
10874 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10875 &realignment_token,
10876 alignment_support_scheme, NULL_TREE,
10877 &at_loop);
10878 if (alignment_support_scheme == dr_explicit_realign_optimized)
10880 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10881 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10882 size_one_node);
10883 gcc_assert (!first_stmt_info_for_drptr);
10887 else
10888 at_loop = loop;
10890 if (!known_eq (poffset, 0))
10891 offset = (offset
10892 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10893 : size_int (poffset));
10895 tree bump;
10896 tree vec_offset = NULL_TREE;
10897 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10899 aggr_type = NULL_TREE;
10900 bump = NULL_TREE;
10902 else if (memory_access_type == VMAT_GATHER_SCATTER)
10904 aggr_type = elem_type;
10905 if (!costing_p)
10906 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10907 &bump, &vec_offset, loop_lens);
10909 else
10911 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10912 aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
10913 else
10914 aggr_type = vectype;
10915 if (!costing_p)
10916 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10917 memory_access_type, loop_lens);
10920 auto_vec<tree> vec_offsets;
10921 auto_vec<tree> vec_masks;
10922 if (mask && !costing_p)
10924 if (slp_node)
10925 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10926 &vec_masks);
10927 else
10928 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10929 &vec_masks, mask_vectype);
10932 tree vec_mask = NULL_TREE;
10933 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10935 gcc_assert (alignment_support_scheme == dr_aligned
10936 || alignment_support_scheme == dr_unaligned_supported);
10938 unsigned int inside_cost = 0, prologue_cost = 0;
10939 /* For costing some adjacent vector loads, we'd like to cost with
10940 the total number of them once instead of cost each one by one. */
10941 unsigned int n_adjacent_loads = 0;
10942 if (slp_node)
10943 ncopies = slp_node->vec_stmts_size / group_size;
10944 for (j = 0; j < ncopies; j++)
10946 if (costing_p)
10948 /* An IFN_LOAD_LANES will load all its vector results,
10949 regardless of which ones we actually need. Account
10950 for the cost of unused results. */
10951 if (first_stmt_info == stmt_info)
10953 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10954 stmt_vec_info next_stmt_info = first_stmt_info;
10957 gaps -= 1;
10958 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10960 while (next_stmt_info);
10961 if (gaps)
10963 if (dump_enabled_p ())
10964 dump_printf_loc (MSG_NOTE, vect_location,
10965 "vect_model_load_cost: %d "
10966 "unused vectors.\n",
10967 gaps);
10968 vect_get_load_cost (vinfo, stmt_info, gaps,
10969 alignment_support_scheme,
10970 misalignment, false, &inside_cost,
10971 &prologue_cost, cost_vec, cost_vec,
10972 true);
10975 n_adjacent_loads++;
10976 continue;
10979 /* 1. Create the vector or array pointer update chain. */
10980 if (j == 0)
10981 dataref_ptr
10982 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10983 at_loop, offset, &dummy, gsi,
10984 &ptr_incr, false, bump);
10985 else
10987 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10988 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10989 stmt_info, bump);
10991 if (mask)
10992 vec_mask = vec_masks[j];
10994 tree vec_array = create_vector_array (vectype, group_size);
10996 tree final_mask = NULL_TREE;
10997 tree final_len = NULL_TREE;
10998 tree bias = NULL_TREE;
10999 if (loop_masks)
11000 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11001 ncopies, vectype, j);
11002 if (vec_mask)
11003 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
11004 vec_mask, gsi);
11006 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
11008 if (loop_lens)
11009 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11010 ncopies, vectype, j, 1);
11011 else
11012 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11013 signed char biasval
11014 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11015 bias = build_int_cst (intQI_type_node, biasval);
11016 if (!final_mask)
11018 mask_vectype = truth_type_for (vectype);
11019 final_mask = build_minus_one_cst (mask_vectype);
11023 gcall *call;
11024 if (final_len && final_mask)
11026 /* Emit:
11027 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
11028 VEC_MASK, LEN, BIAS). */
11029 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
11030 tree alias_ptr = build_int_cst (ref_type, align);
11031 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
11032 dataref_ptr, alias_ptr,
11033 final_mask, final_len, bias);
11035 else if (final_mask)
11037 /* Emit:
11038 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
11039 VEC_MASK). */
11040 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
11041 tree alias_ptr = build_int_cst (ref_type, align);
11042 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
11043 dataref_ptr, alias_ptr,
11044 final_mask);
11046 else
11048 /* Emit:
11049 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
11050 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
11051 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
11053 gimple_call_set_lhs (call, vec_array);
11054 gimple_call_set_nothrow (call, true);
11055 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
11057 if (!slp)
11058 dr_chain.create (group_size);
11059 /* Extract each vector into an SSA_NAME. */
11060 for (unsigned i = 0; i < group_size; i++)
11062 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
11063 vec_array, i);
11064 if (slp)
11065 slp_node->push_vec_def (new_temp);
11066 else
11067 dr_chain.quick_push (new_temp);
11070 if (!slp)
11071 /* Record the mapping between SSA_NAMEs and statements. */
11072 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
11074 /* Record that VEC_ARRAY is now dead. */
11075 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
11077 if (!slp)
11078 dr_chain.release ();
11080 if (!slp_node)
11081 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11084 if (costing_p)
11086 if (n_adjacent_loads > 0)
11087 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
11088 alignment_support_scheme, misalignment, false,
11089 &inside_cost, &prologue_cost, cost_vec,
11090 cost_vec, true);
11091 if (dump_enabled_p ())
11092 dump_printf_loc (MSG_NOTE, vect_location,
11093 "vect_model_load_cost: inside_cost = %u, "
11094 "prologue_cost = %u .\n",
11095 inside_cost, prologue_cost);
11098 return true;
11101 if (memory_access_type == VMAT_GATHER_SCATTER)
11103 gcc_assert (alignment_support_scheme == dr_aligned
11104 || alignment_support_scheme == dr_unaligned_supported);
11105 gcc_assert (!grouped_load && !slp_perm);
11107 unsigned int inside_cost = 0, prologue_cost = 0;
11108 for (j = 0; j < ncopies; j++)
11110 /* 1. Create the vector or array pointer update chain. */
11111 if (j == 0 && !costing_p)
11113 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11114 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
11115 slp_node, &gs_info, &dataref_ptr,
11116 &vec_offsets);
11117 else
11118 dataref_ptr
11119 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11120 at_loop, offset, &dummy, gsi,
11121 &ptr_incr, false, bump);
11123 else if (!costing_p)
11125 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11126 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11127 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11128 gsi, stmt_info, bump);
11131 gimple *new_stmt = NULL;
11132 for (i = 0; i < vec_num; i++)
11134 tree final_mask = NULL_TREE;
11135 tree final_len = NULL_TREE;
11136 tree bias = NULL_TREE;
11137 if (!costing_p)
11139 if (mask)
11140 vec_mask = vec_masks[vec_num * j + i];
11141 if (loop_masks)
11142 final_mask
11143 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11144 vec_num * ncopies, vectype,
11145 vec_num * j + i);
11146 if (vec_mask)
11147 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11148 final_mask, vec_mask, gsi);
11150 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11151 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11152 gsi, stmt_info, bump);
11155 /* 2. Create the vector-load in the loop. */
11156 unsigned HOST_WIDE_INT align;
11157 if (gs_info.ifn != IFN_LAST)
11159 if (costing_p)
11161 unsigned int cnunits = vect_nunits_for_cost (vectype);
11162 inside_cost
11163 = record_stmt_cost (cost_vec, cnunits, scalar_load,
11164 stmt_info, 0, vect_body);
11165 continue;
11167 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11168 vec_offset = vec_offsets[vec_num * j + i];
11169 tree zero = build_zero_cst (vectype);
11170 tree scale = size_int (gs_info.scale);
11172 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
11174 if (loop_lens)
11175 final_len
11176 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11177 vec_num * ncopies, vectype,
11178 vec_num * j + i, 1);
11179 else
11180 final_len
11181 = build_int_cst (sizetype,
11182 TYPE_VECTOR_SUBPARTS (vectype));
11183 signed char biasval
11184 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11185 bias = build_int_cst (intQI_type_node, biasval);
11186 if (!final_mask)
11188 mask_vectype = truth_type_for (vectype);
11189 final_mask = build_minus_one_cst (mask_vectype);
11193 gcall *call;
11194 if (final_len && final_mask)
11195 call
11196 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
11197 dataref_ptr, vec_offset,
11198 scale, zero, final_mask,
11199 final_len, bias);
11200 else if (final_mask)
11201 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
11202 dataref_ptr, vec_offset,
11203 scale, zero, final_mask);
11204 else
11205 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
11206 dataref_ptr, vec_offset,
11207 scale, zero);
11208 gimple_call_set_nothrow (call, true);
11209 new_stmt = call;
11210 data_ref = NULL_TREE;
11212 else if (gs_info.decl)
11214 /* The builtin decls path for gather is legacy, x86 only. */
11215 gcc_assert (!final_len && nunits.is_constant ());
11216 if (costing_p)
11218 unsigned int cnunits = vect_nunits_for_cost (vectype);
11219 inside_cost
11220 = record_stmt_cost (cost_vec, cnunits, scalar_load,
11221 stmt_info, 0, vect_body);
11222 continue;
11224 poly_uint64 offset_nunits
11225 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
11226 if (known_eq (nunits, offset_nunits))
11228 new_stmt = vect_build_one_gather_load_call
11229 (vinfo, stmt_info, gsi, &gs_info,
11230 dataref_ptr, vec_offsets[vec_num * j + i],
11231 final_mask);
11232 data_ref = NULL_TREE;
11234 else if (known_eq (nunits, offset_nunits * 2))
11236 /* We have a offset vector with half the number of
11237 lanes but the builtins will produce full vectype
11238 data with just the lower lanes filled. */
11239 new_stmt = vect_build_one_gather_load_call
11240 (vinfo, stmt_info, gsi, &gs_info,
11241 dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
11242 final_mask);
11243 tree low = make_ssa_name (vectype);
11244 gimple_set_lhs (new_stmt, low);
11245 vect_finish_stmt_generation (vinfo, stmt_info,
11246 new_stmt, gsi);
11248 /* now put upper half of final_mask in final_mask low. */
11249 if (final_mask
11250 && !SCALAR_INT_MODE_P
11251 (TYPE_MODE (TREE_TYPE (final_mask))))
11253 int count = nunits.to_constant ();
11254 vec_perm_builder sel (count, count, 1);
11255 sel.quick_grow (count);
11256 for (int i = 0; i < count; ++i)
11257 sel[i] = i | (count / 2);
11258 vec_perm_indices indices (sel, 2, count);
11259 tree perm_mask = vect_gen_perm_mask_checked
11260 (TREE_TYPE (final_mask), indices);
11261 new_stmt = gimple_build_assign (NULL_TREE,
11262 VEC_PERM_EXPR,
11263 final_mask,
11264 final_mask,
11265 perm_mask);
11266 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11267 gimple_set_lhs (new_stmt, final_mask);
11268 vect_finish_stmt_generation (vinfo, stmt_info,
11269 new_stmt, gsi);
11271 else if (final_mask)
11273 new_stmt = gimple_build_assign (NULL_TREE,
11274 VEC_UNPACK_HI_EXPR,
11275 final_mask);
11276 final_mask = make_ssa_name
11277 (truth_type_for (gs_info.offset_vectype));
11278 gimple_set_lhs (new_stmt, final_mask);
11279 vect_finish_stmt_generation (vinfo, stmt_info,
11280 new_stmt, gsi);
11283 new_stmt = vect_build_one_gather_load_call
11284 (vinfo, stmt_info, gsi, &gs_info,
11285 dataref_ptr,
11286 vec_offsets[2 * vec_num * j + 2 * i + 1],
11287 final_mask);
11288 tree high = make_ssa_name (vectype);
11289 gimple_set_lhs (new_stmt, high);
11290 vect_finish_stmt_generation (vinfo, stmt_info,
11291 new_stmt, gsi);
11293 /* compose low + high. */
11294 int count = nunits.to_constant ();
11295 vec_perm_builder sel (count, count, 1);
11296 sel.quick_grow (count);
11297 for (int i = 0; i < count; ++i)
11298 sel[i] = i < count / 2 ? i : i + count / 2;
11299 vec_perm_indices indices (sel, 2, count);
11300 tree perm_mask
11301 = vect_gen_perm_mask_checked (vectype, indices);
11302 new_stmt = gimple_build_assign (NULL_TREE,
11303 VEC_PERM_EXPR,
11304 low, high, perm_mask);
11305 data_ref = NULL_TREE;
11307 else if (known_eq (nunits * 2, offset_nunits))
11309 /* We have a offset vector with double the number of
11310 lanes. Select the low/high part accordingly. */
11311 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11312 if ((vec_num * j + i) & 1)
11314 int count = offset_nunits.to_constant ();
11315 vec_perm_builder sel (count, count, 1);
11316 sel.quick_grow (count);
11317 for (int i = 0; i < count; ++i)
11318 sel[i] = i | (count / 2);
11319 vec_perm_indices indices (sel, 2, count);
11320 tree perm_mask = vect_gen_perm_mask_checked
11321 (TREE_TYPE (vec_offset), indices);
11322 new_stmt = gimple_build_assign (NULL_TREE,
11323 VEC_PERM_EXPR,
11324 vec_offset,
11325 vec_offset,
11326 perm_mask);
11327 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11328 gimple_set_lhs (new_stmt, vec_offset);
11329 vect_finish_stmt_generation (vinfo, stmt_info,
11330 new_stmt, gsi);
11332 new_stmt = vect_build_one_gather_load_call
11333 (vinfo, stmt_info, gsi, &gs_info,
11334 dataref_ptr, vec_offset, final_mask);
11335 data_ref = NULL_TREE;
11337 else
11338 gcc_unreachable ();
11340 else
11342 /* Emulated gather-scatter. */
11343 gcc_assert (!final_mask);
11344 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11345 if (costing_p)
11347 /* For emulated gathers N offset vector element
11348 offset add is consumed by the load). */
11349 inside_cost = record_stmt_cost (cost_vec, const_nunits,
11350 vec_to_scalar, stmt_info,
11351 slp_node, 0, vect_body);
11352 /* N scalar loads plus gathering them into a
11353 vector. */
11354 inside_cost
11355 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11356 stmt_info, 0, vect_body);
11357 inside_cost
11358 = record_stmt_cost (cost_vec, 1, vec_construct,
11359 stmt_info, slp_node, 0, vect_body);
11360 continue;
11362 unsigned HOST_WIDE_INT const_offset_nunits
11363 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
11364 .to_constant ();
11365 vec<constructor_elt, va_gc> *ctor_elts;
11366 vec_alloc (ctor_elts, const_nunits);
11367 gimple_seq stmts = NULL;
11368 /* We support offset vectors with more elements
11369 than the data vector for now. */
11370 unsigned HOST_WIDE_INT factor
11371 = const_offset_nunits / const_nunits;
11372 vec_offset = vec_offsets[(vec_num * j + i) / factor];
11373 unsigned elt_offset
11374 = ((vec_num * j + i) % factor) * const_nunits;
11375 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11376 tree scale = size_int (gs_info.scale);
11377 align = get_object_alignment (DR_REF (first_dr_info->dr));
11378 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11379 for (unsigned k = 0; k < const_nunits; ++k)
11381 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11382 bitsize_int (k + elt_offset));
11383 tree idx
11384 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11385 vec_offset, TYPE_SIZE (idx_type), boff);
11386 idx = gimple_convert (&stmts, sizetype, idx);
11387 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
11388 scale);
11389 tree ptr = gimple_build (&stmts, PLUS_EXPR,
11390 TREE_TYPE (dataref_ptr),
11391 dataref_ptr, idx);
11392 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11393 tree elt = make_ssa_name (TREE_TYPE (vectype));
11394 tree ref = build2 (MEM_REF, ltype, ptr,
11395 build_int_cst (ref_type, 0));
11396 new_stmt = gimple_build_assign (elt, ref);
11397 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11398 gimple_seq_add_stmt (&stmts, new_stmt);
11399 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11401 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11402 new_stmt = gimple_build_assign (
11403 NULL_TREE, build_constructor (vectype, ctor_elts));
11404 data_ref = NULL_TREE;
11407 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11408 /* DATA_REF is null if we've already built the statement. */
11409 if (data_ref)
11411 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11412 new_stmt = gimple_build_assign (vec_dest, data_ref);
11414 new_temp = make_ssa_name (vec_dest, new_stmt);
11415 gimple_set_lhs (new_stmt, new_temp);
11416 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11418 /* Store vector loads in the corresponding SLP_NODE. */
11419 if (slp)
11420 slp_node->push_vec_def (new_stmt);
11423 if (!slp && !costing_p)
11424 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11427 if (!slp && !costing_p)
11428 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11430 if (costing_p && dump_enabled_p ())
11431 dump_printf_loc (MSG_NOTE, vect_location,
11432 "vect_model_load_cost: inside_cost = %u, "
11433 "prologue_cost = %u .\n",
11434 inside_cost, prologue_cost);
11435 return true;
11438 poly_uint64 group_elt = 0;
11439 unsigned int inside_cost = 0, prologue_cost = 0;
11440 /* For costing some adjacent vector loads, we'd like to cost with
11441 the total number of them once instead of cost each one by one. */
11442 unsigned int n_adjacent_loads = 0;
11443 for (j = 0; j < ncopies; j++)
11445 /* 1. Create the vector or array pointer update chain. */
11446 if (j == 0 && !costing_p)
11448 bool simd_lane_access_p
11449 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11450 if (simd_lane_access_p
11451 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11452 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11453 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11454 && integer_zerop (DR_INIT (first_dr_info->dr))
11455 && alias_sets_conflict_p (get_alias_set (aggr_type),
11456 get_alias_set (TREE_TYPE (ref_type)))
11457 && (alignment_support_scheme == dr_aligned
11458 || alignment_support_scheme == dr_unaligned_supported))
11460 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11461 dataref_offset = build_int_cst (ref_type, 0);
11463 else if (diff_first_stmt_info)
11465 dataref_ptr
11466 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11467 aggr_type, at_loop, offset, &dummy,
11468 gsi, &ptr_incr, simd_lane_access_p,
11469 bump);
11470 /* Adjust the pointer by the difference to first_stmt. */
11471 data_reference_p ptrdr
11472 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11473 tree diff
11474 = fold_convert (sizetype,
11475 size_binop (MINUS_EXPR,
11476 DR_INIT (first_dr_info->dr),
11477 DR_INIT (ptrdr)));
11478 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11479 stmt_info, diff);
11480 if (alignment_support_scheme == dr_explicit_realign)
11482 msq = vect_setup_realignment (vinfo,
11483 first_stmt_info_for_drptr, gsi,
11484 &realignment_token,
11485 alignment_support_scheme,
11486 dataref_ptr, &at_loop);
11487 gcc_assert (!compute_in_loop);
11490 else
11491 dataref_ptr
11492 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11493 at_loop,
11494 offset, &dummy, gsi, &ptr_incr,
11495 simd_lane_access_p, bump);
11497 else if (!costing_p)
11499 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11500 if (dataref_offset)
11501 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11502 bump);
11503 else
11504 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11505 stmt_info, bump);
11508 if (grouped_load || slp_perm)
11509 dr_chain.create (vec_num);
11511 gimple *new_stmt = NULL;
11512 for (i = 0; i < vec_num; i++)
11514 tree final_mask = NULL_TREE;
11515 tree final_len = NULL_TREE;
11516 tree bias = NULL_TREE;
11517 if (!costing_p)
11519 if (mask)
11520 vec_mask = vec_masks[vec_num * j + i];
11521 if (loop_masks)
11522 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11523 vec_num * ncopies, vectype,
11524 vec_num * j + i);
11525 if (vec_mask)
11526 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11527 final_mask, vec_mask, gsi);
11529 if (i > 0)
11530 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11531 gsi, stmt_info, bump);
11534 /* 2. Create the vector-load in the loop. */
11535 switch (alignment_support_scheme)
11537 case dr_aligned:
11538 case dr_unaligned_supported:
11540 if (costing_p)
11541 break;
11543 unsigned int misalign;
11544 unsigned HOST_WIDE_INT align;
11545 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11546 if (alignment_support_scheme == dr_aligned)
11547 misalign = 0;
11548 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11550 align
11551 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11552 misalign = 0;
11554 else
11555 misalign = misalignment;
11556 if (dataref_offset == NULL_TREE
11557 && TREE_CODE (dataref_ptr) == SSA_NAME)
11558 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11559 misalign);
11560 align = least_bit_hwi (misalign | align);
11562 /* Compute IFN when LOOP_LENS or final_mask valid. */
11563 machine_mode vmode = TYPE_MODE (vectype);
11564 machine_mode new_vmode = vmode;
11565 internal_fn partial_ifn = IFN_LAST;
11566 if (loop_lens)
11568 opt_machine_mode new_ovmode
11569 = get_len_load_store_mode (vmode, true, &partial_ifn);
11570 new_vmode = new_ovmode.require ();
11571 unsigned factor
11572 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11573 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11574 vec_num * ncopies, vectype,
11575 vec_num * j + i, factor);
11577 else if (final_mask)
11579 if (!can_vec_mask_load_store_p (
11580 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11581 &partial_ifn))
11582 gcc_unreachable ();
11585 if (partial_ifn == IFN_MASK_LEN_LOAD)
11587 if (!final_len)
11589 /* Pass VF value to 'len' argument of
11590 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11591 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11593 if (!final_mask)
11595 /* Pass all ones value to 'mask' argument of
11596 MASK_LEN_LOAD if final_mask is invalid. */
11597 mask_vectype = truth_type_for (vectype);
11598 final_mask = build_minus_one_cst (mask_vectype);
11601 if (final_len)
11603 signed char biasval
11604 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11606 bias = build_int_cst (intQI_type_node, biasval);
11609 if (final_len)
11611 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11612 gcall *call;
11613 if (partial_ifn == IFN_MASK_LEN_LOAD)
11614 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11615 dataref_ptr, ptr,
11616 final_mask, final_len,
11617 bias);
11618 else
11619 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11620 dataref_ptr, ptr,
11621 final_len, bias);
11622 gimple_call_set_nothrow (call, true);
11623 new_stmt = call;
11624 data_ref = NULL_TREE;
11626 /* Need conversion if it's wrapped with VnQI. */
11627 if (vmode != new_vmode)
11629 tree new_vtype = build_vector_type_for_mode (
11630 unsigned_intQI_type_node, new_vmode);
11631 tree var
11632 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11633 gimple_set_lhs (call, var);
11634 vect_finish_stmt_generation (vinfo, stmt_info, call,
11635 gsi);
11636 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11637 new_stmt = gimple_build_assign (vec_dest,
11638 VIEW_CONVERT_EXPR, op);
11641 else if (final_mask)
11643 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11644 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11645 dataref_ptr, ptr,
11646 final_mask);
11647 gimple_call_set_nothrow (call, true);
11648 new_stmt = call;
11649 data_ref = NULL_TREE;
11651 else
11653 tree ltype = vectype;
11654 tree new_vtype = NULL_TREE;
11655 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11656 unsigned int vect_align
11657 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11658 /* Try to use a single smaller load when we are about
11659 to load excess elements compared to the unrolled
11660 scalar loop. */
11661 if (known_gt ((vec_num * j + i + 1) * nunits,
11662 (group_size * vf - gap)))
11664 poly_uint64 remain = ((group_size * vf - gap)
11665 - (vec_num * j + i) * nunits);
11666 if (known_ge ((vec_num * j + i + 1) * nunits
11667 - (group_size * vf - gap), nunits))
11668 /* DR will be unused. */
11669 ltype = NULL_TREE;
11670 else if (known_ge (vect_align,
11671 tree_to_poly_uint64
11672 (TYPE_SIZE_UNIT (vectype))))
11673 /* Aligned access to excess elements is OK if
11674 at least one element is accessed in the
11675 scalar loop. */
11677 else if (known_gt (vect_align,
11678 ((nunits - remain)
11679 * vect_get_scalar_dr_size
11680 (first_dr_info))))
11681 /* Aligned access to the gap area when there's
11682 at least one element in it is OK. */
11684 else
11686 /* remain should now be > 0 and < nunits. */
11687 unsigned num;
11688 if (known_ne (remain, 0u)
11689 && constant_multiple_p (nunits, remain, &num))
11691 tree ptype;
11692 new_vtype
11693 = vector_vector_composition_type (vectype,
11694 num,
11695 &ptype);
11696 if (new_vtype)
11697 ltype = ptype;
11699 /* Else use multiple loads or a masked load? */
11700 /* For loop vectorization we now should have
11701 an alternate type or LOOP_VINFO_PEELING_FOR_GAPS
11702 set. */
11703 if (loop_vinfo)
11704 gcc_assert (new_vtype
11705 || LOOP_VINFO_PEELING_FOR_GAPS
11706 (loop_vinfo));
11707 /* But still reduce the access size to the next
11708 required power-of-two so peeling a single
11709 scalar iteration is sufficient. */
11710 unsigned HOST_WIDE_INT cremain;
11711 if (remain.is_constant (&cremain))
11713 unsigned HOST_WIDE_INT cpart_size
11714 = 1 << ceil_log2 (cremain);
11715 if (known_gt (nunits, cpart_size)
11716 && constant_multiple_p (nunits, cpart_size,
11717 &num))
11719 tree ptype;
11720 new_vtype
11721 = vector_vector_composition_type (vectype,
11722 num,
11723 &ptype);
11724 if (new_vtype)
11725 ltype = ptype;
11730 tree offset
11731 = (dataref_offset ? dataref_offset
11732 : build_int_cst (ref_type, 0));
11733 if (!ltype)
11735 else if (ltype != vectype
11736 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11738 poly_uint64 gap_offset
11739 = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
11740 - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
11741 tree gapcst = build_int_cstu (ref_type, gap_offset);
11742 offset = size_binop (PLUS_EXPR, offset, gapcst);
11744 if (ltype)
11746 data_ref
11747 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11748 if (alignment_support_scheme == dr_aligned)
11750 else
11751 TREE_TYPE (data_ref)
11752 = build_aligned_type (TREE_TYPE (data_ref),
11753 align * BITS_PER_UNIT);
11755 if (!ltype)
11756 data_ref = build_constructor (vectype, NULL);
11757 else if (ltype != vectype)
11759 vect_copy_ref_info (data_ref,
11760 DR_REF (first_dr_info->dr));
11761 tree tem = make_ssa_name (ltype);
11762 new_stmt = gimple_build_assign (tem, data_ref);
11763 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11764 gsi);
11765 data_ref = NULL;
11766 vec<constructor_elt, va_gc> *v;
11767 /* We've computed 'num' above to statically two
11768 or via constant_multiple_p. */
11769 unsigned num
11770 = (exact_div (tree_to_poly_uint64
11771 (TYPE_SIZE_UNIT (vectype)),
11772 tree_to_poly_uint64
11773 (TYPE_SIZE_UNIT (ltype)))
11774 .to_constant ());
11775 vec_alloc (v, num);
11776 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11778 while (--num)
11779 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11780 build_zero_cst (ltype));
11781 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11783 else
11785 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11786 while (--num)
11787 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11788 build_zero_cst (ltype));
11790 gcc_assert (new_vtype != NULL_TREE);
11791 if (new_vtype == vectype)
11792 new_stmt = gimple_build_assign (
11793 vec_dest, build_constructor (vectype, v));
11794 else
11796 tree new_vname = make_ssa_name (new_vtype);
11797 new_stmt = gimple_build_assign (
11798 new_vname, build_constructor (new_vtype, v));
11799 vect_finish_stmt_generation (vinfo, stmt_info,
11800 new_stmt, gsi);
11801 new_stmt = gimple_build_assign (
11802 vec_dest,
11803 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11807 break;
11809 case dr_explicit_realign:
11811 if (costing_p)
11812 break;
11813 tree ptr, bump;
11815 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11817 if (compute_in_loop)
11818 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11819 &realignment_token,
11820 dr_explicit_realign,
11821 dataref_ptr, NULL);
11823 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11824 ptr = copy_ssa_name (dataref_ptr);
11825 else
11826 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11827 // For explicit realign the target alignment should be
11828 // known at compile time.
11829 unsigned HOST_WIDE_INT align
11830 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11831 new_stmt = gimple_build_assign (
11832 ptr, BIT_AND_EXPR, dataref_ptr,
11833 build_int_cst (TREE_TYPE (dataref_ptr),
11834 -(HOST_WIDE_INT) align));
11835 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11836 data_ref
11837 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11838 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11839 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11840 new_stmt = gimple_build_assign (vec_dest, data_ref);
11841 new_temp = make_ssa_name (vec_dest, new_stmt);
11842 gimple_assign_set_lhs (new_stmt, new_temp);
11843 gimple_move_vops (new_stmt, stmt_info->stmt);
11844 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11845 msq = new_temp;
11847 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11848 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11849 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11850 bump);
11851 new_stmt = gimple_build_assign (
11852 NULL_TREE, BIT_AND_EXPR, ptr,
11853 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11854 if (TREE_CODE (ptr) == SSA_NAME)
11855 ptr = copy_ssa_name (ptr, new_stmt);
11856 else
11857 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11858 gimple_assign_set_lhs (new_stmt, ptr);
11859 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11860 data_ref
11861 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11862 break;
11864 case dr_explicit_realign_optimized:
11866 if (costing_p)
11867 break;
11868 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11869 new_temp = copy_ssa_name (dataref_ptr);
11870 else
11871 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11872 // We should only be doing this if we know the target
11873 // alignment at compile time.
11874 unsigned HOST_WIDE_INT align
11875 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11876 new_stmt = gimple_build_assign (
11877 new_temp, BIT_AND_EXPR, dataref_ptr,
11878 build_int_cst (TREE_TYPE (dataref_ptr),
11879 -(HOST_WIDE_INT) align));
11880 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11881 data_ref = build2 (MEM_REF, vectype, new_temp,
11882 build_int_cst (ref_type, 0));
11883 break;
11885 default:
11886 gcc_unreachable ();
11889 /* One common place to cost the above vect load for different
11890 alignment support schemes. */
11891 if (costing_p)
11893 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11894 only need to take care of the first stmt, whose
11895 stmt_info is first_stmt_info, vec_num iterating on it
11896 will cover the cost for the remaining, it's consistent
11897 with transforming. For the prologue cost for realign,
11898 we only need to count it once for the whole group. */
11899 bool first_stmt_info_p = first_stmt_info == stmt_info;
11900 bool add_realign_cost = first_stmt_info_p && i == 0;
11901 if (memory_access_type == VMAT_CONTIGUOUS
11902 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11903 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11904 && (!grouped_load || first_stmt_info_p)))
11906 /* Leave realign cases alone to keep them simple. */
11907 if (alignment_support_scheme == dr_explicit_realign_optimized
11908 || alignment_support_scheme == dr_explicit_realign)
11909 vect_get_load_cost (vinfo, stmt_info, 1,
11910 alignment_support_scheme, misalignment,
11911 add_realign_cost, &inside_cost,
11912 &prologue_cost, cost_vec, cost_vec,
11913 true);
11914 else
11915 n_adjacent_loads++;
11918 else
11920 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11921 /* DATA_REF is null if we've already built the statement. */
11922 if (data_ref)
11924 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11925 new_stmt = gimple_build_assign (vec_dest, data_ref);
11927 new_temp = make_ssa_name (vec_dest, new_stmt);
11928 gimple_set_lhs (new_stmt, new_temp);
11929 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11932 /* 3. Handle explicit realignment if necessary/supported.
11933 Create in loop:
11934 vec_dest = realign_load (msq, lsq, realignment_token) */
11935 if (!costing_p
11936 && (alignment_support_scheme == dr_explicit_realign_optimized
11937 || alignment_support_scheme == dr_explicit_realign))
11939 lsq = gimple_assign_lhs (new_stmt);
11940 if (!realignment_token)
11941 realignment_token = dataref_ptr;
11942 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11943 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11944 lsq, realignment_token);
11945 new_temp = make_ssa_name (vec_dest, new_stmt);
11946 gimple_assign_set_lhs (new_stmt, new_temp);
11947 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11949 if (alignment_support_scheme == dr_explicit_realign_optimized)
11951 gcc_assert (phi);
11952 if (i == vec_num - 1 && j == ncopies - 1)
11953 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11954 UNKNOWN_LOCATION);
11955 msq = lsq;
11959 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11961 if (costing_p)
11962 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11963 stmt_info, 0, vect_body);
11964 else
11966 tree perm_mask = perm_mask_for_reverse (vectype);
11967 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11968 perm_mask, stmt_info, gsi);
11969 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11973 /* Collect vector loads and later create their permutation in
11974 vect_transform_grouped_load (). */
11975 if (!costing_p && (grouped_load || slp_perm))
11976 dr_chain.quick_push (new_temp);
11978 /* Store vector loads in the corresponding SLP_NODE. */
11979 if (!costing_p && slp && !slp_perm)
11980 slp_node->push_vec_def (new_stmt);
11982 /* With SLP permutation we load the gaps as well, without
11983 we need to skip the gaps after we manage to fully load
11984 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11985 group_elt += nunits;
11986 if (!costing_p
11987 && maybe_ne (group_gap_adj, 0U)
11988 && !slp_perm
11989 && known_eq (group_elt, group_size - group_gap_adj))
11991 poly_wide_int bump_val
11992 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11993 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11994 == -1)
11995 bump_val = -bump_val;
11996 tree bump = wide_int_to_tree (sizetype, bump_val);
11997 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11998 stmt_info, bump);
11999 group_elt = 0;
12002 /* Bump the vector pointer to account for a gap or for excess
12003 elements loaded for a permuted SLP load. */
12004 if (!costing_p
12005 && maybe_ne (group_gap_adj, 0U)
12006 && slp_perm)
12008 poly_wide_int bump_val
12009 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
12010 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
12011 bump_val = -bump_val;
12012 tree bump = wide_int_to_tree (sizetype, bump_val);
12013 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
12014 stmt_info, bump);
12017 if (slp && !slp_perm)
12018 continue;
12020 if (slp_perm)
12022 unsigned n_perms;
12023 /* For SLP we know we've seen all possible uses of dr_chain so
12024 direct vect_transform_slp_perm_load to DCE the unused parts.
12025 ??? This is a hack to prevent compile-time issues as seen
12026 in PR101120 and friends. */
12027 if (costing_p)
12029 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
12030 true, &n_perms, nullptr);
12031 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
12032 stmt_info, 0, vect_body);
12034 else
12036 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
12037 gsi, vf, false, &n_perms,
12038 nullptr, true);
12039 gcc_assert (ok);
12042 else
12044 if (grouped_load)
12046 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
12047 /* We assume that the cost of a single load-lanes instruction
12048 is equivalent to the cost of DR_GROUP_SIZE separate loads.
12049 If a grouped access is instead being provided by a
12050 load-and-permute operation, include the cost of the
12051 permutes. */
12052 if (costing_p && first_stmt_info == stmt_info)
12054 /* Uses an even and odd extract operations or shuffle
12055 operations for each needed permute. */
12056 int group_size = DR_GROUP_SIZE (first_stmt_info);
12057 int nstmts = ceil_log2 (group_size) * group_size;
12058 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
12059 stmt_info, 0, vect_body);
12061 if (dump_enabled_p ())
12062 dump_printf_loc (MSG_NOTE, vect_location,
12063 "vect_model_load_cost:"
12064 "strided group_size = %d .\n",
12065 group_size);
12067 else if (!costing_p)
12069 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
12070 group_size, gsi);
12071 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12074 else if (!costing_p)
12075 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12077 dr_chain.release ();
12079 if (!slp && !costing_p)
12080 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12082 if (costing_p)
12084 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
12085 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
12086 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
12087 if (n_adjacent_loads > 0)
12088 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
12089 alignment_support_scheme, misalignment, false,
12090 &inside_cost, &prologue_cost, cost_vec, cost_vec,
12091 true);
12092 if (dump_enabled_p ())
12093 dump_printf_loc (MSG_NOTE, vect_location,
12094 "vect_model_load_cost: inside_cost = %u, "
12095 "prologue_cost = %u .\n",
12096 inside_cost, prologue_cost);
12099 return true;
12102 /* Function vect_is_simple_cond.
12104 Input:
12105 LOOP - the loop that is being vectorized.
12106 COND - Condition that is checked for simple use.
12108 Output:
12109 *COMP_VECTYPE - the vector type for the comparison.
12110 *DTS - The def types for the arguments of the comparison
12112 Returns whether a COND can be vectorized. Checks whether
12113 condition operands are supportable using vec_is_simple_use. */
12115 static bool
12116 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
12117 slp_tree slp_node, tree *comp_vectype,
12118 enum vect_def_type *dts, tree vectype)
12120 tree lhs, rhs;
12121 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12122 slp_tree slp_op;
12124 /* Mask case. */
12125 if (TREE_CODE (cond) == SSA_NAME
12126 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
12128 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
12129 &slp_op, &dts[0], comp_vectype)
12130 || !*comp_vectype
12131 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
12132 return false;
12133 return true;
12136 if (!COMPARISON_CLASS_P (cond))
12137 return false;
12139 lhs = TREE_OPERAND (cond, 0);
12140 rhs = TREE_OPERAND (cond, 1);
12142 if (TREE_CODE (lhs) == SSA_NAME)
12144 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
12145 &lhs, &slp_op, &dts[0], &vectype1))
12146 return false;
12148 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
12149 || TREE_CODE (lhs) == FIXED_CST)
12150 dts[0] = vect_constant_def;
12151 else
12152 return false;
12154 if (TREE_CODE (rhs) == SSA_NAME)
12156 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
12157 &rhs, &slp_op, &dts[1], &vectype2))
12158 return false;
12160 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
12161 || TREE_CODE (rhs) == FIXED_CST)
12162 dts[1] = vect_constant_def;
12163 else
12164 return false;
12166 if (vectype1 && vectype2
12167 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12168 TYPE_VECTOR_SUBPARTS (vectype2)))
12169 return false;
12171 *comp_vectype = vectype1 ? vectype1 : vectype2;
12172 /* Invariant comparison. */
12173 if (! *comp_vectype)
12175 tree scalar_type = TREE_TYPE (lhs);
12176 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12177 *comp_vectype = truth_type_for (vectype);
12178 else
12180 /* If we can widen the comparison to match vectype do so. */
12181 if (INTEGRAL_TYPE_P (scalar_type)
12182 && !slp_node
12183 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
12184 TYPE_SIZE (TREE_TYPE (vectype))))
12185 scalar_type = build_nonstandard_integer_type
12186 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
12187 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12188 slp_node);
12192 return true;
12195 /* vectorizable_condition.
12197 Check if STMT_INFO is conditional modify expression that can be vectorized.
12198 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12199 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
12200 at GSI.
12202 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
12204 Return true if STMT_INFO is vectorizable in this way. */
12206 static bool
12207 vectorizable_condition (vec_info *vinfo,
12208 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12209 gimple **vec_stmt,
12210 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12212 tree scalar_dest = NULL_TREE;
12213 tree vec_dest = NULL_TREE;
12214 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
12215 tree then_clause, else_clause;
12216 tree comp_vectype = NULL_TREE;
12217 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
12218 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
12219 tree vec_compare;
12220 tree new_temp;
12221 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12222 enum vect_def_type dts[4]
12223 = {vect_unknown_def_type, vect_unknown_def_type,
12224 vect_unknown_def_type, vect_unknown_def_type};
12225 int ndts = 4;
12226 int ncopies;
12227 int vec_num;
12228 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12229 int i;
12230 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12231 vec<tree> vec_oprnds0 = vNULL;
12232 vec<tree> vec_oprnds1 = vNULL;
12233 vec<tree> vec_oprnds2 = vNULL;
12234 vec<tree> vec_oprnds3 = vNULL;
12235 tree vec_cmp_type;
12236 bool masked = false;
12238 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12239 return false;
12241 /* Is vectorizable conditional operation? */
12242 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12243 if (!stmt)
12244 return false;
12246 code = gimple_assign_rhs_code (stmt);
12247 if (code != COND_EXPR)
12248 return false;
12250 stmt_vec_info reduc_info = NULL;
12251 int reduc_index = -1;
12252 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
12253 bool for_reduction
12254 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
12255 if (for_reduction)
12257 if (slp_node && SLP_TREE_LANES (slp_node) > 1)
12258 return false;
12259 reduc_info = info_for_reduction (vinfo, stmt_info);
12260 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
12261 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
12262 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
12263 || reduc_index != -1);
12265 else
12267 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12268 return false;
12271 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12272 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12274 if (slp_node)
12276 ncopies = 1;
12277 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
12279 else
12281 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12282 vec_num = 1;
12285 gcc_assert (ncopies >= 1);
12286 if (for_reduction && ncopies > 1)
12287 return false; /* FORNOW */
12289 cond_expr = gimple_assign_rhs1 (stmt);
12291 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
12292 &comp_vectype, &dts[0], vectype)
12293 || !comp_vectype)
12294 return false;
12296 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
12297 slp_tree then_slp_node, else_slp_node;
12298 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
12299 &then_clause, &then_slp_node, &dts[2], &vectype1))
12300 return false;
12301 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
12302 &else_clause, &else_slp_node, &dts[3], &vectype2))
12303 return false;
12305 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12306 return false;
12308 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12309 return false;
12311 masked = !COMPARISON_CLASS_P (cond_expr);
12312 vec_cmp_type = truth_type_for (comp_vectype);
12314 if (vec_cmp_type == NULL_TREE)
12315 return false;
12317 cond_code = TREE_CODE (cond_expr);
12318 if (!masked)
12320 cond_expr0 = TREE_OPERAND (cond_expr, 0);
12321 cond_expr1 = TREE_OPERAND (cond_expr, 1);
12324 /* For conditional reductions, the "then" value needs to be the candidate
12325 value calculated by this iteration while the "else" value needs to be
12326 the result carried over from previous iterations. If the COND_EXPR
12327 is the other way around, we need to swap it. */
12328 bool must_invert_cmp_result = false;
12329 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12331 if (masked)
12332 must_invert_cmp_result = true;
12333 else
12335 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12336 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12337 if (new_code == ERROR_MARK)
12338 must_invert_cmp_result = true;
12339 else
12341 cond_code = new_code;
12342 /* Make sure we don't accidentally use the old condition. */
12343 cond_expr = NULL_TREE;
12346 /* ??? The vectorized operand query below doesn't allow swapping
12347 this way for SLP. */
12348 if (slp_node)
12349 return false;
12350 std::swap (then_clause, else_clause);
12353 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12355 /* Boolean values may have another representation in vectors
12356 and therefore we prefer bit operations over comparison for
12357 them (which also works for scalar masks). We store opcodes
12358 to use in bitop1 and bitop2. Statement is vectorized as
12359 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12360 depending on bitop1 and bitop2 arity. */
12361 switch (cond_code)
12363 case GT_EXPR:
12364 bitop1 = BIT_NOT_EXPR;
12365 bitop2 = BIT_AND_EXPR;
12366 break;
12367 case GE_EXPR:
12368 bitop1 = BIT_NOT_EXPR;
12369 bitop2 = BIT_IOR_EXPR;
12370 break;
12371 case LT_EXPR:
12372 bitop1 = BIT_NOT_EXPR;
12373 bitop2 = BIT_AND_EXPR;
12374 std::swap (cond_expr0, cond_expr1);
12375 break;
12376 case LE_EXPR:
12377 bitop1 = BIT_NOT_EXPR;
12378 bitop2 = BIT_IOR_EXPR;
12379 std::swap (cond_expr0, cond_expr1);
12380 break;
12381 case NE_EXPR:
12382 bitop1 = BIT_XOR_EXPR;
12383 break;
12384 case EQ_EXPR:
12385 bitop1 = BIT_XOR_EXPR;
12386 bitop2 = BIT_NOT_EXPR;
12387 break;
12388 default:
12389 return false;
12391 cond_code = SSA_NAME;
12394 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12395 && reduction_type == EXTRACT_LAST_REDUCTION
12396 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12398 if (dump_enabled_p ())
12399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12400 "reduction comparison operation not supported.\n");
12401 return false;
12404 if (!vec_stmt)
12406 if (bitop1 != NOP_EXPR)
12408 machine_mode mode = TYPE_MODE (comp_vectype);
12409 optab optab;
12411 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12412 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12413 return false;
12415 if (bitop2 != NOP_EXPR)
12417 optab = optab_for_tree_code (bitop2, comp_vectype,
12418 optab_default);
12419 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12420 return false;
12424 vect_cost_for_stmt kind = vector_stmt;
12425 if (reduction_type == EXTRACT_LAST_REDUCTION)
12426 /* Count one reduction-like operation per vector. */
12427 kind = vec_to_scalar;
12428 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12429 && (masked
12430 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12431 cond_code)
12432 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12433 ERROR_MARK))))
12434 return false;
12436 if (slp_node
12437 && (!vect_maybe_update_slp_op_vectype
12438 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12439 || (op_adjust == 1
12440 && !vect_maybe_update_slp_op_vectype
12441 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12442 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12443 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12445 if (dump_enabled_p ())
12446 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12447 "incompatible vector types for invariants\n");
12448 return false;
12451 if (loop_vinfo && for_reduction
12452 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12454 if (reduction_type == EXTRACT_LAST_REDUCTION)
12456 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12457 vectype, OPTIMIZE_FOR_SPEED))
12458 vect_record_loop_len (loop_vinfo,
12459 &LOOP_VINFO_LENS (loop_vinfo),
12460 ncopies * vec_num, vectype, 1);
12461 else
12462 vect_record_loop_mask (loop_vinfo,
12463 &LOOP_VINFO_MASKS (loop_vinfo),
12464 ncopies * vec_num, vectype, NULL);
12466 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12467 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12469 if (dump_enabled_p ())
12470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12471 "conditional reduction prevents the use"
12472 " of partial vectors.\n");
12473 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12477 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12478 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
12479 cost_vec, kind);
12480 return true;
12483 /* Transform. */
12485 /* Handle def. */
12486 scalar_dest = gimple_assign_lhs (stmt);
12487 if (reduction_type != EXTRACT_LAST_REDUCTION)
12488 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12490 bool swap_cond_operands = false;
12492 /* See whether another part of the vectorized code applies a loop
12493 mask to the condition, or to its inverse. */
12495 vec_loop_masks *masks = NULL;
12496 vec_loop_lens *lens = NULL;
12497 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12499 if (reduction_type == EXTRACT_LAST_REDUCTION)
12500 lens = &LOOP_VINFO_LENS (loop_vinfo);
12502 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12504 if (reduction_type == EXTRACT_LAST_REDUCTION)
12505 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12506 else
12508 scalar_cond_masked_key cond (cond_expr, ncopies);
12509 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12510 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12511 else
12513 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12514 tree_code orig_code = cond.code;
12515 cond.code = invert_tree_comparison (cond.code, honor_nans);
12516 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12518 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12519 cond_code = cond.code;
12520 swap_cond_operands = true;
12522 else
12524 /* Try the inverse of the current mask. We check if the
12525 inverse mask is live and if so we generate a negate of
12526 the current mask such that we still honor NaNs. */
12527 cond.inverted_p = true;
12528 cond.code = orig_code;
12529 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12531 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12532 cond_code = cond.code;
12533 swap_cond_operands = true;
12534 must_invert_cmp_result = true;
12541 /* Handle cond expr. */
12542 if (masked)
12543 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12544 cond_expr, comp_vectype, &vec_oprnds0,
12545 then_clause, vectype, &vec_oprnds2,
12546 reduction_type != EXTRACT_LAST_REDUCTION
12547 ? else_clause : NULL, vectype, &vec_oprnds3);
12548 else
12549 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12550 cond_expr0, comp_vectype, &vec_oprnds0,
12551 cond_expr1, comp_vectype, &vec_oprnds1,
12552 then_clause, vectype, &vec_oprnds2,
12553 reduction_type != EXTRACT_LAST_REDUCTION
12554 ? else_clause : NULL, vectype, &vec_oprnds3);
12556 if (reduction_type == EXTRACT_LAST_REDUCTION)
12557 vec_else_clause = else_clause;
12559 /* Arguments are ready. Create the new vector stmt. */
12560 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12562 vec_then_clause = vec_oprnds2[i];
12563 if (reduction_type != EXTRACT_LAST_REDUCTION)
12564 vec_else_clause = vec_oprnds3[i];
12566 if (swap_cond_operands)
12567 std::swap (vec_then_clause, vec_else_clause);
12569 if (masked)
12570 vec_compare = vec_cond_lhs;
12571 else
12573 vec_cond_rhs = vec_oprnds1[i];
12574 if (bitop1 == NOP_EXPR)
12576 gimple_seq stmts = NULL;
12577 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12578 vec_cond_lhs, vec_cond_rhs);
12579 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12581 else
12583 new_temp = make_ssa_name (vec_cmp_type);
12584 gassign *new_stmt;
12585 if (bitop1 == BIT_NOT_EXPR)
12586 new_stmt = gimple_build_assign (new_temp, bitop1,
12587 vec_cond_rhs);
12588 else
12589 new_stmt
12590 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12591 vec_cond_rhs);
12592 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12593 if (bitop2 == NOP_EXPR)
12594 vec_compare = new_temp;
12595 else if (bitop2 == BIT_NOT_EXPR
12596 && reduction_type != EXTRACT_LAST_REDUCTION)
12598 /* Instead of doing ~x ? y : z do x ? z : y. */
12599 vec_compare = new_temp;
12600 std::swap (vec_then_clause, vec_else_clause);
12602 else
12604 vec_compare = make_ssa_name (vec_cmp_type);
12605 if (bitop2 == BIT_NOT_EXPR)
12606 new_stmt
12607 = gimple_build_assign (vec_compare, bitop2, new_temp);
12608 else
12609 new_stmt
12610 = gimple_build_assign (vec_compare, bitop2,
12611 vec_cond_lhs, new_temp);
12612 vect_finish_stmt_generation (vinfo, stmt_info,
12613 new_stmt, gsi);
12618 /* If we decided to apply a loop mask to the result of the vector
12619 comparison, AND the comparison with the mask now. Later passes
12620 should then be able to reuse the AND results between mulitple
12621 vector statements.
12623 For example:
12624 for (int i = 0; i < 100; ++i)
12625 x[i] = y[i] ? z[i] : 10;
12627 results in following optimized GIMPLE:
12629 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12630 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12631 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12632 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12633 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12634 vect_iftmp.11_47, { 10, ... }>;
12636 instead of using a masked and unmasked forms of
12637 vec != { 0, ... } (masked in the MASK_LOAD,
12638 unmasked in the VEC_COND_EXPR). */
12640 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12641 in cases where that's necessary. */
12643 tree len = NULL_TREE, bias = NULL_TREE;
12644 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12646 if (!is_gimple_val (vec_compare))
12648 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12649 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12650 vec_compare);
12651 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12652 vec_compare = vec_compare_name;
12655 if (must_invert_cmp_result)
12657 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12658 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12659 BIT_NOT_EXPR,
12660 vec_compare);
12661 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12662 vec_compare = vec_compare_name;
12665 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12666 vectype, OPTIMIZE_FOR_SPEED))
12668 if (lens)
12670 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12671 vec_num * ncopies, vectype, i, 1);
12672 signed char biasval
12673 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12674 bias = build_int_cst (intQI_type_node, biasval);
12676 else
12678 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12679 bias = build_int_cst (intQI_type_node, 0);
12682 if (masks)
12684 tree loop_mask
12685 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12686 vectype, i);
12687 tree tmp2 = make_ssa_name (vec_cmp_type);
12688 gassign *g
12689 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12690 loop_mask);
12691 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12692 vec_compare = tmp2;
12696 gimple *new_stmt;
12697 if (reduction_type == EXTRACT_LAST_REDUCTION)
12699 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12700 tree lhs = gimple_get_lhs (old_stmt);
12701 if ((unsigned)i != vec_oprnds0.length () - 1)
12702 lhs = copy_ssa_name (lhs);
12703 if (len)
12704 new_stmt = gimple_build_call_internal
12705 (IFN_LEN_FOLD_EXTRACT_LAST, 5, vec_else_clause, vec_compare,
12706 vec_then_clause, len, bias);
12707 else
12708 new_stmt = gimple_build_call_internal
12709 (IFN_FOLD_EXTRACT_LAST, 3, vec_else_clause, vec_compare,
12710 vec_then_clause);
12711 gimple_call_set_lhs (new_stmt, lhs);
12712 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12713 if ((unsigned)i != vec_oprnds0.length () - 1)
12715 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12716 vec_else_clause = lhs;
12718 else if (old_stmt == gsi_stmt (*gsi))
12719 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12720 else
12722 /* In this case we're moving the definition to later in the
12723 block. That doesn't matter because the only uses of the
12724 lhs are in phi statements. */
12725 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12726 gsi_remove (&old_gsi, true);
12727 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12730 else
12732 new_temp = make_ssa_name (vec_dest);
12733 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12734 vec_then_clause, vec_else_clause);
12735 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12737 if (slp_node)
12738 slp_node->push_vec_def (new_stmt);
12739 else
12740 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12743 if (!slp_node)
12744 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12746 vec_oprnds0.release ();
12747 vec_oprnds1.release ();
12748 vec_oprnds2.release ();
12749 vec_oprnds3.release ();
12751 return true;
12754 /* Helper of vectorizable_comparison.
12756 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12757 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12758 comparison, put it in VEC_STMT, and insert it at GSI.
12760 Return true if STMT_INFO is vectorizable in this way. */
12762 static bool
12763 vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12764 stmt_vec_info stmt_info, tree_code code,
12765 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12766 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12768 tree lhs, rhs1, rhs2;
12769 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12770 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12771 tree new_temp;
12772 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12773 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12774 int ndts = 2;
12775 poly_uint64 nunits;
12776 int ncopies;
12777 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12778 int i;
12779 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12780 vec<tree> vec_oprnds0 = vNULL;
12781 vec<tree> vec_oprnds1 = vNULL;
12782 tree mask_type;
12783 tree mask = NULL_TREE;
12785 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12786 return false;
12788 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12789 return false;
12791 mask_type = vectype;
12792 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12794 if (slp_node)
12795 ncopies = 1;
12796 else
12797 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12799 gcc_assert (ncopies >= 1);
12801 if (TREE_CODE_CLASS (code) != tcc_comparison)
12802 return false;
12804 slp_tree slp_rhs1, slp_rhs2;
12805 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12806 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12807 return false;
12809 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12810 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12811 return false;
12813 if (vectype1 && vectype2
12814 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12815 TYPE_VECTOR_SUBPARTS (vectype2)))
12816 return false;
12818 vectype = vectype1 ? vectype1 : vectype2;
12820 /* Invariant comparison. */
12821 if (!vectype)
12823 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), slp_node);
12824 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12825 return false;
12827 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12828 return false;
12830 /* Can't compare mask and non-mask types. */
12831 if (vectype1 && vectype2
12832 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12833 return false;
12835 /* Boolean values may have another representation in vectors
12836 and therefore we prefer bit operations over comparison for
12837 them (which also works for scalar masks). We store opcodes
12838 to use in bitop1 and bitop2. Statement is vectorized as
12839 BITOP2 (rhs1 BITOP1 rhs2) or
12840 rhs1 BITOP2 (BITOP1 rhs2)
12841 depending on bitop1 and bitop2 arity. */
12842 bool swap_p = false;
12843 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12845 if (code == GT_EXPR)
12847 bitop1 = BIT_NOT_EXPR;
12848 bitop2 = BIT_AND_EXPR;
12850 else if (code == GE_EXPR)
12852 bitop1 = BIT_NOT_EXPR;
12853 bitop2 = BIT_IOR_EXPR;
12855 else if (code == LT_EXPR)
12857 bitop1 = BIT_NOT_EXPR;
12858 bitop2 = BIT_AND_EXPR;
12859 swap_p = true;
12861 else if (code == LE_EXPR)
12863 bitop1 = BIT_NOT_EXPR;
12864 bitop2 = BIT_IOR_EXPR;
12865 swap_p = true;
12867 else
12869 bitop1 = BIT_XOR_EXPR;
12870 if (code == EQ_EXPR)
12871 bitop2 = BIT_NOT_EXPR;
12875 if (!vec_stmt)
12877 if (bitop1 == NOP_EXPR)
12879 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12880 return false;
12882 else
12884 machine_mode mode = TYPE_MODE (vectype);
12885 optab optab;
12887 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12888 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12889 return false;
12891 if (bitop2 != NOP_EXPR)
12893 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12894 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12895 return false;
12899 /* Put types on constant and invariant SLP children. */
12900 if (slp_node
12901 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12902 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12904 if (dump_enabled_p ())
12905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12906 "incompatible vector types for invariants\n");
12907 return false;
12910 vect_model_simple_cost (vinfo, stmt_info,
12911 ncopies * (1 + (bitop2 != NOP_EXPR)),
12912 dts, ndts, slp_node, cost_vec);
12913 return true;
12916 /* Transform. */
12918 /* Handle def. */
12919 lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));
12920 if (lhs)
12921 mask = vect_create_destination_var (lhs, mask_type);
12923 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12924 rhs1, vectype, &vec_oprnds0,
12925 rhs2, vectype, &vec_oprnds1);
12926 if (swap_p)
12927 std::swap (vec_oprnds0, vec_oprnds1);
12929 /* Arguments are ready. Create the new vector stmt. */
12930 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12932 gimple *new_stmt;
12933 vec_rhs2 = vec_oprnds1[i];
12935 if (lhs)
12936 new_temp = make_ssa_name (mask);
12937 else
12938 new_temp = make_temp_ssa_name (mask_type, NULL, "cmp");
12939 if (bitop1 == NOP_EXPR)
12941 new_stmt = gimple_build_assign (new_temp, code,
12942 vec_rhs1, vec_rhs2);
12943 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12945 else
12947 if (bitop1 == BIT_NOT_EXPR)
12948 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12949 else
12950 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12951 vec_rhs2);
12952 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12953 if (bitop2 != NOP_EXPR)
12955 tree res = make_ssa_name (mask);
12956 if (bitop2 == BIT_NOT_EXPR)
12957 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12958 else
12959 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12960 new_temp);
12961 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12964 if (slp_node)
12965 slp_node->push_vec_def (new_stmt);
12966 else
12967 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12970 if (!slp_node)
12971 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12973 vec_oprnds0.release ();
12974 vec_oprnds1.release ();
12976 return true;
12979 /* vectorizable_comparison.
12981 Check if STMT_INFO is comparison expression that can be vectorized.
12982 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12983 comparison, put it in VEC_STMT, and insert it at GSI.
12985 Return true if STMT_INFO is vectorizable in this way. */
12987 static bool
12988 vectorizable_comparison (vec_info *vinfo,
12989 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12990 gimple **vec_stmt,
12991 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12993 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12995 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12996 return false;
12998 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12999 return false;
13001 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
13002 if (!stmt)
13003 return false;
13005 enum tree_code code = gimple_assign_rhs_code (stmt);
13006 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
13007 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
13008 vec_stmt, slp_node, cost_vec))
13009 return false;
13011 if (!vec_stmt)
13012 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
13014 return true;
13017 /* Check to see if the current early break given in STMT_INFO is valid for
13018 vectorization. */
13020 bool
13021 vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
13022 gimple_stmt_iterator *gsi, gimple **vec_stmt,
13023 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
13025 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
13026 if (!loop_vinfo
13027 || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
13028 return false;
13030 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
13031 return false;
13033 if (!STMT_VINFO_RELEVANT_P (stmt_info))
13034 return false;
13036 DUMP_VECT_SCOPE ("vectorizable_early_exit");
13038 auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info));
13040 tree vectype = NULL_TREE;
13041 slp_tree slp_op0;
13042 tree op0;
13043 enum vect_def_type dt0;
13045 /* Early break gcond kind SLP trees can be root only and have no children,
13046 for instance in the case where the argument is an external. If that's
13047 the case there is no operand to analyse use of. */
13048 if ((!slp_node || !SLP_TREE_CHILDREN (slp_node).is_empty ())
13049 && !vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0,
13050 &vectype))
13052 if (dump_enabled_p ())
13053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13054 "use not simple.\n");
13055 return false;
13058 /* For SLP we don't want to use the type of the operands of the SLP node, when
13059 vectorizing using SLP slp_node will be the children of the gcond and we
13060 want to use the type of the direct children which since the gcond is root
13061 will be the current node, rather than a child node as vect_is_simple_use
13062 assumes. */
13063 if (slp_node)
13064 vectype = SLP_TREE_VECTYPE (slp_node);
13066 if (!vectype)
13067 return false;
13069 machine_mode mode = TYPE_MODE (vectype);
13070 int ncopies, vec_num;
13072 if (slp_node)
13074 ncopies = 1;
13075 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
13077 else
13079 ncopies = vect_get_num_copies (loop_vinfo, vectype);
13080 vec_num = 1;
13083 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
13084 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
13085 bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
13086 bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
13088 /* Now build the new conditional. Pattern gimple_conds get dropped during
13089 codegen so we must replace the original insn. */
13090 gimple *orig_stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
13091 gcond *cond_stmt = as_a <gcond *>(orig_stmt);
13092 /* When vectorizing we assume that if the branch edge is taken that we're
13093 exiting the loop. This is not however always the case as the compiler will
13094 rewrite conditions to always be a comparison against 0. To do this it
13095 sometimes flips the edges. This is fine for scalar, but for vector we
13096 then have to flip the test, as we're still assuming that if you take the
13097 branch edge that we found the exit condition. i.e. we need to know whether
13098 we are generating a `forall` or an `exist` condition. */
13099 auto new_code = NE_EXPR;
13100 auto reduc_optab = ior_optab;
13101 auto reduc_op = BIT_IOR_EXPR;
13102 tree cst = build_zero_cst (vectype);
13103 edge exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 0);
13104 if (exit_true_edge->flags & EDGE_FALSE_VALUE)
13105 exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 1);
13106 gcc_assert (exit_true_edge->flags & EDGE_TRUE_VALUE);
13107 if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
13108 exit_true_edge->dest))
13110 new_code = EQ_EXPR;
13111 reduc_optab = and_optab;
13112 reduc_op = BIT_AND_EXPR;
13113 cst = build_minus_one_cst (vectype);
13116 /* Analyze only. */
13117 if (!vec_stmt)
13119 if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing)
13121 if (dump_enabled_p ())
13122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13123 "can't vectorize early exit because the "
13124 "target doesn't support flag setting vector "
13125 "comparisons.\n");
13126 return false;
13129 if (ncopies > 1
13130 && direct_optab_handler (reduc_optab, mode) == CODE_FOR_nothing)
13132 if (dump_enabled_p ())
13133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13134 "can't vectorize early exit because the "
13135 "target does not support boolean vector %s "
13136 "for type %T.\n",
13137 reduc_optab == ior_optab ? "OR" : "AND",
13138 vectype);
13139 return false;
13142 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
13143 vec_stmt, slp_node, cost_vec))
13144 return false;
13146 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
13148 if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
13149 OPTIMIZE_FOR_SPEED))
13150 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num,
13151 vectype, 1);
13152 else
13153 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
13154 vectype, NULL);
13157 return true;
13160 /* Tranform. */
13162 tree new_temp = NULL_TREE;
13163 gimple *new_stmt = NULL;
13165 if (dump_enabled_p ())
13166 dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
13168 /* For SLP we don't do codegen of the body starting from the gcond, the gconds are
13169 roots and so by the time we get to them we have already codegened the SLP tree
13170 and so we shouldn't try to do so again. The arguments have already been
13171 vectorized. It's not very clean to do this here, But the masking code below is
13172 complex and this keeps it all in one place to ease fixes and backports. Once we
13173 drop the non-SLP loop vect or split vectorizable_* this can be simplified. */
13174 if (!slp_node)
13176 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
13177 vec_stmt, slp_node, cost_vec))
13178 gcc_unreachable ();
13181 gimple *stmt = STMT_VINFO_STMT (stmt_info);
13182 basic_block cond_bb = gimple_bb (stmt);
13183 gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb);
13185 auto_vec<tree> stmts;
13187 if (slp_node)
13188 stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node));
13189 else
13191 auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info);
13192 stmts.reserve_exact (vec_stmts.length ());
13193 for (auto stmt : vec_stmts)
13194 stmts.quick_push (gimple_assign_lhs (stmt));
13197 /* Determine if we need to reduce the final value. */
13198 if (stmts.length () > 1)
13200 /* We build the reductions in a way to maintain as much parallelism as
13201 possible. */
13202 auto_vec<tree> workset (stmts.length ());
13204 /* Mask the statements as we queue them up. Normally we loop over
13205 vec_num, but since we inspect the exact results of vectorization
13206 we don't need to and instead can just use the stmts themselves. */
13207 if (masked_loop_p)
13208 for (unsigned i = 0; i < stmts.length (); i++)
13210 tree stmt_mask
13211 = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies * vec_num,
13212 vectype, i);
13213 stmt_mask
13214 = prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), stmt_mask,
13215 stmts[i], &cond_gsi);
13216 workset.quick_push (stmt_mask);
13218 else if (len_loop_p)
13219 for (unsigned i = 0; i < stmts.length (); i++)
13221 tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
13222 lens, ncopies * vec_num,
13223 vectype, stmts[i], i, 1);
13225 workset.quick_push (len_mask);
13227 else
13228 workset.splice (stmts);
13230 while (workset.length () > 1)
13232 new_temp = make_temp_ssa_name (vectype, NULL, "vexit_reduc");
13233 tree arg0 = workset.pop ();
13234 tree arg1 = workset.pop ();
13235 new_stmt = gimple_build_assign (new_temp, reduc_op, arg0, arg1);
13236 vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
13237 &cond_gsi);
13238 workset.quick_insert (0, new_temp);
13241 else
13243 new_temp = stmts[0];
13244 if (masked_loop_p)
13246 tree mask
13247 = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype, 0);
13248 new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
13249 new_temp, &cond_gsi);
13251 else if (len_loop_p)
13252 new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
13253 ncopies, vectype, new_temp, 0, 1);
13256 gcc_assert (new_temp);
13258 gimple_cond_set_condition (cond_stmt, new_code, new_temp, cst);
13259 update_stmt (orig_stmt);
13261 if (slp_node)
13262 SLP_TREE_VEC_DEFS (slp_node).truncate (0);
13263 else
13264 STMT_VINFO_VEC_STMTS (stmt_info).truncate (0);
13266 if (!slp_node)
13267 *vec_stmt = orig_stmt;
13269 return true;
13272 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
13273 can handle all live statements in the node. Otherwise return true
13274 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
13275 VEC_STMT_P is as for vectorizable_live_operation. */
13277 static bool
13278 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
13279 slp_tree slp_node, slp_instance slp_node_instance,
13280 bool vec_stmt_p,
13281 stmt_vector_for_cost *cost_vec)
13283 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
13284 if (slp_node)
13286 stmt_vec_info slp_stmt_info;
13287 unsigned int i;
13288 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
13290 if (slp_stmt_info
13291 && (STMT_VINFO_LIVE_P (slp_stmt_info)
13292 || (loop_vinfo
13293 && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13294 && STMT_VINFO_DEF_TYPE (slp_stmt_info)
13295 == vect_induction_def))
13296 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
13297 slp_node_instance, i,
13298 vec_stmt_p, cost_vec))
13299 return false;
13302 else if ((STMT_VINFO_LIVE_P (stmt_info)
13303 || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13304 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def))
13305 && !vectorizable_live_operation (vinfo, stmt_info,
13306 slp_node, slp_node_instance, -1,
13307 vec_stmt_p, cost_vec))
13308 return false;
13310 return true;
13313 /* Make sure the statement is vectorizable. */
13315 opt_result
13316 vect_analyze_stmt (vec_info *vinfo,
13317 stmt_vec_info stmt_info, bool *need_to_vectorize,
13318 slp_tree node, slp_instance node_instance,
13319 stmt_vector_for_cost *cost_vec)
13321 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
13322 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
13323 bool ok;
13324 gimple_seq pattern_def_seq;
13326 if (dump_enabled_p ())
13327 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
13328 stmt_info->stmt);
13330 if (gimple_has_volatile_ops (stmt_info->stmt))
13331 return opt_result::failure_at (stmt_info->stmt,
13332 "not vectorized:"
13333 " stmt has volatile operands: %G\n",
13334 stmt_info->stmt);
13336 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13337 && node == NULL
13338 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
13340 gimple_stmt_iterator si;
13342 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
13344 stmt_vec_info pattern_def_stmt_info
13345 = vinfo->lookup_stmt (gsi_stmt (si));
13346 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
13347 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
13349 /* Analyze def stmt of STMT if it's a pattern stmt. */
13350 if (dump_enabled_p ())
13351 dump_printf_loc (MSG_NOTE, vect_location,
13352 "==> examining pattern def statement: %G",
13353 pattern_def_stmt_info->stmt);
13355 opt_result res
13356 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
13357 need_to_vectorize, node, node_instance,
13358 cost_vec);
13359 if (!res)
13360 return res;
13365 /* Skip stmts that do not need to be vectorized. In loops this is expected
13366 to include:
13367 - the COND_EXPR which is the loop exit condition
13368 - any LABEL_EXPRs in the loop
13369 - computations that are used only for array indexing or loop control.
13370 In basic blocks we only analyze statements that are a part of some SLP
13371 instance, therefore, all the statements are relevant.
13373 Pattern statement needs to be analyzed instead of the original statement
13374 if the original statement is not relevant. Otherwise, we analyze both
13375 statements. In basic blocks we are called from some SLP instance
13376 traversal, don't analyze pattern stmts instead, the pattern stmts
13377 already will be part of SLP instance. */
13379 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
13380 if (!STMT_VINFO_RELEVANT_P (stmt_info)
13381 && !STMT_VINFO_LIVE_P (stmt_info))
13383 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13384 && pattern_stmt_info
13385 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13386 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
13388 /* Analyze PATTERN_STMT instead of the original stmt. */
13389 stmt_info = pattern_stmt_info;
13390 if (dump_enabled_p ())
13391 dump_printf_loc (MSG_NOTE, vect_location,
13392 "==> examining pattern statement: %G",
13393 stmt_info->stmt);
13395 else
13397 if (dump_enabled_p ())
13398 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
13400 if (node)
13401 return opt_result::failure_at (stmt_info->stmt,
13402 "not vectorized:"
13403 " irrelevant stmt as SLP node %p "
13404 "representative.\n",
13405 (void *)node);
13406 return opt_result::success ();
13409 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13410 && node == NULL
13411 && pattern_stmt_info
13412 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13413 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
13415 /* Analyze PATTERN_STMT too. */
13416 if (dump_enabled_p ())
13417 dump_printf_loc (MSG_NOTE, vect_location,
13418 "==> examining pattern statement: %G",
13419 pattern_stmt_info->stmt);
13421 opt_result res
13422 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
13423 node_instance, cost_vec);
13424 if (!res)
13425 return res;
13428 switch (STMT_VINFO_DEF_TYPE (stmt_info))
13430 case vect_internal_def:
13431 case vect_condition_def:
13432 break;
13434 case vect_reduction_def:
13435 case vect_nested_cycle:
13436 gcc_assert (!bb_vinfo
13437 && (relevance == vect_used_in_outer
13438 || relevance == vect_used_in_outer_by_reduction
13439 || relevance == vect_used_by_reduction
13440 || relevance == vect_unused_in_scope
13441 || relevance == vect_used_only_live));
13442 break;
13444 case vect_double_reduction_def:
13445 gcc_assert (!bb_vinfo && node);
13446 break;
13448 case vect_induction_def:
13449 case vect_first_order_recurrence:
13450 gcc_assert (!bb_vinfo);
13451 break;
13453 case vect_constant_def:
13454 case vect_external_def:
13455 case vect_unknown_def_type:
13456 default:
13457 gcc_unreachable ();
13460 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13461 if (node)
13462 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
13464 if (STMT_VINFO_RELEVANT_P (stmt_info))
13466 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
13467 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
13468 || gimple_code (stmt_info->stmt) == GIMPLE_COND
13469 || (call && gimple_call_lhs (call) == NULL_TREE));
13470 *need_to_vectorize = true;
13473 if (PURE_SLP_STMT (stmt_info) && !node)
13475 if (dump_enabled_p ())
13476 dump_printf_loc (MSG_NOTE, vect_location,
13477 "handled only by SLP analysis\n");
13478 return opt_result::success ();
13481 /* When we arrive here with a non-SLP statement and we are supposed
13482 to use SLP for everything fail vectorization. */
13483 if (!node && param_vect_force_slp)
13484 return opt_result::failure_at (stmt_info->stmt,
13485 "needs non-SLP handling\n");
13487 ok = true;
13488 if (!bb_vinfo
13489 && (STMT_VINFO_RELEVANT_P (stmt_info)
13490 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
13491 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
13492 -mveclibabi= takes preference over library functions with
13493 the simd attribute. */
13494 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13495 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
13496 cost_vec)
13497 || vectorizable_conversion (vinfo, stmt_info,
13498 NULL, NULL, node, cost_vec)
13499 || vectorizable_operation (vinfo, stmt_info,
13500 NULL, NULL, node, cost_vec)
13501 || vectorizable_assignment (vinfo, stmt_info,
13502 NULL, NULL, node, cost_vec)
13503 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13504 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13505 || vectorizable_lane_reducing (as_a <loop_vec_info> (vinfo),
13506 stmt_info, node, cost_vec)
13507 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13508 node, node_instance, cost_vec)
13509 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
13510 NULL, node, cost_vec)
13511 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13512 || vectorizable_condition (vinfo, stmt_info,
13513 NULL, NULL, node, cost_vec)
13514 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
13515 cost_vec)
13516 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13517 stmt_info, NULL, node)
13518 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13519 stmt_info, NULL, node, cost_vec)
13520 || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
13521 cost_vec));
13522 else
13524 if (bb_vinfo)
13525 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13526 || vectorizable_simd_clone_call (vinfo, stmt_info,
13527 NULL, NULL, node, cost_vec)
13528 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
13529 cost_vec)
13530 || vectorizable_shift (vinfo, stmt_info,
13531 NULL, NULL, node, cost_vec)
13532 || vectorizable_operation (vinfo, stmt_info,
13533 NULL, NULL, node, cost_vec)
13534 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
13535 cost_vec)
13536 || vectorizable_load (vinfo, stmt_info,
13537 NULL, NULL, node, cost_vec)
13538 || vectorizable_store (vinfo, stmt_info,
13539 NULL, NULL, node, cost_vec)
13540 || vectorizable_condition (vinfo, stmt_info,
13541 NULL, NULL, node, cost_vec)
13542 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
13543 cost_vec)
13544 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)
13545 || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
13546 cost_vec));
13550 if (node)
13551 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13553 if (!ok)
13554 return opt_result::failure_at (stmt_info->stmt,
13555 "not vectorized:"
13556 " relevant stmt not supported: %G",
13557 stmt_info->stmt);
13559 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
13560 need extra handling, except for vectorizable reductions. */
13561 if (!bb_vinfo
13562 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
13563 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
13564 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
13565 stmt_info, node, node_instance,
13566 false, cost_vec))
13567 return opt_result::failure_at (stmt_info->stmt,
13568 "not vectorized:"
13569 " live stmt not supported: %G",
13570 stmt_info->stmt);
13572 return opt_result::success ();
13576 /* Function vect_transform_stmt.
13578 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13580 bool
13581 vect_transform_stmt (vec_info *vinfo,
13582 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
13583 slp_tree slp_node, slp_instance slp_node_instance)
13585 bool is_store = false;
13586 gimple *vec_stmt = NULL;
13587 bool done;
13589 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
13591 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13592 if (slp_node)
13593 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
13595 switch (STMT_VINFO_TYPE (stmt_info))
13597 case type_demotion_vec_info_type:
13598 case type_promotion_vec_info_type:
13599 case type_conversion_vec_info_type:
13600 done = vectorizable_conversion (vinfo, stmt_info,
13601 gsi, &vec_stmt, slp_node, NULL);
13602 gcc_assert (done);
13603 break;
13605 case induc_vec_info_type:
13606 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
13607 stmt_info, &vec_stmt, slp_node,
13608 NULL);
13609 gcc_assert (done);
13610 break;
13612 case shift_vec_info_type:
13613 done = vectorizable_shift (vinfo, stmt_info,
13614 gsi, &vec_stmt, slp_node, NULL);
13615 gcc_assert (done);
13616 break;
13618 case op_vec_info_type:
13619 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13620 NULL);
13621 gcc_assert (done);
13622 break;
13624 case assignment_vec_info_type:
13625 done = vectorizable_assignment (vinfo, stmt_info,
13626 gsi, &vec_stmt, slp_node, NULL);
13627 gcc_assert (done);
13628 break;
13630 case load_vec_info_type:
13631 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13632 NULL);
13633 gcc_assert (done);
13634 break;
13636 case store_vec_info_type:
13637 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13638 && !slp_node
13639 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13640 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13641 /* In case of interleaving, the whole chain is vectorized when the
13642 last store in the chain is reached. Store stmts before the last
13643 one are skipped, and there vec_stmt_info shouldn't be freed
13644 meanwhile. */
13646 else
13648 done = vectorizable_store (vinfo, stmt_info,
13649 gsi, &vec_stmt, slp_node, NULL);
13650 gcc_assert (done);
13651 is_store = true;
13653 break;
13655 case condition_vec_info_type:
13656 done = vectorizable_condition (vinfo, stmt_info,
13657 gsi, &vec_stmt, slp_node, NULL);
13658 gcc_assert (done);
13659 break;
13661 case comparison_vec_info_type:
13662 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
13663 slp_node, NULL);
13664 gcc_assert (done);
13665 break;
13667 case call_vec_info_type:
13668 done = vectorizable_call (vinfo, stmt_info,
13669 gsi, &vec_stmt, slp_node, NULL);
13670 break;
13672 case call_simd_clone_vec_info_type:
13673 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
13674 slp_node, NULL);
13675 break;
13677 case reduc_vec_info_type:
13678 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13679 gsi, &vec_stmt, slp_node);
13680 gcc_assert (done);
13681 break;
13683 case cycle_phi_info_type:
13684 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13685 &vec_stmt, slp_node, slp_node_instance);
13686 gcc_assert (done);
13687 break;
13689 case lc_phi_info_type:
13690 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13691 stmt_info, &vec_stmt, slp_node);
13692 gcc_assert (done);
13693 break;
13695 case recurr_info_type:
13696 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13697 stmt_info, &vec_stmt, slp_node, NULL);
13698 gcc_assert (done);
13699 break;
13701 case phi_info_type:
13702 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13703 gcc_assert (done);
13704 break;
13706 case loop_exit_ctrl_vec_info_type:
13707 done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt,
13708 slp_node, NULL);
13709 gcc_assert (done);
13710 break;
13712 default:
13713 if (!STMT_VINFO_LIVE_P (stmt_info))
13715 if (dump_enabled_p ())
13716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13717 "stmt not supported.\n");
13718 gcc_unreachable ();
13720 done = true;
13723 if (!slp_node && vec_stmt)
13724 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13726 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13728 /* Handle stmts whose DEF is used outside the loop-nest that is
13729 being vectorized. */
13730 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13731 slp_node_instance, true, NULL);
13732 gcc_assert (done);
13735 if (slp_node)
13736 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13738 return is_store;
13742 /* Remove a group of stores (for SLP or interleaving), free their
13743 stmt_vec_info. */
13745 void
13746 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13748 stmt_vec_info next_stmt_info = first_stmt_info;
13750 while (next_stmt_info)
13752 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13753 next_stmt_info = vect_orig_stmt (next_stmt_info);
13754 /* Free the attached stmt_vec_info and remove the stmt. */
13755 vinfo->remove_stmt (next_stmt_info);
13756 next_stmt_info = tmp;
13760 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13761 elements of type SCALAR_TYPE, or null if the target doesn't support
13762 such a type.
13764 If NUNITS is zero, return a vector type that contains elements of
13765 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13767 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13768 for this vectorization region and want to "autodetect" the best choice.
13769 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13770 and we want the new type to be interoperable with it. PREVAILING_MODE
13771 in this case can be a scalar integer mode or a vector mode; when it
13772 is a vector mode, the function acts like a tree-level version of
13773 related_vector_mode. */
13775 tree
13776 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13777 tree scalar_type, poly_uint64 nunits)
13779 tree orig_scalar_type = scalar_type;
13780 scalar_mode inner_mode;
13781 machine_mode simd_mode;
13782 tree vectype;
13784 if ((!INTEGRAL_TYPE_P (scalar_type)
13785 && !POINTER_TYPE_P (scalar_type)
13786 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13787 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13788 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13789 return NULL_TREE;
13791 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13793 /* Interoperability between modes requires one to be a constant multiple
13794 of the other, so that the number of vectors required for each operation
13795 is a compile-time constant. */
13796 if (prevailing_mode != VOIDmode
13797 && !constant_multiple_p (nunits * nbytes,
13798 GET_MODE_SIZE (prevailing_mode))
13799 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13800 nunits * nbytes))
13801 return NULL_TREE;
13803 /* For vector types of elements whose mode precision doesn't
13804 match their types precision we use a element type of mode
13805 precision. The vectorization routines will have to make sure
13806 they support the proper result truncation/extension.
13807 We also make sure to build vector types with INTEGER_TYPE
13808 component type only. */
13809 if (INTEGRAL_TYPE_P (scalar_type)
13810 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13811 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13812 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13813 TYPE_UNSIGNED (scalar_type));
13815 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13816 When the component mode passes the above test simply use a type
13817 corresponding to that mode. The theory is that any use that
13818 would cause problems with this will disable vectorization anyway. */
13819 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13820 && !INTEGRAL_TYPE_P (scalar_type))
13821 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13823 /* We can't build a vector type of elements with alignment bigger than
13824 their size. */
13825 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13826 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13827 TYPE_UNSIGNED (scalar_type));
13829 /* If we felt back to using the mode fail if there was
13830 no scalar type for it. */
13831 if (scalar_type == NULL_TREE)
13832 return NULL_TREE;
13834 /* If no prevailing mode was supplied, use the mode the target prefers.
13835 Otherwise lookup a vector mode based on the prevailing mode. */
13836 if (prevailing_mode == VOIDmode)
13838 gcc_assert (known_eq (nunits, 0U));
13839 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13840 if (SCALAR_INT_MODE_P (simd_mode))
13842 /* Traditional behavior is not to take the integer mode
13843 literally, but simply to use it as a way of determining
13844 the vector size. It is up to mode_for_vector to decide
13845 what the TYPE_MODE should be.
13847 Note that nunits == 1 is allowed in order to support single
13848 element vector types. */
13849 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13850 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13851 return NULL_TREE;
13854 else if (SCALAR_INT_MODE_P (prevailing_mode)
13855 || !related_vector_mode (prevailing_mode,
13856 inner_mode, nunits).exists (&simd_mode))
13858 /* Fall back to using mode_for_vector, mostly in the hope of being
13859 able to use an integer mode. */
13860 if (known_eq (nunits, 0U)
13861 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13862 return NULL_TREE;
13864 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13865 return NULL_TREE;
13868 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13870 /* In cases where the mode was chosen by mode_for_vector, check that
13871 the target actually supports the chosen mode, or that it at least
13872 allows the vector mode to be replaced by a like-sized integer. */
13873 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13874 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13875 return NULL_TREE;
13877 /* Re-attach the address-space qualifier if we canonicalized the scalar
13878 type. */
13879 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13880 return build_qualified_type
13881 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13883 return vectype;
13886 /* Function get_vectype_for_scalar_type.
13888 Returns the vector type corresponding to SCALAR_TYPE as supported
13889 by the target. If GROUP_SIZE is nonzero and we're performing BB
13890 vectorization, make sure that the number of elements in the vector
13891 is no bigger than GROUP_SIZE. */
13893 tree
13894 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13895 unsigned int group_size)
13897 /* For BB vectorization, we should always have a group size once we've
13898 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13899 are tentative requests during things like early data reference
13900 analysis and pattern recognition. */
13901 if (is_a <bb_vec_info> (vinfo))
13902 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13903 else
13904 group_size = 0;
13906 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13907 scalar_type);
13908 if (vectype && vinfo->vector_mode == VOIDmode)
13909 vinfo->vector_mode = TYPE_MODE (vectype);
13911 /* Register the natural choice of vector type, before the group size
13912 has been applied. */
13913 if (vectype)
13914 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13916 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13917 try again with an explicit number of elements. */
13918 if (vectype
13919 && group_size
13920 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13922 /* Start with the biggest number of units that fits within
13923 GROUP_SIZE and halve it until we find a valid vector type.
13924 Usually either the first attempt will succeed or all will
13925 fail (in the latter case because GROUP_SIZE is too small
13926 for the target), but it's possible that a target could have
13927 a hole between supported vector types.
13929 If GROUP_SIZE is not a power of 2, this has the effect of
13930 trying the largest power of 2 that fits within the group,
13931 even though the group is not a multiple of that vector size.
13932 The BB vectorizer will then try to carve up the group into
13933 smaller pieces. */
13934 unsigned int nunits = 1 << floor_log2 (group_size);
13937 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13938 scalar_type, nunits);
13939 nunits /= 2;
13941 while (nunits > 1 && !vectype);
13944 return vectype;
13947 /* Return the vector type corresponding to SCALAR_TYPE as supported
13948 by the target. NODE, if nonnull, is the SLP tree node that will
13949 use the returned vector type. */
13951 tree
13952 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13954 unsigned int group_size = 0;
13955 if (node)
13956 group_size = SLP_TREE_LANES (node);
13957 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13960 /* Function get_mask_type_for_scalar_type.
13962 Returns the mask type corresponding to a result of comparison
13963 of vectors of specified SCALAR_TYPE as supported by target.
13964 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13965 make sure that the number of elements in the vector is no bigger
13966 than GROUP_SIZE. */
13968 tree
13969 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13970 unsigned int group_size)
13972 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13974 if (!vectype)
13975 return NULL;
13977 return truth_type_for (vectype);
13980 /* Function get_mask_type_for_scalar_type.
13982 Returns the mask type corresponding to a result of comparison
13983 of vectors of specified SCALAR_TYPE as supported by target.
13984 NODE, if nonnull, is the SLP tree node that will use the returned
13985 vector type. */
13987 tree
13988 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13989 slp_tree node)
13991 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13993 if (!vectype)
13994 return NULL;
13996 return truth_type_for (vectype);
13999 /* Function get_same_sized_vectype
14001 Returns a vector type corresponding to SCALAR_TYPE of size
14002 VECTOR_TYPE if supported by the target. */
14004 tree
14005 get_same_sized_vectype (tree scalar_type, tree vector_type)
14007 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
14008 return truth_type_for (vector_type);
14010 poly_uint64 nunits;
14011 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
14012 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
14013 return NULL_TREE;
14015 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
14016 scalar_type, nunits);
14019 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
14020 would not change the chosen vector modes. */
14022 bool
14023 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
14025 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
14026 i != vinfo->used_vector_modes.end (); ++i)
14027 if (!VECTOR_MODE_P (*i)
14028 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
14029 return false;
14030 return true;
14033 /* Function vect_is_simple_use.
14035 Input:
14036 VINFO - the vect info of the loop or basic block that is being vectorized.
14037 OPERAND - operand in the loop or bb.
14038 Output:
14039 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
14040 case OPERAND is an SSA_NAME that is defined in the vectorizable region
14041 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
14042 the definition could be anywhere in the function
14043 DT - the type of definition
14045 Returns whether a stmt with OPERAND can be vectorized.
14046 For loops, supportable operands are constants, loop invariants, and operands
14047 that are defined by the current iteration of the loop. Unsupportable
14048 operands are those that are defined by a previous iteration of the loop (as
14049 is the case in reduction/induction computations).
14050 For basic blocks, supportable operands are constants and bb invariants.
14051 For now, operands defined outside the basic block are not supported. */
14053 bool
14054 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
14055 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
14057 if (def_stmt_info_out)
14058 *def_stmt_info_out = NULL;
14059 if (def_stmt_out)
14060 *def_stmt_out = NULL;
14061 *dt = vect_unknown_def_type;
14063 if (dump_enabled_p ())
14065 dump_printf_loc (MSG_NOTE, vect_location,
14066 "vect_is_simple_use: operand ");
14067 if (TREE_CODE (operand) == SSA_NAME
14068 && !SSA_NAME_IS_DEFAULT_DEF (operand))
14069 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
14070 else
14071 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
14074 if (CONSTANT_CLASS_P (operand))
14075 *dt = vect_constant_def;
14076 else if (is_gimple_min_invariant (operand))
14077 *dt = vect_external_def;
14078 else if (TREE_CODE (operand) != SSA_NAME)
14079 *dt = vect_unknown_def_type;
14080 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
14081 *dt = vect_external_def;
14082 else
14084 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
14085 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
14086 if (!stmt_vinfo)
14087 *dt = vect_external_def;
14088 else
14090 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
14091 def_stmt = stmt_vinfo->stmt;
14092 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
14093 if (def_stmt_info_out)
14094 *def_stmt_info_out = stmt_vinfo;
14096 if (def_stmt_out)
14097 *def_stmt_out = def_stmt;
14100 if (dump_enabled_p ())
14102 dump_printf (MSG_NOTE, ", type of def: ");
14103 switch (*dt)
14105 case vect_uninitialized_def:
14106 dump_printf (MSG_NOTE, "uninitialized\n");
14107 break;
14108 case vect_constant_def:
14109 dump_printf (MSG_NOTE, "constant\n");
14110 break;
14111 case vect_external_def:
14112 dump_printf (MSG_NOTE, "external\n");
14113 break;
14114 case vect_internal_def:
14115 dump_printf (MSG_NOTE, "internal\n");
14116 break;
14117 case vect_induction_def:
14118 dump_printf (MSG_NOTE, "induction\n");
14119 break;
14120 case vect_reduction_def:
14121 dump_printf (MSG_NOTE, "reduction\n");
14122 break;
14123 case vect_double_reduction_def:
14124 dump_printf (MSG_NOTE, "double reduction\n");
14125 break;
14126 case vect_nested_cycle:
14127 dump_printf (MSG_NOTE, "nested cycle\n");
14128 break;
14129 case vect_first_order_recurrence:
14130 dump_printf (MSG_NOTE, "first order recurrence\n");
14131 break;
14132 case vect_condition_def:
14133 dump_printf (MSG_NOTE, "control flow\n");
14134 break;
14135 case vect_unknown_def_type:
14136 dump_printf (MSG_NOTE, "unknown\n");
14137 break;
14141 if (*dt == vect_unknown_def_type)
14143 if (dump_enabled_p ())
14144 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
14145 "Unsupported pattern.\n");
14146 return false;
14149 return true;
14152 /* Function vect_is_simple_use.
14154 Same as vect_is_simple_use but also determines the vector operand
14155 type of OPERAND and stores it to *VECTYPE. If the definition of
14156 OPERAND is vect_uninitialized_def, vect_constant_def or
14157 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
14158 is responsible to compute the best suited vector type for the
14159 scalar operand. */
14161 bool
14162 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
14163 tree *vectype, stmt_vec_info *def_stmt_info_out,
14164 gimple **def_stmt_out)
14166 stmt_vec_info def_stmt_info;
14167 gimple *def_stmt;
14168 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
14169 return false;
14171 if (def_stmt_out)
14172 *def_stmt_out = def_stmt;
14173 if (def_stmt_info_out)
14174 *def_stmt_info_out = def_stmt_info;
14176 /* Now get a vector type if the def is internal, otherwise supply
14177 NULL_TREE and leave it up to the caller to figure out a proper
14178 type for the use stmt. */
14179 if (*dt == vect_internal_def
14180 || *dt == vect_induction_def
14181 || *dt == vect_reduction_def
14182 || *dt == vect_double_reduction_def
14183 || *dt == vect_nested_cycle
14184 || *dt == vect_first_order_recurrence)
14186 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
14187 gcc_assert (*vectype != NULL_TREE);
14188 if (dump_enabled_p ())
14189 dump_printf_loc (MSG_NOTE, vect_location,
14190 "vect_is_simple_use: vectype %T\n", *vectype);
14192 else if (*dt == vect_uninitialized_def
14193 || *dt == vect_constant_def
14194 || *dt == vect_external_def)
14195 *vectype = NULL_TREE;
14196 else
14197 gcc_unreachable ();
14199 return true;
14202 /* Function vect_is_simple_use.
14204 Same as vect_is_simple_use but determines the operand by operand
14205 position OPERAND from either STMT or SLP_NODE, filling in *OP
14206 and *SLP_DEF (when SLP_NODE is not NULL). */
14208 bool
14209 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
14210 unsigned operand, tree *op, slp_tree *slp_def,
14211 enum vect_def_type *dt,
14212 tree *vectype, stmt_vec_info *def_stmt_info_out)
14214 if (slp_node)
14216 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
14217 *slp_def = child;
14218 *vectype = SLP_TREE_VECTYPE (child);
14219 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
14221 /* ??? VEC_PERM nodes might be intermediate and their lane value
14222 have no representative (nor do we build a VEC_PERM stmt for
14223 the actual operation). Note for two-operator nodes we set
14224 a representative but leave scalar stmts empty as we'd only
14225 have one for a subset of lanes. Ideally no caller would
14226 require *op for internal defs. */
14227 if (SLP_TREE_REPRESENTATIVE (child))
14229 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
14230 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
14232 else
14234 gcc_assert (SLP_TREE_CODE (child) == VEC_PERM_EXPR);
14235 *op = error_mark_node;
14236 *dt = vect_internal_def;
14237 if (def_stmt_info_out)
14238 *def_stmt_info_out = NULL;
14239 return true;
14242 else
14244 if (def_stmt_info_out)
14245 *def_stmt_info_out = NULL;
14246 *op = SLP_TREE_SCALAR_OPS (child)[0];
14247 *dt = SLP_TREE_DEF_TYPE (child);
14248 return true;
14251 else
14253 *slp_def = NULL;
14254 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
14256 if (gimple_assign_rhs_code (ass) == COND_EXPR
14257 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
14259 if (operand < 2)
14260 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
14261 else
14262 *op = gimple_op (ass, operand);
14264 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
14265 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
14266 else
14267 *op = gimple_op (ass, operand + 1);
14269 else if (gcond *cond = dyn_cast <gcond *> (stmt->stmt))
14270 *op = gimple_op (cond, operand);
14271 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
14272 *op = gimple_call_arg (call, operand);
14273 else
14274 gcc_unreachable ();
14275 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
14279 /* If OP is not NULL and is external or constant update its vector
14280 type with VECTYPE. Returns true if successful or false if not,
14281 for example when conflicting vector types are present. */
14283 bool
14284 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
14286 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
14287 return true;
14288 if (SLP_TREE_VECTYPE (op))
14289 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
14290 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
14291 should be handled by patters. Allow vect_constant_def for now. */
14292 if (VECTOR_BOOLEAN_TYPE_P (vectype)
14293 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
14294 return false;
14295 SLP_TREE_VECTYPE (op) = vectype;
14296 return true;
14299 /* Function supportable_widening_operation
14301 Check whether an operation represented by the code CODE is a
14302 widening operation that is supported by the target platform in
14303 vector form (i.e., when operating on arguments of type VECTYPE_IN
14304 producing a result of type VECTYPE_OUT).
14306 Widening operations we currently support are NOP (CONVERT), FLOAT,
14307 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
14308 are supported by the target platform either directly (via vector
14309 tree-codes), or via target builtins.
14311 Output:
14312 - CODE1 and CODE2 are codes of vector operations to be used when
14313 vectorizing the operation, if available.
14314 - MULTI_STEP_CVT determines the number of required intermediate steps in
14315 case of multi-step conversion (like char->short->int - in that case
14316 MULTI_STEP_CVT will be 1).
14317 - INTERM_TYPES contains the intermediate type required to perform the
14318 widening operation (short in the above example). */
14320 bool
14321 supportable_widening_operation (vec_info *vinfo,
14322 code_helper code,
14323 stmt_vec_info stmt_info,
14324 tree vectype_out, tree vectype_in,
14325 code_helper *code1,
14326 code_helper *code2,
14327 int *multi_step_cvt,
14328 vec<tree> *interm_types)
14330 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
14331 class loop *vect_loop = NULL;
14332 machine_mode vec_mode;
14333 enum insn_code icode1, icode2;
14334 optab optab1 = unknown_optab, optab2 = unknown_optab;
14335 tree vectype = vectype_in;
14336 tree wide_vectype = vectype_out;
14337 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
14338 int i;
14339 tree prev_type, intermediate_type;
14340 machine_mode intermediate_mode, prev_mode;
14341 optab optab3, optab4;
14343 *multi_step_cvt = 0;
14344 if (loop_info)
14345 vect_loop = LOOP_VINFO_LOOP (loop_info);
14347 switch (code.safe_as_tree_code ())
14349 case MAX_TREE_CODES:
14350 /* Don't set c1 and c2 if code is not a tree_code. */
14351 break;
14353 case WIDEN_MULT_EXPR:
14354 /* The result of a vectorized widening operation usually requires
14355 two vectors (because the widened results do not fit into one vector).
14356 The generated vector results would normally be expected to be
14357 generated in the same order as in the original scalar computation,
14358 i.e. if 8 results are generated in each vector iteration, they are
14359 to be organized as follows:
14360 vect1: [res1,res2,res3,res4],
14361 vect2: [res5,res6,res7,res8].
14363 However, in the special case that the result of the widening
14364 operation is used in a reduction computation only, the order doesn't
14365 matter (because when vectorizing a reduction we change the order of
14366 the computation). Some targets can take advantage of this and
14367 generate more efficient code. For example, targets like Altivec,
14368 that support widen_mult using a sequence of {mult_even,mult_odd}
14369 generate the following vectors:
14370 vect1: [res1,res3,res5,res7],
14371 vect2: [res2,res4,res6,res8].
14373 When vectorizing outer-loops, we execute the inner-loop sequentially
14374 (each vectorized inner-loop iteration contributes to VF outer-loop
14375 iterations in parallel). We therefore don't allow to change the
14376 order of the computation in the inner-loop during outer-loop
14377 vectorization. */
14378 /* TODO: Another case in which order doesn't *really* matter is when we
14379 widen and then contract again, e.g. (short)((int)x * y >> 8).
14380 Normally, pack_trunc performs an even/odd permute, whereas the
14381 repack from an even/odd expansion would be an interleave, which
14382 would be significantly simpler for e.g. AVX2. */
14383 /* In any case, in order to avoid duplicating the code below, recurse
14384 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
14385 are properly set up for the caller. If we fail, we'll continue with
14386 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
14387 if (vect_loop
14388 && !nested_in_vect_loop_p (vect_loop, stmt_info)
14389 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
14390 stmt_info, vectype_out,
14391 vectype_in, code1,
14392 code2, multi_step_cvt,
14393 interm_types))
14395 /* Elements in a vector with vect_used_by_reduction property cannot
14396 be reordered if the use chain with this property does not have the
14397 same operation. One such an example is s += a * b, where elements
14398 in a and b cannot be reordered. Here we check if the vector defined
14399 by STMT is only directly used in the reduction statement. */
14400 tree lhs = gimple_assign_lhs (vect_orig_stmt (stmt_info)->stmt);
14401 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
14402 if (use_stmt_info && STMT_VINFO_REDUC_DEF (use_stmt_info))
14403 return true;
14405 c1 = VEC_WIDEN_MULT_LO_EXPR;
14406 c2 = VEC_WIDEN_MULT_HI_EXPR;
14407 break;
14409 case DOT_PROD_EXPR:
14410 c1 = DOT_PROD_EXPR;
14411 c2 = DOT_PROD_EXPR;
14412 break;
14414 case SAD_EXPR:
14415 c1 = SAD_EXPR;
14416 c2 = SAD_EXPR;
14417 break;
14419 case VEC_WIDEN_MULT_EVEN_EXPR:
14420 /* Support the recursion induced just above. */
14421 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
14422 c2 = VEC_WIDEN_MULT_ODD_EXPR;
14423 break;
14425 case WIDEN_LSHIFT_EXPR:
14426 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
14427 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
14428 break;
14430 CASE_CONVERT:
14431 c1 = VEC_UNPACK_LO_EXPR;
14432 c2 = VEC_UNPACK_HI_EXPR;
14433 break;
14435 case FLOAT_EXPR:
14436 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
14437 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
14438 break;
14440 case FIX_TRUNC_EXPR:
14441 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
14442 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
14443 break;
14445 default:
14446 gcc_unreachable ();
14449 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
14450 std::swap (c1, c2);
14452 if (code == FIX_TRUNC_EXPR)
14454 /* The signedness is determined from output operand. */
14455 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14456 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14458 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
14459 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14460 && VECTOR_BOOLEAN_TYPE_P (vectype)
14461 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14462 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14464 /* If the input and result modes are the same, a different optab
14465 is needed where we pass in the number of units in vectype. */
14466 optab1 = vec_unpacks_sbool_lo_optab;
14467 optab2 = vec_unpacks_sbool_hi_optab;
14470 vec_mode = TYPE_MODE (vectype);
14471 if (widening_fn_p (code))
14473 /* If this is an internal fn then we must check whether the target
14474 supports either a low-high split or an even-odd split. */
14475 internal_fn ifn = as_internal_fn ((combined_fn) code);
14477 internal_fn lo, hi, even, odd;
14478 lookup_hilo_internal_fn (ifn, &lo, &hi);
14479 *code1 = as_combined_fn (lo);
14480 *code2 = as_combined_fn (hi);
14481 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
14482 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
14484 /* If we don't support low-high, then check for even-odd. */
14485 if (!optab1
14486 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14487 || !optab2
14488 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14490 lookup_evenodd_internal_fn (ifn, &even, &odd);
14491 *code1 = as_combined_fn (even);
14492 *code2 = as_combined_fn (odd);
14493 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
14494 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
14497 else if (code.is_tree_code ())
14499 if (code == FIX_TRUNC_EXPR)
14501 /* The signedness is determined from output operand. */
14502 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14503 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14505 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
14506 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14507 && VECTOR_BOOLEAN_TYPE_P (vectype)
14508 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14509 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14511 /* If the input and result modes are the same, a different optab
14512 is needed where we pass in the number of units in vectype. */
14513 optab1 = vec_unpacks_sbool_lo_optab;
14514 optab2 = vec_unpacks_sbool_hi_optab;
14516 else
14518 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14519 optab2 = optab_for_tree_code (c2, vectype, optab_default);
14521 *code1 = c1;
14522 *code2 = c2;
14525 if (!optab1 || !optab2)
14526 return false;
14528 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14529 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14530 return false;
14533 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14534 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14536 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14537 return true;
14538 /* For scalar masks we may have different boolean
14539 vector types having the same QImode. Thus we
14540 add additional check for elements number. */
14541 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
14542 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14543 return true;
14546 /* Check if it's a multi-step conversion that can be done using intermediate
14547 types. */
14549 prev_type = vectype;
14550 prev_mode = vec_mode;
14552 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
14553 return false;
14555 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14556 intermediate steps in promotion sequence. We try
14557 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
14558 not. */
14559 interm_types->create (MAX_INTERM_CVT_STEPS);
14560 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14562 intermediate_mode = insn_data[icode1].operand[0].mode;
14563 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14564 intermediate_type
14565 = vect_halve_mask_nunits (prev_type, intermediate_mode);
14566 else if (VECTOR_MODE_P (intermediate_mode))
14568 tree intermediate_element_type
14569 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
14570 TYPE_UNSIGNED (prev_type));
14571 intermediate_type
14572 = build_vector_type_for_mode (intermediate_element_type,
14573 intermediate_mode);
14575 else
14576 intermediate_type
14577 = lang_hooks.types.type_for_mode (intermediate_mode,
14578 TYPE_UNSIGNED (prev_type));
14580 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14581 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14582 && intermediate_mode == prev_mode
14583 && SCALAR_INT_MODE_P (prev_mode))
14585 /* If the input and result modes are the same, a different optab
14586 is needed where we pass in the number of units in vectype. */
14587 optab3 = vec_unpacks_sbool_lo_optab;
14588 optab4 = vec_unpacks_sbool_hi_optab;
14590 else
14592 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
14593 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
14596 if (!optab3 || !optab4
14597 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
14598 || insn_data[icode1].operand[0].mode != intermediate_mode
14599 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
14600 || insn_data[icode2].operand[0].mode != intermediate_mode
14601 || ((icode1 = optab_handler (optab3, intermediate_mode))
14602 == CODE_FOR_nothing)
14603 || ((icode2 = optab_handler (optab4, intermediate_mode))
14604 == CODE_FOR_nothing))
14605 break;
14607 interm_types->quick_push (intermediate_type);
14608 (*multi_step_cvt)++;
14610 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14611 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14613 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14614 return true;
14615 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
14616 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14617 return true;
14620 prev_type = intermediate_type;
14621 prev_mode = intermediate_mode;
14624 interm_types->release ();
14625 return false;
14629 /* Function supportable_narrowing_operation
14631 Check whether an operation represented by the code CODE is a
14632 narrowing operation that is supported by the target platform in
14633 vector form (i.e., when operating on arguments of type VECTYPE_IN
14634 and producing a result of type VECTYPE_OUT).
14636 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14637 and FLOAT. This function checks if these operations are supported by
14638 the target platform directly via vector tree-codes.
14640 Output:
14641 - CODE1 is the code of a vector operation to be used when
14642 vectorizing the operation, if available.
14643 - MULTI_STEP_CVT determines the number of required intermediate steps in
14644 case of multi-step conversion (like int->short->char - in that case
14645 MULTI_STEP_CVT will be 1).
14646 - INTERM_TYPES contains the intermediate type required to perform the
14647 narrowing operation (short in the above example). */
14649 bool
14650 supportable_narrowing_operation (code_helper code,
14651 tree vectype_out, tree vectype_in,
14652 code_helper *code1, int *multi_step_cvt,
14653 vec<tree> *interm_types)
14655 machine_mode vec_mode;
14656 enum insn_code icode1;
14657 optab optab1, interm_optab;
14658 tree vectype = vectype_in;
14659 tree narrow_vectype = vectype_out;
14660 enum tree_code c1;
14661 tree intermediate_type, prev_type;
14662 machine_mode intermediate_mode, prev_mode;
14663 int i;
14664 unsigned HOST_WIDE_INT n_elts;
14665 bool uns;
14667 if (!code.is_tree_code ())
14668 return false;
14670 *multi_step_cvt = 0;
14671 switch ((tree_code) code)
14673 CASE_CONVERT:
14674 c1 = VEC_PACK_TRUNC_EXPR;
14675 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14676 && VECTOR_BOOLEAN_TYPE_P (vectype)
14677 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14678 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
14679 && n_elts < BITS_PER_UNIT)
14680 optab1 = vec_pack_sbool_trunc_optab;
14681 else
14682 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14683 break;
14685 case FIX_TRUNC_EXPR:
14686 c1 = VEC_PACK_FIX_TRUNC_EXPR;
14687 /* The signedness is determined from output operand. */
14688 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14689 break;
14691 case FLOAT_EXPR:
14692 c1 = VEC_PACK_FLOAT_EXPR;
14693 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14694 break;
14696 default:
14697 gcc_unreachable ();
14700 if (!optab1)
14701 return false;
14703 vec_mode = TYPE_MODE (vectype);
14704 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14705 return false;
14707 *code1 = c1;
14709 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14711 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14712 return true;
14713 /* For scalar masks we may have different boolean
14714 vector types having the same QImode. Thus we
14715 add additional check for elements number. */
14716 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14717 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14718 return true;
14721 if (code == FLOAT_EXPR)
14722 return false;
14724 /* Check if it's a multi-step conversion that can be done using intermediate
14725 types. */
14726 prev_mode = vec_mode;
14727 prev_type = vectype;
14728 if (code == FIX_TRUNC_EXPR)
14729 uns = TYPE_UNSIGNED (vectype_out);
14730 else
14731 uns = TYPE_UNSIGNED (vectype);
14733 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14734 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14735 costly than signed. */
14736 if (code == FIX_TRUNC_EXPR && uns)
14738 enum insn_code icode2;
14740 intermediate_type
14741 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14742 interm_optab
14743 = optab_for_tree_code (c1, intermediate_type, optab_default);
14744 if (interm_optab != unknown_optab
14745 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14746 && insn_data[icode1].operand[0].mode
14747 == insn_data[icode2].operand[0].mode)
14749 uns = false;
14750 optab1 = interm_optab;
14751 icode1 = icode2;
14755 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14756 intermediate steps in promotion sequence. We try
14757 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14758 interm_types->create (MAX_INTERM_CVT_STEPS);
14759 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14761 intermediate_mode = insn_data[icode1].operand[0].mode;
14762 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14763 intermediate_type
14764 = vect_double_mask_nunits (prev_type, intermediate_mode);
14765 else
14766 intermediate_type
14767 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14768 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14769 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14770 && SCALAR_INT_MODE_P (prev_mode)
14771 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14772 && n_elts < BITS_PER_UNIT)
14773 interm_optab = vec_pack_sbool_trunc_optab;
14774 else
14775 interm_optab
14776 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14777 optab_default);
14778 if (!interm_optab
14779 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14780 || insn_data[icode1].operand[0].mode != intermediate_mode
14781 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14782 == CODE_FOR_nothing))
14783 break;
14785 interm_types->quick_push (intermediate_type);
14786 (*multi_step_cvt)++;
14788 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14790 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14791 return true;
14792 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14793 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14794 return true;
14797 prev_mode = intermediate_mode;
14798 prev_type = intermediate_type;
14799 optab1 = interm_optab;
14802 interm_types->release ();
14803 return false;
14806 /* Function supportable_indirect_convert_operation
14808 Check whether an operation represented by the code CODE is single or multi
14809 operations that are supported by the target platform in
14810 vector form (i.e., when operating on arguments of type VECTYPE_IN
14811 producing a result of type VECTYPE_OUT).
14813 Convert operations we currently support directly are FIX_TRUNC and FLOAT.
14814 This function checks if these operations are supported
14815 by the target platform directly (via vector tree-codes).
14817 Output:
14818 - converts contains some pairs to perform the convert operation,
14819 the pair's first is the intermediate type, and its second is the code of
14820 a vector operation to be used when converting the operation from the
14821 previous type to the intermediate type. */
14822 bool
14823 supportable_indirect_convert_operation (code_helper code,
14824 tree vectype_out,
14825 tree vectype_in,
14826 vec<std::pair<tree, tree_code> > *converts,
14827 tree op0)
14829 bool found_mode = false;
14830 scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
14831 scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
14832 opt_scalar_mode mode_iter;
14833 tree_code tc1, tc2, code1, code2;
14835 tree cvt_type = NULL_TREE;
14836 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
14838 if (supportable_convert_operation ((tree_code) code,
14839 vectype_out,
14840 vectype_in,
14841 &tc1))
14843 converts->safe_push (std::make_pair (vectype_out, tc1));
14844 return true;
14847 /* For conversions between float and integer types try whether
14848 we can use intermediate signed integer types to support the
14849 conversion. */
14850 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
14851 && (code == FLOAT_EXPR
14852 || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
14854 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
14855 bool float_expr_p = code == FLOAT_EXPR;
14856 unsigned short target_size;
14857 scalar_mode intermediate_mode;
14858 if (demotion)
14860 intermediate_mode = lhs_mode;
14861 target_size = GET_MODE_SIZE (rhs_mode);
14863 else
14865 target_size = GET_MODE_SIZE (lhs_mode);
14866 if (!int_mode_for_size
14867 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
14868 return false;
14870 code1 = float_expr_p ? (tree_code) code : NOP_EXPR;
14871 code2 = float_expr_p ? NOP_EXPR : (tree_code) code;
14872 opt_scalar_mode mode_iter;
14873 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
14875 intermediate_mode = mode_iter.require ();
14877 if (GET_MODE_SIZE (intermediate_mode) > target_size)
14878 break;
14880 scalar_mode cvt_mode;
14881 if (!int_mode_for_size
14882 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
14883 break;
14885 cvt_type = build_nonstandard_integer_type
14886 (GET_MODE_BITSIZE (cvt_mode), 0);
14888 /* Check if the intermediate type can hold OP0's range.
14889 When converting from float to integer this is not necessary
14890 because values that do not fit the (smaller) target type are
14891 unspecified anyway. */
14892 if (demotion && float_expr_p)
14894 wide_int op_min_value, op_max_value;
14895 /* For vector form, it looks like op0 doesn't have RANGE_INFO.
14896 In the future, if it is supported, changes may need to be made
14897 to this part, such as checking the RANGE of each element
14898 in the vector. */
14899 if (TREE_CODE (op0) != SSA_NAME
14900 || !SSA_NAME_RANGE_INFO (op0)
14901 || !vect_get_range_info (op0, &op_min_value,
14902 &op_max_value))
14903 break;
14905 if (cvt_type == NULL_TREE
14906 || (wi::min_precision (op_max_value, SIGNED)
14907 > TYPE_PRECISION (cvt_type))
14908 || (wi::min_precision (op_min_value, SIGNED)
14909 > TYPE_PRECISION (cvt_type)))
14910 continue;
14913 cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE (vectype_in),
14914 cvt_type,
14915 nelts);
14916 /* This should only happened for SLP as long as loop vectorizer
14917 only supports same-sized vector. */
14918 if (cvt_type == NULL_TREE
14919 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
14920 || !supportable_convert_operation ((tree_code) code1,
14921 vectype_out,
14922 cvt_type, &tc1)
14923 || !supportable_convert_operation ((tree_code) code2,
14924 cvt_type,
14925 vectype_in, &tc2))
14926 continue;
14928 found_mode = true;
14929 break;
14932 if (found_mode)
14934 converts->safe_push (std::make_pair (cvt_type, tc2));
14935 if (TYPE_MODE (cvt_type) != TYPE_MODE (vectype_out))
14936 converts->safe_push (std::make_pair (vectype_out, tc1));
14937 return true;
14940 return false;
14943 /* Generate and return a vector mask of MASK_TYPE such that
14944 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14945 Add the statements to SEQ. */
14947 tree
14948 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14949 tree end_index, const char *name)
14951 tree cmp_type = TREE_TYPE (start_index);
14952 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14953 cmp_type, mask_type,
14954 OPTIMIZE_FOR_SPEED));
14955 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14956 start_index, end_index,
14957 build_zero_cst (mask_type));
14958 tree tmp;
14959 if (name)
14960 tmp = make_temp_ssa_name (mask_type, NULL, name);
14961 else
14962 tmp = make_ssa_name (mask_type);
14963 gimple_call_set_lhs (call, tmp);
14964 gimple_seq_add_stmt (seq, call);
14965 return tmp;
14968 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14969 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14971 tree
14972 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14973 tree end_index)
14975 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14976 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14979 /* Try to compute the vector types required to vectorize STMT_INFO,
14980 returning true on success and false if vectorization isn't possible.
14981 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14982 take sure that the number of elements in the vectors is no bigger
14983 than GROUP_SIZE.
14985 On success:
14987 - Set *STMT_VECTYPE_OUT to:
14988 - NULL_TREE if the statement doesn't need to be vectorized;
14989 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14991 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14992 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14993 statement does not help to determine the overall number of units. */
14995 opt_result
14996 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14997 tree *stmt_vectype_out,
14998 tree *nunits_vectype_out,
14999 unsigned int group_size)
15001 gimple *stmt = stmt_info->stmt;
15003 /* For BB vectorization, we should always have a group size once we've
15004 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
15005 are tentative requests during things like early data reference
15006 analysis and pattern recognition. */
15007 if (is_a <bb_vec_info> (vinfo))
15008 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
15009 else
15010 group_size = 0;
15012 *stmt_vectype_out = NULL_TREE;
15013 *nunits_vectype_out = NULL_TREE;
15015 if (gimple_get_lhs (stmt) == NULL_TREE
15016 /* Allow vector conditionals through here. */
15017 && !is_a <gcond *> (stmt)
15018 /* MASK_STORE and friends have no lhs, but are ok. */
15019 && !(is_gimple_call (stmt)
15020 && gimple_call_internal_p (stmt)
15021 && internal_store_fn_p (gimple_call_internal_fn (stmt))))
15023 if (is_a <gcall *> (stmt))
15025 /* Ignore calls with no lhs. These must be calls to
15026 #pragma omp simd functions, and what vectorization factor
15027 it really needs can't be determined until
15028 vectorizable_simd_clone_call. */
15029 if (dump_enabled_p ())
15030 dump_printf_loc (MSG_NOTE, vect_location,
15031 "defer to SIMD clone analysis.\n");
15032 return opt_result::success ();
15035 return opt_result::failure_at (stmt,
15036 "not vectorized: irregular stmt: %G", stmt);
15039 tree vectype;
15040 tree scalar_type = NULL_TREE;
15041 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
15043 vectype = STMT_VINFO_VECTYPE (stmt_info);
15044 if (dump_enabled_p ())
15045 dump_printf_loc (MSG_NOTE, vect_location,
15046 "precomputed vectype: %T\n", vectype);
15048 else if (vect_use_mask_type_p (stmt_info))
15050 unsigned int precision = stmt_info->mask_precision;
15051 scalar_type = build_nonstandard_integer_type (precision, 1);
15052 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
15053 if (!vectype)
15054 return opt_result::failure_at (stmt, "not vectorized: unsupported"
15055 " data-type %T\n", scalar_type);
15056 if (dump_enabled_p ())
15057 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
15059 else
15061 /* If we got here with a gcond it means that the target had no available vector
15062 mode for the scalar type. We can't vectorize so abort. */
15063 if (is_a <gcond *> (stmt))
15064 return opt_result::failure_at (stmt,
15065 "not vectorized:"
15066 " unsupported data-type for gcond %T\n",
15067 scalar_type);
15069 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
15070 scalar_type = TREE_TYPE (DR_REF (dr));
15071 else
15072 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
15074 if (dump_enabled_p ())
15076 if (group_size)
15077 dump_printf_loc (MSG_NOTE, vect_location,
15078 "get vectype for scalar type (group size %d):"
15079 " %T\n", group_size, scalar_type);
15080 else
15081 dump_printf_loc (MSG_NOTE, vect_location,
15082 "get vectype for scalar type: %T\n", scalar_type);
15084 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
15085 if (!vectype)
15086 return opt_result::failure_at (stmt,
15087 "not vectorized:"
15088 " unsupported data-type %T\n",
15089 scalar_type);
15091 if (dump_enabled_p ())
15092 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
15095 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
15096 return opt_result::failure_at (stmt,
15097 "not vectorized: vector stmt in loop:%G",
15098 stmt);
15100 *stmt_vectype_out = vectype;
15102 /* Don't try to compute scalar types if the stmt produces a boolean
15103 vector; use the existing vector type instead. */
15104 tree nunits_vectype = vectype;
15105 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
15107 /* The number of units is set according to the smallest scalar
15108 type (or the largest vector size, but we only support one
15109 vector size per vectorization). */
15110 scalar_type = vect_get_smallest_scalar_type (stmt_info,
15111 TREE_TYPE (vectype));
15112 if (!types_compatible_p (scalar_type, TREE_TYPE (vectype)))
15114 if (dump_enabled_p ())
15115 dump_printf_loc (MSG_NOTE, vect_location,
15116 "get vectype for smallest scalar type: %T\n",
15117 scalar_type);
15118 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
15119 group_size);
15120 if (!nunits_vectype)
15121 return opt_result::failure_at
15122 (stmt, "not vectorized: unsupported data-type %T\n",
15123 scalar_type);
15124 if (dump_enabled_p ())
15125 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
15126 nunits_vectype);
15130 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
15131 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
15132 return opt_result::failure_at (stmt,
15133 "Not vectorized: Incompatible number "
15134 "of vector subparts between %T and %T\n",
15135 nunits_vectype, *stmt_vectype_out);
15137 if (dump_enabled_p ())
15139 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
15140 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
15141 dump_printf (MSG_NOTE, "\n");
15144 *nunits_vectype_out = nunits_vectype;
15145 return opt_result::success ();
15148 /* Generate and return statement sequence that sets vector length LEN that is:
15150 min_of_start_and_end = min (START_INDEX, END_INDEX);
15151 left_len = END_INDEX - min_of_start_and_end;
15152 rhs = min (left_len, LEN_LIMIT);
15153 LEN = rhs;
15155 Note: the cost of the code generated by this function is modeled
15156 by vect_estimate_min_profitable_iters, so changes here may need
15157 corresponding changes there. */
15159 gimple_seq
15160 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
15162 gimple_seq stmts = NULL;
15163 tree len_type = TREE_TYPE (len);
15164 gcc_assert (TREE_TYPE (start_index) == len_type);
15166 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
15167 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
15168 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
15169 gimple* stmt = gimple_build_assign (len, rhs);
15170 gimple_seq_add_stmt (&stmts, stmt);
15172 return stmts;