1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2025 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "gimple-iterator.h"
29 #include "gimple-fold.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
39 #include "gimple-iterator.h"
40 #include "gimple-fold.h"
41 #include "gimplify-me.h"
43 #include "tree-vectorizer.h"
46 #include "internal-fn.h"
47 #include "case-cfn-macros.h"
48 #include "fold-const-call.h"
51 #include "omp-simd-clone.h"
53 #include "tree-vector-builder.h"
54 #include "tree-ssa-loop-ivopts.h"
55 #include "vec-perm-indices.h"
56 #include "gimple-range.h"
60 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
61 in the first operand. Disentangling this is future work, the
62 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
65 /* Return true if we have a useful VR_RANGE range for VAR, storing it
66 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
69 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
73 get_range_query (cfun
)->range_of_expr (vr
, var
);
74 if (vr
.undefined_p ())
75 vr
.set_varying (TREE_TYPE (var
));
76 value_range_kind vr_type
= get_legacy_range (vr
, vr_min
, vr_max
);
77 *min_value
= wi::to_wide (vr_min
);
78 *max_value
= wi::to_wide (vr_max
);
79 wide_int nonzero
= get_nonzero_bits (var
);
80 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
81 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
82 nonzero
, sgn
) == VR_RANGE
)
84 if (dump_enabled_p ())
86 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
87 dump_printf (MSG_NOTE
, " has range [");
88 dump_hex (MSG_NOTE
, *min_value
);
89 dump_printf (MSG_NOTE
, ", ");
90 dump_hex (MSG_NOTE
, *max_value
);
91 dump_printf (MSG_NOTE
, "]\n");
97 if (dump_enabled_p ())
99 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
100 dump_printf (MSG_NOTE
, " has no range info\n");
106 /* Report that we've found an instance of pattern PATTERN in
110 vect_pattern_detected (const char *name
, gimple
*stmt
)
112 if (dump_enabled_p ())
113 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: %G", name
, stmt
);
116 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
117 return the pattern statement's stmt_vec_info. Set its vector type to
118 VECTYPE if it doesn't have one already. */
121 vect_init_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
122 stmt_vec_info orig_stmt_info
, tree vectype
)
124 stmt_vec_info pattern_stmt_info
= vinfo
->lookup_stmt (pattern_stmt
);
125 if (pattern_stmt_info
== NULL
)
126 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
127 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
129 pattern_stmt_info
->pattern_stmt_p
= true;
130 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
;
131 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
132 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
133 STMT_VINFO_TYPE (pattern_stmt_info
) = STMT_VINFO_TYPE (orig_stmt_info
);
134 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
137 || is_a
<gcond
*> (pattern_stmt
)
138 || (VECTOR_BOOLEAN_TYPE_P (vectype
)
139 == vect_use_mask_type_p (orig_stmt_info
)));
140 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
141 pattern_stmt_info
->mask_precision
= orig_stmt_info
->mask_precision
;
143 return pattern_stmt_info
;
146 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
147 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
151 vect_set_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
152 stmt_vec_info orig_stmt_info
, tree vectype
)
154 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
155 STMT_VINFO_RELATED_STMT (orig_stmt_info
)
156 = vect_init_pattern_stmt (vinfo
, pattern_stmt
, orig_stmt_info
, vectype
);
159 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
160 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
161 be different from the vector type of the final pattern statement.
162 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
163 from which it was derived. */
166 append_pattern_def_seq (vec_info
*vinfo
,
167 stmt_vec_info stmt_info
, gimple
*new_stmt
,
168 tree vectype
= NULL_TREE
,
169 tree scalar_type_for_mask
= NULL_TREE
)
171 gcc_assert (!scalar_type_for_mask
172 == (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
)));
175 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (new_stmt
);
176 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
177 if (scalar_type_for_mask
)
178 new_stmt_info
->mask_precision
179 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask
));
181 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
186 /* Add NEW_STMT to VINFO's invariant pattern definition statements. These
187 statements are not vectorized but are materialized as scalar in the loop
191 append_inv_pattern_def_seq (vec_info
*vinfo
, gimple
*new_stmt
)
193 gimple_seq_add_stmt_without_update (&vinfo
->inv_pattern_def_seq
, new_stmt
);
196 /* The caller wants to perform new operations on vect_external variable
197 VAR, so that the result of the operations would also be vect_external.
198 Return the edge on which the operations can be performed, if one exists.
199 Return null if the operations should instead be treated as part of
200 the pattern that needs them. */
203 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
206 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
208 e
= loop_preheader_edge (loop_vinfo
->loop
);
209 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
211 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
213 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
220 /* Return true if the target supports a vector version of CODE,
221 where CODE is known to map to a direct optab with the given SUBTYPE.
222 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
223 specifies the type of the scalar result.
225 If CODE allows the inputs and outputs to have different type
226 (such as for WIDEN_SUM_EXPR), it is the input mode rather
227 than the output mode that determines the appropriate target pattern.
228 Operand 0 of the target pattern then specifies the mode that the output
231 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
232 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
236 vect_supportable_direct_optab_p (vec_info
*vinfo
, tree otype
, tree_code code
,
237 tree itype
, tree
*vecotype_out
,
238 tree
*vecitype_out
= NULL
,
239 enum optab_subtype subtype
= optab_default
)
241 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
245 tree vecotype
= get_vectype_for_scalar_type (vinfo
, otype
);
249 optab optab
= optab_for_tree_code (code
, vecitype
, subtype
);
253 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
254 if (icode
== CODE_FOR_nothing
255 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
258 *vecotype_out
= vecotype
;
260 *vecitype_out
= vecitype
;
264 /* Return true if the target supports a vector version of CODE,
265 where CODE is known to map to a conversion optab with the given SUBTYPE.
266 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
267 specifies the type of the scalar result.
269 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
270 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
274 vect_supportable_conv_optab_p (vec_info
*vinfo
, tree otype
, tree_code code
,
275 tree itype
, tree
*vecotype_out
,
276 tree
*vecitype_out
= NULL
,
277 enum optab_subtype subtype
= optab_default
)
279 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
280 tree vecotype
= get_vectype_for_scalar_type (vinfo
, otype
);
281 if (!vecitype
|| !vecotype
)
284 if (!directly_supported_p (code
, vecotype
, vecitype
, subtype
))
287 *vecotype_out
= vecotype
;
289 *vecitype_out
= vecitype
;
293 /* Round bit precision PRECISION up to a full element. */
296 vect_element_precision (unsigned int precision
)
298 precision
= 1 << ceil_log2 (precision
);
299 return MAX (precision
, BITS_PER_UNIT
);
302 /* If OP is defined by a statement that's being considered for vectorization,
303 return information about that statement, otherwise return NULL. */
306 vect_get_internal_def (vec_info
*vinfo
, tree op
)
308 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (op
);
310 && STMT_VINFO_DEF_TYPE (def_stmt_info
) == vect_internal_def
)
311 return vect_stmt_to_vectorize (def_stmt_info
);
315 /* Holds information about an input operand after some sign changes
316 and type promotions have been peeled away. */
317 class vect_unpromoted_value
{
319 vect_unpromoted_value ();
321 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
323 /* The value obtained after peeling away zero or more casts. */
326 /* The type of OP. */
329 /* The definition type of OP. */
332 /* If OP is the result of peeling at least one cast, and if the cast
333 of OP itself is a vectorizable statement, CASTER identifies that
334 statement, otherwise it is null. */
335 stmt_vec_info caster
;
338 inline vect_unpromoted_value::vect_unpromoted_value ()
341 dt (vect_uninitialized_def
),
346 /* Set the operand to OP_IN, its definition type to DT_IN, and the
347 statement that casts it to CASTER_IN. */
350 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
351 stmt_vec_info caster_in
)
354 type
= TREE_TYPE (op
);
359 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
360 to reach some vectorizable inner operand OP', continuing as long as it
361 is possible to convert OP' back to OP using a possible sign change
362 followed by a possible promotion P. Return this OP', or null if OP is
363 not a vectorizable SSA name. If there is a promotion P, describe its
364 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
365 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
366 have more than one user.
368 A successful return means that it is possible to go from OP' to OP
369 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
370 whereas the cast from UNPROM to OP might be a promotion, a sign
375 signed short *ptr = ...;
376 signed short C = *ptr;
377 unsigned short B = (unsigned short) C; // sign change
378 signed int A = (signed int) B; // unsigned promotion
379 ...possible other uses of A...
380 unsigned int OP = (unsigned int) A; // sign change
382 In this case it's possible to go directly from C to OP using:
384 OP = (unsigned int) (unsigned short) C;
385 +------------+ +--------------+
386 promotion sign change
388 so OP' would be C. The input to the promotion is B, so UNPROM
392 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
393 vect_unpromoted_value
*unprom
,
394 bool *single_use_p
= NULL
)
396 tree op_type
= TREE_TYPE (op
);
397 if (!INTEGRAL_TYPE_P (op_type
))
400 tree res
= NULL_TREE
;
401 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
402 unsigned int min_precision
= orig_precision
;
403 stmt_vec_info caster
= NULL
;
404 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
406 /* See whether OP is simple enough to vectorize. */
407 stmt_vec_info def_stmt_info
;
410 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
413 /* If OP is the input of a demotion, skip over it to see whether
414 OP is itself the result of a promotion. If so, the combined
415 effect of the promotion and the demotion might fit the required
416 pattern, otherwise neither operation fits.
418 This copes with cases such as the result of an arithmetic
419 operation being truncated before being stored, and where that
420 arithmetic operation has been recognized as an over-widened one. */
421 if (TYPE_PRECISION (op_type
) <= min_precision
)
423 /* Use OP as the UNPROM described above if we haven't yet
424 found a promotion, or if using the new input preserves the
425 sign of the previous promotion. */
427 || TYPE_PRECISION (unprom
->type
) == orig_precision
428 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
)
429 || (TYPE_UNSIGNED (op_type
)
430 && TYPE_PRECISION (op_type
) < TYPE_PRECISION (unprom
->type
)))
432 unprom
->set_op (op
, dt
, caster
);
433 min_precision
= TYPE_PRECISION (op_type
);
435 /* Stop if we've already seen a promotion and if this
436 conversion does more than change the sign. */
437 else if (TYPE_PRECISION (op_type
)
438 != TYPE_PRECISION (unprom
->type
))
441 /* The sequence now extends to OP. */
445 /* See whether OP is defined by a cast. Record it as CASTER if
446 the cast is potentially vectorizable. */
449 caster
= def_stmt_info
;
451 /* Ignore pattern statements, since we don't link uses for them. */
454 && !STMT_VINFO_RELATED_STMT (caster
)
455 && !has_single_use (res
))
456 *single_use_p
= false;
458 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
459 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
462 /* Continue with the input to the cast. */
463 op
= gimple_assign_rhs1 (def_stmt
);
464 op_type
= TREE_TYPE (op
);
469 /* OP is an integer operand to an operation that returns TYPE, and we
470 want to treat the operation as a widening one. So far we can treat
471 it as widening from *COMMON_TYPE.
473 Return true if OP is suitable for such a widening operation,
474 either widening from *COMMON_TYPE or from some supertype of it.
475 Update *COMMON_TYPE to the supertype in the latter case.
477 SHIFT_P is true if OP is a shift amount. */
480 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
483 /* Calculate the minimum precision required by OP, without changing
484 the sign of either operand. */
485 unsigned int precision
;
488 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
490 precision
= TREE_INT_CST_LOW (op
);
494 precision
= wi::min_precision (wi::to_widest (op
),
495 TYPE_SIGN (*common_type
));
496 if (precision
* 2 > TYPE_PRECISION (type
))
500 /* If OP requires a wider type, switch to that type. The checks
501 above ensure that this is still narrower than the result. */
502 precision
= vect_element_precision (precision
);
503 if (TYPE_PRECISION (*common_type
) < precision
)
504 *common_type
= build_nonstandard_integer_type
505 (precision
, TYPE_UNSIGNED (*common_type
));
509 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
510 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
513 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
515 if (types_compatible_p (*common_type
, new_type
))
518 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
519 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
520 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
523 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
524 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
525 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
527 *common_type
= new_type
;
531 /* We have mismatched signs, with the signed type being
532 no wider than the unsigned type. In this case we need
533 a wider signed type. */
534 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
535 TYPE_PRECISION (new_type
));
538 if (precision
* 2 > TYPE_PRECISION (type
))
541 *common_type
= build_nonstandard_integer_type (precision
, false);
545 /* Check whether STMT_INFO can be viewed as a tree of integer operations
546 in which each node either performs CODE or WIDENED_CODE, and where
547 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
548 specifies the maximum number of leaf operands. SHIFT_P says whether
549 CODE and WIDENED_CODE are some sort of shift.
551 If STMT_INFO is such a tree, return the number of leaf operands
552 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
553 to a type that (a) is narrower than the result of STMT_INFO and
554 (b) can hold all leaf operand values.
556 If SUBTYPE then allow that the signs of the operands
557 may differ in signs but not in precision. SUBTYPE is updated to reflect
560 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
564 vect_widened_op_tree (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree_code code
,
565 code_helper widened_code
, bool shift_p
,
566 unsigned int max_nops
,
567 vect_unpromoted_value
*unprom
, tree
*common_type
,
568 enum optab_subtype
*subtype
= NULL
)
570 /* Check for an integer operation with the right code. */
571 gimple
* stmt
= stmt_info
->stmt
;
572 if (!(is_gimple_assign (stmt
) || is_gimple_call (stmt
)))
575 code_helper rhs_code
;
576 if (is_gimple_assign (stmt
))
577 rhs_code
= gimple_assign_rhs_code (stmt
);
578 else if (is_gimple_call (stmt
))
579 rhs_code
= gimple_call_combined_fn (stmt
);
584 && rhs_code
!= widened_code
)
587 tree lhs
= gimple_get_lhs (stmt
);
588 tree type
= TREE_TYPE (lhs
);
589 if (!INTEGRAL_TYPE_P (type
))
592 /* Assume that both operands will be leaf operands. */
595 /* Check the operands. */
596 unsigned int next_op
= 0;
597 for (unsigned int i
= 0; i
< 2; ++i
)
599 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
600 unsigned int nops
= 1;
601 tree op
= gimple_arg (stmt
, i
);
602 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
604 /* We already have a common type from earlier operands.
605 Update it to account for OP. */
606 this_unprom
->set_op (op
, vect_constant_def
);
607 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
612 /* Only allow shifts by constants. */
613 if (shift_p
&& i
== 1)
616 if (rhs_code
!= code
)
618 /* If rhs_code is widened_code, don't look through further
619 possible promotions, there is a promotion already embedded
620 in the WIDEN_*_EXPR. */
621 if (TREE_CODE (op
) != SSA_NAME
622 || !INTEGRAL_TYPE_P (TREE_TYPE (op
)))
625 stmt_vec_info def_stmt_info
;
628 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
,
631 this_unprom
->set_op (op
, dt
, NULL
);
633 else if (!vect_look_through_possible_promotion (vinfo
, op
,
637 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
639 /* The operand isn't widened. If STMT_INFO has the code
640 for an unwidened operation, recursively check whether
641 this operand is a node of the tree. */
644 || this_unprom
->dt
!= vect_internal_def
)
647 /* Give back the leaf slot allocated above now that we're
648 not treating this as a leaf operand. */
651 /* Recursively process the definition of the operand. */
652 stmt_vec_info def_stmt_info
653 = vect_get_internal_def (vinfo
, this_unprom
->op
);
655 nops
= vect_widened_op_tree (vinfo
, def_stmt_info
, code
,
656 widened_code
, shift_p
, max_nops
,
657 this_unprom
, common_type
,
666 /* Make sure that the operand is narrower than the result. */
667 if (TYPE_PRECISION (this_unprom
->type
) * 2
668 > TYPE_PRECISION (type
))
671 /* Update COMMON_TYPE for the new operand. */
673 *common_type
= this_unprom
->type
;
674 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
679 /* See if we can sign extend the smaller type. */
680 if (TYPE_PRECISION (this_unprom
->type
)
681 > TYPE_PRECISION (*common_type
))
682 *common_type
= this_unprom
->type
;
683 *subtype
= optab_vector_mixed_sign
;
695 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
696 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
699 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
= NULL
)
701 return make_temp_ssa_name (type
, stmt
, "patt");
704 /* STMT2_INFO describes a type conversion that could be split into STMT1
705 followed by a version of STMT2_INFO that takes NEW_RHS as its first
706 input. Try to do this using pattern statements, returning true on
710 vect_split_statement (vec_info
*vinfo
, stmt_vec_info stmt2_info
, tree new_rhs
,
711 gimple
*stmt1
, tree vectype
)
713 if (is_pattern_stmt_p (stmt2_info
))
715 /* STMT2_INFO is part of a pattern. Get the statement to which
716 the pattern is attached. */
717 stmt_vec_info orig_stmt2_info
= STMT_VINFO_RELATED_STMT (stmt2_info
);
718 vect_init_pattern_stmt (vinfo
, stmt1
, orig_stmt2_info
, vectype
);
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_NOTE
, vect_location
,
722 "Splitting pattern statement: %G", stmt2_info
->stmt
);
724 /* Since STMT2_INFO is a pattern statement, we can change it
725 in-situ without worrying about changing the code for the
727 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
729 if (dump_enabled_p ())
731 dump_printf_loc (MSG_NOTE
, vect_location
, "into: %G", stmt1
);
732 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
736 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
737 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
)
738 /* STMT2_INFO is the actual pattern statement. Add STMT1
739 to the end of the definition sequence. */
740 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
743 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
745 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
746 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
752 /* STMT2_INFO doesn't yet have a pattern. Try to create a
753 two-statement pattern now. */
754 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
755 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
756 tree lhs_vectype
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
760 if (dump_enabled_p ())
761 dump_printf_loc (MSG_NOTE
, vect_location
,
762 "Splitting statement: %G", stmt2_info
->stmt
);
764 /* Add STMT1 as a singleton pattern definition sequence. */
765 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
766 vect_init_pattern_stmt (vinfo
, stmt1
, stmt2_info
, vectype
);
767 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
769 /* Build the second of the two pattern statements. */
770 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
771 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
772 vect_set_pattern_stmt (vinfo
, new_stmt2
, stmt2_info
, lhs_vectype
);
774 if (dump_enabled_p ())
776 dump_printf_loc (MSG_NOTE
, vect_location
,
777 "into pattern statements: %G", stmt1
);
778 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
779 (gimple
*) new_stmt2
);
786 /* Look for the following pattern
792 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
793 HALF_TYPE and UNPROM will be set should the statement be found to
794 be a widened operation.
795 DIFF_STMT will be set to the MINUS_EXPR
796 statement that precedes the ABS_STMT if it is a MINUS_EXPR..
799 vect_recog_absolute_difference (vec_info
*vinfo
, gassign
*abs_stmt
,
801 vect_unpromoted_value unprom
[2],
807 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
808 inside the loop (in case we are analyzing an outer-loop). */
809 enum tree_code code
= gimple_assign_rhs_code (abs_stmt
);
810 if (code
!= ABS_EXPR
&& code
!= ABSU_EXPR
)
813 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
814 tree abs_type
= TREE_TYPE (abs_oprnd
);
817 if (!ANY_INTEGRAL_TYPE_P (abs_type
)
818 || TYPE_OVERFLOW_WRAPS (abs_type
)
819 || TYPE_UNSIGNED (abs_type
))
822 /* Peel off conversions from the ABS input. This can involve sign
823 changes (e.g. from an unsigned subtraction to a signed ABS input)
824 or signed promotion, but it can't include unsigned promotion.
825 (Note that ABS of an unsigned promotion should have been folded
826 away before now anyway.) */
827 vect_unpromoted_value unprom_diff
;
828 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
832 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
833 && TYPE_UNSIGNED (unprom_diff
.type
))
836 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
837 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
838 if (!diff_stmt_vinfo
)
841 gassign
*diff
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (diff_stmt_vinfo
));
842 if (diff_stmt
&& diff
843 && gimple_assign_rhs_code (diff
) == MINUS_EXPR
844 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd
)))
847 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
848 inside the loop (in case we are analyzing an outer-loop). */
849 if (vect_widened_op_tree (vinfo
, diff_stmt_vinfo
,
850 MINUS_EXPR
, IFN_VEC_WIDEN_MINUS
,
851 false, 2, unprom
, half_type
))
857 /* Convert UNPROM to TYPE and return the result, adding new statements
858 to STMT_INFO's pattern definition statements if no better way is
859 available. VECTYPE is the vector form of TYPE.
861 If SUBTYPE then convert the type based on the subtype. */
864 vect_convert_input (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
865 vect_unpromoted_value
*unprom
, tree vectype
,
866 enum optab_subtype subtype
= optab_default
)
868 /* Update the type if the signs differ. */
869 if (subtype
== optab_vector_mixed_sign
)
871 gcc_assert (!TYPE_UNSIGNED (type
));
872 if (TYPE_UNSIGNED (TREE_TYPE (unprom
->op
)))
874 type
= unsigned_type_for (type
);
875 vectype
= unsigned_type_for (vectype
);
879 /* Check for a no-op conversion. */
880 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
883 /* Allow the caller to create constant vect_unpromoted_values. */
884 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
885 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
887 tree input
= unprom
->op
;
890 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
891 tree lhs_type
= TREE_TYPE (lhs
);
893 /* If the result of the existing cast is the right width, use it
894 instead of the source of the cast. */
895 if (TYPE_PRECISION (lhs_type
) == TYPE_PRECISION (type
))
897 /* If the precision we want is between the source and result
898 precisions of the existing cast, try splitting the cast into
899 two and tapping into a mid-way point. */
900 else if (TYPE_PRECISION (lhs_type
) > TYPE_PRECISION (type
)
901 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
))
903 /* In order to preserve the semantics of the original cast,
904 give the mid-way point the same signedness as the input value.
906 It would be possible to use a signed type here instead if
907 TYPE is signed and UNPROM->TYPE is unsigned, but that would
908 make the sign of the midtype sensitive to the order in
909 which we process the statements, since the signedness of
910 TYPE is the signedness required by just one of possibly
911 many users. Also, unsigned promotions are usually as cheap
912 as or cheaper than signed ones, so it's better to keep an
913 unsigned promotion. */
914 tree midtype
= build_nonstandard_integer_type
915 (TYPE_PRECISION (type
), TYPE_UNSIGNED (unprom
->type
));
916 tree vec_midtype
= get_vectype_for_scalar_type (vinfo
, midtype
);
919 input
= vect_recog_temp_ssa_var (midtype
, NULL
);
920 gassign
*new_stmt
= gimple_build_assign (input
, NOP_EXPR
,
922 if (!vect_split_statement (vinfo
, unprom
->caster
, input
, new_stmt
,
924 append_pattern_def_seq (vinfo
, stmt_info
,
925 new_stmt
, vec_midtype
);
929 /* See if we can reuse an existing result. */
930 if (types_compatible_p (type
, TREE_TYPE (input
)))
934 /* We need a new conversion statement. */
935 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
936 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, input
);
938 /* If OP is an external value, see if we can insert the new statement
939 on an incoming edge. */
940 if (input
== unprom
->op
&& unprom
->dt
== vect_external_def
)
941 if (edge e
= vect_get_external_def_edge (vinfo
, input
))
943 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
944 gcc_assert (!new_bb
);
948 /* As a (common) last resort, add the statement to the pattern itself. */
949 append_pattern_def_seq (vinfo
, stmt_info
, new_stmt
, vectype
);
953 /* Invoke vect_convert_input for N elements of UNPROM and store the
954 result in the corresponding elements of RESULT.
956 If SUBTYPE then convert the type based on the subtype. */
959 vect_convert_inputs (vec_info
*vinfo
, stmt_vec_info stmt_info
, unsigned int n
,
960 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
961 tree vectype
, enum optab_subtype subtype
= optab_default
)
963 for (unsigned int i
= 0; i
< n
; ++i
)
966 for (j
= 0; j
< i
; ++j
)
967 if (unprom
[j
].op
== unprom
[i
].op
)
971 result
[i
] = result
[j
];
973 result
[i
] = vect_convert_input (vinfo
, stmt_info
,
974 type
, &unprom
[i
], vectype
, subtype
);
978 /* The caller has created a (possibly empty) sequence of pattern definition
979 statements followed by a single statement PATTERN_STMT. Cast the result
980 of this final statement to TYPE. If a new statement is needed, add
981 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
982 and return the new statement, otherwise return PATTERN_STMT as-is.
983 VECITYPE is the vector form of PATTERN_STMT's result type. */
986 vect_convert_output (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
987 gimple
*pattern_stmt
, tree vecitype
)
989 tree lhs
= gimple_get_lhs (pattern_stmt
);
990 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
992 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vecitype
);
993 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
994 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
999 /* Return true if STMT_VINFO describes a reduction for which reassociation
1000 is allowed. If STMT_INFO is part of a group, assume that it's part of
1001 a reduction chain and optimistically assume that all statements
1002 except the last allow reassociation.
1003 Also require it to have code CODE and to be a reduction
1004 in the outermost loop. When returning true, store the operands in
1005 *OP0_OUT and *OP1_OUT. */
1008 vect_reassociating_reduction_p (vec_info
*vinfo
,
1009 stmt_vec_info stmt_info
, tree_code code
,
1010 tree
*op0_out
, tree
*op1_out
)
1012 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
1016 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1017 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
1020 /* We don't allow changing the order of the computation in the inner-loop
1021 when doing outer-loop vectorization. */
1022 class loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
1023 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
))
1026 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
1028 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign
)),
1032 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == NULL
)
1035 *op0_out
= gimple_assign_rhs1 (assign
);
1036 *op1_out
= gimple_assign_rhs2 (assign
);
1037 if (commutative_tree_code (code
) && STMT_VINFO_REDUC_IDX (stmt_info
) == 0)
1038 std::swap (*op0_out
, *op1_out
);
1042 /* match.pd function to match
1043 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1045 1) @1, @2, c, d, a, b are all integral type.
1046 2) There's single_use for both @1 and @2.
1047 3) a, c have same precision.
1048 4) c and @1 have different precision.
1049 5) c, d are the same type or they can differ in sign when convert is
1052 record a and c and d and @3. */
1054 extern bool gimple_cond_expr_convert_p (tree
, tree
*, tree (*)(tree
));
1056 /* Function vect_recog_cond_expr_convert
1058 Try to find the following pattern:
1063 TYPE_E op_true = (TYPE_E) A;
1064 TYPE_E op_false = (TYPE_E) B;
1066 E = C cmp D ? op_true : op_false;
1069 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1070 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1071 single_use of op_true and op_false.
1072 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1076 * STMT_VINFO: The stmt from which the pattern search begins.
1077 here it starts with E = c cmp D ? op_true : op_false;
1081 TYPE1 E' = C cmp D ? A : B;
1082 TYPE3 E = (TYPE3) E';
1084 There may extra nop_convert for A or B to handle different signness.
1086 * TYPE_OUT: The vector type of the output of this pattern.
1088 * Return value: A new stmt that will be used to replace the sequence of
1089 stmts that constitute the pattern. In this case it will be:
1091 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1094 vect_recog_cond_expr_convert_pattern (vec_info
*vinfo
,
1095 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1097 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
1098 tree lhs
, match
[4], temp
, type
, new_lhs
, op2
;
1100 gimple
*pattern_stmt
;
1105 lhs
= gimple_assign_lhs (last_stmt
);
1107 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1108 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1109 if (!gimple_cond_expr_convert_p (lhs
, &match
[0], NULL
))
1112 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt
);
1115 type
= TREE_TYPE (match
[1]);
1116 if (TYPE_SIGN (type
) != TYPE_SIGN (TREE_TYPE (match
[2])))
1118 op2
= vect_recog_temp_ssa_var (type
, NULL
);
1119 gimple
* nop_stmt
= gimple_build_assign (op2
, NOP_EXPR
, match
[2]);
1120 append_pattern_def_seq (vinfo
, stmt_vinfo
, nop_stmt
,
1121 get_vectype_for_scalar_type (vinfo
, type
));
1124 temp
= vect_recog_temp_ssa_var (type
, NULL
);
1125 cond_stmt
= gimple_build_assign (temp
, build3 (COND_EXPR
, type
, match
[3],
1127 append_pattern_def_seq (vinfo
, stmt_vinfo
, cond_stmt
,
1128 get_vectype_for_scalar_type (vinfo
, type
));
1129 new_lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
1130 pattern_stmt
= gimple_build_assign (new_lhs
, NOP_EXPR
, temp
);
1131 *type_out
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1133 if (dump_enabled_p ())
1134 dump_printf_loc (MSG_NOTE
, vect_location
,
1135 "created pattern stmt: %G", pattern_stmt
);
1136 return pattern_stmt
;
1139 /* Function vect_recog_dot_prod_pattern
1141 Try to find the following pattern:
1148 sum_0 = phi <init, sum_1>
1151 S3 x_T = (TYPE1) x_t;
1152 S4 y_T = (TYPE1) y_t;
1153 S5 prod = x_T * y_T;
1154 [S6 prod = (TYPE2) prod; #optional]
1155 S7 sum_1 = prod + sum_0;
1157 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1158 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1159 'type1a' and 'type1b' can differ.
1163 * STMT_VINFO: The stmt from which the pattern search begins. In the
1164 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1169 * TYPE_OUT: The type of the output of this pattern.
1171 * Return value: A new stmt that will be used to replace the sequence of
1172 stmts that constitute the pattern. In this case it will be:
1173 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1175 Note: The dot-prod idiom is a widening reduction pattern that is
1176 vectorized without preserving all the intermediate results. It
1177 produces only N/2 (widened) results (by summing up pairs of
1178 intermediate results) rather than all N results. Therefore, we
1179 cannot allow this pattern when we want to get all the results and in
1180 the correct order (as is the case when this computation is in an
1181 inner-loop nested in an outer-loop that us being vectorized). */
1184 vect_recog_dot_prod_pattern (vec_info
*vinfo
,
1185 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1187 tree oprnd0
, oprnd1
;
1188 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1189 tree type
, half_type
;
1190 gimple
*pattern_stmt
;
1193 /* Look for the following pattern
1197 DDPROD = (TYPE2) DPROD;
1198 sum_1 = DDPROD + sum_0;
1200 - DX is double the size of X
1201 - DY is double the size of Y
1202 - DX, DY, DPROD all have the same type but the sign
1203 between X, Y and DPROD can differ.
1204 - sum is the same size of DPROD or bigger
1205 - sum has been recognized as a reduction variable.
1207 This is equivalent to:
1208 DPROD = X w* Y; #widen mult
1209 sum_1 = DPROD w+ sum_0; #widen summation
1211 DPROD = X w* Y; #widen mult
1212 sum_1 = DPROD + sum_0; #summation
1215 /* Starting from LAST_STMT, follow the defs of its uses in search
1216 of the above pattern. */
1218 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1222 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1224 vect_unpromoted_value unprom_mult
;
1225 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
1227 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1228 we know that oprnd1 is the reduction variable (defined by a loop-header
1229 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1230 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1234 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
1238 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1239 inside the loop (in case we are analyzing an outer-loop). */
1240 vect_unpromoted_value unprom0
[2];
1241 enum optab_subtype subtype
= optab_vector
;
1242 if (!vect_widened_op_tree (vinfo
, mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
1243 false, 2, unprom0
, &half_type
, &subtype
))
1246 /* If there are two widening operations, make sure they agree on the sign
1247 of the extension. The result of an optab_vector_mixed_sign operation
1248 is signed; otherwise, the result has the same sign as the operands. */
1249 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
1250 && (subtype
== optab_vector_mixed_sign
1251 ? TYPE_UNSIGNED (unprom_mult
.type
)
1252 : TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
)))
1255 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
1257 /* If the inputs have mixed signs, canonicalize on using the signed
1258 input type for analysis. This also helps when emulating mixed-sign
1259 operations using signed operations. */
1260 if (subtype
== optab_vector_mixed_sign
)
1261 half_type
= signed_type_for (half_type
);
1264 if (!vect_supportable_conv_optab_p (vinfo
, type
, DOT_PROD_EXPR
, half_type
,
1265 type_out
, &half_vectype
, subtype
))
1267 /* We can emulate a mixed-sign dot-product using a sequence of
1268 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1269 if (subtype
!= optab_vector_mixed_sign
1270 || !vect_supportable_conv_optab_p (vinfo
, signed_type_for (type
),
1271 DOT_PROD_EXPR
, half_type
,
1272 type_out
, &half_vectype
,
1276 *type_out
= signed_or_unsigned_type_for (TYPE_UNSIGNED (type
),
1280 /* Get the inputs in the appropriate types. */
1282 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, mult_oprnd
, half_type
,
1283 unprom0
, half_vectype
, subtype
);
1285 var
= vect_recog_temp_ssa_var (type
, NULL
);
1286 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
1287 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
1289 return pattern_stmt
;
1293 /* Function vect_recog_sad_pattern
1295 Try to find the following Sum of Absolute Difference (SAD) pattern:
1298 signed TYPE1 diff, abs_diff;
1301 sum_0 = phi <init, sum_1>
1304 S3 x_T = (TYPE1) x_t;
1305 S4 y_T = (TYPE1) y_t;
1306 S5 diff = x_T - y_T;
1307 S6 abs_diff = ABS_EXPR <diff>;
1308 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1309 S8 sum_1 = abs_diff + sum_0;
1311 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1312 same size of 'TYPE1' or bigger. This is a special case of a reduction
1317 * STMT_VINFO: The stmt from which the pattern search begins. In the
1318 example, when this function is called with S8, the pattern
1319 {S3,S4,S5,S6,S7,S8} will be detected.
1323 * TYPE_OUT: The type of the output of this pattern.
1325 * Return value: A new stmt that will be used to replace the sequence of
1326 stmts that constitute the pattern. In this case it will be:
1327 SAD_EXPR <x_t, y_t, sum_0>
1331 vect_recog_sad_pattern (vec_info
*vinfo
,
1332 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1334 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1337 /* Look for the following pattern
1341 DAD = ABS_EXPR <DDIFF>;
1342 DDPROD = (TYPE2) DPROD;
1343 sum_1 = DAD + sum_0;
1345 - DX is at least double the size of X
1346 - DY is at least double the size of Y
1347 - DX, DY, DDIFF, DAD all have the same type
1348 - sum is the same size of DAD or bigger
1349 - sum has been recognized as a reduction variable.
1351 This is equivalent to:
1352 DDIFF = X w- Y; #widen sub
1353 DAD = ABS_EXPR <DDIFF>;
1354 sum_1 = DAD w+ sum_0; #widen summation
1356 DDIFF = X w- Y; #widen sub
1357 DAD = ABS_EXPR <DDIFF>;
1358 sum_1 = DAD + sum_0; #summation
1361 /* Starting from LAST_STMT, follow the defs of its uses in search
1362 of the above pattern. */
1364 tree plus_oprnd0
, plus_oprnd1
;
1365 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1366 &plus_oprnd0
, &plus_oprnd1
))
1369 tree sum_type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1371 /* Any non-truncating sequence of conversions is OK here, since
1372 with a successful match, the result of the ABS(U) is known to fit
1373 within the nonnegative range of the result type. (It cannot be the
1374 negative of the minimum signed value due to the range of the widening
1376 vect_unpromoted_value unprom_abs
;
1377 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1380 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1381 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1382 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1383 Then check that plus_oprnd0 is defined by an abs_expr. */
1388 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1389 if (!abs_stmt_vinfo
)
1392 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1393 inside the loop (in case we are analyzing an outer-loop). */
1394 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1395 vect_unpromoted_value unprom
[2];
1399 gcall
*abd_stmt
= dyn_cast
<gcall
*> (abs_stmt_vinfo
->stmt
);
1401 || !gimple_call_internal_p (abd_stmt
)
1402 || gimple_call_num_args (abd_stmt
) != 2)
1405 tree abd_oprnd0
= gimple_call_arg (abd_stmt
, 0);
1406 tree abd_oprnd1
= gimple_call_arg (abd_stmt
, 1);
1408 if (gimple_call_internal_fn (abd_stmt
) == IFN_ABD
)
1410 if (!vect_look_through_possible_promotion (vinfo
, abd_oprnd0
,
1412 || !vect_look_through_possible_promotion (vinfo
, abd_oprnd1
,
1416 else if (gimple_call_internal_fn (abd_stmt
) == IFN_VEC_WIDEN_ABD
)
1418 unprom
[0].op
= abd_oprnd0
;
1419 unprom
[0].type
= TREE_TYPE (abd_oprnd0
);
1420 unprom
[1].op
= abd_oprnd1
;
1421 unprom
[1].type
= TREE_TYPE (abd_oprnd1
);
1426 half_type
= unprom
[0].type
;
1428 else if (!vect_recog_absolute_difference (vinfo
, abs_stmt
, &half_type
,
1432 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1435 if (!vect_supportable_direct_optab_p (vinfo
, sum_type
, SAD_EXPR
, half_type
,
1436 type_out
, &half_vectype
))
1439 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1441 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, sad_oprnd
, half_type
,
1442 unprom
, half_vectype
);
1444 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1445 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1446 sad_oprnd
[1], plus_oprnd1
);
1448 return pattern_stmt
;
1451 /* Function vect_recog_abd_pattern
1453 Try to find the following ABsolute Difference (ABD) or
1454 widening ABD (WIDEN_ABD) pattern:
1458 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1459 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1460 TYPE3 diff = x_cast - y_cast;
1461 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1462 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1464 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1465 twice as wide as TYPE3.
1469 * STMT_VINFO: The stmt from which the pattern search begins
1473 * TYPE_OUT: The type of the output of this pattern
1475 * Return value: A new stmt that will be used to replace the sequence of
1476 stmts that constitute the pattern, principally:
1477 out = IFN_ABD (x, y)
1478 out = IFN_WIDEN_ABD (x, y)
1482 vect_recog_abd_pattern (vec_info
*vinfo
,
1483 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1485 gassign
*last_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (stmt_vinfo
));
1489 tree out_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
1491 vect_unpromoted_value unprom
[2];
1492 gassign
*diff_stmt
= NULL
;
1494 if (!vect_recog_absolute_difference (vinfo
, last_stmt
, &abd_in_type
,
1495 unprom
, &diff_stmt
))
1497 /* We cannot try further without having a non-widening MINUS. */
1501 unprom
[0].op
= gimple_assign_rhs1 (diff_stmt
);
1502 unprom
[1].op
= gimple_assign_rhs2 (diff_stmt
);
1503 abd_in_type
= signed_type_for (out_type
);
1506 tree abd_out_type
= abd_in_type
;
1508 tree vectype_in
= get_vectype_for_scalar_type (vinfo
, abd_in_type
);
1512 internal_fn ifn
= IFN_ABD
;
1513 tree vectype_out
= vectype_in
;
1515 if (TYPE_PRECISION (out_type
) >= TYPE_PRECISION (abd_in_type
) * 2
1516 && stmt_vinfo
->min_output_precision
>= TYPE_PRECISION (abd_in_type
) * 2)
1519 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type
) * 2,
1520 TYPE_UNSIGNED (abd_in_type
));
1521 tree mid_vectype
= get_vectype_for_scalar_type (vinfo
, mid_type
);
1523 code_helper dummy_code
;
1525 auto_vec
<tree
> dummy_vec
;
1527 && supportable_widening_operation (vinfo
, IFN_VEC_WIDEN_ABD
,
1528 stmt_vinfo
, mid_vectype
,
1530 &dummy_code
, &dummy_code
,
1531 &dummy_int
, &dummy_vec
))
1533 ifn
= IFN_VEC_WIDEN_ABD
;
1534 abd_out_type
= mid_type
;
1535 vectype_out
= mid_vectype
;
1540 && !direct_internal_fn_supported_p (ifn
, vectype_in
,
1541 OPTIMIZE_FOR_SPEED
))
1544 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt
);
1547 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, abd_oprnds
,
1548 abd_in_type
, unprom
, vectype_in
);
1550 *type_out
= get_vectype_for_scalar_type (vinfo
, out_type
);
1552 tree abd_result
= vect_recog_temp_ssa_var (abd_out_type
, NULL
);
1553 gcall
*abd_stmt
= gimple_build_call_internal (ifn
, 2,
1554 abd_oprnds
[0], abd_oprnds
[1]);
1555 gimple_call_set_lhs (abd_stmt
, abd_result
);
1556 gimple_set_location (abd_stmt
, gimple_location (last_stmt
));
1558 gimple
*stmt
= abd_stmt
;
1559 if (TYPE_PRECISION (abd_in_type
) == TYPE_PRECISION (abd_out_type
)
1560 && TYPE_PRECISION (abd_out_type
) < TYPE_PRECISION (out_type
)
1561 && !TYPE_UNSIGNED (abd_out_type
))
1563 tree unsign
= unsigned_type_for (abd_out_type
);
1564 stmt
= vect_convert_output (vinfo
, stmt_vinfo
, unsign
, stmt
, vectype_out
);
1565 vectype_out
= get_vectype_for_scalar_type (vinfo
, unsign
);
1568 return vect_convert_output (vinfo
, stmt_vinfo
, out_type
, stmt
, vectype_out
);
1571 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1572 so that it can be treated as though it had the form:
1576 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1577 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1578 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1579 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1580 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1582 Try to replace the pattern with:
1586 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1587 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1588 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1589 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1591 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1593 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1594 name of the pattern being matched, for dump purposes. */
1597 vect_recog_widen_op_pattern (vec_info
*vinfo
,
1598 stmt_vec_info last_stmt_info
, tree
*type_out
,
1599 tree_code orig_code
, code_helper wide_code
,
1600 bool shift_p
, const char *name
)
1602 gimple
*last_stmt
= last_stmt_info
->stmt
;
1604 vect_unpromoted_value unprom
[2];
1606 if (!vect_widened_op_tree (vinfo
, last_stmt_info
, orig_code
, orig_code
,
1607 shift_p
, 2, unprom
, &half_type
))
1611 /* Pattern detected. */
1612 vect_pattern_detected (name
, last_stmt
);
1614 tree type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1616 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1617 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1618 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1619 TYPE_UNSIGNED (half_type
));
1621 /* Check target support */
1622 tree vectype
= get_vectype_for_scalar_type (vinfo
, half_type
);
1623 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
1625 tree vecctype
= vecitype
;
1626 if (orig_code
== MINUS_EXPR
1627 && TYPE_UNSIGNED (itype
)
1628 && TYPE_PRECISION (type
) > TYPE_PRECISION (itype
))
1630 /* Subtraction is special, even if half_type is unsigned and no matter
1631 whether type is signed or unsigned, if type is wider than itype,
1632 we need to sign-extend from the widening operation result to the
1634 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1635 itype unsigned short and type either int or unsigned int.
1636 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1637 (unsigned short) 0xffff, but for type int we want the result -1
1638 and for type unsigned int 0xffffffff rather than 0xffff. */
1639 ctype
= build_nonstandard_integer_type (TYPE_PRECISION (itype
), 0);
1640 vecctype
= get_vectype_for_scalar_type (vinfo
, ctype
);
1643 code_helper dummy_code
;
1645 auto_vec
<tree
> dummy_vec
;
1649 || !supportable_widening_operation (vinfo
, wide_code
, last_stmt_info
,
1651 &dummy_code
, &dummy_code
,
1652 &dummy_int
, &dummy_vec
))
1655 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
1660 vect_convert_inputs (vinfo
, last_stmt_info
,
1661 2, oprnd
, half_type
, unprom
, vectype
);
1663 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1664 gimple
*pattern_stmt
= vect_gimple_build (var
, wide_code
, oprnd
[0], oprnd
[1]);
1666 if (vecctype
!= vecitype
)
1667 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, ctype
,
1668 pattern_stmt
, vecitype
);
1670 return vect_convert_output (vinfo
, last_stmt_info
,
1671 type
, pattern_stmt
, vecctype
);
1674 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1675 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1678 vect_recog_widen_mult_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1681 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1682 MULT_EXPR
, WIDEN_MULT_EXPR
, false,
1683 "vect_recog_widen_mult_pattern");
1686 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1687 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1690 vect_recog_widen_plus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1693 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1694 PLUS_EXPR
, IFN_VEC_WIDEN_PLUS
,
1695 false, "vect_recog_widen_plus_pattern");
1698 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1699 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1701 vect_recog_widen_minus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1704 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1705 MINUS_EXPR
, IFN_VEC_WIDEN_MINUS
,
1706 false, "vect_recog_widen_minus_pattern");
1709 /* Try to detect abd on widened inputs, converting IFN_ABD
1710 to IFN_VEC_WIDEN_ABD. */
1712 vect_recog_widen_abd_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1715 gassign
*last_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (stmt_vinfo
));
1716 if (!last_stmt
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt
)))
1719 tree last_rhs
= gimple_assign_rhs1 (last_stmt
);
1721 tree in_type
= TREE_TYPE (last_rhs
);
1722 tree out_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
1723 if (!INTEGRAL_TYPE_P (in_type
)
1724 || !INTEGRAL_TYPE_P (out_type
)
1725 || TYPE_PRECISION (in_type
) * 2 != TYPE_PRECISION (out_type
)
1726 || !TYPE_UNSIGNED (in_type
))
1729 vect_unpromoted_value unprom
;
1730 tree op
= vect_look_through_possible_promotion (vinfo
, last_rhs
, &unprom
);
1731 if (!op
|| TYPE_PRECISION (TREE_TYPE (op
)) != TYPE_PRECISION (in_type
))
1734 stmt_vec_info abd_pattern_vinfo
= vect_get_internal_def (vinfo
, op
);
1735 if (!abd_pattern_vinfo
)
1738 gcall
*abd_stmt
= dyn_cast
<gcall
*> (STMT_VINFO_STMT (abd_pattern_vinfo
));
1740 || !gimple_call_internal_p (abd_stmt
)
1741 || gimple_call_internal_fn (abd_stmt
) != IFN_ABD
)
1744 tree vectype_in
= get_vectype_for_scalar_type (vinfo
, in_type
);
1745 tree vectype_out
= get_vectype_for_scalar_type (vinfo
, out_type
);
1747 code_helper dummy_code
;
1749 auto_vec
<tree
> dummy_vec
;
1750 if (!supportable_widening_operation (vinfo
, IFN_VEC_WIDEN_ABD
, stmt_vinfo
,
1751 vectype_out
, vectype_in
,
1752 &dummy_code
, &dummy_code
,
1753 &dummy_int
, &dummy_vec
))
1756 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt
);
1758 *type_out
= vectype_out
;
1760 tree abd_oprnd0
= gimple_call_arg (abd_stmt
, 0);
1761 tree abd_oprnd1
= gimple_call_arg (abd_stmt
, 1);
1762 tree widen_abd_result
= vect_recog_temp_ssa_var (out_type
, NULL
);
1763 gcall
*widen_abd_stmt
= gimple_build_call_internal (IFN_VEC_WIDEN_ABD
, 2,
1764 abd_oprnd0
, abd_oprnd1
);
1765 gimple_call_set_lhs (widen_abd_stmt
, widen_abd_result
);
1766 gimple_set_location (widen_abd_stmt
, gimple_location (last_stmt
));
1767 return widen_abd_stmt
;
1770 /* Function vect_recog_ctz_ffs_pattern
1772 Try to find the following pattern:
1777 B = __builtin_ctz{,l,ll} (A);
1781 B = __builtin_ffs{,l,ll} (A);
1785 * STMT_VINFO: The stmt from which the pattern search begins.
1786 here it starts with B = __builtin_* (A);
1790 * TYPE_OUT: The vector type of the output of this pattern.
1792 * Return value: A new stmt that will be used to replace the sequence of
1793 stmts that constitute the pattern, using clz or popcount builtins. */
1796 vect_recog_ctz_ffs_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1799 gimple
*call_stmt
= stmt_vinfo
->stmt
;
1800 gimple
*pattern_stmt
;
1801 tree rhs_oprnd
, rhs_type
, lhs_oprnd
, lhs_type
, vec_type
, vec_rhs_type
;
1803 internal_fn ifn
= IFN_LAST
, ifnnew
= IFN_LAST
;
1804 bool defined_at_zero
= true, defined_at_zero_new
= false;
1805 int val
= 0, val_new
= 0, val_cmp
= 0;
1807 int sub
= 0, add
= 0;
1810 if (!is_gimple_call (call_stmt
))
1813 if (gimple_call_num_args (call_stmt
) != 1
1814 && gimple_call_num_args (call_stmt
) != 2)
1817 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
1818 rhs_type
= TREE_TYPE (rhs_oprnd
);
1819 lhs_oprnd
= gimple_call_lhs (call_stmt
);
1822 lhs_type
= TREE_TYPE (lhs_oprnd
);
1823 if (!INTEGRAL_TYPE_P (lhs_type
)
1824 || !INTEGRAL_TYPE_P (rhs_type
)
1825 || !type_has_mode_precision_p (rhs_type
)
1826 || TREE_CODE (rhs_oprnd
) != SSA_NAME
)
1829 switch (gimple_call_combined_fn (call_stmt
))
1833 if (!gimple_call_internal_p (call_stmt
)
1834 || gimple_call_num_args (call_stmt
) != 2)
1835 defined_at_zero
= false;
1837 val
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
1846 prec
= TYPE_PRECISION (rhs_type
);
1847 loc
= gimple_location (call_stmt
);
1849 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
1853 vec_rhs_type
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
1857 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1858 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1859 popcount<vector_mode>2. */
1861 || direct_internal_fn_supported_p (ifn
, vec_rhs_type
,
1862 OPTIMIZE_FOR_SPEED
))
1866 && direct_internal_fn_supported_p (IFN_CTZ
, vec_rhs_type
,
1867 OPTIMIZE_FOR_SPEED
))
1871 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type
),
1874 else if (direct_internal_fn_supported_p (IFN_CLZ
, vec_rhs_type
,
1875 OPTIMIZE_FOR_SPEED
))
1879 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type
),
1882 if ((ifnnew
== IFN_LAST
1883 || (defined_at_zero
&& !defined_at_zero_new
))
1884 && direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_rhs_type
,
1885 OPTIMIZE_FOR_SPEED
))
1887 ifnnew
= IFN_POPCOUNT
;
1888 defined_at_zero_new
= true;
1891 if (ifnnew
== IFN_LAST
)
1894 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt
);
1897 if ((ifnnew
== IFN_CLZ
1899 && defined_at_zero_new
1902 || (ifnnew
== IFN_POPCOUNT
&& ifn
== IFN_CTZ
))
1904 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1905 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1906 if (ifnnew
== IFN_CLZ
)
1910 if (!TYPE_UNSIGNED (rhs_type
))
1912 rhs_type
= unsigned_type_for (rhs_type
);
1913 vec_rhs_type
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
1914 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1915 pattern_stmt
= gimple_build_assign (new_var
, NOP_EXPR
, rhs_oprnd
);
1916 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
,
1918 rhs_oprnd
= new_var
;
1921 tree m1
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1922 pattern_stmt
= gimple_build_assign (m1
, PLUS_EXPR
, rhs_oprnd
,
1923 build_int_cst (rhs_type
, -1));
1924 gimple_set_location (pattern_stmt
, loc
);
1925 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1927 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1928 pattern_stmt
= gimple_build_assign (new_var
, BIT_NOT_EXPR
, rhs_oprnd
);
1929 gimple_set_location (pattern_stmt
, loc
);
1930 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1931 rhs_oprnd
= new_var
;
1933 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1934 pattern_stmt
= gimple_build_assign (new_var
, BIT_AND_EXPR
,
1936 gimple_set_location (pattern_stmt
, loc
);
1937 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1938 rhs_oprnd
= new_var
;
1940 else if (ifnnew
== IFN_CLZ
)
1942 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1943 .FFS (X) = PREC - .CLZ (X & -X). */
1944 sub
= prec
- (ifn
== IFN_CTZ
);
1945 val_cmp
= sub
- val_new
;
1947 tree neg
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1948 pattern_stmt
= gimple_build_assign (neg
, NEGATE_EXPR
, rhs_oprnd
);
1949 gimple_set_location (pattern_stmt
, loc
);
1950 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1952 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1953 pattern_stmt
= gimple_build_assign (new_var
, BIT_AND_EXPR
,
1955 gimple_set_location (pattern_stmt
, loc
);
1956 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1957 rhs_oprnd
= new_var
;
1959 else if (ifnnew
== IFN_POPCOUNT
)
1961 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1962 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1963 sub
= prec
+ (ifn
== IFN_FFS
);
1966 tree neg
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1967 pattern_stmt
= gimple_build_assign (neg
, NEGATE_EXPR
, rhs_oprnd
);
1968 gimple_set_location (pattern_stmt
, loc
);
1969 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1971 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1972 pattern_stmt
= gimple_build_assign (new_var
, BIT_IOR_EXPR
,
1974 gimple_set_location (pattern_stmt
, loc
);
1975 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1976 rhs_oprnd
= new_var
;
1978 else if (ifnnew
== IFN_CTZ
)
1980 /* .FFS (X) = .CTZ (X) + 1. */
1985 /* Create B = .IFNNEW (A). */
1986 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
1987 if ((ifnnew
== IFN_CLZ
|| ifnnew
== IFN_CTZ
) && defined_at_zero_new
)
1989 = gimple_build_call_internal (ifnnew
, 2, rhs_oprnd
,
1990 build_int_cst (integer_type_node
,
1993 pattern_stmt
= gimple_build_call_internal (ifnnew
, 1, rhs_oprnd
);
1994 gimple_call_set_lhs (pattern_stmt
, new_var
);
1995 gimple_set_location (pattern_stmt
, loc
);
1996 *type_out
= vec_type
;
2000 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2001 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2002 pattern_stmt
= gimple_build_assign (ret_var
, MINUS_EXPR
,
2003 build_int_cst (lhs_type
, sub
),
2005 gimple_set_location (pattern_stmt
, loc
);
2010 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2011 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2012 pattern_stmt
= gimple_build_assign (ret_var
, PLUS_EXPR
, new_var
,
2013 build_int_cst (lhs_type
, add
));
2014 gimple_set_location (pattern_stmt
, loc
);
2019 && (!defined_at_zero_new
|| val
!= val_cmp
))
2021 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2022 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2023 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
2024 rhs_type
= TREE_TYPE (rhs_oprnd
);
2025 tree cmp
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
2026 pattern_stmt
= gimple_build_assign (cmp
, NE_EXPR
, rhs_oprnd
,
2027 build_zero_cst (rhs_type
));
2028 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
,
2029 truth_type_for (vec_type
), rhs_type
);
2030 pattern_stmt
= gimple_build_assign (ret_var
, COND_EXPR
, cmp
,
2032 build_int_cst (lhs_type
, val
));
2035 if (dump_enabled_p ())
2036 dump_printf_loc (MSG_NOTE
, vect_location
,
2037 "created pattern stmt: %G", pattern_stmt
);
2039 return pattern_stmt
;
2042 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2044 Try to find the following pattern:
2050 temp_in = (UTYPE2)A;
2052 temp_out = __builtin_popcount{,l,ll} (temp_in);
2053 B = (TYPE1) temp_out;
2055 TYPE2 may or may not be equal to TYPE3.
2056 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2057 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2061 * STMT_VINFO: The stmt from which the pattern search begins.
2062 here it starts with B = (TYPE1) temp_out;
2066 * TYPE_OUT: The vector type of the output of this pattern.
2068 * Return value: A new stmt that will be used to replace the sequence of
2069 stmts that constitute the pattern. In this case it will be:
2072 Similarly for clz, ctz and ffs.
2076 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info
*vinfo
,
2077 stmt_vec_info stmt_vinfo
,
2080 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
2081 gimple
*call_stmt
, *pattern_stmt
;
2082 tree rhs_oprnd
, rhs_origin
, lhs_oprnd
, lhs_type
, vec_type
, new_var
;
2083 internal_fn ifn
= IFN_LAST
;
2086 /* Find B = (TYPE1) temp_out. */
2089 tree_code code
= gimple_assign_rhs_code (last_stmt
);
2090 if (!CONVERT_EXPR_CODE_P (code
))
2093 lhs_oprnd
= gimple_assign_lhs (last_stmt
);
2094 lhs_type
= TREE_TYPE (lhs_oprnd
);
2095 if (!INTEGRAL_TYPE_P (lhs_type
))
2098 rhs_oprnd
= gimple_assign_rhs1 (last_stmt
);
2099 if (TREE_CODE (rhs_oprnd
) != SSA_NAME
2100 || !has_single_use (rhs_oprnd
))
2102 call_stmt
= SSA_NAME_DEF_STMT (rhs_oprnd
);
2104 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2105 if (!is_gimple_call (call_stmt
))
2107 switch (gimple_call_combined_fn (call_stmt
))
2115 /* Punt if call result is unsigned and defined value at zero
2116 is negative, as the negative value doesn't extend correctly. */
2117 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd
))
2118 && gimple_call_internal_p (call_stmt
)
2119 && CLZ_DEFINED_VALUE_AT_ZERO
2120 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val
) == 2
2126 /* Punt if call result is unsigned and defined value at zero
2127 is negative, as the negative value doesn't extend correctly. */
2128 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd
))
2129 && gimple_call_internal_p (call_stmt
)
2130 && CTZ_DEFINED_VALUE_AT_ZERO
2131 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val
) == 2
2142 if (gimple_call_num_args (call_stmt
) != 1
2143 && gimple_call_num_args (call_stmt
) != 2)
2146 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
2147 vect_unpromoted_value unprom_diff
;
2149 = vect_look_through_possible_promotion (vinfo
, rhs_oprnd
, &unprom_diff
);
2154 /* Input and output of .POPCOUNT should be same-precision integer. */
2155 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (lhs_type
))
2158 /* Also A should be unsigned or same precision as temp_in, otherwise
2159 different builtins/internal functions have different behaviors. */
2160 if (TYPE_PRECISION (unprom_diff
.type
)
2161 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd
)))
2165 /* For popcount require zero extension, which doesn't add any
2166 further bits to the count. */
2167 if (!TYPE_UNSIGNED (unprom_diff
.type
))
2171 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2172 if it is undefined at zero or if it matches also for the
2173 defined value there. */
2174 if (!TYPE_UNSIGNED (unprom_diff
.type
))
2176 if (!type_has_mode_precision_p (lhs_type
)
2177 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd
)))
2179 addend
= (TYPE_PRECISION (TREE_TYPE (rhs_oprnd
))
2180 - TYPE_PRECISION (lhs_type
));
2181 if (gimple_call_internal_p (call_stmt
)
2182 && gimple_call_num_args (call_stmt
) == 2)
2185 val1
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
2187 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2189 if (d2
!= 2 || val1
!= val2
+ addend
)
2194 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2195 if it is undefined at zero or if it matches also for the
2196 defined value there. */
2197 if (gimple_call_internal_p (call_stmt
)
2198 && gimple_call_num_args (call_stmt
) == 2)
2201 val1
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
2203 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2205 if (d2
!= 2 || val1
!= val2
)
2210 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2216 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
2217 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2222 = direct_internal_fn_supported_p (ifn
, vec_type
, OPTIMIZE_FOR_SPEED
);
2230 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2231 if (direct_internal_fn_supported_p (IFN_CTZ
, vec_type
,
2232 OPTIMIZE_FOR_SPEED
))
2236 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2238 if (direct_internal_fn_supported_p (IFN_CLZ
, vec_type
,
2239 OPTIMIZE_FOR_SPEED
))
2241 if (direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_type
,
2242 OPTIMIZE_FOR_SPEED
))
2249 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2252 /* Create B = .POPCOUNT (A). */
2253 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2254 tree arg2
= NULL_TREE
;
2257 && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2259 arg2
= build_int_cst (integer_type_node
, val
);
2260 else if (ifn
== IFN_CTZ
2261 && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2263 arg2
= build_int_cst (integer_type_node
, val
);
2265 pattern_stmt
= gimple_build_call_internal (ifn
, 2, unprom_diff
.op
, arg2
);
2267 pattern_stmt
= gimple_build_call_internal (ifn
, 1, unprom_diff
.op
);
2268 gimple_call_set_lhs (pattern_stmt
, new_var
);
2269 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
2270 *type_out
= vec_type
;
2272 if (dump_enabled_p ())
2273 dump_printf_loc (MSG_NOTE
, vect_location
,
2274 "created pattern stmt: %G", pattern_stmt
);
2278 gcc_assert (supported
);
2279 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2280 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2281 pattern_stmt
= gimple_build_assign (ret_var
, PLUS_EXPR
, new_var
,
2282 build_int_cst (lhs_type
, addend
));
2284 else if (!supported
)
2286 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
2287 STMT_VINFO_VECTYPE (new_stmt_info
) = vec_type
;
2289 = vect_recog_ctz_ffs_pattern (vinfo
, new_stmt_info
, type_out
);
2290 if (pattern_stmt
== NULL
)
2292 if (gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info
))
2294 gimple_seq
*pseq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
2295 gimple_seq_add_seq_without_update (pseq
, seq
);
2298 return pattern_stmt
;
2301 /* Function vect_recog_pow_pattern
2303 Try to find the following pattern:
2307 with POW being one of pow, powf, powi, powif and N being
2312 * STMT_VINFO: The stmt from which the pattern search begins.
2316 * TYPE_OUT: The type of the output of this pattern.
2318 * Return value: A new stmt that will be used to replace the sequence of
2319 stmts that constitute the pattern. In this case it will be:
2326 vect_recog_pow_pattern (vec_info
*vinfo
,
2327 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2329 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2334 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
2337 switch (gimple_call_combined_fn (last_stmt
))
2347 base
= gimple_call_arg (last_stmt
, 0);
2348 exp
= gimple_call_arg (last_stmt
, 1);
2349 if (TREE_CODE (exp
) != REAL_CST
2350 && TREE_CODE (exp
) != INTEGER_CST
)
2352 if (flag_unsafe_math_optimizations
2353 && TREE_CODE (base
) == REAL_CST
2354 && gimple_call_builtin_p (last_stmt
, BUILT_IN_NORMAL
))
2356 combined_fn log_cfn
;
2357 built_in_function exp_bfn
;
2358 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
2361 log_cfn
= CFN_BUILT_IN_LOG
;
2362 exp_bfn
= BUILT_IN_EXP
;
2365 log_cfn
= CFN_BUILT_IN_LOGF
;
2366 exp_bfn
= BUILT_IN_EXPF
;
2369 log_cfn
= CFN_BUILT_IN_LOGL
;
2370 exp_bfn
= BUILT_IN_EXPL
;
2375 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
2376 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
2377 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2378 does that, but if C is a power of 2, we want to use
2379 exp2 (log2 (C) * x) in the non-vectorized version, but for
2380 vectorization we don't have vectorized exp2. */
2382 && TREE_CODE (logc
) == REAL_CST
2384 && lookup_attribute ("omp declare simd",
2385 DECL_ATTRIBUTES (exp_decl
)))
2387 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
2388 if (node
->simd_clones
== NULL
)
2390 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
2391 || node
->definition
)
2393 expand_simd_clones (node
);
2394 if (node
->simd_clones
== NULL
)
2397 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
2400 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2401 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
2402 append_pattern_def_seq (vinfo
, stmt_vinfo
, g
);
2403 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2404 g
= gimple_build_call (exp_decl
, 1, def
);
2405 gimple_call_set_lhs (g
, res
);
2413 /* We now have a pow or powi builtin function call with a constant
2416 /* Catch squaring. */
2417 if ((tree_fits_shwi_p (exp
)
2418 && tree_to_shwi (exp
) == 2)
2419 || (TREE_CODE (exp
) == REAL_CST
2420 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
2422 if (!vect_supportable_direct_optab_p (vinfo
, TREE_TYPE (base
), MULT_EXPR
,
2423 TREE_TYPE (base
), type_out
))
2426 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2427 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
2431 /* Catch square root. */
2432 if (TREE_CODE (exp
) == REAL_CST
2433 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
2435 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
2437 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
2438 OPTIMIZE_FOR_SPEED
))
2440 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
2441 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
2442 gimple_call_set_lhs (stmt
, var
);
2443 gimple_call_set_nothrow (stmt
, true);
2452 /* Function vect_recog_widen_sum_pattern
2454 Try to find the following pattern:
2457 TYPE x_T, sum = init;
2459 sum_0 = phi <init, sum_1>
2461 S2 x_T = (TYPE) x_t;
2462 S3 sum_1 = x_T + sum_0;
2464 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2465 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2466 a special case of a reduction computation.
2470 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2471 when this function is called with S3, the pattern {S2,S3} will be detected.
2475 * TYPE_OUT: The type of the output of this pattern.
2477 * Return value: A new stmt that will be used to replace the sequence of
2478 stmts that constitute the pattern. In this case it will be:
2479 WIDEN_SUM <x_t, sum_0>
2481 Note: The widening-sum idiom is a widening reduction pattern that is
2482 vectorized without preserving all the intermediate results. It
2483 produces only N/2 (widened) results (by summing up pairs of
2484 intermediate results) rather than all N results. Therefore, we
2485 cannot allow this pattern when we want to get all the results and in
2486 the correct order (as is the case when this computation is in an
2487 inner-loop nested in an outer-loop that us being vectorized). */
2490 vect_recog_widen_sum_pattern (vec_info
*vinfo
,
2491 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2493 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2494 tree oprnd0
, oprnd1
;
2496 gimple
*pattern_stmt
;
2499 /* Look for the following pattern
2502 In which DX is at least double the size of X, and sum_1 has been
2503 recognized as a reduction variable.
2506 /* Starting from LAST_STMT, follow the defs of its uses in search
2507 of the above pattern. */
2509 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
2511 || TREE_CODE (oprnd0
) != SSA_NAME
2512 || !vinfo
->lookup_def (oprnd0
))
2515 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
2517 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2518 we know that oprnd1 is the reduction variable (defined by a loop-header
2519 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2520 Left to check that oprnd0 is defined by a cast from type 'type' to type
2523 vect_unpromoted_value unprom0
;
2524 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
2525 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
2528 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
2530 if (!vect_supportable_direct_optab_p (vinfo
, type
, WIDEN_SUM_EXPR
,
2531 unprom0
.type
, type_out
))
2534 var
= vect_recog_temp_ssa_var (type
, NULL
);
2535 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
2537 return pattern_stmt
;
2540 /* Function vect_recog_bitfield_ref_pattern
2542 Try to find the following pattern:
2544 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2545 result = (type_out) bf_value;
2549 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2551 where type_out is a non-bitfield type, that is to say, it's precision matches
2552 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2556 * STMT_VINFO: The stmt from which the pattern search begins.
2557 here it starts with:
2558 result = (type_out) bf_value;
2562 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2566 * TYPE_OUT: The vector type of the output of this pattern.
2568 * Return value: A new stmt that will be used to replace the sequence of
2569 stmts that constitute the pattern. If the precision of type_out is bigger
2570 than the precision type of _1 we perform the widening before the shifting,
2571 since the new precision will be large enough to shift the value and moving
2572 widening operations up the statement chain enables the generation of
2573 widening loads. If we are widening and the operation after the pattern is
2574 an addition then we mask first and shift later, to enable the generation of
2575 shifting adds. In the case of narrowing we will always mask first, shift
2576 last and then perform a narrowing operation. This will enable the
2577 generation of narrowing shifts.
2579 Widening with mask first, shift later:
2580 container = (type_out) container;
2581 masked = container & (((1 << bitsize) - 1) << bitpos);
2582 result = masked >> bitpos;
2584 Widening with shift first, mask last:
2585 container = (type_out) container;
2586 shifted = container >> bitpos;
2587 result = shifted & ((1 << bitsize) - 1);
2590 masked = container & (((1 << bitsize) - 1) << bitpos);
2591 result = masked >> bitpos;
2592 result = (type_out) result;
2594 If the bitfield is signed and it's wider than type_out, we need to
2595 keep the result sign-extended:
2596 container = (type) container;
2597 masked = container << (prec - bitsize - bitpos);
2598 result = (type_out) (masked >> (prec - bitsize));
2600 Here type is the signed variant of the wider of type_out and the type
2603 The shifting is always optional depending on whether bitpos != 0.
2605 When the original bitfield was inside a gcond then an new gcond is also
2606 generated with the newly `result` as the operand to the comparison.
2611 vect_recog_bitfield_ref_pattern (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2614 gimple
*bf_stmt
= NULL
;
2615 tree lhs
= NULL_TREE
;
2616 tree ret_type
= NULL_TREE
;
2617 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
2618 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
2620 tree op
= gimple_cond_lhs (cond_stmt
);
2621 if (TREE_CODE (op
) != SSA_NAME
)
2623 bf_stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (op
));
2624 if (TREE_CODE (gimple_cond_rhs (cond_stmt
)) != INTEGER_CST
)
2627 else if (is_gimple_assign (stmt
)
2628 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt
))
2629 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
2631 gimple
*second_stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
2632 bf_stmt
= dyn_cast
<gassign
*> (second_stmt
);
2633 lhs
= gimple_assign_lhs (stmt
);
2634 ret_type
= TREE_TYPE (lhs
);
2638 || gimple_assign_rhs_code (bf_stmt
) != BIT_FIELD_REF
)
2641 tree bf_ref
= gimple_assign_rhs1 (bf_stmt
);
2642 tree container
= TREE_OPERAND (bf_ref
, 0);
2643 ret_type
= ret_type
? ret_type
: TREE_TYPE (container
);
2645 if (!bit_field_offset (bf_ref
).is_constant ()
2646 || !bit_field_size (bf_ref
).is_constant ()
2647 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container
))))
2650 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref
))
2651 || !INTEGRAL_TYPE_P (TREE_TYPE (container
))
2652 || TYPE_MODE (TREE_TYPE (container
)) == E_BLKmode
)
2655 gimple
*use_stmt
, *pattern_stmt
;
2656 use_operand_p use_p
;
2657 bool shift_first
= true;
2658 tree container_type
= TREE_TYPE (container
);
2659 tree vectype
= get_vectype_for_scalar_type (vinfo
, container_type
);
2661 /* Calculate shift_n before the adjustments for widening loads, otherwise
2662 the container may change and we have to consider offset change for
2663 widening loads on big endianness. The shift_n calculated here can be
2664 independent of widening. */
2665 unsigned HOST_WIDE_INT shift_n
= bit_field_offset (bf_ref
).to_constant ();
2666 unsigned HOST_WIDE_INT mask_width
= bit_field_size (bf_ref
).to_constant ();
2667 unsigned HOST_WIDE_INT prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2668 if (BYTES_BIG_ENDIAN
)
2669 shift_n
= prec
- shift_n
- mask_width
;
2671 bool ref_sext
= (!TYPE_UNSIGNED (TREE_TYPE (bf_ref
)) &&
2672 TYPE_PRECISION (ret_type
) > mask_width
);
2673 bool load_widen
= (TYPE_PRECISION (TREE_TYPE (container
)) <
2674 TYPE_PRECISION (ret_type
));
2676 /* We move the conversion earlier if the loaded type is smaller than the
2677 return type to enable the use of widening loads. And if we need a
2678 sign extension, we need to convert the loaded value early to a signed
2680 if (ref_sext
|| load_widen
)
2682 tree type
= load_widen
? ret_type
: container_type
;
2684 type
= gimple_signed_type (type
);
2685 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
),
2686 NOP_EXPR
, container
);
2687 container
= gimple_get_lhs (pattern_stmt
);
2688 container_type
= TREE_TYPE (container
);
2689 prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2690 vectype
= get_vectype_for_scalar_type (vinfo
, container_type
);
2691 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2693 else if (!useless_type_conversion_p (TREE_TYPE (container
), ret_type
))
2694 /* If we are doing the conversion last then also delay the shift as we may
2695 be able to combine the shift and conversion in certain cases. */
2696 shift_first
= false;
2698 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2699 PLUS_EXPR then do the shift last as some targets can combine the shift and
2700 add into a single instruction. */
2701 if (lhs
&& !is_pattern_stmt_p (stmt_info
)
2702 && single_imm_use (lhs
, &use_p
, &use_stmt
))
2704 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
2705 && gimple_assign_rhs_code (use_stmt
) == PLUS_EXPR
)
2706 shift_first
= false;
2709 /* If we don't have to shift we only generate the mask, so just fix the
2710 code-path to shift_first. */
2715 if (shift_first
&& !ref_sext
)
2717 tree shifted
= container
;
2721 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2722 RSHIFT_EXPR
, container
,
2723 build_int_cst (sizetype
, shift_n
));
2724 shifted
= gimple_assign_lhs (pattern_stmt
);
2725 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2728 tree mask
= wide_int_to_tree (container_type
,
2729 wi::mask (mask_width
, false, prec
));
2732 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2733 BIT_AND_EXPR
, shifted
, mask
);
2734 result
= gimple_assign_lhs (pattern_stmt
);
2738 tree temp
= vect_recog_temp_ssa_var (container_type
);
2741 tree mask
= wide_int_to_tree (container_type
,
2742 wi::shifted_mask (shift_n
,
2745 pattern_stmt
= gimple_build_assign (temp
, BIT_AND_EXPR
,
2750 HOST_WIDE_INT shl
= prec
- shift_n
- mask_width
;
2752 pattern_stmt
= gimple_build_assign (temp
, LSHIFT_EXPR
,
2754 build_int_cst (sizetype
,
2758 tree masked
= gimple_assign_lhs (pattern_stmt
);
2759 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2761 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2762 RSHIFT_EXPR
, masked
,
2763 build_int_cst (sizetype
, shift_n
));
2764 result
= gimple_assign_lhs (pattern_stmt
);
2767 if (!useless_type_conversion_p (TREE_TYPE (result
), ret_type
))
2769 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2771 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type
),
2780 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2781 vectype
= truth_type_for (vectype
);
2783 /* FIXME: This part extracts the boolean value out of the bitfield in the
2784 same way as vect_recog_gcond_pattern does. However because
2785 patterns cannot match the same root twice, when we handle and
2786 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2787 apply anymore. We should really fix it so that we don't need to
2788 duplicate transformations like these. */
2789 tree new_lhs
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
2790 gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt_info
->stmt
);
2791 tree cond_cst
= gimple_cond_rhs (cond_stmt
);
2793 = gimple_build_assign (new_lhs
, gimple_cond_code (cond_stmt
),
2794 gimple_get_lhs (pattern_stmt
),
2795 fold_convert (container_type
, cond_cst
));
2796 append_pattern_def_seq (vinfo
, stmt_info
, new_stmt
, vectype
, container_type
);
2798 = gimple_build_cond (NE_EXPR
, new_lhs
,
2799 build_zero_cst (TREE_TYPE (new_lhs
)),
2800 NULL_TREE
, NULL_TREE
);
2803 *type_out
= STMT_VINFO_VECTYPE (stmt_info
);
2804 vect_pattern_detected ("bitfield_ref pattern", stmt_info
->stmt
);
2806 return pattern_stmt
;
2809 /* Function vect_recog_bit_insert_pattern
2811 Try to find the following pattern:
2813 written = BIT_INSERT_EXPR (container, value, bitpos);
2817 * STMT_VINFO: The stmt we want to replace.
2821 * TYPE_OUT: The vector type of the output of this pattern.
2823 * Return value: A new stmt that will be used to replace the sequence of
2824 stmts that constitute the pattern. In this case it will be:
2825 value = (container_type) value; // Make sure
2826 shifted = value << bitpos; // Shift value into place
2827 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2828 // the 'to-write value'.
2829 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2830 // write to from the value we want
2832 written = cleared | masked; // Write bits.
2835 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2836 bits corresponding to the real size of the bitfield value we are writing to.
2837 The shifting is always optional depending on whether bitpos != 0.
2842 vect_recog_bit_insert_pattern (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2845 gassign
*bf_stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
2846 if (!bf_stmt
|| gimple_assign_rhs_code (bf_stmt
) != BIT_INSERT_EXPR
)
2849 tree container
= gimple_assign_rhs1 (bf_stmt
);
2850 tree value
= gimple_assign_rhs2 (bf_stmt
);
2851 tree shift
= gimple_assign_rhs3 (bf_stmt
);
2853 tree bf_type
= TREE_TYPE (value
);
2854 tree container_type
= TREE_TYPE (container
);
2856 if (!INTEGRAL_TYPE_P (container_type
)
2857 || !tree_fits_uhwi_p (TYPE_SIZE (container_type
)))
2860 gimple
*pattern_stmt
;
2862 vect_unpromoted_value unprom
;
2863 unprom
.set_op (value
, vect_internal_def
);
2864 value
= vect_convert_input (vinfo
, stmt_info
, container_type
, &unprom
,
2865 get_vectype_for_scalar_type (vinfo
,
2868 unsigned HOST_WIDE_INT mask_width
= TYPE_PRECISION (bf_type
);
2869 unsigned HOST_WIDE_INT prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2870 unsigned HOST_WIDE_INT shift_n
= tree_to_uhwi (shift
);
2871 if (BYTES_BIG_ENDIAN
)
2873 shift_n
= prec
- shift_n
- mask_width
;
2874 shift
= build_int_cst (TREE_TYPE (shift
), shift_n
);
2877 if (!useless_type_conversion_p (TREE_TYPE (value
), container_type
))
2880 gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2882 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2883 value
= gimple_get_lhs (pattern_stmt
);
2886 /* Shift VALUE into place. */
2887 tree shifted
= value
;
2890 gimple_seq stmts
= NULL
;
2892 = gimple_build (&stmts
, LSHIFT_EXPR
, container_type
, value
, shift
);
2893 if (!gimple_seq_empty_p (stmts
))
2894 append_pattern_def_seq (vinfo
, stmt_info
,
2895 gimple_seq_first_stmt (stmts
));
2899 = wide_int_to_tree (container_type
,
2900 wi::shifted_mask (shift_n
, mask_width
, false, prec
));
2902 /* Clear bits we don't want to write back from SHIFTED. */
2903 gimple_seq stmts
= NULL
;
2904 tree masked
= gimple_build (&stmts
, BIT_AND_EXPR
, container_type
, shifted
,
2906 if (!gimple_seq_empty_p (stmts
))
2908 pattern_stmt
= gimple_seq_first_stmt (stmts
);
2909 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2912 /* Mask off the bits in the container that we are to write to. */
2913 mask_t
= wide_int_to_tree (container_type
,
2914 wi::shifted_mask (shift_n
, mask_width
, true, prec
));
2915 tree cleared
= vect_recog_temp_ssa_var (container_type
);
2916 pattern_stmt
= gimple_build_assign (cleared
, BIT_AND_EXPR
, container
, mask_t
);
2917 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2919 /* Write MASKED into CLEARED. */
2921 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2922 BIT_IOR_EXPR
, cleared
, masked
);
2924 *type_out
= STMT_VINFO_VECTYPE (stmt_info
);
2925 vect_pattern_detected ("bit_insert pattern", stmt_info
->stmt
);
2927 return pattern_stmt
;
2931 /* Recognize cases in which an operation is performed in one type WTYPE
2932 but could be done more efficiently in a narrower type NTYPE. For example,
2935 ATYPE a; // narrower than NTYPE
2936 BTYPE b; // narrower than NTYPE
2937 WTYPE aw = (WTYPE) a;
2938 WTYPE bw = (WTYPE) b;
2939 WTYPE res = aw + bw; // only uses of aw and bw
2941 then it would be more efficient to do:
2943 NTYPE an = (NTYPE) a;
2944 NTYPE bn = (NTYPE) b;
2945 NTYPE resn = an + bn;
2946 WTYPE res = (WTYPE) resn;
2948 Other situations include things like:
2950 ATYPE a; // NTYPE or narrower
2951 WTYPE aw = (WTYPE) a;
2954 when only "(NTYPE) res" is significant. In that case it's more efficient
2955 to truncate "b" and do the operation on NTYPE instead:
2957 NTYPE an = (NTYPE) a;
2958 NTYPE bn = (NTYPE) b; // truncation
2959 NTYPE resn = an + bn;
2960 WTYPE res = (WTYPE) resn;
2962 All users of "res" should then use "resn" instead, making the final
2963 statement dead (not marked as relevant). The final statement is still
2964 needed to maintain the type correctness of the IR.
2966 vect_determine_precisions has already determined the minimum
2967 precison of the operation and the minimum precision required
2968 by users of the result. */
2971 vect_recog_over_widening_pattern (vec_info
*vinfo
,
2972 stmt_vec_info last_stmt_info
, tree
*type_out
)
2974 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
2978 /* See whether we have found that this operation can be done on a
2979 narrower type without changing its semantics. */
2980 unsigned int new_precision
= last_stmt_info
->operation_precision
;
2984 tree lhs
= gimple_assign_lhs (last_stmt
);
2985 tree type
= TREE_TYPE (lhs
);
2986 tree_code code
= gimple_assign_rhs_code (last_stmt
);
2988 /* Punt for reductions where we don't handle the type conversions. */
2989 if (STMT_VINFO_DEF_TYPE (last_stmt_info
) == vect_reduction_def
)
2992 /* Keep the first operand of a COND_EXPR as-is: only the other two
2993 operands are interesting. */
2994 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
2996 /* Check the operands. */
2997 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
2998 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
2999 unprom
.quick_grow_cleared (nops
);
3000 unsigned int min_precision
= 0;
3001 bool single_use_p
= false;
3002 for (unsigned int i
= 0; i
< nops
; ++i
)
3004 tree op
= gimple_op (last_stmt
, first_op
+ i
);
3005 if (TREE_CODE (op
) == INTEGER_CST
)
3006 unprom
[i
].set_op (op
, vect_constant_def
);
3007 else if (TREE_CODE (op
) == SSA_NAME
)
3009 bool op_single_use_p
= true;
3010 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
3015 (1) N bits of the result are needed;
3016 (2) all inputs are widened from M<N bits; and
3017 (3) one operand OP is a single-use SSA name
3019 we can shift the M->N widening from OP to the output
3020 without changing the number or type of extensions involved.
3021 This then reduces the number of copies of STMT_INFO.
3023 If instead of (3) more than one operand is a single-use SSA name,
3024 shifting the extension to the output is even more of a win.
3028 (1) N bits of the result are needed;
3029 (2) one operand OP2 is widened from M2<N bits;
3030 (3) another operand OP1 is widened from M1<M2 bits; and
3031 (4) both OP1 and OP2 are single-use
3033 the choice is between:
3035 (a) truncating OP2 to M1, doing the operation on M1,
3036 and then widening the result to N
3038 (b) widening OP1 to M2, doing the operation on M2, and then
3039 widening the result to N
3041 Both shift the M2->N widening of the inputs to the output.
3042 (a) additionally shifts the M1->M2 widening to the output;
3043 it requires fewer copies of STMT_INFO but requires an extra
3046 Which is better will depend on the complexity and cost of
3047 STMT_INFO, which is hard to predict at this stage. However,
3048 a clear tie-breaker in favor of (b) is the fact that the
3049 truncation in (a) increases the length of the operation chain.
3051 If instead of (4) only one of OP1 or OP2 is single-use,
3052 (b) is still a win over doing the operation in N bits:
3053 it still shifts the M2->N widening on the single-use operand
3054 to the output and reduces the number of STMT_INFO copies.
3056 If neither operand is single-use then operating on fewer than
3057 N bits might lead to more extensions overall. Whether it does
3058 or not depends on global information about the vectorization
3059 region, and whether that's a good trade-off would again
3060 depend on the complexity and cost of the statements involved,
3061 as well as things like register pressure that are not normally
3062 modelled at this stage. We therefore ignore these cases
3063 and just optimize the clear single-use wins above.
3065 Thus we take the maximum precision of the unpromoted operands
3066 and record whether any operand is single-use. */
3067 if (unprom
[i
].dt
== vect_internal_def
)
3069 min_precision
= MAX (min_precision
,
3070 TYPE_PRECISION (unprom
[i
].type
));
3071 single_use_p
|= op_single_use_p
;
3078 /* Although the operation could be done in operation_precision, we have
3079 to balance that against introducing extra truncations or extensions.
3080 Calculate the minimum precision that can be handled efficiently.
3082 The loop above determined that the operation could be handled
3083 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3084 extension from the inputs to the output without introducing more
3085 instructions, and would reduce the number of instructions required
3086 for STMT_INFO itself.
3088 vect_determine_precisions has also determined that the result only
3089 needs min_output_precision bits. Truncating by a factor of N times
3090 requires a tree of N - 1 instructions, so if TYPE is N times wider
3091 than min_output_precision, doing the operation in TYPE and truncating
3092 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3095 - truncating the input to a unary operation and doing the operation
3096 in the new type requires at most N - 1 + 1 = N instructions per
3099 - doing the same for a binary operation requires at most
3100 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3102 Both unary and binary operations require fewer instructions than
3103 this if the operands were extended from a suitable truncated form.
3104 Thus there is usually nothing to lose by doing operations in
3105 min_output_precision bits, but there can be something to gain. */
3107 min_precision
= last_stmt_info
->min_output_precision
;
3109 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
3111 /* Apply the minimum efficient precision we just calculated. */
3112 if (new_precision
< min_precision
)
3113 new_precision
= min_precision
;
3114 new_precision
= vect_element_precision (new_precision
);
3115 if (new_precision
>= TYPE_PRECISION (type
))
3118 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
3120 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
3124 /* We've found a viable pattern. Get the new type of the operation. */
3125 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
3126 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
3128 /* If we're truncating an operation, we need to make sure that we
3129 don't introduce new undefined overflow. The codes tested here are
3130 a subset of those accepted by vect_truncatable_operation_p. */
3131 tree op_type
= new_type
;
3132 if (TYPE_OVERFLOW_UNDEFINED (new_type
)
3133 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== MULT_EXPR
))
3134 op_type
= build_nonstandard_integer_type (new_precision
, true);
3136 /* We specifically don't check here whether the target supports the
3137 new operation, since it might be something that a later pattern
3138 wants to rewrite anyway. If targets have a minimum element size
3139 for some optabs, we should pattern-match smaller ops to larger ops
3140 where beneficial. */
3141 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3142 tree op_vectype
= get_vectype_for_scalar_type (vinfo
, op_type
);
3143 if (!new_vectype
|| !op_vectype
)
3146 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting %T to %T\n",
3150 /* Calculate the rhs operands for an operation on OP_TYPE. */
3152 for (unsigned int i
= 1; i
< first_op
; ++i
)
3153 ops
[i
- 1] = gimple_op (last_stmt
, i
);
3154 vect_convert_inputs (vinfo
, last_stmt_info
, nops
, &ops
[first_op
- 1],
3155 op_type
, &unprom
[0], op_vectype
);
3157 /* Use the operation to produce a result of type OP_TYPE. */
3158 tree new_var
= vect_recog_temp_ssa_var (op_type
, NULL
);
3159 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
3160 ops
[0], ops
[1], ops
[2]);
3161 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3163 if (dump_enabled_p ())
3164 dump_printf_loc (MSG_NOTE
, vect_location
,
3165 "created pattern stmt: %G", pattern_stmt
);
3167 /* Convert back to the original signedness, if OP_TYPE is different
3169 if (op_type
!= new_type
)
3170 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, new_type
,
3171 pattern_stmt
, op_vectype
);
3173 /* Promote the result to the original type. */
3174 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, type
,
3175 pattern_stmt
, new_vectype
);
3177 return pattern_stmt
;
3180 /* Recognize the following patterns:
3182 ATYPE a; // narrower than TYPE
3183 BTYPE b; // narrower than TYPE
3185 1) Multiply high with scaling
3186 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3187 Here, c is bitsize (TYPE) / 2 - 1.
3189 2) ... or also with rounding
3190 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3191 Here, d is bitsize (TYPE) / 2 - 2.
3193 3) Normal multiply high
3194 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3195 Here, e is bitsize (TYPE) / 2.
3197 where only the bottom half of res is used. */
3200 vect_recog_mulhs_pattern (vec_info
*vinfo
,
3201 stmt_vec_info last_stmt_info
, tree
*type_out
)
3203 /* Check for a right shift. */
3204 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3206 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
)
3209 /* Check that the shift result is wider than the users of the
3210 result need (i.e. that narrowing would be a natural choice). */
3211 tree lhs_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
3212 unsigned int target_precision
3213 = vect_element_precision (last_stmt_info
->min_output_precision
);
3214 if (!INTEGRAL_TYPE_P (lhs_type
)
3215 || target_precision
>= TYPE_PRECISION (lhs_type
))
3218 /* Look through any change in sign on the outer shift input. */
3219 vect_unpromoted_value unprom_rshift_input
;
3220 tree rshift_input
= vect_look_through_possible_promotion
3221 (vinfo
, gimple_assign_rhs1 (last_stmt
), &unprom_rshift_input
);
3223 || TYPE_PRECISION (TREE_TYPE (rshift_input
))
3224 != TYPE_PRECISION (lhs_type
))
3227 /* Get the definition of the shift input. */
3228 stmt_vec_info rshift_input_stmt_info
3229 = vect_get_internal_def (vinfo
, rshift_input
);
3230 if (!rshift_input_stmt_info
)
3232 gassign
*rshift_input_stmt
3233 = dyn_cast
<gassign
*> (rshift_input_stmt_info
->stmt
);
3234 if (!rshift_input_stmt
)
3237 stmt_vec_info mulh_stmt_info
;
3239 bool rounding_p
= false;
3241 /* Check for the presence of the rounding term. */
3242 if (gimple_assign_rhs_code (rshift_input_stmt
) == PLUS_EXPR
)
3244 /* Check that the outer shift was by 1. */
3245 if (!integer_onep (gimple_assign_rhs2 (last_stmt
)))
3248 /* Check that the second operand of the PLUS_EXPR is 1. */
3249 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt
)))
3252 /* Look through any change in sign on the addition input. */
3253 vect_unpromoted_value unprom_plus_input
;
3254 tree plus_input
= vect_look_through_possible_promotion
3255 (vinfo
, gimple_assign_rhs1 (rshift_input_stmt
), &unprom_plus_input
);
3257 || TYPE_PRECISION (TREE_TYPE (plus_input
))
3258 != TYPE_PRECISION (TREE_TYPE (rshift_input
)))
3261 /* Get the definition of the multiply-high-scale part. */
3262 stmt_vec_info plus_input_stmt_info
3263 = vect_get_internal_def (vinfo
, plus_input
);
3264 if (!plus_input_stmt_info
)
3266 gassign
*plus_input_stmt
3267 = dyn_cast
<gassign
*> (plus_input_stmt_info
->stmt
);
3268 if (!plus_input_stmt
3269 || gimple_assign_rhs_code (plus_input_stmt
) != RSHIFT_EXPR
)
3272 /* Look through any change in sign on the scaling input. */
3273 vect_unpromoted_value unprom_scale_input
;
3274 tree scale_input
= vect_look_through_possible_promotion
3275 (vinfo
, gimple_assign_rhs1 (plus_input_stmt
), &unprom_scale_input
);
3277 || TYPE_PRECISION (TREE_TYPE (scale_input
))
3278 != TYPE_PRECISION (TREE_TYPE (plus_input
)))
3281 /* Get the definition of the multiply-high part. */
3282 mulh_stmt_info
= vect_get_internal_def (vinfo
, scale_input
);
3283 if (!mulh_stmt_info
)
3286 /* Get the scaling term. */
3287 scale_term
= gimple_assign_rhs2 (plus_input_stmt
);
3292 mulh_stmt_info
= rshift_input_stmt_info
;
3293 scale_term
= gimple_assign_rhs2 (last_stmt
);
3296 /* Check that the scaling factor is constant. */
3297 if (TREE_CODE (scale_term
) != INTEGER_CST
)
3300 /* Check whether the scaling input term can be seen as two widened
3301 inputs multiplied together. */
3302 vect_unpromoted_value unprom_mult
[2];
3305 = vect_widened_op_tree (vinfo
, mulh_stmt_info
, MULT_EXPR
, WIDEN_MULT_EXPR
,
3306 false, 2, unprom_mult
, &new_type
);
3310 /* Adjust output precision. */
3311 if (TYPE_PRECISION (new_type
) < target_precision
)
3312 new_type
= build_nonstandard_integer_type
3313 (target_precision
, TYPE_UNSIGNED (new_type
));
3315 unsigned mult_precision
= TYPE_PRECISION (new_type
);
3317 /* Check that the scaling factor is expected. Instead of
3318 target_precision, we should use the one that we actually
3319 use for internal function. */
3322 /* Check pattern 2). */
3323 if (wi::to_widest (scale_term
) + mult_precision
+ 2
3324 != TYPE_PRECISION (lhs_type
))
3331 /* Check for pattern 1). */
3332 if (wi::to_widest (scale_term
) + mult_precision
+ 1
3333 == TYPE_PRECISION (lhs_type
))
3335 /* Check for pattern 3). */
3336 else if (wi::to_widest (scale_term
) + mult_precision
3337 == TYPE_PRECISION (lhs_type
))
3343 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt
);
3345 /* Check for target support. */
3346 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3348 || !direct_internal_fn_supported_p
3349 (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
3352 /* The IR requires a valid vector type for the cast result, even though
3353 it's likely to be discarded. */
3354 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
3358 /* Generate the IFN_MULHRS call. */
3359 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
3361 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
3362 unprom_mult
, new_vectype
);
3364 = gimple_build_call_internal (ifn
, 2, new_ops
[0], new_ops
[1]);
3365 gimple_call_set_lhs (mulhrs_stmt
, new_var
);
3366 gimple_set_location (mulhrs_stmt
, gimple_location (last_stmt
));
3368 if (dump_enabled_p ())
3369 dump_printf_loc (MSG_NOTE
, vect_location
,
3370 "created pattern stmt: %G", (gimple
*) mulhrs_stmt
);
3372 return vect_convert_output (vinfo
, last_stmt_info
, lhs_type
,
3373 mulhrs_stmt
, new_vectype
);
3376 /* Recognize the patterns:
3378 ATYPE a; // narrower than TYPE
3379 BTYPE b; // narrower than TYPE
3380 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3381 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3383 where only the bottom half of avg is used. Try to transform them into:
3385 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3386 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3390 TYPE avg = (TYPE) avg';
3392 where NTYPE is no wider than half of TYPE. Since only the bottom half
3393 of avg is used, all or part of the cast of avg' should become redundant.
3395 If there is no target support available, generate code to distribute rshift
3396 over plus and add a carry. */
3399 vect_recog_average_pattern (vec_info
*vinfo
,
3400 stmt_vec_info last_stmt_info
, tree
*type_out
)
3402 /* Check for a shift right by one bit. */
3403 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3405 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
3406 || !integer_onep (gimple_assign_rhs2 (last_stmt
)))
3409 /* Check that the shift result is wider than the users of the
3410 result need (i.e. that narrowing would be a natural choice). */
3411 tree lhs
= gimple_assign_lhs (last_stmt
);
3412 tree type
= TREE_TYPE (lhs
);
3413 unsigned int target_precision
3414 = vect_element_precision (last_stmt_info
->min_output_precision
);
3415 if (!INTEGRAL_TYPE_P (type
) || target_precision
>= TYPE_PRECISION (type
))
3418 /* Look through any change in sign on the shift input. */
3419 tree rshift_rhs
= gimple_assign_rhs1 (last_stmt
);
3420 vect_unpromoted_value unprom_plus
;
3421 rshift_rhs
= vect_look_through_possible_promotion (vinfo
, rshift_rhs
,
3424 || TYPE_PRECISION (TREE_TYPE (rshift_rhs
)) != TYPE_PRECISION (type
))
3427 /* Get the definition of the shift input. */
3428 stmt_vec_info plus_stmt_info
= vect_get_internal_def (vinfo
, rshift_rhs
);
3429 if (!plus_stmt_info
)
3432 /* Check whether the shift input can be seen as a tree of additions on
3433 2 or 3 widened inputs.
3435 Note that the pattern should be a win even if the result of one or
3436 more additions is reused elsewhere: if the pattern matches, we'd be
3437 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3438 internal_fn ifn
= IFN_AVG_FLOOR
;
3439 vect_unpromoted_value unprom
[3];
3441 unsigned int nops
= vect_widened_op_tree (vinfo
, plus_stmt_info
, PLUS_EXPR
,
3442 IFN_VEC_WIDEN_PLUS
, false, 3,
3448 /* Check that one operand is 1. */
3450 for (i
= 0; i
< 3; ++i
)
3451 if (integer_onep (unprom
[i
].op
))
3455 /* Throw away the 1 operand and keep the other two. */
3457 unprom
[i
] = unprom
[2];
3461 vect_pattern_detected ("vect_recog_average_pattern", last_stmt
);
3465 (a) the operation can be viewed as:
3467 TYPE widened0 = (TYPE) UNPROM[0];
3468 TYPE widened1 = (TYPE) UNPROM[1];
3469 TYPE tmp1 = widened0 + widened1 {+ 1};
3470 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3472 (b) the first two statements are equivalent to:
3474 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3475 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3477 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3480 (d) all the operations can be performed correctly at twice the width of
3481 NEW_TYPE, due to the nature of the average operation; and
3483 (e) users of the result of the right shift need only TARGET_PRECISION
3484 bits, where TARGET_PRECISION is no more than half of TYPE's
3487 Under these circumstances, the only situation in which NEW_TYPE
3488 could be narrower than TARGET_PRECISION is if widened0, widened1
3489 and an addition result are all used more than once. Thus we can
3490 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3491 as "free", whereas widening the result of the average instruction
3492 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3493 therefore better not to go narrower than TARGET_PRECISION. */
3494 if (TYPE_PRECISION (new_type
) < target_precision
)
3495 new_type
= build_nonstandard_integer_type (target_precision
,
3496 TYPE_UNSIGNED (new_type
));
3498 /* Check for target support. */
3499 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3503 bool fallback_p
= false;
3505 if (direct_internal_fn_supported_p (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
3507 else if (TYPE_UNSIGNED (new_type
)
3508 && optab_for_tree_code (RSHIFT_EXPR
, new_vectype
, optab_scalar
)
3509 && optab_for_tree_code (PLUS_EXPR
, new_vectype
, optab_default
)
3510 && optab_for_tree_code (BIT_IOR_EXPR
, new_vectype
, optab_default
)
3511 && optab_for_tree_code (BIT_AND_EXPR
, new_vectype
, optab_default
))
3516 /* The IR requires a valid vector type for the cast result, even though
3517 it's likely to be discarded. */
3518 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
3522 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
3524 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
3525 unprom
, new_vectype
);
3529 /* As a fallback, generate code for following sequence:
3531 shifted_op0 = new_ops[0] >> 1;
3532 shifted_op1 = new_ops[1] >> 1;
3533 sum_of_shifted = shifted_op0 + shifted_op1;
3534 unmasked_carry = new_ops[0] and/or new_ops[1];
3535 carry = unmasked_carry & 1;
3536 new_var = sum_of_shifted + carry;
3539 tree one_cst
= build_one_cst (new_type
);
3542 tree shifted_op0
= vect_recog_temp_ssa_var (new_type
, NULL
);
3543 g
= gimple_build_assign (shifted_op0
, RSHIFT_EXPR
, new_ops
[0], one_cst
);
3544 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3546 tree shifted_op1
= vect_recog_temp_ssa_var (new_type
, NULL
);
3547 g
= gimple_build_assign (shifted_op1
, RSHIFT_EXPR
, new_ops
[1], one_cst
);
3548 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3550 tree sum_of_shifted
= vect_recog_temp_ssa_var (new_type
, NULL
);
3551 g
= gimple_build_assign (sum_of_shifted
, PLUS_EXPR
,
3552 shifted_op0
, shifted_op1
);
3553 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3555 tree unmasked_carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
3556 tree_code c
= (ifn
== IFN_AVG_CEIL
) ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3557 g
= gimple_build_assign (unmasked_carry
, c
, new_ops
[0], new_ops
[1]);
3558 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3560 tree carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
3561 g
= gimple_build_assign (carry
, BIT_AND_EXPR
, unmasked_carry
, one_cst
);
3562 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3564 g
= gimple_build_assign (new_var
, PLUS_EXPR
, sum_of_shifted
, carry
);
3565 return vect_convert_output (vinfo
, last_stmt_info
, type
, g
, new_vectype
);
3568 /* Generate the IFN_AVG* call. */
3569 gcall
*average_stmt
= gimple_build_call_internal (ifn
, 2, new_ops
[0],
3571 gimple_call_set_lhs (average_stmt
, new_var
);
3572 gimple_set_location (average_stmt
, gimple_location (last_stmt
));
3574 if (dump_enabled_p ())
3575 dump_printf_loc (MSG_NOTE
, vect_location
,
3576 "created pattern stmt: %G", (gimple
*) average_stmt
);
3578 return vect_convert_output (vinfo
, last_stmt_info
,
3579 type
, average_stmt
, new_vectype
);
3582 /* Recognize cases in which the input to a cast is wider than its
3583 output, and the input is fed by a widening operation. Fold this
3584 by removing the unnecessary intermediate widening. E.g.:
3587 unsigned int b = (unsigned int) a;
3588 unsigned short c = (unsigned short) b;
3592 unsigned short c = (unsigned short) a;
3594 Although this is rare in input IR, it is an expected side-effect
3595 of the over-widening pattern above.
3597 This is beneficial also for integer-to-float conversions, if the
3598 widened integer has more bits than the float, and if the unwidened
3602 vect_recog_cast_forwprop_pattern (vec_info
*vinfo
,
3603 stmt_vec_info last_stmt_info
, tree
*type_out
)
3605 /* Check for a cast, including an integer-to-float conversion. */
3606 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3609 tree_code code
= gimple_assign_rhs_code (last_stmt
);
3610 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
3613 /* Make sure that the rhs is a scalar with a natural bitsize. */
3614 tree lhs
= gimple_assign_lhs (last_stmt
);
3617 tree lhs_type
= TREE_TYPE (lhs
);
3618 scalar_mode lhs_mode
;
3619 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
3620 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
3623 /* Check for a narrowing operation (from a vector point of view). */
3624 tree rhs
= gimple_assign_rhs1 (last_stmt
);
3625 tree rhs_type
= TREE_TYPE (rhs
);
3626 if (!INTEGRAL_TYPE_P (rhs_type
)
3627 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
3628 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
3631 /* Try to find an unpromoted input. */
3632 vect_unpromoted_value unprom
;
3633 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
3634 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
3637 /* If the bits above RHS_TYPE matter, make sure that they're the
3638 same when extending from UNPROM as they are when extending from RHS. */
3639 if (!INTEGRAL_TYPE_P (lhs_type
)
3640 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
3643 /* We can get the same result by casting UNPROM directly, to avoid
3644 the unnecessary widening and narrowing. */
3645 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
3647 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
3651 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
3652 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
3653 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3655 return pattern_stmt
;
3658 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3659 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3662 vect_recog_widen_shift_pattern (vec_info
*vinfo
,
3663 stmt_vec_info last_stmt_info
, tree
*type_out
)
3665 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
3666 LSHIFT_EXPR
, WIDEN_LSHIFT_EXPR
, true,
3667 "vect_recog_widen_shift_pattern");
3670 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3674 S0 a_t = b_t r<< c_t;
3678 * STMT_VINFO: The stmt from which the pattern search begins,
3679 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3683 S2 e_t = d_t & (B - 1);
3684 S3 f_t = b_t << c_t;
3685 S4 g_t = b_t >> e_t;
3688 where B is element bitsize of type.
3692 * TYPE_OUT: The type of the output of this pattern.
3694 * Return value: A new stmt that will be used to replace the rotate
3698 vect_recog_rotate_pattern (vec_info
*vinfo
,
3699 stmt_vec_info stmt_vinfo
, tree
*type_out
)
3701 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3702 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
3703 gimple
*pattern_stmt
, *def_stmt
;
3704 enum tree_code rhs_code
;
3705 enum vect_def_type dt
;
3706 optab optab1
, optab2
;
3707 edge ext_def
= NULL
;
3708 bool bswap16_p
= false;
3710 if (is_gimple_assign (last_stmt
))
3712 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3722 lhs
= gimple_assign_lhs (last_stmt
);
3723 oprnd0
= gimple_assign_rhs1 (last_stmt
);
3724 type
= TREE_TYPE (oprnd0
);
3725 oprnd1
= gimple_assign_rhs2 (last_stmt
);
3727 else if (gimple_call_builtin_p (last_stmt
, BUILT_IN_BSWAP16
))
3729 /* __builtin_bswap16 (x) is another form of x r>> 8.
3730 The vectorizer has bswap support, but only if the argument isn't
3732 lhs
= gimple_call_lhs (last_stmt
);
3733 oprnd0
= gimple_call_arg (last_stmt
, 0);
3734 type
= TREE_TYPE (oprnd0
);
3736 || TYPE_PRECISION (TREE_TYPE (lhs
)) != 16
3737 || TYPE_PRECISION (type
) <= 16
3738 || TREE_CODE (oprnd0
) != SSA_NAME
3739 || BITS_PER_UNIT
!= 8)
3742 stmt_vec_info def_stmt_info
;
3743 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
3746 if (dt
!= vect_internal_def
)
3749 if (gimple_assign_cast_p (def_stmt
))
3751 def
= gimple_assign_rhs1 (def_stmt
);
3752 if (INTEGRAL_TYPE_P (TREE_TYPE (def
))
3753 && TYPE_PRECISION (TREE_TYPE (def
)) == 16)
3757 type
= TREE_TYPE (lhs
);
3758 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3759 if (vectype
== NULL_TREE
)
3762 if (tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype
))
3764 /* The encoding uses one stepped pattern for each byte in the
3766 vec_perm_builder
elts (TYPE_VECTOR_SUBPARTS (char_vectype
), 2, 3);
3767 for (unsigned i
= 0; i
< 3; ++i
)
3768 for (unsigned j
= 0; j
< 2; ++j
)
3769 elts
.quick_push ((i
+ 1) * 2 - j
- 1);
3771 vec_perm_indices
indices (elts
, 1,
3772 TYPE_VECTOR_SUBPARTS (char_vectype
));
3773 machine_mode vmode
= TYPE_MODE (char_vectype
);
3774 if (can_vec_perm_const_p (vmode
, vmode
, indices
))
3776 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3777 undo the argument promotion. */
3778 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
3780 def
= vect_recog_temp_ssa_var (type
, NULL
);
3781 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3782 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3786 /* Pattern detected. */
3787 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3789 *type_out
= vectype
;
3791 /* Pattern supported. Create a stmt to be used to replace the
3792 pattern, with the unpromoted argument. */
3793 var
= vect_recog_temp_ssa_var (type
, NULL
);
3794 pattern_stmt
= gimple_build_call (gimple_call_fndecl (last_stmt
),
3796 gimple_call_set_lhs (pattern_stmt
, var
);
3797 gimple_call_set_fntype (as_a
<gcall
*> (pattern_stmt
),
3798 gimple_call_fntype (last_stmt
));
3799 return pattern_stmt
;
3803 oprnd1
= build_int_cst (integer_type_node
, 8);
3804 rhs_code
= LROTATE_EXPR
;
3810 if (TREE_CODE (oprnd0
) != SSA_NAME
3811 || !INTEGRAL_TYPE_P (type
)
3812 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
))
3815 stmt_vec_info def_stmt_info
;
3816 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
3819 if (dt
!= vect_internal_def
3820 && dt
!= vect_constant_def
3821 && dt
!= vect_external_def
)
3824 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3825 if (vectype
== NULL_TREE
)
3828 /* If vector/vector or vector/scalar rotate is supported by the target,
3829 don't do anything here. */
3830 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
3832 && can_implement_p (optab1
, TYPE_MODE (vectype
)))
3837 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
3839 def
= vect_recog_temp_ssa_var (type
, NULL
);
3840 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3841 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3845 /* Pattern detected. */
3846 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3848 *type_out
= vectype
;
3850 /* Pattern supported. Create a stmt to be used to replace the
3852 var
= vect_recog_temp_ssa_var (type
, NULL
);
3853 pattern_stmt
= gimple_build_assign (var
, LROTATE_EXPR
, oprnd0
,
3855 return pattern_stmt
;
3860 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
3862 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
3864 && can_implement_p (optab2
, TYPE_MODE (vectype
)))
3868 tree utype
= unsigned_type_for (type
);
3869 tree uvectype
= get_vectype_for_scalar_type (vinfo
, utype
);
3873 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3874 don't do anything here either. */
3875 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_vector
);
3876 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_vector
);
3878 || !can_implement_p (optab1
, TYPE_MODE (uvectype
))
3880 || !can_implement_p (optab2
, TYPE_MODE (uvectype
)))
3882 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
3884 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_scalar
);
3885 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_scalar
);
3887 || !can_implement_p (optab1
, TYPE_MODE (uvectype
))
3889 || !can_implement_p (optab2
, TYPE_MODE (uvectype
)))
3893 *type_out
= vectype
;
3895 if (!useless_type_conversion_p (utype
, TREE_TYPE (oprnd0
)))
3897 def
= vect_recog_temp_ssa_var (utype
, NULL
);
3898 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3899 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3903 if (dt
== vect_external_def
&& TREE_CODE (oprnd1
) == SSA_NAME
)
3904 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
3907 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (utype
);
3908 if (dt
!= vect_internal_def
|| TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
3910 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
3912 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
3913 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
3914 && TYPE_PRECISION (TREE_TYPE (rhs1
))
3915 == TYPE_PRECISION (type
))
3919 if (def
== NULL_TREE
)
3921 def
= vect_recog_temp_ssa_var (utype
, NULL
);
3922 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
3923 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3925 stype
= TREE_TYPE (def
);
3927 if (TREE_CODE (def
) == INTEGER_CST
)
3929 if (!tree_fits_uhwi_p (def
)
3930 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
3931 || integer_zerop (def
))
3933 def2
= build_int_cst (stype
,
3934 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
3938 tree vecstype
= get_vectype_for_scalar_type (vinfo
, stype
);
3940 if (vecstype
== NULL_TREE
)
3942 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
3943 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
3947 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
3948 gcc_assert (!new_bb
);
3951 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
3953 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
3954 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (mode
) - 1);
3955 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
3956 gimple_assign_lhs (def_stmt
), mask
);
3960 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
3961 gcc_assert (!new_bb
);
3964 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
3967 var1
= vect_recog_temp_ssa_var (utype
, NULL
);
3968 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
3969 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
3971 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3973 var2
= vect_recog_temp_ssa_var (utype
, NULL
);
3974 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
3975 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
3977 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3979 /* Pattern detected. */
3980 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3982 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3983 var
= vect_recog_temp_ssa_var (utype
, NULL
);
3984 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
3986 if (!useless_type_conversion_p (type
, utype
))
3988 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, uvectype
);
3989 tree result
= vect_recog_temp_ssa_var (type
, NULL
);
3990 pattern_stmt
= gimple_build_assign (result
, NOP_EXPR
, var
);
3992 return pattern_stmt
;
3995 /* Detect a vector by vector shift pattern that wouldn't be otherwise
4003 S3 res_T = b_T op a_t;
4005 where type 'TYPE' is a type with different size than 'type',
4006 and op is <<, >> or rotate.
4011 TYPE b_T, c_T, res_T;
4014 S1 a_t = (type) c_T;
4016 S3 res_T = b_T op a_t;
4020 * STMT_VINFO: The stmt from which the pattern search begins,
4021 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4022 with a shift/rotate which has same type on both operands, in the
4023 second case just b_T op c_T, in the first case with added cast
4024 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4028 * TYPE_OUT: The type of the output of this pattern.
4030 * Return value: A new stmt that will be used to replace the shift/rotate
4034 vect_recog_vector_vector_shift_pattern (vec_info
*vinfo
,
4035 stmt_vec_info stmt_vinfo
,
4038 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4039 tree oprnd0
, oprnd1
, lhs
, var
;
4040 gimple
*pattern_stmt
;
4041 enum tree_code rhs_code
;
4043 if (!is_gimple_assign (last_stmt
))
4046 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4058 lhs
= gimple_assign_lhs (last_stmt
);
4059 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4060 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4061 if (TREE_CODE (oprnd0
) != SSA_NAME
4062 || TREE_CODE (oprnd1
) != SSA_NAME
4063 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
4064 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0
))
4065 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
4066 || TYPE_PRECISION (TREE_TYPE (lhs
))
4067 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
4070 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
4074 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (oprnd0
));
4075 if (*type_out
== NULL_TREE
)
4078 tree def
= NULL_TREE
;
4079 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
4080 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
4082 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4083 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
4084 && TYPE_PRECISION (TREE_TYPE (rhs1
))
4085 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
4087 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
4088 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
4093 = build_low_bits_mask (TREE_TYPE (rhs1
),
4094 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
4095 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4096 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
4097 tree vecstype
= get_vectype_for_scalar_type (vinfo
,
4099 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
4104 if (def
== NULL_TREE
)
4106 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
4107 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
4108 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4111 /* Pattern detected. */
4112 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
4114 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4115 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
4116 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
4118 return pattern_stmt
;
4121 /* Return true iff the target has a vector optab implementing the operation
4122 CODE on type VECTYPE. */
4125 target_has_vecop_for_code (tree_code code
, tree vectype
)
4127 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4129 && can_implement_p (voptab
, TYPE_MODE (vectype
));
4132 /* Verify that the target has optabs of VECTYPE to perform all the steps
4133 needed by the multiplication-by-immediate synthesis algorithm described by
4134 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4135 present. Return true iff the target supports all the steps. */
4138 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
4139 tree vectype
, bool synth_shift_p
)
4141 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
4144 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
4145 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
4147 if (var
== negate_variant
4148 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
4151 /* If we must synthesize shifts with additions make sure that vector
4152 addition is available. */
4153 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
4156 for (int i
= 1; i
< alg
->ops
; i
++)
4164 case alg_add_factor
:
4165 if (!supports_vplus
)
4170 case alg_sub_factor
:
4171 if (!supports_vminus
)
4177 case alg_impossible
:
4187 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4188 putting the final result in DEST. Append all statements but the last into
4189 VINFO. Return the last statement. */
4192 synth_lshift_by_additions (vec_info
*vinfo
,
4193 tree dest
, tree op
, HOST_WIDE_INT amnt
,
4194 stmt_vec_info stmt_info
)
4197 tree itype
= TREE_TYPE (op
);
4199 gcc_assert (amnt
>= 0);
4200 for (i
= 0; i
< amnt
; i
++)
4202 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
4205 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
4208 append_pattern_def_seq (vinfo
, stmt_info
, stmt
);
4216 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4217 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4218 the process if necessary. Append the resulting assignment statements
4219 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4220 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4221 left shifts using additions. */
4224 apply_binop_and_append_stmt (vec_info
*vinfo
,
4225 tree_code code
, tree op1
, tree op2
,
4226 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
4228 if (integer_zerop (op2
)
4229 && (code
== LSHIFT_EXPR
4230 || code
== PLUS_EXPR
))
4232 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
4237 tree itype
= TREE_TYPE (op1
);
4238 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
4240 if (code
== LSHIFT_EXPR
4243 stmt
= synth_lshift_by_additions (vinfo
, tmp_var
, op1
,
4244 TREE_INT_CST_LOW (op2
), stmt_vinfo
);
4245 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4249 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
4250 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4254 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4255 and simple arithmetic operations to be vectorized. Record the statements
4256 produced in STMT_VINFO and return the last statement in the sequence or
4257 NULL if it's not possible to synthesize such a multiplication.
4258 This function mirrors the behavior of expand_mult_const in expmed.cc but
4259 works on tree-ssa form. */
4262 vect_synth_mult_by_constant (vec_info
*vinfo
, tree op
, tree val
,
4263 stmt_vec_info stmt_vinfo
)
4265 tree itype
= TREE_TYPE (op
);
4266 machine_mode mode
= TYPE_MODE (itype
);
4267 struct algorithm alg
;
4268 mult_variant variant
;
4269 if (!tree_fits_shwi_p (val
))
4272 /* Multiplication synthesis by shifts, adds and subs can introduce
4273 signed overflow where the original operation didn't. Perform the
4274 operations on an unsigned type and cast back to avoid this.
4275 In the future we may want to relax this for synthesis algorithms
4276 that we can prove do not cause unexpected overflow. */
4277 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
4279 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
4280 tree vectype
= get_vectype_for_scalar_type (vinfo
, multtype
);
4284 /* Targets that don't support vector shifts but support vector additions
4285 can synthesize shifts that way. */
4286 bool synth_shift_p
= !vect_supportable_shift (vinfo
, LSHIFT_EXPR
, multtype
);
4288 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
4289 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4290 The vectorizer's benefit analysis will decide whether it's beneficial
4292 bool possible
= choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype
))
4293 ? TYPE_MODE (vectype
) : mode
,
4294 hwval
, &alg
, &variant
, MAX_COST
);
4298 if (!target_supports_mult_synth_alg (&alg
, variant
, vectype
, synth_shift_p
))
4303 /* Clear out the sequence of statements so we can populate it below. */
4304 gimple
*stmt
= NULL
;
4306 if (cast_to_unsigned_p
)
4308 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
4309 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
4310 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4314 if (alg
.op
[0] == alg_zero
)
4315 accumulator
= build_int_cst (multtype
, 0);
4319 bool needs_fixup
= (variant
== negate_variant
)
4320 || (variant
== add_variant
);
4322 for (int i
= 1; i
< alg
.ops
; i
++)
4324 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
4325 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4326 tree tmp_var
= NULL_TREE
;
4333 = synth_lshift_by_additions (vinfo
, accum_tmp
, accumulator
,
4334 alg
.log
[i
], stmt_vinfo
);
4336 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
4341 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
, shft_log
,
4342 stmt_vinfo
, synth_shift_p
);
4343 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
4347 tmp_var
= apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
,
4348 shft_log
, stmt_vinfo
,
4350 /* In some algorithms the first step involves zeroing the
4351 accumulator. If subtracting from such an accumulator
4352 just emit the negation directly. */
4353 if (integer_zerop (accumulator
))
4354 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
4356 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
4361 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4362 shft_log
, stmt_vinfo
, synth_shift_p
);
4363 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
4367 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4368 shft_log
, stmt_vinfo
, synth_shift_p
);
4369 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
4371 case alg_add_factor
:
4373 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4374 shft_log
, stmt_vinfo
, synth_shift_p
);
4375 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
4378 case alg_sub_factor
:
4380 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4381 shft_log
, stmt_vinfo
, synth_shift_p
);
4382 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
4388 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4389 but rather return it directly. */
4391 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
4392 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4393 accumulator
= accum_tmp
;
4395 if (variant
== negate_variant
)
4397 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4398 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
4399 accumulator
= accum_tmp
;
4400 if (cast_to_unsigned_p
)
4401 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4403 else if (variant
== add_variant
)
4405 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4406 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
4407 accumulator
= accum_tmp
;
4408 if (cast_to_unsigned_p
)
4409 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4411 /* Move back to a signed if needed. */
4412 if (cast_to_unsigned_p
)
4414 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
4415 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
4421 /* Detect multiplication by constant and convert it into a sequence of
4422 shifts and additions, subtractions, negations. We reuse the
4423 choose_mult_variant algorithms from expmed.cc
4427 STMT_VINFO: The stmt from which the pattern search begins,
4432 * TYPE_OUT: The type of the output of this pattern.
4434 * Return value: A new stmt that will be used to replace
4435 the multiplication. */
4438 vect_recog_mult_pattern (vec_info
*vinfo
,
4439 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4441 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4442 tree oprnd0
, oprnd1
, vectype
, itype
;
4443 gimple
*pattern_stmt
;
4445 if (!is_gimple_assign (last_stmt
))
4448 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
4451 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4452 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4453 itype
= TREE_TYPE (oprnd0
);
4455 if (TREE_CODE (oprnd0
) != SSA_NAME
4456 || TREE_CODE (oprnd1
) != INTEGER_CST
4457 || !INTEGRAL_TYPE_P (itype
)
4458 || !type_has_mode_precision_p (itype
))
4461 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
4462 if (vectype
== NULL_TREE
)
4465 /* If the target can handle vectorized multiplication natively,
4466 don't attempt to optimize this. */
4467 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
4468 if (mul_optab
!= unknown_optab
4469 && can_implement_p (mul_optab
, TYPE_MODE (vectype
)))
4472 pattern_stmt
= vect_synth_mult_by_constant (vinfo
,
4473 oprnd0
, oprnd1
, stmt_vinfo
);
4477 /* Pattern detected. */
4478 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
4480 *type_out
= vectype
;
4482 return pattern_stmt
;
4485 extern bool gimple_unsigned_integer_sat_add (tree
, tree
*, tree (*)(tree
));
4486 extern bool gimple_unsigned_integer_sat_sub (tree
, tree
*, tree (*)(tree
));
4487 extern bool gimple_unsigned_integer_sat_trunc (tree
, tree
*, tree (*)(tree
));
4489 extern bool gimple_signed_integer_sat_add (tree
, tree
*, tree (*)(tree
));
4490 extern bool gimple_signed_integer_sat_sub (tree
, tree
*, tree (*)(tree
));
4491 extern bool gimple_signed_integer_sat_trunc (tree
, tree
*, tree (*)(tree
));
4494 vect_recog_build_binary_gimple_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
4495 internal_fn fn
, tree
*type_out
,
4496 tree lhs
, tree op_0
, tree op_1
)
4498 tree itype
= TREE_TYPE (op_0
);
4499 tree otype
= TREE_TYPE (lhs
);
4500 tree v_itype
= get_vectype_for_scalar_type (vinfo
, itype
);
4501 tree v_otype
= get_vectype_for_scalar_type (vinfo
, otype
);
4503 if (v_itype
!= NULL_TREE
&& v_otype
!= NULL_TREE
4504 && direct_internal_fn_supported_p (fn
, v_itype
, OPTIMIZE_FOR_BOTH
))
4506 gcall
*call
= gimple_build_call_internal (fn
, 2, op_0
, op_1
);
4507 tree in_ssa
= vect_recog_temp_ssa_var (itype
, NULL
);
4509 gimple_call_set_lhs (call
, in_ssa
);
4510 gimple_call_set_nothrow (call
, /* nothrow_p */ false);
4511 gimple_set_location (call
, gimple_location (STMT_VINFO_STMT (stmt_info
)));
4513 *type_out
= v_otype
;
4515 if (types_compatible_p (itype
, otype
))
4519 append_pattern_def_seq (vinfo
, stmt_info
, call
, v_itype
);
4520 tree out_ssa
= vect_recog_temp_ssa_var (otype
, NULL
);
4522 return gimple_build_assign (out_ssa
, NOP_EXPR
, in_ssa
);
4530 * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
4533 * _9 = (long unsigned int) _8;
4537 * And then simplied to
4538 * _12 = .SAT_ADD (_4, _6);
4542 vect_recog_sat_add_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
4545 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
4547 if (!is_gimple_assign (last_stmt
))
4551 tree lhs
= gimple_assign_lhs (last_stmt
);
4553 if (gimple_unsigned_integer_sat_add (lhs
, ops
, NULL
)
4554 || gimple_signed_integer_sat_add (lhs
, ops
, NULL
))
4556 if (TREE_CODE (ops
[1]) == INTEGER_CST
)
4557 ops
[1] = fold_convert (TREE_TYPE (ops
[0]), ops
[1]);
4559 gimple
*stmt
= vect_recog_build_binary_gimple_stmt (vinfo
, stmt_vinfo
,
4560 IFN_SAT_ADD
, type_out
,
4561 lhs
, ops
[0], ops
[1]);
4564 vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt
);
4573 * Try to transform the truncation for .SAT_SUB pattern, mostly occurs in
4574 * the benchmark zip. Aka:
4578 * unsigned short int _4;
4579 * _9 = (unsigned short int).SAT_SUB (_1, _2);
4581 * if _1 is known to be in the range of unsigned short int. For example
4582 * there is a def _1 = (unsigned short int)_4. Then we can transform the
4585 * _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
4586 * _9 = .SAT_SUB (_4, _3);
4588 * Then, we can better vectorized code and avoid the unnecessary narrowing
4589 * stmt during vectorization with below stmt(s).
4591 * _3 = .SAT_TRUNC(_2); // SI => HI
4592 * _9 = .SAT_SUB (_4, _3);
4595 vect_recog_sat_sub_pattern_transform (vec_info
*vinfo
,
4596 stmt_vec_info stmt_vinfo
,
4597 tree lhs
, tree
*ops
)
4599 tree otype
= TREE_TYPE (lhs
);
4600 tree itype
= TREE_TYPE (ops
[0]);
4601 unsigned itype_prec
= TYPE_PRECISION (itype
);
4602 unsigned otype_prec
= TYPE_PRECISION (otype
);
4604 if (types_compatible_p (otype
, itype
) || otype_prec
>= itype_prec
)
4607 tree v_otype
= get_vectype_for_scalar_type (vinfo
, otype
);
4608 tree v_itype
= get_vectype_for_scalar_type (vinfo
, itype
);
4609 tree_pair v_pair
= tree_pair (v_otype
, v_itype
);
4611 if (v_otype
== NULL_TREE
|| v_itype
== NULL_TREE
4612 || !direct_internal_fn_supported_p (IFN_SAT_TRUNC
, v_pair
,
4616 /* 1. Find the _4 and update ops[0] as above example. */
4617 vect_unpromoted_value unprom
;
4618 tree tmp
= vect_look_through_possible_promotion (vinfo
, ops
[0], &unprom
);
4620 if (tmp
== NULL_TREE
|| TYPE_PRECISION (unprom
.type
) != otype_prec
)
4625 /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example. */
4626 tree trunc_lhs_ssa
= vect_recog_temp_ssa_var (otype
, NULL
);
4627 gcall
*call
= gimple_build_call_internal (IFN_SAT_TRUNC
, 1, ops
[1]);
4629 gimple_call_set_lhs (call
, trunc_lhs_ssa
);
4630 gimple_call_set_nothrow (call
, /* nothrow_p */ false);
4631 append_pattern_def_seq (vinfo
, stmt_vinfo
, call
, v_otype
);
4633 ops
[1] = trunc_lhs_ssa
;
4637 * Try to detect saturation sub pattern (SAT_ADD), aka below gimple:
4641 * _10 = (long unsigned int) _7;
4644 * And then simplied to
4645 * _9 = .SAT_SUB (_1, _2);
4648 * x.0_4 = (unsigned char) x_16;
4649 * y.1_5 = (unsigned char) y_18;
4650 * _6 = x.0_4 - y.1_5;
4651 * minus_19 = (int8_t) _6;
4653 * _8 = x_16 ^ minus_19;
4656 * _24 = (signed char) _23;
4657 * _58 = (unsigned char) _24;
4659 * _25 = (signed char) _59;
4663 * iftmp.2_11 = _41 ? _26 : minus_19;
4665 * And then simplied to
4666 * iftmp.2_11 = .SAT_SUB (x_16, y_18);
4670 vect_recog_sat_sub_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
4673 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
4675 if (!is_gimple_assign (last_stmt
))
4679 tree lhs
= gimple_assign_lhs (last_stmt
);
4681 if (gimple_unsigned_integer_sat_sub (lhs
, ops
, NULL
)
4682 || gimple_signed_integer_sat_sub (lhs
, ops
, NULL
))
4684 vect_recog_sat_sub_pattern_transform (vinfo
, stmt_vinfo
, lhs
, ops
);
4685 gimple
*stmt
= vect_recog_build_binary_gimple_stmt (vinfo
, stmt_vinfo
,
4686 IFN_SAT_SUB
, type_out
,
4687 lhs
, ops
[0], ops
[1]);
4690 vect_pattern_detected ("vect_recog_sat_sub_pattern", last_stmt
);
4699 * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
4700 * overflow_5 = x_4(D) > 4294967295;
4701 * _1 = (unsigned int) x_4(D);
4702 * _2 = (unsigned int) overflow_5;
4706 * And then simplied to
4707 * _6 = .SAT_TRUNC (x_4(D));
4711 vect_recog_sat_trunc_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
4714 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
4716 if (!is_gimple_assign (last_stmt
))
4720 tree lhs
= gimple_assign_lhs (last_stmt
);
4721 tree otype
= TREE_TYPE (lhs
);
4723 if ((gimple_unsigned_integer_sat_trunc (lhs
, ops
, NULL
)
4724 || gimple_signed_integer_sat_trunc (lhs
, ops
, NULL
))
4725 && type_has_mode_precision_p (otype
))
4727 tree itype
= TREE_TYPE (ops
[0]);
4728 tree v_itype
= get_vectype_for_scalar_type (vinfo
, itype
);
4729 tree v_otype
= get_vectype_for_scalar_type (vinfo
, otype
);
4730 internal_fn fn
= IFN_SAT_TRUNC
;
4732 if (v_itype
!= NULL_TREE
&& v_otype
!= NULL_TREE
4733 && direct_internal_fn_supported_p (fn
, tree_pair (v_otype
, v_itype
),
4736 gcall
*call
= gimple_build_call_internal (fn
, 1, ops
[0]);
4737 tree out_ssa
= vect_recog_temp_ssa_var (otype
, NULL
);
4739 gimple_call_set_lhs (call
, out_ssa
);
4740 gimple_call_set_nothrow (call
, /* nothrow_p */ false);
4741 gimple_set_location (call
, gimple_location (last_stmt
));
4743 *type_out
= v_otype
;
4752 /* Detect a signed division by a constant that wouldn't be
4753 otherwise vectorized:
4759 where type 'type' is an integral type and N is a constant.
4761 Similarly handle modulo by a constant:
4767 * STMT_VINFO: The stmt from which the pattern search begins,
4768 i.e. the division stmt. S1 is replaced by if N is a power
4769 of two constant and type is signed:
4770 S3 y_t = b_t < 0 ? N - 1 : 0;
4772 S1' a_t = x_t >> log2 (N);
4774 S4 is replaced if N is a power of two constant and
4775 type is signed by (where *_T temporaries have unsigned type):
4776 S9 y_T = b_t < 0 ? -1U : 0U;
4777 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4778 S7 z_t = (type) z_T;
4780 S5 x_t = w_t & (N - 1);
4781 S4' a_t = x_t - z_t;
4785 * TYPE_OUT: The type of the output of this pattern.
4787 * Return value: A new stmt that will be used to replace the division
4788 S1 or modulo S4 stmt. */
4791 vect_recog_divmod_pattern (vec_info
*vinfo
,
4792 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4794 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4795 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
4796 gimple
*pattern_stmt
, *def_stmt
;
4797 enum tree_code rhs_code
;
4802 if (!is_gimple_assign (last_stmt
))
4805 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4808 case TRUNC_DIV_EXPR
:
4809 case EXACT_DIV_EXPR
:
4810 case TRUNC_MOD_EXPR
:
4816 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4817 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4818 itype
= TREE_TYPE (oprnd0
);
4819 if (TREE_CODE (oprnd0
) != SSA_NAME
4820 || TREE_CODE (oprnd1
) != INTEGER_CST
4821 || TREE_CODE (itype
) != INTEGER_TYPE
4822 || !type_has_mode_precision_p (itype
))
4825 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
4826 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
4827 if (vectype
== NULL_TREE
)
4830 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
4832 /* If the target can handle vectorized division or modulo natively,
4833 don't attempt to optimize this, since native division is likely
4834 to give smaller code. */
4835 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
4836 if (optab
!= unknown_optab
4837 && can_implement_p (optab
, TYPE_MODE (vectype
)))
4841 prec
= TYPE_PRECISION (itype
);
4842 if (integer_pow2p (oprnd1
))
4844 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
4847 /* Pattern detected. */
4848 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
4850 *type_out
= vectype
;
4852 /* Check if the target supports this internal function. */
4853 internal_fn ifn
= IFN_DIV_POW2
;
4854 if (direct_internal_fn_supported_p (ifn
, vectype
, OPTIMIZE_FOR_SPEED
))
4856 tree shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
4858 tree var_div
= vect_recog_temp_ssa_var (itype
, NULL
);
4859 gimple
*div_stmt
= gimple_build_call_internal (ifn
, 2, oprnd0
, shift
);
4860 gimple_call_set_lhs (div_stmt
, var_div
);
4862 if (rhs_code
== TRUNC_MOD_EXPR
)
4864 append_pattern_def_seq (vinfo
, stmt_vinfo
, div_stmt
);
4866 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4867 LSHIFT_EXPR
, var_div
, shift
);
4868 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4870 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4872 gimple_assign_lhs (def_stmt
));
4875 pattern_stmt
= div_stmt
;
4876 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
4878 return pattern_stmt
;
4881 cond
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
4882 def_stmt
= gimple_build_assign (cond
, LT_EXPR
, oprnd0
,
4883 build_int_cst (itype
, 0));
4884 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
,
4885 truth_type_for (vectype
), itype
);
4886 if (rhs_code
== TRUNC_DIV_EXPR
4887 || rhs_code
== EXACT_DIV_EXPR
)
4889 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
4892 = gimple_build_assign (var
, COND_EXPR
, cond
,
4893 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
4894 build_int_cst (itype
, 1)),
4895 build_int_cst (itype
, 0));
4896 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4897 var
= vect_recog_temp_ssa_var (itype
, NULL
);
4899 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
4900 gimple_assign_lhs (def_stmt
));
4901 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4903 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
4905 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4906 RSHIFT_EXPR
, var
, shift
);
4911 if (compare_tree_int (oprnd1
, 2) == 0)
4913 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
4914 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
4915 build_int_cst (itype
, 1),
4916 build_int_cst (itype
, 0));
4917 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4922 = build_nonstandard_integer_type (prec
, 1);
4923 tree vecutype
= get_vectype_for_scalar_type (vinfo
, utype
);
4925 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
4926 - tree_log2 (oprnd1
));
4927 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
4929 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
4930 build_int_cst (utype
, -1),
4931 build_int_cst (utype
, 0));
4932 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
4933 var
= vect_recog_temp_ssa_var (utype
, NULL
);
4934 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
4935 gimple_assign_lhs (def_stmt
),
4937 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
4938 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
4940 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
4941 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4944 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4945 PLUS_EXPR
, oprnd0
, signmask
);
4946 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4948 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4949 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
4950 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
4951 build_int_cst (itype
, 1)));
4952 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4955 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4956 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
4960 return pattern_stmt
;
4963 if ((cst
= uniform_integer_cst_p (oprnd1
))
4964 && TYPE_UNSIGNED (itype
)
4965 && rhs_code
== TRUNC_DIV_EXPR
4967 && targetm
.vectorize
.preferred_div_as_shifts_over_mult (vectype
))
4969 /* We can use the relationship:
4971 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4973 to optimize cases where N+1 is a power of 2, and where // (N+1)
4974 is therefore a shift right. When operating in modes that are
4975 multiples of a byte in size, there are two cases:
4977 (1) N(N+3) is not representable, in which case the question
4978 becomes whether the replacement expression overflows.
4979 It is enough to test that x+N+2 does not overflow,
4980 i.e. that x < MAX-(N+1).
4982 (2) N(N+3) is representable, in which case it is the (only)
4983 bound that we need to check.
4985 ??? For now we just handle the case where // (N+1) is a shift
4986 right by half the precision, since some architectures can
4987 optimize the associated addition and shift combinations
4988 into single instructions. */
4990 auto wcst
= wi::to_wide (cst
);
4991 int pow
= wi::exact_log2 (wcst
+ 1);
4992 if (pow
== prec
/ 2)
4994 gimple
*stmt
= SSA_NAME_DEF_STMT (oprnd0
);
4996 gimple_ranger ranger
;
4999 /* Check that no overflow will occur. If we don't have range
5000 information we can't perform the optimization. */
5002 if (ranger
.range_of_expr (r
, oprnd0
, stmt
) && !r
.undefined_p ())
5004 wide_int max
= r
.upper_bound ();
5005 wide_int one
= wi::shwi (1, prec
);
5006 wide_int adder
= wi::add (one
, wi::lshift (one
, pow
));
5007 wi::overflow_type ovf
;
5008 wi::add (max
, adder
, UNSIGNED
, &ovf
);
5009 if (ovf
== wi::OVF_NONE
)
5011 *type_out
= vectype
;
5012 tree tadder
= wide_int_to_tree (itype
, adder
);
5013 tree rshift
= wide_int_to_tree (itype
, pow
);
5015 tree new_lhs1
= vect_recog_temp_ssa_var (itype
, NULL
);
5017 = gimple_build_assign (new_lhs1
, PLUS_EXPR
, oprnd0
, tadder
);
5018 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
5020 tree new_lhs2
= vect_recog_temp_ssa_var (itype
, NULL
);
5021 patt1
= gimple_build_assign (new_lhs2
, RSHIFT_EXPR
, new_lhs1
,
5023 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
5025 tree new_lhs3
= vect_recog_temp_ssa_var (itype
, NULL
);
5026 patt1
= gimple_build_assign (new_lhs3
, PLUS_EXPR
, new_lhs2
,
5028 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
5030 tree new_lhs4
= vect_recog_temp_ssa_var (itype
, NULL
);
5031 pattern_stmt
= gimple_build_assign (new_lhs4
, RSHIFT_EXPR
,
5034 return pattern_stmt
;
5040 if (prec
> HOST_BITS_PER_WIDE_INT
5041 || integer_zerop (oprnd1
))
5044 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
5047 if (TYPE_UNSIGNED (itype
))
5049 unsigned HOST_WIDE_INT mh
, ml
;
5050 int pre_shift
, post_shift
;
5051 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
5052 & GET_MODE_MASK (itype_mode
));
5053 tree t1
, t2
, t3
, t4
;
5055 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
5056 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
5059 /* Find a suitable multiplier and right shift count instead of
5060 directly dividing by D. */
5061 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
);
5063 /* If the suggested multiplier is more than PREC bits, we can do better
5064 for even divisors, using an initial right shift. */
5065 if (mh
!= 0 && (d
& 1) == 0)
5067 pre_shift
= ctz_or_zero (d
);
5068 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
5077 if (post_shift
- 1 >= prec
)
5080 /* t1 = oprnd0 h* ml;
5084 q = t4 >> (post_shift - 1); */
5085 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
5086 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
5087 build_int_cst (itype
, ml
));
5088 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5090 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
5092 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
5093 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5095 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
5097 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
5098 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5100 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
5102 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
5104 if (post_shift
!= 1)
5106 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5108 q
= vect_recog_temp_ssa_var (itype
, NULL
);
5110 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
5111 build_int_cst (itype
, post_shift
- 1));
5116 pattern_stmt
= def_stmt
;
5121 if (pre_shift
>= prec
|| post_shift
>= prec
)
5124 /* t1 = oprnd0 >> pre_shift;
5126 q = t2 >> post_shift; */
5129 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
5131 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
5132 build_int_cst (NULL
, pre_shift
));
5133 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5138 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
5139 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
5140 build_int_cst (itype
, ml
));
5144 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5146 q
= vect_recog_temp_ssa_var (itype
, NULL
);
5148 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
5149 build_int_cst (itype
, post_shift
));
5154 pattern_stmt
= def_stmt
;
5159 unsigned HOST_WIDE_INT ml
;
5161 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
5162 unsigned HOST_WIDE_INT abs_d
;
5164 tree t1
, t2
, t3
, t4
;
5166 /* Give up for -1. */
5170 /* Since d might be INT_MIN, we have to cast to
5171 unsigned HOST_WIDE_INT before negating to avoid
5172 undefined signed overflow. */
5174 ? (unsigned HOST_WIDE_INT
) d
5175 : - (unsigned HOST_WIDE_INT
) d
);
5177 /* n rem d = n rem -d */
5178 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
5181 oprnd1
= build_int_cst (itype
, abs_d
);
5183 if (HOST_BITS_PER_WIDE_INT
>= prec
5184 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
5185 /* This case is not handled correctly below. */
5188 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
);
5189 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
5192 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
5194 if (post_shift
>= prec
)
5197 /* t1 = oprnd0 h* ml; */
5198 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
5199 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
5200 build_int_cst (itype
, ml
));
5204 /* t2 = t1 + oprnd0; */
5205 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5206 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
5207 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
5214 /* t3 = t2 >> post_shift; */
5215 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5216 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
5217 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
5218 build_int_cst (itype
, post_shift
));
5225 get_range_query (cfun
)->range_of_expr (r
, oprnd0
);
5226 if (!r
.varying_p () && !r
.undefined_p ())
5228 if (!wi::neg_p (r
.lower_bound (), TYPE_SIGN (itype
)))
5230 else if (wi::neg_p (r
.upper_bound (), TYPE_SIGN (itype
)))
5234 if (msb
== 0 && d
>= 0)
5238 pattern_stmt
= def_stmt
;
5242 /* t4 = oprnd0 >> (prec - 1);
5243 or if we know from VRP that oprnd0 >= 0
5245 or if we know from VRP that oprnd0 < 0
5247 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5248 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
5250 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
5251 build_int_cst (itype
, msb
));
5253 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
5254 build_int_cst (itype
, prec
- 1));
5255 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5257 /* q = t3 - t4; or q = t4 - t3; */
5258 q
= vect_recog_temp_ssa_var (itype
, NULL
);
5259 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
5264 if (rhs_code
== TRUNC_MOD_EXPR
)
5268 /* We divided. Now finish by:
5271 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
);
5273 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
5274 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
5275 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5277 r
= vect_recog_temp_ssa_var (itype
, NULL
);
5278 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
5281 /* Pattern detected. */
5282 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
5284 *type_out
= vectype
;
5285 return pattern_stmt
;
5288 /* Detects pattern with a modulo operation (S1) where both arguments
5289 are variables of integral type.
5290 The statement is replaced by division, multiplication, and subtraction.
5291 The last statement (S4) is returned.
5299 S4 z_t = a_t - y_t; */
5302 vect_recog_mod_var_pattern (vec_info
*vinfo
,
5303 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5305 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
5306 tree oprnd0
, oprnd1
, vectype
, itype
;
5307 gimple
*pattern_stmt
, *def_stmt
;
5308 enum tree_code rhs_code
;
5310 if (!is_gimple_assign (last_stmt
))
5313 rhs_code
= gimple_assign_rhs_code (last_stmt
);
5314 if (rhs_code
!= TRUNC_MOD_EXPR
)
5317 oprnd0
= gimple_assign_rhs1 (last_stmt
);
5318 oprnd1
= gimple_assign_rhs2 (last_stmt
);
5319 itype
= TREE_TYPE (oprnd0
);
5320 if (TREE_CODE (oprnd0
) != SSA_NAME
5321 || TREE_CODE (oprnd1
) != SSA_NAME
5322 || TREE_CODE (itype
) != INTEGER_TYPE
)
5325 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
5328 || target_has_vecop_for_code (TRUNC_MOD_EXPR
, vectype
)
5329 || !target_has_vecop_for_code (TRUNC_DIV_EXPR
, vectype
)
5330 || !target_has_vecop_for_code (MULT_EXPR
, vectype
)
5331 || !target_has_vecop_for_code (MINUS_EXPR
, vectype
))
5335 q
= vect_recog_temp_ssa_var (itype
, NULL
);
5336 def_stmt
= gimple_build_assign (q
, TRUNC_DIV_EXPR
, oprnd0
, oprnd1
);
5337 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vectype
);
5339 tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
5340 def_stmt
= gimple_build_assign (tmp
, MULT_EXPR
, q
, oprnd1
);
5341 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vectype
);
5343 r
= vect_recog_temp_ssa_var (itype
, NULL
);
5344 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, tmp
);
5346 /* Pattern detected. */
5347 *type_out
= vectype
;
5348 vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt
);
5350 return pattern_stmt
;
5354 /* Return the proper type for converting bool VAR into
5355 an integer value or NULL_TREE if no such type exists.
5356 The type is chosen so that the converted value has the
5357 same number of elements as VAR's vector type. */
5360 integer_type_for_mask (tree var
, vec_info
*vinfo
)
5362 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
5365 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
5366 if (!def_stmt_info
|| !vect_use_mask_type_p (def_stmt_info
))
5369 return build_nonstandard_integer_type (def_stmt_info
->mask_precision
, 1);
5372 /* Function vect_recog_gcond_pattern
5374 Try to find pattern like following:
5378 where operator 'op' is not != and convert it to an adjusted boolean pattern
5383 and set the mask type on MASK.
5387 * STMT_VINFO: The stmt at the end from which the pattern
5388 search begins, i.e. cast of a bool to
5393 * TYPE_OUT: The type of the output of this pattern.
5395 * Return value: A new stmt that will be used to replace the pattern. */
5398 vect_recog_gcond_pattern (vec_info
*vinfo
,
5399 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5401 /* Currently we only support this for loop vectorization and when multiple
5403 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5404 if (!loop_vinfo
|| !LOOP_VINFO_EARLY_BREAKS (loop_vinfo
))
5407 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
5409 if (!(cond
= dyn_cast
<gcond
*> (last_stmt
)))
5412 auto lhs
= gimple_cond_lhs (cond
);
5413 auto rhs
= gimple_cond_rhs (cond
);
5414 auto code
= gimple_cond_code (cond
);
5416 tree scalar_type
= TREE_TYPE (lhs
);
5417 if (VECTOR_TYPE_P (scalar_type
))
5420 /* If the input is a boolean then try to figure out the precision that the
5421 vector type should use. We cannot use the scalar precision as this would
5422 later mismatch. This is similar to what recog_bool does. */
5423 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
5425 if (tree stype
= integer_type_for_mask (lhs
, vinfo
))
5426 scalar_type
= stype
;
5429 tree vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
);
5430 if (vectype
== NULL_TREE
)
5433 tree new_lhs
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
5434 gimple
*new_stmt
= gimple_build_assign (new_lhs
, code
, lhs
, rhs
);
5435 append_pattern_def_seq (vinfo
, stmt_vinfo
, new_stmt
, vectype
, scalar_type
);
5437 gimple
*pattern_stmt
5438 = gimple_build_cond (NE_EXPR
, new_lhs
,
5439 build_int_cst (TREE_TYPE (new_lhs
), 0),
5440 NULL_TREE
, NULL_TREE
);
5441 *type_out
= vectype
;
5442 vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt
);
5443 return pattern_stmt
;
5446 /* Function vect_recog_bool_pattern
5448 Try to find pattern like following:
5450 bool a_b, b_b, c_b, d_b, e_b;
5453 S1 a_b = x1 CMP1 y1;
5454 S2 b_b = x2 CMP2 y2;
5456 S4 d_b = x3 CMP3 y3;
5458 S6 f_T = (TYPE) e_b;
5460 where type 'TYPE' is an integral type. Or a similar pattern
5463 S6 f_Y = e_b ? r_Y : s_Y;
5465 as results from if-conversion of a complex condition.
5469 * STMT_VINFO: The stmt at the end from which the pattern
5470 search begins, i.e. cast of a bool to
5475 * TYPE_OUT: The type of the output of this pattern.
5477 * Return value: A new stmt that will be used to replace the pattern.
5479 Assuming size of TYPE is the same as size of all comparisons
5480 (otherwise some casts would be added where needed), the above
5481 sequence we create related pattern stmts:
5482 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5483 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5484 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5485 S5' e_T = c_T | d_T;
5488 Instead of the above S3' we could emit:
5489 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5490 S3' c_T = a_T | b_T;
5491 but the above is more efficient. */
5494 vect_recog_bool_pattern (vec_info
*vinfo
,
5495 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5497 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5498 enum tree_code rhs_code
;
5499 tree var
, lhs
, rhs
, vectype
;
5500 gimple
*pattern_stmt
;
5502 if (!is_gimple_assign (last_stmt
))
5505 var
= gimple_assign_rhs1 (last_stmt
);
5506 lhs
= gimple_assign_lhs (last_stmt
);
5507 rhs_code
= gimple_assign_rhs_code (last_stmt
);
5509 if (rhs_code
== VIEW_CONVERT_EXPR
)
5510 var
= TREE_OPERAND (var
, 0);
5512 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
5515 hash_set
<gimple
*> bool_stmts
;
5517 if (CONVERT_EXPR_CODE_P (rhs_code
)
5518 || rhs_code
== VIEW_CONVERT_EXPR
)
5520 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
5521 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
5523 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5525 tree type
= integer_type_for_mask (var
, vinfo
);
5526 tree cst0
, cst1
, tmp
;
5531 /* We may directly use cond with narrowed type to avoid multiple cond
5532 exprs with following result packing and perform single cond with
5533 packed mask instead. In case of widening we better make cond first
5534 and then extract results. */
5535 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
5536 type
= TREE_TYPE (lhs
);
5538 cst0
= build_int_cst (type
, 0);
5539 cst1
= build_int_cst (type
, 1);
5540 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
5541 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
5543 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
5545 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5546 append_pattern_def_seq (vinfo
, stmt_vinfo
,
5547 pattern_stmt
, new_vectype
);
5549 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5550 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
5553 *type_out
= vectype
;
5554 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5556 return pattern_stmt
;
5558 else if (rhs_code
== COND_EXPR
5559 && TREE_CODE (var
) == SSA_NAME
)
5561 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5562 if (vectype
== NULL_TREE
)
5565 /* Build a scalar type for the boolean result that when
5566 vectorized matches the vector type of the result in
5567 size and number of elements. */
5569 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
5570 TYPE_VECTOR_SUBPARTS (vectype
));
5573 = build_nonstandard_integer_type (prec
,
5574 TYPE_UNSIGNED (TREE_TYPE (var
)));
5575 if (get_vectype_for_scalar_type (vinfo
, type
) == NULL_TREE
)
5578 enum vect_def_type dt
;
5579 if (integer_type_for_mask (var
, vinfo
))
5581 else if (TREE_CODE (TREE_TYPE (var
)) == BOOLEAN_TYPE
5582 && vect_is_simple_use (var
, vinfo
, &dt
)
5583 && (dt
== vect_external_def
5584 || dt
== vect_constant_def
))
5586 /* If the condition is already a boolean then manually convert it to a
5587 mask of the given integer type but don't set a vectype. */
5588 tree lhs_ivar
= vect_recog_temp_ssa_var (type
, NULL
);
5589 pattern_stmt
= gimple_build_assign (lhs_ivar
, COND_EXPR
, var
,
5590 build_all_ones_cst (type
),
5591 build_zero_cst (type
));
5592 append_inv_pattern_def_seq (vinfo
, pattern_stmt
);
5596 tree lhs_var
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
5597 pattern_stmt
= gimple_build_assign (lhs_var
, NE_EXPR
, var
,
5598 build_zero_cst (TREE_TYPE (var
)));
5600 tree new_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (var
));
5604 new_vectype
= truth_type_for (new_vectype
);
5605 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, new_vectype
,
5608 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5610 = gimple_build_assign (lhs
, COND_EXPR
, lhs_var
,
5611 gimple_assign_rhs2 (last_stmt
),
5612 gimple_assign_rhs3 (last_stmt
));
5613 *type_out
= vectype
;
5614 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5616 return pattern_stmt
;
5618 else if (rhs_code
== SSA_NAME
5619 && STMT_VINFO_DATA_REF (stmt_vinfo
))
5621 stmt_vec_info pattern_stmt_info
;
5622 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5623 if (!vectype
|| !VECTOR_MODE_P (TYPE_MODE (vectype
)))
5626 tree type
= integer_type_for_mask (var
, vinfo
);
5627 tree cst0
, cst1
, new_vectype
;
5632 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
5633 type
= TREE_TYPE (vectype
);
5635 cst0
= build_int_cst (type
, 0);
5636 cst1
= build_int_cst (type
, 1);
5637 new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5639 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
5640 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
5641 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, new_vectype
);
5643 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
5644 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
5646 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5647 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
5648 append_pattern_def_seq (vinfo
, stmt_vinfo
, cast_stmt
);
5651 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
5652 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
5653 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
5654 *type_out
= vectype
;
5655 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5657 return pattern_stmt
;
5663 /* A helper for vect_recog_mask_conversion_pattern. Build
5664 conversion of MASK to a type suitable for masking VECTYPE.
5665 Built statement gets required vectype and is appended to
5666 a pattern sequence of STMT_VINFO.
5668 Return converted mask. */
5671 build_mask_conversion (vec_info
*vinfo
,
5672 tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
)
5677 masktype
= truth_type_for (vectype
);
5678 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
5679 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
5680 append_pattern_def_seq (vinfo
, stmt_vinfo
,
5681 stmt
, masktype
, TREE_TYPE (vectype
));
5687 /* Function vect_recog_mask_conversion_pattern
5689 Try to find statements which require boolean type
5690 converison. Additional conversion statements are
5691 added to handle such cases. For example:
5701 S4 c_1 = m_3 ? c_2 : c_3;
5703 Will be transformed into:
5707 S3'' m_2' = (_Bool[bitsize=32])m_2
5708 S3' m_3' = m_1 & m_2';
5709 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5710 S4' c_1' = m_3'' ? c_2 : c_3; */
5713 vect_recog_mask_conversion_pattern (vec_info
*vinfo
,
5714 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5716 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5717 enum tree_code rhs_code
;
5718 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
5719 tree vectype1
, vectype2
;
5720 stmt_vec_info pattern_stmt_info
;
5722 /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
5724 if (is_gimple_call (last_stmt
)
5725 && gimple_call_internal_p (last_stmt
))
5727 gcall
*pattern_stmt
;
5729 internal_fn ifn
= gimple_call_internal_fn (last_stmt
);
5730 int mask_argno
= internal_fn_mask_index (ifn
);
5734 bool store_p
= internal_store_fn_p (ifn
);
5735 bool load_p
= internal_store_fn_p (ifn
);
5738 int rhs_index
= internal_fn_stored_value_index (ifn
);
5739 tree rhs
= gimple_call_arg (last_stmt
, rhs_index
);
5740 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs
));
5744 lhs
= gimple_call_lhs (last_stmt
);
5747 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5753 tree mask_arg
= gimple_call_arg (last_stmt
, mask_argno
);
5754 tree mask_arg_type
= integer_type_for_mask (mask_arg
, vinfo
);
5757 vectype2
= get_mask_type_for_scalar_type (vinfo
, mask_arg_type
);
5760 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
5761 TYPE_VECTOR_SUBPARTS (vectype2
)))
5764 else if (store_p
|| load_p
)
5767 tmp
= build_mask_conversion (vinfo
, mask_arg
, vectype1
, stmt_vinfo
);
5769 auto_vec
<tree
, 8> args
;
5770 unsigned int nargs
= gimple_call_num_args (last_stmt
);
5771 args
.safe_grow (nargs
, true);
5772 for (unsigned int i
= 0; i
< nargs
; ++i
)
5773 args
[i
] = ((int) i
== mask_argno
5775 : gimple_call_arg (last_stmt
, i
));
5776 pattern_stmt
= gimple_build_call_internal_vec (ifn
, args
);
5780 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5781 gimple_call_set_lhs (pattern_stmt
, lhs
);
5784 if (load_p
|| store_p
)
5785 gimple_call_set_nothrow (pattern_stmt
, true);
5787 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
5788 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
5789 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
5791 *type_out
= vectype1
;
5792 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
5794 return pattern_stmt
;
5797 if (!is_gimple_assign (last_stmt
))
5800 gimple
*pattern_stmt
;
5801 lhs
= gimple_assign_lhs (last_stmt
);
5802 rhs1
= gimple_assign_rhs1 (last_stmt
);
5803 rhs_code
= gimple_assign_rhs_code (last_stmt
);
5805 /* Check for cond expression requiring mask conversion. */
5806 if (rhs_code
== COND_EXPR
)
5808 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5810 gcc_assert (! COMPARISON_CLASS_P (rhs1
));
5811 if (TREE_CODE (rhs1
) == SSA_NAME
)
5813 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
5820 vectype2
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
5822 if (!vectype1
|| !vectype2
)
5825 /* Continue if a conversion is needed. Also continue if we have
5826 a comparison whose vector type would normally be different from
5827 VECTYPE2 when considered in isolation. In that case we'll
5828 replace the comparison with an SSA name (so that we can record
5829 its vector type) and behave as though the comparison was an SSA
5830 name from the outset. */
5831 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
5832 TYPE_VECTOR_SUBPARTS (vectype2
)))
5835 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
5836 TYPE_VECTOR_SUBPARTS (vectype2
)))
5837 tmp
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
5841 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5842 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
5843 gimple_assign_rhs2 (last_stmt
),
5844 gimple_assign_rhs3 (last_stmt
));
5846 *type_out
= vectype1
;
5847 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
5849 return pattern_stmt
;
5852 /* Now check for binary boolean operations requiring conversion for
5854 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
5857 if (rhs_code
!= BIT_IOR_EXPR
5858 && rhs_code
!= BIT_XOR_EXPR
5859 && rhs_code
!= BIT_AND_EXPR
5860 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
5863 rhs2
= gimple_assign_rhs2 (last_stmt
);
5865 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
5866 rhs2_type
= integer_type_for_mask (rhs2
, vinfo
);
5868 if (!rhs1_type
|| !rhs2_type
5869 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
5872 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
5874 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
5877 rhs2
= build_mask_conversion (vinfo
, rhs2
, vectype1
, stmt_vinfo
);
5881 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs2_type
);
5884 rhs1
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
5887 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5888 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
5890 *type_out
= vectype1
;
5891 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
5893 return pattern_stmt
;
5896 /* STMT_INFO is a load or store. If the load or store is conditional, return
5897 the boolean condition under which it occurs, otherwise return null. */
5900 vect_get_load_store_mask (stmt_vec_info stmt_info
)
5902 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
5904 gcc_assert (gimple_assign_single_p (def_assign
));
5908 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
5910 internal_fn ifn
= gimple_call_internal_fn (def_call
);
5911 int mask_index
= internal_fn_mask_index (ifn
);
5912 return gimple_call_arg (def_call
, mask_index
);
5918 /* Return MASK if MASK is suitable for masking an operation on vectors
5919 of type VECTYPE, otherwise convert it into such a form and return
5920 the result. Associate any conversion statements with STMT_INFO's
5924 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
5925 stmt_vec_info stmt_info
, vec_info
*vinfo
)
5927 tree mask_type
= integer_type_for_mask (mask
, vinfo
);
5930 tree mask_vectype
= get_mask_type_for_scalar_type (vinfo
, mask_type
);
5932 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
5933 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
5934 mask
= build_mask_conversion (vinfo
, mask
, vectype
, stmt_info
);
5939 /* Return the equivalent of:
5941 fold_convert (TYPE, VALUE)
5943 with the expectation that the operation will be vectorized.
5944 If new statements are needed, add them as pattern statements
5948 vect_add_conversion_to_pattern (vec_info
*vinfo
,
5949 tree type
, tree value
, stmt_vec_info stmt_info
)
5951 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
5954 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
5955 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
5956 append_pattern_def_seq (vinfo
, stmt_info
, conversion
,
5957 get_vectype_for_scalar_type (vinfo
, type
));
5961 /* Try to convert STMT_INFO into a call to a gather load or scatter store
5962 internal function. Return the final statement on success and set
5963 *TYPE_OUT to the vector type being loaded or stored.
5965 This function only handles gathers and scatters that were recognized
5966 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
5969 vect_recog_gather_scatter_pattern (vec_info
*vinfo
,
5970 stmt_vec_info stmt_info
, tree
*type_out
)
5972 /* Currently we only support this for loop vectorization. */
5973 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5977 /* Make sure that we're looking at a gather load or scatter store. */
5978 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
5979 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5982 /* Get the boolean that controls whether the load or store happens.
5983 This is null if the operation is unconditional. */
5984 tree mask
= vect_get_load_store_mask (stmt_info
);
5986 /* Make sure that the target supports an appropriate internal
5987 function for the gather/scatter operation. */
5988 gather_scatter_info gs_info
;
5989 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, &gs_info
)
5990 || gs_info
.ifn
== IFN_LAST
)
5993 /* Convert the mask to the right form. */
5994 tree gs_vectype
= get_vectype_for_scalar_type (loop_vinfo
,
5995 gs_info
.element_type
);
5997 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, stmt_info
,
5999 else if (gs_info
.ifn
== IFN_MASK_SCATTER_STORE
6000 || gs_info
.ifn
== IFN_MASK_GATHER_LOAD
6001 || gs_info
.ifn
== IFN_MASK_LEN_SCATTER_STORE
6002 || gs_info
.ifn
== IFN_MASK_LEN_GATHER_LOAD
)
6003 mask
= build_int_cst (TREE_TYPE (truth_type_for (gs_vectype
)), -1);
6005 /* Get the invariant base and non-invariant offset, converting the
6006 latter to the same width as the vector elements. */
6007 tree base
= gs_info
.base
;
6008 tree offset_type
= TREE_TYPE (gs_info
.offset_vectype
);
6009 tree offset
= vect_add_conversion_to_pattern (vinfo
, offset_type
,
6010 gs_info
.offset
, stmt_info
);
6012 /* Build the new pattern statement. */
6013 tree scale
= size_int (gs_info
.scale
);
6014 gcall
*pattern_stmt
;
6016 if (DR_IS_READ (dr
))
6018 tree zero
= build_zero_cst (gs_info
.element_type
);
6021 int elsval
= MASK_LOAD_ELSE_ZERO
;
6024 = vect_get_mask_load_else (elsval
, TREE_TYPE (gs_vectype
));
6025 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 6, base
,
6026 offset
, scale
, zero
, mask
,
6030 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
6031 offset
, scale
, zero
);
6032 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
6033 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
6037 tree rhs
= vect_get_store_rhs (stmt_info
);
6039 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5,
6040 base
, offset
, scale
, rhs
,
6043 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4,
6044 base
, offset
, scale
, rhs
);
6046 gimple_call_set_nothrow (pattern_stmt
, true);
6048 /* Copy across relevant vectorization info and associate DR with the
6049 new pattern statement instead of the original statement. */
6050 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (pattern_stmt
);
6051 loop_vinfo
->move_dr (pattern_stmt_info
, stmt_info
);
6053 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6054 *type_out
= vectype
;
6055 vect_pattern_detected ("gather/scatter pattern", stmt_info
->stmt
);
6057 return pattern_stmt
;
6060 /* Helper method of vect_recog_cond_store_pattern, checks to see if COND_ARG
6061 is points to a load statement that reads the same data as that of
6065 vect_cond_store_pattern_same_ref (vec_info
*vinfo
,
6066 stmt_vec_info store_vinfo
, tree cond_arg
)
6068 stmt_vec_info load_stmt_vinfo
= vinfo
->lookup_def (cond_arg
);
6069 if (!load_stmt_vinfo
6070 || !STMT_VINFO_DATA_REF (load_stmt_vinfo
)
6071 || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo
))
6072 || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo
),
6073 STMT_VINFO_DATA_REF (load_stmt_vinfo
)))
6079 /* Function vect_recog_cond_store_pattern
6081 Try to find the following pattern:
6088 where the store of _3 happens on a conditional select on a value loaded
6089 from the same location. In such case we can elide the initial load if
6090 MASK_STORE is supported and instead only conditionally write out the result.
6092 The pattern produces for the above:
6095 .MASK_STORE (_3, c, t_20)
6099 * STMT_VINFO: The stmt from which the pattern search begins. In the
6100 example, when this function is called with _3 then the search begins.
6104 * TYPE_OUT: The type of the output of this pattern.
6106 * Return value: A new stmt that will be used to replace the sequence. */
6109 vect_recog_cond_store_pattern (vec_info
*vinfo
,
6110 stmt_vec_info stmt_vinfo
, tree
*type_out
)
6112 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6116 gimple
*store_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
6118 /* Needs to be a gimple store where we have DR info for. */
6119 if (!STMT_VINFO_DATA_REF (stmt_vinfo
)
6120 || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo
))
6121 || !gimple_store_p (store_stmt
))
6124 tree st_rhs
= gimple_assign_rhs1 (store_stmt
);
6126 if (TREE_CODE (st_rhs
) != SSA_NAME
)
6129 auto cond_vinfo
= vinfo
->lookup_def (st_rhs
);
6131 /* If the condition isn't part of the loop then bool recog wouldn't have seen
6132 it and so this transformation may not be valid. */
6136 cond_vinfo
= vect_stmt_to_vectorize (cond_vinfo
);
6137 gassign
*cond_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (cond_vinfo
));
6138 if (!cond_stmt
|| gimple_assign_rhs_code (cond_stmt
) != COND_EXPR
)
6141 /* Check if the else value matches the original loaded one. */
6142 bool invert
= false;
6143 tree cmp_ls
= gimple_arg (cond_stmt
, 0);
6144 if (TREE_CODE (cmp_ls
) != SSA_NAME
)
6147 tree cond_arg1
= gimple_arg (cond_stmt
, 1);
6148 tree cond_arg2
= gimple_arg (cond_stmt
, 2);
6150 if (!vect_cond_store_pattern_same_ref (vinfo
, stmt_vinfo
, cond_arg2
)
6151 && !(invert
= vect_cond_store_pattern_same_ref (vinfo
, stmt_vinfo
,
6155 vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt
);
6157 tree scalar_type
= TREE_TYPE (st_rhs
);
6158 if (VECTOR_TYPE_P (scalar_type
))
6161 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
6162 if (vectype
== NULL_TREE
)
6165 machine_mode mask_mode
;
6166 machine_mode vecmode
= TYPE_MODE (vectype
);
6167 if (!VECTOR_MODE_P (vecmode
)
6168 || targetm
.vectorize
.conditional_operation_is_expensive (IFN_MASK_STORE
)
6169 || !targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
6170 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, false))
6173 tree base
= DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo
));
6174 if (may_be_nonaddressable_p (base
))
6177 /* We need to use the false parameter of the conditional select. */
6178 tree cond_store_arg
= invert
? cond_arg2
: cond_arg1
;
6179 tree cond_load_arg
= invert
? cond_arg1
: cond_arg2
;
6180 gimple
*load_stmt
= SSA_NAME_DEF_STMT (cond_load_arg
);
6182 /* This is a rough estimation to check that there aren't any aliasing stores
6183 in between the load and store. It's a bit strict, but for now it's good
6185 if (gimple_vuse (load_stmt
) != gimple_vuse (store_stmt
))
6188 /* If we have to invert the condition, i.e. use the true argument rather than
6189 the false argument, we have to negate the mask. */
6192 tree var
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
6194 /* Invert the mask using ^ 1. */
6195 tree itype
= TREE_TYPE (cmp_ls
);
6196 gassign
*conv
= gimple_build_assign (var
, BIT_XOR_EXPR
, cmp_ls
,
6197 build_int_cst (itype
, 1));
6199 tree mask_vec_type
= get_mask_type_for_scalar_type (vinfo
, itype
);
6200 append_pattern_def_seq (vinfo
, stmt_vinfo
, conv
, mask_vec_type
, itype
);
6204 if (TREE_CODE (base
) != MEM_REF
)
6205 base
= build_fold_addr_expr (base
);
6207 tree ptr
= build_int_cst (reference_alias_ptr_type (base
),
6208 get_object_alignment (base
));
6210 /* Convert the mask to the right form. */
6211 tree mask
= vect_convert_mask_for_vectype (cmp_ls
, vectype
, stmt_vinfo
,
6215 = gimple_build_call_internal (IFN_MASK_STORE
, 4, base
, ptr
, mask
,
6217 gimple_set_location (call
, gimple_location (store_stmt
));
6219 /* Copy across relevant vectorization info and associate DR with the
6220 new pattern statement instead of the original statement. */
6221 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (call
);
6222 loop_vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
6224 *type_out
= vectype
;
6228 /* Return true if TYPE is a non-boolean integer type. These are the types
6229 that we want to consider for narrowing. */
6232 vect_narrowable_type_p (tree type
)
6234 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
6237 /* Return true if the operation given by CODE can be truncated to N bits
6238 when only N bits of the output are needed. This is only true if bit N+1
6239 of the inputs has no effect on the low N bits of the result. */
6242 vect_truncatable_operation_p (tree_code code
)
6262 /* Record that STMT_INFO could be changed from operating on TYPE to
6263 operating on a type with the precision and sign given by PRECISION
6264 and SIGN respectively. PRECISION is an arbitrary bit precision;
6265 it might not be a whole number of bytes. */
6268 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
6269 unsigned int precision
, signop sign
)
6271 /* Round the precision up to a whole number of bytes. */
6272 precision
= vect_element_precision (precision
);
6273 if (precision
< TYPE_PRECISION (type
)
6274 && (!stmt_info
->operation_precision
6275 || stmt_info
->operation_precision
> precision
))
6277 stmt_info
->operation_precision
= precision
;
6278 stmt_info
->operation_sign
= sign
;
6282 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6283 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6284 is an arbitrary bit precision; it might not be a whole number of bytes. */
6287 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
6288 unsigned int min_input_precision
)
6290 /* This operation in isolation only requires the inputs to have
6291 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6292 that MIN_INPUT_PRECISION is a natural precision for the chain
6293 as a whole. E.g. consider something like:
6295 unsigned short *x, *y;
6296 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6298 The right shift can be done on unsigned chars, and only requires the
6299 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6300 approach would mean turning a natural chain of single-vector unsigned
6301 short operations into one that truncates "*x" and then extends
6302 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6303 operation and one vector for each unsigned char operation.
6304 This would be a significant pessimization.
6306 Instead only propagate the maximum of this precision and the precision
6307 required by the users of the result. This means that we don't pessimize
6308 the case above but continue to optimize things like:
6312 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6314 Here we would truncate two vectors of *x to a single vector of
6315 unsigned chars and use single-vector unsigned char operations for
6316 everything else, rather than doing two unsigned short copies of
6317 "(*x & 0xf0) >> 4" and then truncating the result. */
6318 min_input_precision
= MAX (min_input_precision
,
6319 stmt_info
->min_output_precision
);
6321 if (min_input_precision
< TYPE_PRECISION (type
)
6322 && (!stmt_info
->min_input_precision
6323 || stmt_info
->min_input_precision
> min_input_precision
))
6324 stmt_info
->min_input_precision
= min_input_precision
;
6327 /* Subroutine of vect_determine_min_output_precision. Return true if
6328 we can calculate a reduced number of output bits for STMT_INFO,
6329 whose result is LHS. */
6332 vect_determine_min_output_precision_1 (vec_info
*vinfo
,
6333 stmt_vec_info stmt_info
, tree lhs
)
6335 /* Take the maximum precision required by users of the result. */
6336 unsigned int precision
= 0;
6337 imm_use_iterator iter
;
6339 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
6341 gimple
*use_stmt
= USE_STMT (use
);
6342 if (is_gimple_debug (use_stmt
))
6344 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
6345 if (!use_stmt_info
|| !use_stmt_info
->min_input_precision
)
6347 /* The input precision recorded for COND_EXPRs applies only to the
6348 "then" and "else" values. */
6349 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6351 && gimple_assign_rhs_code (assign
) == COND_EXPR
6352 && use
->use
!= gimple_assign_rhs2_ptr (assign
)
6353 && use
->use
!= gimple_assign_rhs3_ptr (assign
))
6355 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
6358 if (dump_enabled_p ())
6359 dump_printf_loc (MSG_NOTE
, vect_location
,
6360 "only the low %d bits of %T are significant\n",
6362 stmt_info
->min_output_precision
= precision
;
6366 /* Calculate min_output_precision for STMT_INFO. */
6369 vect_determine_min_output_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6371 /* We're only interested in statements with a narrowable result. */
6372 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
6374 || TREE_CODE (lhs
) != SSA_NAME
6375 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
6378 if (!vect_determine_min_output_precision_1 (vinfo
, stmt_info
, lhs
))
6379 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
6382 /* Use range information to decide whether STMT (described by STMT_INFO)
6383 could be done in a narrower type. This is effectively a forward
6384 propagation, since it uses context-independent information that applies
6385 to all users of an SSA name. */
6388 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
6390 tree lhs
= gimple_assign_lhs (stmt
);
6391 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
6394 tree type
= TREE_TYPE (lhs
);
6395 if (!vect_narrowable_type_p (type
))
6398 /* First see whether we have any useful range information for the result. */
6399 unsigned int precision
= TYPE_PRECISION (type
);
6400 signop sign
= TYPE_SIGN (type
);
6401 wide_int min_value
, max_value
;
6402 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
6405 tree_code code
= gimple_assign_rhs_code (stmt
);
6406 unsigned int nops
= gimple_num_ops (stmt
);
6408 if (!vect_truncatable_operation_p (code
))
6410 /* Handle operations that can be computed in type T if all inputs
6411 and outputs can be represented in type T. Also handle left and
6412 right shifts, where (in addition) the maximum shift amount must
6413 be less than the number of bits in T. */
6425 case TRUNC_DIV_EXPR
:
6427 case FLOOR_DIV_EXPR
:
6428 case ROUND_DIV_EXPR
:
6429 case EXACT_DIV_EXPR
:
6430 /* Modulus is excluded because it is typically calculated by doing
6431 a division, for which minimum signed / -1 isn't representable in
6432 the original signed type. We could take the division range into
6433 account instead, if handling modulus ever becomes important. */
6440 for (unsigned int i
= 1; i
< nops
; ++i
)
6442 tree op
= gimple_op (stmt
, i
);
6443 wide_int op_min_value
, op_max_value
;
6444 if (TREE_CODE (op
) == INTEGER_CST
)
6446 unsigned int op_precision
= TYPE_PRECISION (TREE_TYPE (op
));
6447 op_min_value
= op_max_value
= wi::to_wide (op
, op_precision
);
6449 else if (TREE_CODE (op
) == SSA_NAME
)
6451 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
6457 if (is_shift
&& i
== 2)
6459 /* There needs to be one more bit than the maximum shift amount.
6461 If the maximum shift amount is already 1 less than PRECISION
6462 then we can't narrow the shift further. Dealing with that
6463 case first ensures that we can safely use an unsigned range
6466 op_min_value isn't relevant, since shifts by negative amounts
6468 if (wi::geu_p (op_max_value
, precision
- 1))
6470 unsigned int min_bits
= op_max_value
.to_uhwi () + 1;
6472 /* As explained below, we can convert a signed shift into an
6473 unsigned shift if the sign bit is always clear. At this
6474 point we've already processed the ranges of the output and
6476 auto op_sign
= sign
;
6477 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
6479 op_min_value
= wide_int::from (wi::min_value (min_bits
, op_sign
),
6480 precision
, op_sign
);
6481 op_max_value
= wide_int::from (wi::max_value (min_bits
, op_sign
),
6482 precision
, op_sign
);
6484 min_value
= wi::min (min_value
, op_min_value
, sign
);
6485 max_value
= wi::max (max_value
, op_max_value
, sign
);
6489 /* Try to switch signed types for unsigned types if we can.
6490 This is better for two reasons. First, unsigned ops tend
6491 to be cheaper than signed ops. Second, it means that we can
6495 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6500 unsigned short res_1 = (unsigned short) c & 0xff00;
6501 int res = (int) res_1;
6503 where the intermediate result res_1 has unsigned rather than
6505 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
6508 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6509 unsigned int precision1
= wi::min_precision (min_value
, sign
);
6510 unsigned int precision2
= wi::min_precision (max_value
, sign
);
6511 unsigned int value_precision
= MAX (precision1
, precision2
);
6512 if (value_precision
>= precision
)
6515 if (dump_enabled_p ())
6516 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
6517 " without loss of precision: %G",
6518 sign
== SIGNED
? "signed" : "unsigned",
6519 value_precision
, (gimple
*) stmt
);
6521 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
6522 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
6525 /* Use information about the users of STMT's result to decide whether
6526 STMT (described by STMT_INFO) could be done in a narrower type.
6527 This is effectively a backward propagation. */
6530 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
6532 tree_code code
= gimple_assign_rhs_code (stmt
);
6533 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
6534 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
6535 if (!vect_narrowable_type_p (type
))
6538 unsigned int precision
= TYPE_PRECISION (type
);
6539 unsigned int operation_precision
, min_input_precision
;
6543 /* Only the bits that contribute to the output matter. Don't change
6544 the precision of the operation itself. */
6545 operation_precision
= precision
;
6546 min_input_precision
= stmt_info
->min_output_precision
;
6552 tree shift
= gimple_assign_rhs2 (stmt
);
6553 if (TREE_CODE (shift
) != INTEGER_CST
6554 || !wi::ltu_p (wi::to_widest (shift
), precision
))
6556 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
6557 if (code
== LSHIFT_EXPR
)
6559 /* Avoid creating an undefined shift.
6561 ??? We could instead use min_output_precision as-is and
6562 optimize out-of-range shifts to zero. However, only
6563 degenerate testcases shift away all their useful input data,
6564 and it isn't natural to drop input operations in the middle
6565 of vectorization. This sort of thing should really be
6566 handled before vectorization. */
6567 operation_precision
= MAX (stmt_info
->min_output_precision
,
6569 /* We need CONST_SHIFT fewer bits of the input. */
6570 min_input_precision
= (MAX (operation_precision
, const_shift
)
6575 /* We need CONST_SHIFT extra bits to do the operation. */
6576 operation_precision
= (stmt_info
->min_output_precision
6578 min_input_precision
= operation_precision
;
6584 if (vect_truncatable_operation_p (code
))
6586 /* Input bit N has no effect on output bits N-1 and lower. */
6587 operation_precision
= stmt_info
->min_output_precision
;
6588 min_input_precision
= operation_precision
;
6594 if (operation_precision
< precision
)
6596 if (dump_enabled_p ())
6597 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
6598 " without affecting users: %G",
6599 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
6600 operation_precision
, (gimple
*) stmt
);
6601 vect_set_operation_type (stmt_info
, type
, operation_precision
,
6604 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
6607 /* Return true if the statement described by STMT_INFO sets a boolean
6608 SSA_NAME and if we know how to vectorize this kind of statement using
6609 vector mask types. */
6612 possible_vector_mask_operation_p (stmt_vec_info stmt_info
)
6614 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
6615 tree_code code
= ERROR_MARK
;
6616 gassign
*assign
= NULL
;
6619 if ((assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
)))
6620 code
= gimple_assign_rhs_code (assign
);
6621 else if ((cond
= dyn_cast
<gcond
*> (stmt_info
->stmt
)))
6623 lhs
= gimple_cond_lhs (cond
);
6624 code
= gimple_cond_code (cond
);
6628 || TREE_CODE (lhs
) != SSA_NAME
6629 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
6632 if (code
!= ERROR_MARK
)
6645 return TREE_CODE_CLASS (code
) == tcc_comparison
;
6648 else if (is_a
<gphi
*> (stmt_info
->stmt
))
6653 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6654 a vector mask type instead of a normal vector type. Record the
6655 result in STMT_INFO->mask_precision. */
6658 vect_determine_mask_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6660 if (!possible_vector_mask_operation_p (stmt_info
))
6663 /* If at least one boolean input uses a vector mask type,
6664 pick the mask type with the narrowest elements.
6666 ??? This is the traditional behavior. It should always produce
6667 the smallest number of operations, but isn't necessarily the
6668 optimal choice. For example, if we have:
6674 - the user of a wants it to have a mask type for 16-bit elements (M16)
6676 - c uses a mask type for 8-bit elements (M8)
6678 then picking M8 gives:
6680 - 1 M16->M8 pack for b
6682 - 2 M8->M16 unpacks for the user of a
6684 whereas picking M16 would have given:
6686 - 2 M8->M16 unpacks for c
6689 The number of operations are equal, but M16 would have given
6690 a shorter dependency chain and allowed more ILP. */
6691 unsigned int precision
= ~0U;
6692 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6694 /* If the statement compares two values that shouldn't use vector masks,
6695 try comparing the values as normal scalars instead. */
6696 tree_code code
= ERROR_MARK
;
6698 unsigned int nops
= -1;
6699 unsigned int ops_start
= 0;
6701 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt
))
6703 code
= gimple_assign_rhs_code (assign
);
6704 op0_type
= TREE_TYPE (gimple_assign_rhs1 (assign
));
6705 nops
= gimple_num_ops (assign
);
6708 else if (gcond
*cond
= dyn_cast
<gcond
*> (stmt
))
6710 code
= gimple_cond_code (cond
);
6711 op0_type
= TREE_TYPE (gimple_cond_lhs (cond
));
6716 if (code
!= ERROR_MARK
)
6718 for (unsigned int i
= ops_start
; i
< nops
; ++i
)
6720 tree rhs
= gimple_op (stmt
, i
);
6721 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs
)))
6724 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
6726 /* Don't let external or constant operands influence the choice.
6727 We can convert them to whichever vector type we pick. */
6730 if (def_stmt_info
->mask_precision
)
6732 if (precision
> def_stmt_info
->mask_precision
)
6733 precision
= def_stmt_info
->mask_precision
;
6737 if (precision
== ~0U
6738 && TREE_CODE_CLASS (code
) == tcc_comparison
)
6741 tree vectype
, mask_type
;
6742 if (is_a
<scalar_mode
> (TYPE_MODE (op0_type
), &mode
)
6743 && (vectype
= get_vectype_for_scalar_type (vinfo
, op0_type
))
6744 && (mask_type
= get_mask_type_for_scalar_type (vinfo
, op0_type
))
6745 && expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
6746 precision
= GET_MODE_BITSIZE (mode
);
6751 gphi
*phi
= as_a
<gphi
*> (stmt_info
->stmt
);
6752 for (unsigned i
= 0; i
< gimple_phi_num_args (phi
); ++i
)
6754 tree rhs
= gimple_phi_arg_def (phi
, i
);
6756 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
6758 /* Don't let external or constant operands influence the choice.
6759 We can convert them to whichever vector type we pick. */
6762 if (def_stmt_info
->mask_precision
)
6764 if (precision
> def_stmt_info
->mask_precision
)
6765 precision
= def_stmt_info
->mask_precision
;
6770 if (dump_enabled_p ())
6772 if (precision
== ~0U)
6773 dump_printf_loc (MSG_NOTE
, vect_location
,
6774 "using normal nonmask vectors for %G",
6777 dump_printf_loc (MSG_NOTE
, vect_location
,
6778 "using boolean precision %d for %G",
6779 precision
, stmt_info
->stmt
);
6782 stmt_info
->mask_precision
= precision
;
6785 /* Handle vect_determine_precisions for STMT_INFO, given that we
6786 have already done so for the users of its result. */
6789 vect_determine_stmt_precisions (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6791 vect_determine_min_output_precision (vinfo
, stmt_info
);
6792 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6794 vect_determine_precisions_from_range (stmt_info
, stmt
);
6795 vect_determine_precisions_from_users (stmt_info
, stmt
);
6799 /* Walk backwards through the vectorizable region to determine the
6800 values of these fields:
6802 - min_output_precision
6803 - min_input_precision
6804 - operation_precision
6805 - operation_sign. */
6808 vect_determine_precisions (vec_info
*vinfo
)
6810 basic_block
*bbs
= vinfo
->bbs
;
6811 unsigned int nbbs
= vinfo
->nbbs
;
6813 DUMP_VECT_SCOPE ("vect_determine_precisions");
6815 for (unsigned int i
= 0; i
< nbbs
; i
++)
6817 basic_block bb
= bbs
[i
];
6818 for (auto gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6820 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6821 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6822 vect_determine_mask_precision (vinfo
, stmt_info
);
6824 for (auto gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6826 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
6827 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6828 vect_determine_mask_precision (vinfo
, stmt_info
);
6831 for (unsigned int i
= 0; i
< nbbs
; i
++)
6833 basic_block bb
= bbs
[nbbs
- i
- 1];
6834 for (auto gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
6836 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
6837 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6838 vect_determine_stmt_precisions (vinfo
, stmt_info
);
6840 for (auto gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6842 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6843 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6844 vect_determine_stmt_precisions (vinfo
, stmt_info
);
6849 typedef gimple
*(*vect_recog_func_ptr
) (vec_info
*, stmt_vec_info
, tree
*);
6851 struct vect_recog_func
6853 vect_recog_func_ptr fn
;
6857 /* Note that ordering matters - the first pattern matching on a stmt is
6858 taken which means usually the more complex one needs to preceed the
6859 less comples onex (widen_sum only after dot_prod or sad for example). */
6860 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
6861 { vect_recog_bitfield_ref_pattern
, "bitfield_ref" },
6862 { vect_recog_bit_insert_pattern
, "bit_insert" },
6863 { vect_recog_abd_pattern
, "abd" },
6864 { vect_recog_over_widening_pattern
, "over_widening" },
6865 /* Must come after over_widening, which narrows the shift as much as
6866 possible beforehand. */
6867 { vect_recog_average_pattern
, "average" },
6868 { vect_recog_cond_expr_convert_pattern
, "cond_expr_convert" },
6869 { vect_recog_mulhs_pattern
, "mult_high" },
6870 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
6871 { vect_recog_widen_mult_pattern
, "widen_mult" },
6872 { vect_recog_dot_prod_pattern
, "dot_prod" },
6873 { vect_recog_sad_pattern
, "sad" },
6874 { vect_recog_widen_sum_pattern
, "widen_sum" },
6875 { vect_recog_pow_pattern
, "pow" },
6876 { vect_recog_popcount_clz_ctz_ffs_pattern
, "popcount_clz_ctz_ffs" },
6877 { vect_recog_ctz_ffs_pattern
, "ctz_ffs" },
6878 { vect_recog_widen_shift_pattern
, "widen_shift" },
6879 { vect_recog_rotate_pattern
, "rotate" },
6880 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
6881 { vect_recog_divmod_pattern
, "divmod" },
6882 { vect_recog_mod_var_pattern
, "modvar" },
6883 { vect_recog_mult_pattern
, "mult" },
6884 { vect_recog_sat_add_pattern
, "sat_add" },
6885 { vect_recog_sat_sub_pattern
, "sat_sub" },
6886 { vect_recog_sat_trunc_pattern
, "sat_trunc" },
6887 { vect_recog_gcond_pattern
, "gcond" },
6888 { vect_recog_bool_pattern
, "bool" },
6889 /* This must come before mask conversion, and includes the parts
6890 of mask conversion that are needed for gather and scatter
6891 internal functions. */
6892 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
6893 { vect_recog_cond_store_pattern
, "cond_store" },
6894 { vect_recog_mask_conversion_pattern
, "mask_conversion" },
6895 { vect_recog_widen_plus_pattern
, "widen_plus" },
6896 { vect_recog_widen_minus_pattern
, "widen_minus" },
6897 { vect_recog_widen_abd_pattern
, "widen_abd" },
6898 /* These must come after the double widening ones. */
6901 /* Mark statements that are involved in a pattern. */
6904 vect_mark_pattern_stmts (vec_info
*vinfo
,
6905 stmt_vec_info orig_stmt_info
, gimple
*pattern_stmt
,
6906 tree pattern_vectype
)
6908 stmt_vec_info orig_stmt_info_saved
= orig_stmt_info
;
6909 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
6911 gimple
*orig_pattern_stmt
= NULL
;
6912 if (is_pattern_stmt_p (orig_stmt_info
))
6914 /* We're replacing a statement in an existing pattern definition
6916 orig_pattern_stmt
= orig_stmt_info
->stmt
;
6917 if (dump_enabled_p ())
6918 dump_printf_loc (MSG_NOTE
, vect_location
,
6919 "replacing earlier pattern %G", orig_pattern_stmt
);
6921 /* To keep the book-keeping simple, just swap the lhs of the
6922 old and new statements, so that the old one has a valid but
6924 tree old_lhs
= gimple_get_lhs (orig_pattern_stmt
);
6925 gimple_set_lhs (orig_pattern_stmt
, gimple_get_lhs (pattern_stmt
));
6926 gimple_set_lhs (pattern_stmt
, old_lhs
);
6928 if (dump_enabled_p ())
6929 dump_printf_loc (MSG_NOTE
, vect_location
, "with %G", pattern_stmt
);
6931 /* Switch to the statement that ORIG replaces. */
6932 orig_stmt_info
= STMT_VINFO_RELATED_STMT (orig_stmt_info
);
6934 /* We shouldn't be replacing the main pattern statement. */
6935 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
)->stmt
6936 != orig_pattern_stmt
);
6940 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
6941 !gsi_end_p (si
); gsi_next (&si
))
6943 if (dump_enabled_p ())
6944 dump_printf_loc (MSG_NOTE
, vect_location
,
6945 "extra pattern stmt: %G", gsi_stmt (si
));
6946 stmt_vec_info pattern_stmt_info
6947 = vect_init_pattern_stmt (vinfo
, gsi_stmt (si
),
6948 orig_stmt_info
, pattern_vectype
);
6949 /* Stmts in the def sequence are not vectorizable cycle or
6950 induction defs, instead they should all be vect_internal_def
6951 feeding the main pattern stmt which retains this def type. */
6952 STMT_VINFO_DEF_TYPE (pattern_stmt_info
) = vect_internal_def
;
6955 if (orig_pattern_stmt
)
6957 vect_init_pattern_stmt (vinfo
, pattern_stmt
,
6958 orig_stmt_info
, pattern_vectype
);
6960 /* Insert all the new pattern statements before the original one. */
6961 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
6962 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_pattern_stmt
,
6964 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
6965 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
6967 /* Remove the pattern statement that this new pattern replaces. */
6968 gsi_remove (&gsi
, false);
6971 vect_set_pattern_stmt (vinfo
,
6972 pattern_stmt
, orig_stmt_info
, pattern_vectype
);
6974 /* For any conditionals mark them as vect_condition_def. */
6975 if (is_a
<gcond
*> (pattern_stmt
))
6976 STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info
)) = vect_condition_def
;
6978 /* Transfer reduction path info to the pattern. */
6979 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved
) != -1)
6982 if (!gimple_extract_op (orig_stmt_info_saved
->stmt
, &op
))
6984 tree lookfor
= op
.ops
[STMT_VINFO_REDUC_IDX (orig_stmt_info
)];
6985 /* Search the pattern def sequence and the main pattern stmt. Note
6986 we may have inserted all into a containing pattern def sequence
6987 so the following is a bit awkward. */
6988 gimple_stmt_iterator si
;
6992 si
= gsi_start (def_seq
);
7004 if (gimple_extract_op (s
, &op
))
7005 for (unsigned i
= 0; i
< op
.num_ops
; ++i
)
7006 if (op
.ops
[i
] == lookfor
)
7008 STMT_VINFO_REDUC_IDX (vinfo
->lookup_stmt (s
)) = i
;
7009 lookfor
= gimple_get_lhs (s
);
7013 if (s
== pattern_stmt
)
7015 if (!found
&& dump_enabled_p ())
7016 dump_printf_loc (MSG_NOTE
, vect_location
,
7017 "failed to update reduction index.\n");
7025 if (s
== pattern_stmt
)
7026 /* Found the end inside a bigger pattern def seq. */
7035 /* Function vect_pattern_recog_1
7038 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7039 computation pattern.
7040 STMT_INFO: A stmt from which the pattern search should start.
7042 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7043 a sequence of statements that has the same functionality and can be
7044 used to replace STMT_INFO. It returns the last statement in the sequence
7045 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7046 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7047 statement, having first checked that the target supports the new operation
7050 This function also does some bookkeeping, as explained in the documentation
7051 for vect_recog_pattern. */
7054 vect_pattern_recog_1 (vec_info
*vinfo
,
7055 const vect_recog_func
&recog_func
, stmt_vec_info stmt_info
)
7057 gimple
*pattern_stmt
;
7058 tree pattern_vectype
;
7060 /* If this statement has already been replaced with pattern statements,
7061 leave the original statement alone, since the first match wins.
7062 Instead try to match against the definition statements that feed
7063 the main pattern statement. */
7064 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7066 gimple_stmt_iterator gsi
;
7067 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
7068 !gsi_end_p (gsi
); gsi_next (&gsi
))
7069 vect_pattern_recog_1 (vinfo
, recog_func
,
7070 vinfo
->lookup_stmt (gsi_stmt (gsi
)));
7074 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
7075 pattern_stmt
= recog_func
.fn (vinfo
, stmt_info
, &pattern_vectype
);
7078 /* Clear any half-formed pattern definition sequence. */
7079 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
7083 /* Found a vectorizable pattern. */
7084 if (dump_enabled_p ())
7085 dump_printf_loc (MSG_NOTE
, vect_location
,
7086 "%s pattern recognized: %G",
7087 recog_func
.name
, pattern_stmt
);
7089 /* Mark the stmts that are involved in the pattern. */
7090 vect_mark_pattern_stmts (vinfo
, stmt_info
, pattern_stmt
, pattern_vectype
);
7094 /* Function vect_pattern_recog
7097 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7100 Output - for each computation idiom that is detected we create a new stmt
7101 that provides the same functionality and that can be vectorized. We
7102 also record some information in the struct_stmt_info of the relevant
7103 stmts, as explained below:
7105 At the entry to this function we have the following stmts, with the
7106 following initial value in the STMT_VINFO fields:
7108 stmt in_pattern_p related_stmt vec_stmt
7109 S1: a_i = .... - - -
7110 S2: a_2 = ..use(a_i).. - - -
7111 S3: a_1 = ..use(a_2).. - - -
7112 S4: a_0 = ..use(a_1).. - - -
7113 S5: ... = ..use(a_0).. - - -
7115 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7116 represented by a single stmt. We then:
7117 - create a new stmt S6 equivalent to the pattern (the stmt is not
7118 inserted into the code)
7119 - fill in the STMT_VINFO fields as follows:
7121 in_pattern_p related_stmt vec_stmt
7122 S1: a_i = .... - - -
7123 S2: a_2 = ..use(a_i).. - - -
7124 S3: a_1 = ..use(a_2).. - - -
7125 S4: a_0 = ..use(a_1).. true S6 -
7126 '---> S6: a_new = .... - S4 -
7127 S5: ... = ..use(a_0).. - - -
7129 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7130 to each other through the RELATED_STMT field).
7132 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7133 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7134 remain irrelevant unless used by stmts other than S4.
7136 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7137 (because they are marked as irrelevant). It will vectorize S6, and record
7138 a pointer to the new vector stmt VS6 from S6 (as usual).
7139 S4 will be skipped, and S5 will be vectorized as usual:
7141 in_pattern_p related_stmt vec_stmt
7142 S1: a_i = .... - - -
7143 S2: a_2 = ..use(a_i).. - - -
7144 S3: a_1 = ..use(a_2).. - - -
7145 > VS6: va_new = .... - - -
7146 S4: a_0 = ..use(a_1).. true S6 VS6
7147 '---> S6: a_new = .... - S4 VS6
7148 > VS5: ... = ..vuse(va_new).. - - -
7149 S5: ... = ..use(a_0).. - - -
7151 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7152 elsewhere), and we'll end up with:
7155 VS5: ... = ..vuse(va_new)..
7157 In case of more than one pattern statements, e.g., widen-mult with
7161 S2 a_T = (TYPE) a_t;
7162 '--> S3: a_it = (interm_type) a_t;
7163 S4 prod_T = a_T * CONST;
7164 '--> S5: prod_T' = a_it w* CONST;
7166 there may be other users of a_T outside the pattern. In that case S2 will
7167 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7168 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7169 be recorded in S3. */
7172 vect_pattern_recog (vec_info
*vinfo
)
7174 basic_block
*bbs
= vinfo
->bbs
;
7175 unsigned int nbbs
= vinfo
->nbbs
;
7177 vect_determine_precisions (vinfo
);
7179 DUMP_VECT_SCOPE ("vect_pattern_recog");
7181 /* Scan through the stmts in the region, applying the pattern recognition
7182 functions starting at each stmt visited. */
7183 for (unsigned i
= 0; i
< nbbs
; i
++)
7185 basic_block bb
= bbs
[i
];
7187 for (auto si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
7189 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (si
));
7191 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
))
7194 /* Scan over all generic vect_recog_xxx_pattern functions. */
7195 for (const auto &func_ptr
: vect_vect_recog_func_ptrs
)
7196 vect_pattern_recog_1 (vinfo
, func_ptr
,
7201 /* After this no more add_stmt calls are allowed. */
7202 vinfo
->stmt_vec_info_ro
= true;
7205 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7206 or internal_fn contained in ch, respectively. */
7208 vect_gimple_build (tree lhs
, code_helper ch
, tree op0
, tree op1
)
7210 gcc_assert (op0
!= NULL_TREE
);
7211 if (ch
.is_tree_code ())
7212 return gimple_build_assign (lhs
, (tree_code
) ch
, op0
, op1
);
7214 gcc_assert (ch
.is_internal_fn ());
7215 gimple
* stmt
= gimple_build_call_internal (as_internal_fn ((combined_fn
) ch
),
7216 op1
== NULL_TREE
? 1 : 2,
7218 gimple_call_set_lhs (stmt
, lhs
);