1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #define INCLUDE_MEMORY
24 #include "coretypes.h"
29 #include "gimple-iterator.h"
30 #include "gimple-fold.h"
33 #include "optabs-tree.h"
34 #include "insn-config.h"
35 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
40 #include "gimple-iterator.h"
41 #include "gimple-fold.h"
42 #include "gimplify-me.h"
44 #include "tree-vectorizer.h"
47 #include "internal-fn.h"
48 #include "case-cfn-macros.h"
49 #include "fold-const-call.h"
52 #include "omp-simd-clone.h"
54 #include "tree-vector-builder.h"
55 #include "tree-ssa-loop-ivopts.h"
56 #include "vec-perm-indices.h"
57 #include "gimple-range.h"
61 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
62 in the first operand. Disentangling this is future work, the
63 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
66 /* Return true if we have a useful VR_RANGE range for VAR, storing it
67 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
70 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
74 get_range_query (cfun
)->range_of_expr (vr
, var
);
75 if (vr
.undefined_p ())
76 vr
.set_varying (TREE_TYPE (var
));
77 value_range_kind vr_type
= get_legacy_range (vr
, vr_min
, vr_max
);
78 *min_value
= wi::to_wide (vr_min
);
79 *max_value
= wi::to_wide (vr_max
);
80 wide_int nonzero
= get_nonzero_bits (var
);
81 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
82 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
83 nonzero
, sgn
) == VR_RANGE
)
85 if (dump_enabled_p ())
87 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
88 dump_printf (MSG_NOTE
, " has range [");
89 dump_hex (MSG_NOTE
, *min_value
);
90 dump_printf (MSG_NOTE
, ", ");
91 dump_hex (MSG_NOTE
, *max_value
);
92 dump_printf (MSG_NOTE
, "]\n");
98 if (dump_enabled_p ())
100 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
101 dump_printf (MSG_NOTE
, " has no range info\n");
107 /* Report that we've found an instance of pattern PATTERN in
111 vect_pattern_detected (const char *name
, gimple
*stmt
)
113 if (dump_enabled_p ())
114 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: %G", name
, stmt
);
117 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
118 return the pattern statement's stmt_vec_info. Set its vector type to
119 VECTYPE if it doesn't have one already. */
122 vect_init_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
123 stmt_vec_info orig_stmt_info
, tree vectype
)
125 stmt_vec_info pattern_stmt_info
= vinfo
->lookup_stmt (pattern_stmt
);
126 if (pattern_stmt_info
== NULL
)
127 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
128 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
130 pattern_stmt_info
->pattern_stmt_p
= true;
131 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
;
132 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
133 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
134 STMT_VINFO_TYPE (pattern_stmt_info
) = STMT_VINFO_TYPE (orig_stmt_info
);
135 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
138 || is_a
<gcond
*> (pattern_stmt
)
139 || (VECTOR_BOOLEAN_TYPE_P (vectype
)
140 == vect_use_mask_type_p (orig_stmt_info
)));
141 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
142 pattern_stmt_info
->mask_precision
= orig_stmt_info
->mask_precision
;
144 return pattern_stmt_info
;
147 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
148 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
152 vect_set_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
153 stmt_vec_info orig_stmt_info
, tree vectype
)
155 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
156 STMT_VINFO_RELATED_STMT (orig_stmt_info
)
157 = vect_init_pattern_stmt (vinfo
, pattern_stmt
, orig_stmt_info
, vectype
);
160 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
161 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
162 be different from the vector type of the final pattern statement.
163 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
164 from which it was derived. */
167 append_pattern_def_seq (vec_info
*vinfo
,
168 stmt_vec_info stmt_info
, gimple
*new_stmt
,
169 tree vectype
= NULL_TREE
,
170 tree scalar_type_for_mask
= NULL_TREE
)
172 gcc_assert (!scalar_type_for_mask
173 == (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
)));
176 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (new_stmt
);
177 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
178 if (scalar_type_for_mask
)
179 new_stmt_info
->mask_precision
180 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask
));
182 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
187 /* Add NEW_STMT to VINFO's invariant pattern definition statements. These
188 statements are not vectorized but are materialized as scalar in the loop
192 append_inv_pattern_def_seq (vec_info
*vinfo
, gimple
*new_stmt
)
194 gimple_seq_add_stmt_without_update (&vinfo
->inv_pattern_def_seq
, new_stmt
);
197 /* The caller wants to perform new operations on vect_external variable
198 VAR, so that the result of the operations would also be vect_external.
199 Return the edge on which the operations can be performed, if one exists.
200 Return null if the operations should instead be treated as part of
201 the pattern that needs them. */
204 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
207 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
209 e
= loop_preheader_edge (loop_vinfo
->loop
);
210 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
212 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
214 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
221 /* Return true if the target supports a vector version of CODE,
222 where CODE is known to map to a direct optab with the given SUBTYPE.
223 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
224 specifies the type of the scalar result.
226 If CODE allows the inputs and outputs to have different type
227 (such as for WIDEN_SUM_EXPR), it is the input mode rather
228 than the output mode that determines the appropriate target pattern.
229 Operand 0 of the target pattern then specifies the mode that the output
232 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
233 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
237 vect_supportable_direct_optab_p (vec_info
*vinfo
, tree otype
, tree_code code
,
238 tree itype
, tree
*vecotype_out
,
239 tree
*vecitype_out
= NULL
,
240 enum optab_subtype subtype
= optab_default
)
242 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
246 tree vecotype
= get_vectype_for_scalar_type (vinfo
, otype
);
250 optab optab
= optab_for_tree_code (code
, vecitype
, subtype
);
254 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
255 if (icode
== CODE_FOR_nothing
256 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
259 *vecotype_out
= vecotype
;
261 *vecitype_out
= vecitype
;
265 /* Return true if the target supports a vector version of CODE,
266 where CODE is known to map to a conversion optab with the given SUBTYPE.
267 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
268 specifies the type of the scalar result.
270 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
271 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
275 vect_supportable_conv_optab_p (vec_info
*vinfo
, tree otype
, tree_code code
,
276 tree itype
, tree
*vecotype_out
,
277 tree
*vecitype_out
= NULL
,
278 enum optab_subtype subtype
= optab_default
)
280 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
281 tree vecotype
= get_vectype_for_scalar_type (vinfo
, otype
);
282 if (!vecitype
|| !vecotype
)
285 if (!directly_supported_p (code
, vecotype
, vecitype
, subtype
))
288 *vecotype_out
= vecotype
;
290 *vecitype_out
= vecitype
;
294 /* Round bit precision PRECISION up to a full element. */
297 vect_element_precision (unsigned int precision
)
299 precision
= 1 << ceil_log2 (precision
);
300 return MAX (precision
, BITS_PER_UNIT
);
303 /* If OP is defined by a statement that's being considered for vectorization,
304 return information about that statement, otherwise return NULL. */
307 vect_get_internal_def (vec_info
*vinfo
, tree op
)
309 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (op
);
311 && STMT_VINFO_DEF_TYPE (def_stmt_info
) == vect_internal_def
)
312 return vect_stmt_to_vectorize (def_stmt_info
);
316 /* Check whether NAME, an ssa-name used in STMT_VINFO,
317 is a result of a type promotion, such that:
318 DEF_STMT: NAME = NOP (name0)
319 If CHECK_SIGN is TRUE, check that either both types are signed or both are
323 type_conversion_p (vec_info
*vinfo
, tree name
, bool check_sign
,
324 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
326 tree type
= TREE_TYPE (name
);
328 enum vect_def_type dt
;
330 stmt_vec_info def_stmt_info
;
331 if (!vect_is_simple_use (name
, vinfo
, &dt
, &def_stmt_info
, def_stmt
))
334 if (dt
!= vect_internal_def
335 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
341 if (!is_gimple_assign (*def_stmt
))
344 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
347 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
349 *orig_type
= TREE_TYPE (oprnd0
);
350 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
351 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
354 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
359 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
))
365 /* Holds information about an input operand after some sign changes
366 and type promotions have been peeled away. */
367 class vect_unpromoted_value
{
369 vect_unpromoted_value ();
371 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
373 /* The value obtained after peeling away zero or more casts. */
376 /* The type of OP. */
379 /* The definition type of OP. */
382 /* If OP is the result of peeling at least one cast, and if the cast
383 of OP itself is a vectorizable statement, CASTER identifies that
384 statement, otherwise it is null. */
385 stmt_vec_info caster
;
388 inline vect_unpromoted_value::vect_unpromoted_value ()
391 dt (vect_uninitialized_def
),
396 /* Set the operand to OP_IN, its definition type to DT_IN, and the
397 statement that casts it to CASTER_IN. */
400 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
401 stmt_vec_info caster_in
)
404 type
= TREE_TYPE (op
);
409 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
410 to reach some vectorizable inner operand OP', continuing as long as it
411 is possible to convert OP' back to OP using a possible sign change
412 followed by a possible promotion P. Return this OP', or null if OP is
413 not a vectorizable SSA name. If there is a promotion P, describe its
414 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
415 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
416 have more than one user.
418 A successful return means that it is possible to go from OP' to OP
419 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
420 whereas the cast from UNPROM to OP might be a promotion, a sign
425 signed short *ptr = ...;
426 signed short C = *ptr;
427 unsigned short B = (unsigned short) C; // sign change
428 signed int A = (signed int) B; // unsigned promotion
429 ...possible other uses of A...
430 unsigned int OP = (unsigned int) A; // sign change
432 In this case it's possible to go directly from C to OP using:
434 OP = (unsigned int) (unsigned short) C;
435 +------------+ +--------------+
436 promotion sign change
438 so OP' would be C. The input to the promotion is B, so UNPROM
442 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
443 vect_unpromoted_value
*unprom
,
444 bool *single_use_p
= NULL
)
446 tree op_type
= TREE_TYPE (op
);
447 if (!INTEGRAL_TYPE_P (op_type
))
450 tree res
= NULL_TREE
;
451 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
452 unsigned int min_precision
= orig_precision
;
453 stmt_vec_info caster
= NULL
;
454 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
456 /* See whether OP is simple enough to vectorize. */
457 stmt_vec_info def_stmt_info
;
460 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
463 /* If OP is the input of a demotion, skip over it to see whether
464 OP is itself the result of a promotion. If so, the combined
465 effect of the promotion and the demotion might fit the required
466 pattern, otherwise neither operation fits.
468 This copes with cases such as the result of an arithmetic
469 operation being truncated before being stored, and where that
470 arithmetic operation has been recognized as an over-widened one. */
471 if (TYPE_PRECISION (op_type
) <= min_precision
)
473 /* Use OP as the UNPROM described above if we haven't yet
474 found a promotion, or if using the new input preserves the
475 sign of the previous promotion. */
477 || TYPE_PRECISION (unprom
->type
) == orig_precision
478 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
)
479 || (TYPE_UNSIGNED (op_type
)
480 && TYPE_PRECISION (op_type
) < TYPE_PRECISION (unprom
->type
)))
482 unprom
->set_op (op
, dt
, caster
);
483 min_precision
= TYPE_PRECISION (op_type
);
485 /* Stop if we've already seen a promotion and if this
486 conversion does more than change the sign. */
487 else if (TYPE_PRECISION (op_type
)
488 != TYPE_PRECISION (unprom
->type
))
491 /* The sequence now extends to OP. */
495 /* See whether OP is defined by a cast. Record it as CASTER if
496 the cast is potentially vectorizable. */
499 caster
= def_stmt_info
;
501 /* Ignore pattern statements, since we don't link uses for them. */
504 && !STMT_VINFO_RELATED_STMT (caster
)
505 && !has_single_use (res
))
506 *single_use_p
= false;
508 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
509 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
512 /* Continue with the input to the cast. */
513 op
= gimple_assign_rhs1 (def_stmt
);
514 op_type
= TREE_TYPE (op
);
519 /* OP is an integer operand to an operation that returns TYPE, and we
520 want to treat the operation as a widening one. So far we can treat
521 it as widening from *COMMON_TYPE.
523 Return true if OP is suitable for such a widening operation,
524 either widening from *COMMON_TYPE or from some supertype of it.
525 Update *COMMON_TYPE to the supertype in the latter case.
527 SHIFT_P is true if OP is a shift amount. */
530 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
533 /* Calculate the minimum precision required by OP, without changing
534 the sign of either operand. */
535 unsigned int precision
;
538 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
540 precision
= TREE_INT_CST_LOW (op
);
544 precision
= wi::min_precision (wi::to_widest (op
),
545 TYPE_SIGN (*common_type
));
546 if (precision
* 2 > TYPE_PRECISION (type
))
550 /* If OP requires a wider type, switch to that type. The checks
551 above ensure that this is still narrower than the result. */
552 precision
= vect_element_precision (precision
);
553 if (TYPE_PRECISION (*common_type
) < precision
)
554 *common_type
= build_nonstandard_integer_type
555 (precision
, TYPE_UNSIGNED (*common_type
));
559 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
560 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
563 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
565 if (types_compatible_p (*common_type
, new_type
))
568 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
569 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
570 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
573 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
574 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
575 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
577 *common_type
= new_type
;
581 /* We have mismatched signs, with the signed type being
582 no wider than the unsigned type. In this case we need
583 a wider signed type. */
584 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
585 TYPE_PRECISION (new_type
));
588 if (precision
* 2 > TYPE_PRECISION (type
))
591 *common_type
= build_nonstandard_integer_type (precision
, false);
595 /* Check whether STMT_INFO can be viewed as a tree of integer operations
596 in which each node either performs CODE or WIDENED_CODE, and where
597 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
598 specifies the maximum number of leaf operands. SHIFT_P says whether
599 CODE and WIDENED_CODE are some sort of shift.
601 If STMT_INFO is such a tree, return the number of leaf operands
602 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
603 to a type that (a) is narrower than the result of STMT_INFO and
604 (b) can hold all leaf operand values.
606 If SUBTYPE then allow that the signs of the operands
607 may differ in signs but not in precision. SUBTYPE is updated to reflect
610 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
614 vect_widened_op_tree (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree_code code
,
615 code_helper widened_code
, bool shift_p
,
616 unsigned int max_nops
,
617 vect_unpromoted_value
*unprom
, tree
*common_type
,
618 enum optab_subtype
*subtype
= NULL
)
620 /* Check for an integer operation with the right code. */
621 gimple
* stmt
= stmt_info
->stmt
;
622 if (!(is_gimple_assign (stmt
) || is_gimple_call (stmt
)))
625 code_helper rhs_code
;
626 if (is_gimple_assign (stmt
))
627 rhs_code
= gimple_assign_rhs_code (stmt
);
628 else if (is_gimple_call (stmt
))
629 rhs_code
= gimple_call_combined_fn (stmt
);
634 && rhs_code
!= widened_code
)
637 tree lhs
= gimple_get_lhs (stmt
);
638 tree type
= TREE_TYPE (lhs
);
639 if (!INTEGRAL_TYPE_P (type
))
642 /* Assume that both operands will be leaf operands. */
645 /* Check the operands. */
646 unsigned int next_op
= 0;
647 for (unsigned int i
= 0; i
< 2; ++i
)
649 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
650 unsigned int nops
= 1;
651 tree op
= gimple_arg (stmt
, i
);
652 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
654 /* We already have a common type from earlier operands.
655 Update it to account for OP. */
656 this_unprom
->set_op (op
, vect_constant_def
);
657 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
662 /* Only allow shifts by constants. */
663 if (shift_p
&& i
== 1)
666 if (rhs_code
!= code
)
668 /* If rhs_code is widened_code, don't look through further
669 possible promotions, there is a promotion already embedded
670 in the WIDEN_*_EXPR. */
671 if (TREE_CODE (op
) != SSA_NAME
672 || !INTEGRAL_TYPE_P (TREE_TYPE (op
)))
675 stmt_vec_info def_stmt_info
;
678 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
,
681 this_unprom
->set_op (op
, dt
, NULL
);
683 else if (!vect_look_through_possible_promotion (vinfo
, op
,
687 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
689 /* The operand isn't widened. If STMT_INFO has the code
690 for an unwidened operation, recursively check whether
691 this operand is a node of the tree. */
694 || this_unprom
->dt
!= vect_internal_def
)
697 /* Give back the leaf slot allocated above now that we're
698 not treating this as a leaf operand. */
701 /* Recursively process the definition of the operand. */
702 stmt_vec_info def_stmt_info
703 = vect_get_internal_def (vinfo
, this_unprom
->op
);
705 nops
= vect_widened_op_tree (vinfo
, def_stmt_info
, code
,
706 widened_code
, shift_p
, max_nops
,
707 this_unprom
, common_type
,
716 /* Make sure that the operand is narrower than the result. */
717 if (TYPE_PRECISION (this_unprom
->type
) * 2
718 > TYPE_PRECISION (type
))
721 /* Update COMMON_TYPE for the new operand. */
723 *common_type
= this_unprom
->type
;
724 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
729 /* See if we can sign extend the smaller type. */
730 if (TYPE_PRECISION (this_unprom
->type
)
731 > TYPE_PRECISION (*common_type
))
732 *common_type
= this_unprom
->type
;
733 *subtype
= optab_vector_mixed_sign
;
745 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
746 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
749 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
= NULL
)
751 return make_temp_ssa_name (type
, stmt
, "patt");
754 /* STMT2_INFO describes a type conversion that could be split into STMT1
755 followed by a version of STMT2_INFO that takes NEW_RHS as its first
756 input. Try to do this using pattern statements, returning true on
760 vect_split_statement (vec_info
*vinfo
, stmt_vec_info stmt2_info
, tree new_rhs
,
761 gimple
*stmt1
, tree vectype
)
763 if (is_pattern_stmt_p (stmt2_info
))
765 /* STMT2_INFO is part of a pattern. Get the statement to which
766 the pattern is attached. */
767 stmt_vec_info orig_stmt2_info
= STMT_VINFO_RELATED_STMT (stmt2_info
);
768 vect_init_pattern_stmt (vinfo
, stmt1
, orig_stmt2_info
, vectype
);
770 if (dump_enabled_p ())
771 dump_printf_loc (MSG_NOTE
, vect_location
,
772 "Splitting pattern statement: %G", stmt2_info
->stmt
);
774 /* Since STMT2_INFO is a pattern statement, we can change it
775 in-situ without worrying about changing the code for the
777 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
779 if (dump_enabled_p ())
781 dump_printf_loc (MSG_NOTE
, vect_location
, "into: %G", stmt1
);
782 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
786 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
787 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
)
788 /* STMT2_INFO is the actual pattern statement. Add STMT1
789 to the end of the definition sequence. */
790 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
793 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
795 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
796 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
802 /* STMT2_INFO doesn't yet have a pattern. Try to create a
803 two-statement pattern now. */
804 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
805 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
806 tree lhs_vectype
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
810 if (dump_enabled_p ())
811 dump_printf_loc (MSG_NOTE
, vect_location
,
812 "Splitting statement: %G", stmt2_info
->stmt
);
814 /* Add STMT1 as a singleton pattern definition sequence. */
815 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
816 vect_init_pattern_stmt (vinfo
, stmt1
, stmt2_info
, vectype
);
817 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
819 /* Build the second of the two pattern statements. */
820 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
821 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
822 vect_set_pattern_stmt (vinfo
, new_stmt2
, stmt2_info
, lhs_vectype
);
824 if (dump_enabled_p ())
826 dump_printf_loc (MSG_NOTE
, vect_location
,
827 "into pattern statements: %G", stmt1
);
828 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
829 (gimple
*) new_stmt2
);
836 /* Look for the following pattern
842 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
843 HALF_TYPE and UNPROM will be set should the statement be found to
844 be a widened operation.
845 DIFF_STMT will be set to the MINUS_EXPR
846 statement that precedes the ABS_STMT if it is a MINUS_EXPR..
849 vect_recog_absolute_difference (vec_info
*vinfo
, gassign
*abs_stmt
,
851 vect_unpromoted_value unprom
[2],
857 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
858 inside the loop (in case we are analyzing an outer-loop). */
859 enum tree_code code
= gimple_assign_rhs_code (abs_stmt
);
860 if (code
!= ABS_EXPR
&& code
!= ABSU_EXPR
)
863 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
864 tree abs_type
= TREE_TYPE (abs_oprnd
);
867 if (!ANY_INTEGRAL_TYPE_P (abs_type
)
868 || TYPE_OVERFLOW_WRAPS (abs_type
)
869 || TYPE_UNSIGNED (abs_type
))
872 /* Peel off conversions from the ABS input. This can involve sign
873 changes (e.g. from an unsigned subtraction to a signed ABS input)
874 or signed promotion, but it can't include unsigned promotion.
875 (Note that ABS of an unsigned promotion should have been folded
876 away before now anyway.) */
877 vect_unpromoted_value unprom_diff
;
878 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
882 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
883 && TYPE_UNSIGNED (unprom_diff
.type
))
886 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
887 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
888 if (!diff_stmt_vinfo
)
891 gassign
*diff
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (diff_stmt_vinfo
));
892 if (diff_stmt
&& diff
893 && gimple_assign_rhs_code (diff
) == MINUS_EXPR
894 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd
)))
897 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
898 inside the loop (in case we are analyzing an outer-loop). */
899 if (vect_widened_op_tree (vinfo
, diff_stmt_vinfo
,
900 MINUS_EXPR
, IFN_VEC_WIDEN_MINUS
,
901 false, 2, unprom
, half_type
))
907 /* Convert UNPROM to TYPE and return the result, adding new statements
908 to STMT_INFO's pattern definition statements if no better way is
909 available. VECTYPE is the vector form of TYPE.
911 If SUBTYPE then convert the type based on the subtype. */
914 vect_convert_input (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
915 vect_unpromoted_value
*unprom
, tree vectype
,
916 enum optab_subtype subtype
= optab_default
)
918 /* Update the type if the signs differ. */
919 if (subtype
== optab_vector_mixed_sign
)
921 gcc_assert (!TYPE_UNSIGNED (type
));
922 if (TYPE_UNSIGNED (TREE_TYPE (unprom
->op
)))
924 type
= unsigned_type_for (type
);
925 vectype
= unsigned_type_for (vectype
);
929 /* Check for a no-op conversion. */
930 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
933 /* Allow the caller to create constant vect_unpromoted_values. */
934 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
935 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
937 tree input
= unprom
->op
;
940 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
941 tree lhs_type
= TREE_TYPE (lhs
);
943 /* If the result of the existing cast is the right width, use it
944 instead of the source of the cast. */
945 if (TYPE_PRECISION (lhs_type
) == TYPE_PRECISION (type
))
947 /* If the precision we want is between the source and result
948 precisions of the existing cast, try splitting the cast into
949 two and tapping into a mid-way point. */
950 else if (TYPE_PRECISION (lhs_type
) > TYPE_PRECISION (type
)
951 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
))
953 /* In order to preserve the semantics of the original cast,
954 give the mid-way point the same signedness as the input value.
956 It would be possible to use a signed type here instead if
957 TYPE is signed and UNPROM->TYPE is unsigned, but that would
958 make the sign of the midtype sensitive to the order in
959 which we process the statements, since the signedness of
960 TYPE is the signedness required by just one of possibly
961 many users. Also, unsigned promotions are usually as cheap
962 as or cheaper than signed ones, so it's better to keep an
963 unsigned promotion. */
964 tree midtype
= build_nonstandard_integer_type
965 (TYPE_PRECISION (type
), TYPE_UNSIGNED (unprom
->type
));
966 tree vec_midtype
= get_vectype_for_scalar_type (vinfo
, midtype
);
969 input
= vect_recog_temp_ssa_var (midtype
, NULL
);
970 gassign
*new_stmt
= gimple_build_assign (input
, NOP_EXPR
,
972 if (!vect_split_statement (vinfo
, unprom
->caster
, input
, new_stmt
,
974 append_pattern_def_seq (vinfo
, stmt_info
,
975 new_stmt
, vec_midtype
);
979 /* See if we can reuse an existing result. */
980 if (types_compatible_p (type
, TREE_TYPE (input
)))
984 /* We need a new conversion statement. */
985 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
986 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, input
);
988 /* If OP is an external value, see if we can insert the new statement
989 on an incoming edge. */
990 if (input
== unprom
->op
&& unprom
->dt
== vect_external_def
)
991 if (edge e
= vect_get_external_def_edge (vinfo
, input
))
993 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
994 gcc_assert (!new_bb
);
998 /* As a (common) last resort, add the statement to the pattern itself. */
999 append_pattern_def_seq (vinfo
, stmt_info
, new_stmt
, vectype
);
1003 /* Invoke vect_convert_input for N elements of UNPROM and store the
1004 result in the corresponding elements of RESULT.
1006 If SUBTYPE then convert the type based on the subtype. */
1009 vect_convert_inputs (vec_info
*vinfo
, stmt_vec_info stmt_info
, unsigned int n
,
1010 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
1011 tree vectype
, enum optab_subtype subtype
= optab_default
)
1013 for (unsigned int i
= 0; i
< n
; ++i
)
1016 for (j
= 0; j
< i
; ++j
)
1017 if (unprom
[j
].op
== unprom
[i
].op
)
1021 result
[i
] = result
[j
];
1023 result
[i
] = vect_convert_input (vinfo
, stmt_info
,
1024 type
, &unprom
[i
], vectype
, subtype
);
1028 /* The caller has created a (possibly empty) sequence of pattern definition
1029 statements followed by a single statement PATTERN_STMT. Cast the result
1030 of this final statement to TYPE. If a new statement is needed, add
1031 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
1032 and return the new statement, otherwise return PATTERN_STMT as-is.
1033 VECITYPE is the vector form of PATTERN_STMT's result type. */
1036 vect_convert_output (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
1037 gimple
*pattern_stmt
, tree vecitype
)
1039 tree lhs
= gimple_get_lhs (pattern_stmt
);
1040 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
1042 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vecitype
);
1043 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
1044 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
1046 return pattern_stmt
;
1049 /* Return true if STMT_VINFO describes a reduction for which reassociation
1050 is allowed. If STMT_INFO is part of a group, assume that it's part of
1051 a reduction chain and optimistically assume that all statements
1052 except the last allow reassociation.
1053 Also require it to have code CODE and to be a reduction
1054 in the outermost loop. When returning true, store the operands in
1055 *OP0_OUT and *OP1_OUT. */
1058 vect_reassociating_reduction_p (vec_info
*vinfo
,
1059 stmt_vec_info stmt_info
, tree_code code
,
1060 tree
*op0_out
, tree
*op1_out
)
1062 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
1066 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1067 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
1070 /* We don't allow changing the order of the computation in the inner-loop
1071 when doing outer-loop vectorization. */
1072 class loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
1073 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
))
1076 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
1078 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign
)),
1082 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == NULL
)
1085 *op0_out
= gimple_assign_rhs1 (assign
);
1086 *op1_out
= gimple_assign_rhs2 (assign
);
1087 if (commutative_tree_code (code
) && STMT_VINFO_REDUC_IDX (stmt_info
) == 0)
1088 std::swap (*op0_out
, *op1_out
);
1092 /* match.pd function to match
1093 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1095 1) @1, @2, c, d, a, b are all integral type.
1096 2) There's single_use for both @1 and @2.
1097 3) a, c have same precision.
1098 4) c and @1 have different precision.
1099 5) c, d are the same type or they can differ in sign when convert is
1102 record a and c and d and @3. */
1104 extern bool gimple_cond_expr_convert_p (tree
, tree
*, tree (*)(tree
));
1106 /* Function vect_recog_cond_expr_convert
1108 Try to find the following pattern:
1113 TYPE_E op_true = (TYPE_E) A;
1114 TYPE_E op_false = (TYPE_E) B;
1116 E = C cmp D ? op_true : op_false;
1119 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1120 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1121 single_use of op_true and op_false.
1122 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1126 * STMT_VINFO: The stmt from which the pattern search begins.
1127 here it starts with E = c cmp D ? op_true : op_false;
1131 TYPE1 E' = C cmp D ? A : B;
1132 TYPE3 E = (TYPE3) E';
1134 There may extra nop_convert for A or B to handle different signness.
1136 * TYPE_OUT: The vector type of the output of this pattern.
1138 * Return value: A new stmt that will be used to replace the sequence of
1139 stmts that constitute the pattern. In this case it will be:
1141 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1144 vect_recog_cond_expr_convert_pattern (vec_info
*vinfo
,
1145 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1147 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
1148 tree lhs
, match
[4], temp
, type
, new_lhs
, op2
;
1150 gimple
*pattern_stmt
;
1155 lhs
= gimple_assign_lhs (last_stmt
);
1157 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1158 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1159 if (!gimple_cond_expr_convert_p (lhs
, &match
[0], NULL
))
1162 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt
);
1165 type
= TREE_TYPE (match
[1]);
1166 if (TYPE_SIGN (type
) != TYPE_SIGN (TREE_TYPE (match
[2])))
1168 op2
= vect_recog_temp_ssa_var (type
, NULL
);
1169 gimple
* nop_stmt
= gimple_build_assign (op2
, NOP_EXPR
, match
[2]);
1170 append_pattern_def_seq (vinfo
, stmt_vinfo
, nop_stmt
,
1171 get_vectype_for_scalar_type (vinfo
, type
));
1174 temp
= vect_recog_temp_ssa_var (type
, NULL
);
1175 cond_stmt
= gimple_build_assign (temp
, build3 (COND_EXPR
, type
, match
[3],
1177 append_pattern_def_seq (vinfo
, stmt_vinfo
, cond_stmt
,
1178 get_vectype_for_scalar_type (vinfo
, type
));
1179 new_lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
1180 pattern_stmt
= gimple_build_assign (new_lhs
, NOP_EXPR
, temp
);
1181 *type_out
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1183 if (dump_enabled_p ())
1184 dump_printf_loc (MSG_NOTE
, vect_location
,
1185 "created pattern stmt: %G", pattern_stmt
);
1186 return pattern_stmt
;
1189 /* Function vect_recog_dot_prod_pattern
1191 Try to find the following pattern:
1198 sum_0 = phi <init, sum_1>
1201 S3 x_T = (TYPE1) x_t;
1202 S4 y_T = (TYPE1) y_t;
1203 S5 prod = x_T * y_T;
1204 [S6 prod = (TYPE2) prod; #optional]
1205 S7 sum_1 = prod + sum_0;
1207 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1208 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1209 'type1a' and 'type1b' can differ.
1213 * STMT_VINFO: The stmt from which the pattern search begins. In the
1214 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1219 * TYPE_OUT: The type of the output of this pattern.
1221 * Return value: A new stmt that will be used to replace the sequence of
1222 stmts that constitute the pattern. In this case it will be:
1223 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1225 Note: The dot-prod idiom is a widening reduction pattern that is
1226 vectorized without preserving all the intermediate results. It
1227 produces only N/2 (widened) results (by summing up pairs of
1228 intermediate results) rather than all N results. Therefore, we
1229 cannot allow this pattern when we want to get all the results and in
1230 the correct order (as is the case when this computation is in an
1231 inner-loop nested in an outer-loop that us being vectorized). */
1234 vect_recog_dot_prod_pattern (vec_info
*vinfo
,
1235 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1237 tree oprnd0
, oprnd1
;
1238 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1239 tree type
, half_type
;
1240 gimple
*pattern_stmt
;
1243 /* Look for the following pattern
1247 DDPROD = (TYPE2) DPROD;
1248 sum_1 = DDPROD + sum_0;
1250 - DX is double the size of X
1251 - DY is double the size of Y
1252 - DX, DY, DPROD all have the same type but the sign
1253 between X, Y and DPROD can differ.
1254 - sum is the same size of DPROD or bigger
1255 - sum has been recognized as a reduction variable.
1257 This is equivalent to:
1258 DPROD = X w* Y; #widen mult
1259 sum_1 = DPROD w+ sum_0; #widen summation
1261 DPROD = X w* Y; #widen mult
1262 sum_1 = DPROD + sum_0; #summation
1265 /* Starting from LAST_STMT, follow the defs of its uses in search
1266 of the above pattern. */
1268 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1272 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1274 vect_unpromoted_value unprom_mult
;
1275 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
1277 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1278 we know that oprnd1 is the reduction variable (defined by a loop-header
1279 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1280 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1284 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
1288 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1289 inside the loop (in case we are analyzing an outer-loop). */
1290 vect_unpromoted_value unprom0
[2];
1291 enum optab_subtype subtype
= optab_vector
;
1292 if (!vect_widened_op_tree (vinfo
, mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
1293 false, 2, unprom0
, &half_type
, &subtype
))
1296 /* If there are two widening operations, make sure they agree on the sign
1297 of the extension. The result of an optab_vector_mixed_sign operation
1298 is signed; otherwise, the result has the same sign as the operands. */
1299 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
1300 && (subtype
== optab_vector_mixed_sign
1301 ? TYPE_UNSIGNED (unprom_mult
.type
)
1302 : TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
)))
1305 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
1307 /* If the inputs have mixed signs, canonicalize on using the signed
1308 input type for analysis. This also helps when emulating mixed-sign
1309 operations using signed operations. */
1310 if (subtype
== optab_vector_mixed_sign
)
1311 half_type
= signed_type_for (half_type
);
1314 if (!vect_supportable_conv_optab_p (vinfo
, type
, DOT_PROD_EXPR
, half_type
,
1315 type_out
, &half_vectype
, subtype
))
1317 /* We can emulate a mixed-sign dot-product using a sequence of
1318 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1319 if (subtype
!= optab_vector_mixed_sign
1320 || !vect_supportable_conv_optab_p (vinfo
, signed_type_for (type
),
1321 DOT_PROD_EXPR
, half_type
,
1322 type_out
, &half_vectype
,
1326 *type_out
= signed_or_unsigned_type_for (TYPE_UNSIGNED (type
),
1330 /* Get the inputs in the appropriate types. */
1332 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, mult_oprnd
, half_type
,
1333 unprom0
, half_vectype
, subtype
);
1335 var
= vect_recog_temp_ssa_var (type
, NULL
);
1336 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
1337 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
1339 return pattern_stmt
;
1343 /* Function vect_recog_sad_pattern
1345 Try to find the following Sum of Absolute Difference (SAD) pattern:
1348 signed TYPE1 diff, abs_diff;
1351 sum_0 = phi <init, sum_1>
1354 S3 x_T = (TYPE1) x_t;
1355 S4 y_T = (TYPE1) y_t;
1356 S5 diff = x_T - y_T;
1357 S6 abs_diff = ABS_EXPR <diff>;
1358 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1359 S8 sum_1 = abs_diff + sum_0;
1361 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1362 same size of 'TYPE1' or bigger. This is a special case of a reduction
1367 * STMT_VINFO: The stmt from which the pattern search begins. In the
1368 example, when this function is called with S8, the pattern
1369 {S3,S4,S5,S6,S7,S8} will be detected.
1373 * TYPE_OUT: The type of the output of this pattern.
1375 * Return value: A new stmt that will be used to replace the sequence of
1376 stmts that constitute the pattern. In this case it will be:
1377 SAD_EXPR <x_t, y_t, sum_0>
1381 vect_recog_sad_pattern (vec_info
*vinfo
,
1382 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1384 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1387 /* Look for the following pattern
1391 DAD = ABS_EXPR <DDIFF>;
1392 DDPROD = (TYPE2) DPROD;
1393 sum_1 = DAD + sum_0;
1395 - DX is at least double the size of X
1396 - DY is at least double the size of Y
1397 - DX, DY, DDIFF, DAD all have the same type
1398 - sum is the same size of DAD or bigger
1399 - sum has been recognized as a reduction variable.
1401 This is equivalent to:
1402 DDIFF = X w- Y; #widen sub
1403 DAD = ABS_EXPR <DDIFF>;
1404 sum_1 = DAD w+ sum_0; #widen summation
1406 DDIFF = X w- Y; #widen sub
1407 DAD = ABS_EXPR <DDIFF>;
1408 sum_1 = DAD + sum_0; #summation
1411 /* Starting from LAST_STMT, follow the defs of its uses in search
1412 of the above pattern. */
1414 tree plus_oprnd0
, plus_oprnd1
;
1415 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1416 &plus_oprnd0
, &plus_oprnd1
))
1419 tree sum_type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1421 /* Any non-truncating sequence of conversions is OK here, since
1422 with a successful match, the result of the ABS(U) is known to fit
1423 within the nonnegative range of the result type. (It cannot be the
1424 negative of the minimum signed value due to the range of the widening
1426 vect_unpromoted_value unprom_abs
;
1427 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1430 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1431 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1432 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1433 Then check that plus_oprnd0 is defined by an abs_expr. */
1438 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1439 if (!abs_stmt_vinfo
)
1442 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1443 inside the loop (in case we are analyzing an outer-loop). */
1444 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1445 vect_unpromoted_value unprom
[2];
1449 gcall
*abd_stmt
= dyn_cast
<gcall
*> (abs_stmt_vinfo
->stmt
);
1451 || !gimple_call_internal_p (abd_stmt
)
1452 || gimple_call_num_args (abd_stmt
) != 2)
1455 tree abd_oprnd0
= gimple_call_arg (abd_stmt
, 0);
1456 tree abd_oprnd1
= gimple_call_arg (abd_stmt
, 1);
1458 if (gimple_call_internal_fn (abd_stmt
) == IFN_ABD
)
1460 if (!vect_look_through_possible_promotion (vinfo
, abd_oprnd0
,
1462 || !vect_look_through_possible_promotion (vinfo
, abd_oprnd1
,
1466 else if (gimple_call_internal_fn (abd_stmt
) == IFN_VEC_WIDEN_ABD
)
1468 unprom
[0].op
= abd_oprnd0
;
1469 unprom
[0].type
= TREE_TYPE (abd_oprnd0
);
1470 unprom
[1].op
= abd_oprnd1
;
1471 unprom
[1].type
= TREE_TYPE (abd_oprnd1
);
1476 half_type
= unprom
[0].type
;
1478 else if (!vect_recog_absolute_difference (vinfo
, abs_stmt
, &half_type
,
1482 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1485 if (!vect_supportable_direct_optab_p (vinfo
, sum_type
, SAD_EXPR
, half_type
,
1486 type_out
, &half_vectype
))
1489 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1491 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, sad_oprnd
, half_type
,
1492 unprom
, half_vectype
);
1494 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1495 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1496 sad_oprnd
[1], plus_oprnd1
);
1498 return pattern_stmt
;
1501 /* Function vect_recog_abd_pattern
1503 Try to find the following ABsolute Difference (ABD) or
1504 widening ABD (WIDEN_ABD) pattern:
1508 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1509 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1510 TYPE3 diff = x_cast - y_cast;
1511 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1512 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1514 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1515 twice as wide as TYPE3.
1519 * STMT_VINFO: The stmt from which the pattern search begins
1523 * TYPE_OUT: The type of the output of this pattern
1525 * Return value: A new stmt that will be used to replace the sequence of
1526 stmts that constitute the pattern, principally:
1527 out = IFN_ABD (x, y)
1528 out = IFN_WIDEN_ABD (x, y)
1532 vect_recog_abd_pattern (vec_info
*vinfo
,
1533 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1535 gassign
*last_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (stmt_vinfo
));
1539 tree out_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
1541 vect_unpromoted_value unprom
[2];
1542 gassign
*diff_stmt
= NULL
;
1544 if (!vect_recog_absolute_difference (vinfo
, last_stmt
, &abd_in_type
,
1545 unprom
, &diff_stmt
))
1547 /* We cannot try further without having a non-widening MINUS. */
1551 unprom
[0].op
= gimple_assign_rhs1 (diff_stmt
);
1552 unprom
[1].op
= gimple_assign_rhs2 (diff_stmt
);
1553 abd_in_type
= signed_type_for (out_type
);
1556 tree abd_out_type
= abd_in_type
;
1558 tree vectype_in
= get_vectype_for_scalar_type (vinfo
, abd_in_type
);
1562 internal_fn ifn
= IFN_ABD
;
1563 tree vectype_out
= vectype_in
;
1565 if (TYPE_PRECISION (out_type
) >= TYPE_PRECISION (abd_in_type
) * 2
1566 && stmt_vinfo
->min_output_precision
>= TYPE_PRECISION (abd_in_type
) * 2)
1569 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type
) * 2,
1570 TYPE_UNSIGNED (abd_in_type
));
1571 tree mid_vectype
= get_vectype_for_scalar_type (vinfo
, mid_type
);
1573 code_helper dummy_code
;
1575 auto_vec
<tree
> dummy_vec
;
1577 && supportable_widening_operation (vinfo
, IFN_VEC_WIDEN_ABD
,
1578 stmt_vinfo
, mid_vectype
,
1580 &dummy_code
, &dummy_code
,
1581 &dummy_int
, &dummy_vec
))
1583 ifn
= IFN_VEC_WIDEN_ABD
;
1584 abd_out_type
= mid_type
;
1585 vectype_out
= mid_vectype
;
1590 && !direct_internal_fn_supported_p (ifn
, vectype_in
,
1591 OPTIMIZE_FOR_SPEED
))
1594 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt
);
1597 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, abd_oprnds
,
1598 abd_in_type
, unprom
, vectype_in
);
1600 *type_out
= get_vectype_for_scalar_type (vinfo
, out_type
);
1602 tree abd_result
= vect_recog_temp_ssa_var (abd_out_type
, NULL
);
1603 gcall
*abd_stmt
= gimple_build_call_internal (ifn
, 2,
1604 abd_oprnds
[0], abd_oprnds
[1]);
1605 gimple_call_set_lhs (abd_stmt
, abd_result
);
1606 gimple_set_location (abd_stmt
, gimple_location (last_stmt
));
1608 gimple
*stmt
= abd_stmt
;
1609 if (TYPE_PRECISION (abd_in_type
) == TYPE_PRECISION (abd_out_type
)
1610 && TYPE_PRECISION (abd_out_type
) < TYPE_PRECISION (out_type
)
1611 && !TYPE_UNSIGNED (abd_out_type
))
1613 tree unsign
= unsigned_type_for (abd_out_type
);
1614 stmt
= vect_convert_output (vinfo
, stmt_vinfo
, unsign
, stmt
, vectype_out
);
1615 vectype_out
= get_vectype_for_scalar_type (vinfo
, unsign
);
1618 return vect_convert_output (vinfo
, stmt_vinfo
, out_type
, stmt
, vectype_out
);
1621 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1622 so that it can be treated as though it had the form:
1626 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1627 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1628 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1629 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1630 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1632 Try to replace the pattern with:
1636 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1637 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1638 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1639 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1641 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1643 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1644 name of the pattern being matched, for dump purposes. */
1647 vect_recog_widen_op_pattern (vec_info
*vinfo
,
1648 stmt_vec_info last_stmt_info
, tree
*type_out
,
1649 tree_code orig_code
, code_helper wide_code
,
1650 bool shift_p
, const char *name
)
1652 gimple
*last_stmt
= last_stmt_info
->stmt
;
1654 vect_unpromoted_value unprom
[2];
1656 if (!vect_widened_op_tree (vinfo
, last_stmt_info
, orig_code
, orig_code
,
1657 shift_p
, 2, unprom
, &half_type
))
1661 /* Pattern detected. */
1662 vect_pattern_detected (name
, last_stmt
);
1664 tree type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1666 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1667 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1668 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1669 TYPE_UNSIGNED (half_type
));
1671 /* Check target support */
1672 tree vectype
= get_vectype_for_scalar_type (vinfo
, half_type
);
1673 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
1675 tree vecctype
= vecitype
;
1676 if (orig_code
== MINUS_EXPR
1677 && TYPE_UNSIGNED (itype
)
1678 && TYPE_PRECISION (type
) > TYPE_PRECISION (itype
))
1680 /* Subtraction is special, even if half_type is unsigned and no matter
1681 whether type is signed or unsigned, if type is wider than itype,
1682 we need to sign-extend from the widening operation result to the
1684 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1685 itype unsigned short and type either int or unsigned int.
1686 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1687 (unsigned short) 0xffff, but for type int we want the result -1
1688 and for type unsigned int 0xffffffff rather than 0xffff. */
1689 ctype
= build_nonstandard_integer_type (TYPE_PRECISION (itype
), 0);
1690 vecctype
= get_vectype_for_scalar_type (vinfo
, ctype
);
1693 code_helper dummy_code
;
1695 auto_vec
<tree
> dummy_vec
;
1699 || !supportable_widening_operation (vinfo
, wide_code
, last_stmt_info
,
1701 &dummy_code
, &dummy_code
,
1702 &dummy_int
, &dummy_vec
))
1705 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
1710 vect_convert_inputs (vinfo
, last_stmt_info
,
1711 2, oprnd
, half_type
, unprom
, vectype
);
1713 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1714 gimple
*pattern_stmt
= vect_gimple_build (var
, wide_code
, oprnd
[0], oprnd
[1]);
1716 if (vecctype
!= vecitype
)
1717 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, ctype
,
1718 pattern_stmt
, vecitype
);
1720 return vect_convert_output (vinfo
, last_stmt_info
,
1721 type
, pattern_stmt
, vecctype
);
1724 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1725 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1728 vect_recog_widen_mult_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1731 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1732 MULT_EXPR
, WIDEN_MULT_EXPR
, false,
1733 "vect_recog_widen_mult_pattern");
1736 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1737 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1740 vect_recog_widen_plus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1743 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1744 PLUS_EXPR
, IFN_VEC_WIDEN_PLUS
,
1745 false, "vect_recog_widen_plus_pattern");
1748 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1749 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1751 vect_recog_widen_minus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1754 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1755 MINUS_EXPR
, IFN_VEC_WIDEN_MINUS
,
1756 false, "vect_recog_widen_minus_pattern");
1759 /* Try to detect abd on widened inputs, converting IFN_ABD
1760 to IFN_VEC_WIDEN_ABD. */
1762 vect_recog_widen_abd_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1765 gassign
*last_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (stmt_vinfo
));
1766 if (!last_stmt
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt
)))
1769 tree last_rhs
= gimple_assign_rhs1 (last_stmt
);
1771 tree in_type
= TREE_TYPE (last_rhs
);
1772 tree out_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
1773 if (!INTEGRAL_TYPE_P (in_type
)
1774 || !INTEGRAL_TYPE_P (out_type
)
1775 || TYPE_PRECISION (in_type
) * 2 != TYPE_PRECISION (out_type
)
1776 || !TYPE_UNSIGNED (in_type
))
1779 vect_unpromoted_value unprom
;
1780 tree op
= vect_look_through_possible_promotion (vinfo
, last_rhs
, &unprom
);
1781 if (!op
|| TYPE_PRECISION (TREE_TYPE (op
)) != TYPE_PRECISION (in_type
))
1784 stmt_vec_info abd_pattern_vinfo
= vect_get_internal_def (vinfo
, op
);
1785 if (!abd_pattern_vinfo
)
1788 gcall
*abd_stmt
= dyn_cast
<gcall
*> (STMT_VINFO_STMT (abd_pattern_vinfo
));
1790 || !gimple_call_internal_p (abd_stmt
)
1791 || gimple_call_internal_fn (abd_stmt
) != IFN_ABD
)
1794 tree vectype_in
= get_vectype_for_scalar_type (vinfo
, in_type
);
1795 tree vectype_out
= get_vectype_for_scalar_type (vinfo
, out_type
);
1797 code_helper dummy_code
;
1799 auto_vec
<tree
> dummy_vec
;
1800 if (!supportable_widening_operation (vinfo
, IFN_VEC_WIDEN_ABD
, stmt_vinfo
,
1801 vectype_out
, vectype_in
,
1802 &dummy_code
, &dummy_code
,
1803 &dummy_int
, &dummy_vec
))
1806 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt
);
1808 *type_out
= vectype_out
;
1810 tree abd_oprnd0
= gimple_call_arg (abd_stmt
, 0);
1811 tree abd_oprnd1
= gimple_call_arg (abd_stmt
, 1);
1812 tree widen_abd_result
= vect_recog_temp_ssa_var (out_type
, NULL
);
1813 gcall
*widen_abd_stmt
= gimple_build_call_internal (IFN_VEC_WIDEN_ABD
, 2,
1814 abd_oprnd0
, abd_oprnd1
);
1815 gimple_call_set_lhs (widen_abd_stmt
, widen_abd_result
);
1816 gimple_set_location (widen_abd_stmt
, gimple_location (last_stmt
));
1817 return widen_abd_stmt
;
1820 /* Function vect_recog_ctz_ffs_pattern
1822 Try to find the following pattern:
1827 B = __builtin_ctz{,l,ll} (A);
1831 B = __builtin_ffs{,l,ll} (A);
1835 * STMT_VINFO: The stmt from which the pattern search begins.
1836 here it starts with B = __builtin_* (A);
1840 * TYPE_OUT: The vector type of the output of this pattern.
1842 * Return value: A new stmt that will be used to replace the sequence of
1843 stmts that constitute the pattern, using clz or popcount builtins. */
1846 vect_recog_ctz_ffs_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1849 gimple
*call_stmt
= stmt_vinfo
->stmt
;
1850 gimple
*pattern_stmt
;
1851 tree rhs_oprnd
, rhs_type
, lhs_oprnd
, lhs_type
, vec_type
, vec_rhs_type
;
1853 internal_fn ifn
= IFN_LAST
, ifnnew
= IFN_LAST
;
1854 bool defined_at_zero
= true, defined_at_zero_new
= false;
1855 int val
= 0, val_new
= 0, val_cmp
= 0;
1857 int sub
= 0, add
= 0;
1860 if (!is_gimple_call (call_stmt
))
1863 if (gimple_call_num_args (call_stmt
) != 1
1864 && gimple_call_num_args (call_stmt
) != 2)
1867 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
1868 rhs_type
= TREE_TYPE (rhs_oprnd
);
1869 lhs_oprnd
= gimple_call_lhs (call_stmt
);
1872 lhs_type
= TREE_TYPE (lhs_oprnd
);
1873 if (!INTEGRAL_TYPE_P (lhs_type
)
1874 || !INTEGRAL_TYPE_P (rhs_type
)
1875 || !type_has_mode_precision_p (rhs_type
)
1876 || TREE_CODE (rhs_oprnd
) != SSA_NAME
)
1879 switch (gimple_call_combined_fn (call_stmt
))
1883 if (!gimple_call_internal_p (call_stmt
)
1884 || gimple_call_num_args (call_stmt
) != 2)
1885 defined_at_zero
= false;
1887 val
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
1896 prec
= TYPE_PRECISION (rhs_type
);
1897 loc
= gimple_location (call_stmt
);
1899 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
1903 vec_rhs_type
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
1907 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1908 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1909 popcount<vector_mode>2. */
1911 || direct_internal_fn_supported_p (ifn
, vec_rhs_type
,
1912 OPTIMIZE_FOR_SPEED
))
1916 && direct_internal_fn_supported_p (IFN_CTZ
, vec_rhs_type
,
1917 OPTIMIZE_FOR_SPEED
))
1921 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type
),
1924 else if (direct_internal_fn_supported_p (IFN_CLZ
, vec_rhs_type
,
1925 OPTIMIZE_FOR_SPEED
))
1929 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type
),
1932 if ((ifnnew
== IFN_LAST
1933 || (defined_at_zero
&& !defined_at_zero_new
))
1934 && direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_rhs_type
,
1935 OPTIMIZE_FOR_SPEED
))
1937 ifnnew
= IFN_POPCOUNT
;
1938 defined_at_zero_new
= true;
1941 if (ifnnew
== IFN_LAST
)
1944 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt
);
1947 if ((ifnnew
== IFN_CLZ
1949 && defined_at_zero_new
1952 || (ifnnew
== IFN_POPCOUNT
&& ifn
== IFN_CTZ
))
1954 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1955 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1956 if (ifnnew
== IFN_CLZ
)
1960 if (!TYPE_UNSIGNED (rhs_type
))
1962 rhs_type
= unsigned_type_for (rhs_type
);
1963 vec_rhs_type
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
1964 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1965 pattern_stmt
= gimple_build_assign (new_var
, NOP_EXPR
, rhs_oprnd
);
1966 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
,
1968 rhs_oprnd
= new_var
;
1971 tree m1
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1972 pattern_stmt
= gimple_build_assign (m1
, PLUS_EXPR
, rhs_oprnd
,
1973 build_int_cst (rhs_type
, -1));
1974 gimple_set_location (pattern_stmt
, loc
);
1975 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1977 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1978 pattern_stmt
= gimple_build_assign (new_var
, BIT_NOT_EXPR
, rhs_oprnd
);
1979 gimple_set_location (pattern_stmt
, loc
);
1980 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1981 rhs_oprnd
= new_var
;
1983 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1984 pattern_stmt
= gimple_build_assign (new_var
, BIT_AND_EXPR
,
1986 gimple_set_location (pattern_stmt
, loc
);
1987 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1988 rhs_oprnd
= new_var
;
1990 else if (ifnnew
== IFN_CLZ
)
1992 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1993 .FFS (X) = PREC - .CLZ (X & -X). */
1994 sub
= prec
- (ifn
== IFN_CTZ
);
1995 val_cmp
= sub
- val_new
;
1997 tree neg
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1998 pattern_stmt
= gimple_build_assign (neg
, NEGATE_EXPR
, rhs_oprnd
);
1999 gimple_set_location (pattern_stmt
, loc
);
2000 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
2002 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
2003 pattern_stmt
= gimple_build_assign (new_var
, BIT_AND_EXPR
,
2005 gimple_set_location (pattern_stmt
, loc
);
2006 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
2007 rhs_oprnd
= new_var
;
2009 else if (ifnnew
== IFN_POPCOUNT
)
2011 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
2012 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
2013 sub
= prec
+ (ifn
== IFN_FFS
);
2016 tree neg
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
2017 pattern_stmt
= gimple_build_assign (neg
, NEGATE_EXPR
, rhs_oprnd
);
2018 gimple_set_location (pattern_stmt
, loc
);
2019 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
2021 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
2022 pattern_stmt
= gimple_build_assign (new_var
, BIT_IOR_EXPR
,
2024 gimple_set_location (pattern_stmt
, loc
);
2025 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
2026 rhs_oprnd
= new_var
;
2028 else if (ifnnew
== IFN_CTZ
)
2030 /* .FFS (X) = .CTZ (X) + 1. */
2035 /* Create B = .IFNNEW (A). */
2036 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2037 if ((ifnnew
== IFN_CLZ
|| ifnnew
== IFN_CTZ
) && defined_at_zero_new
)
2039 = gimple_build_call_internal (ifnnew
, 2, rhs_oprnd
,
2040 build_int_cst (integer_type_node
,
2043 pattern_stmt
= gimple_build_call_internal (ifnnew
, 1, rhs_oprnd
);
2044 gimple_call_set_lhs (pattern_stmt
, new_var
);
2045 gimple_set_location (pattern_stmt
, loc
);
2046 *type_out
= vec_type
;
2050 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2051 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2052 pattern_stmt
= gimple_build_assign (ret_var
, MINUS_EXPR
,
2053 build_int_cst (lhs_type
, sub
),
2055 gimple_set_location (pattern_stmt
, loc
);
2060 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2061 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2062 pattern_stmt
= gimple_build_assign (ret_var
, PLUS_EXPR
, new_var
,
2063 build_int_cst (lhs_type
, add
));
2064 gimple_set_location (pattern_stmt
, loc
);
2069 && (!defined_at_zero_new
|| val
!= val_cmp
))
2071 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2072 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2073 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
2074 rhs_type
= TREE_TYPE (rhs_oprnd
);
2075 tree cmp
= build2_loc (loc
, NE_EXPR
, boolean_type_node
,
2076 rhs_oprnd
, build_zero_cst (rhs_type
));
2077 pattern_stmt
= gimple_build_assign (ret_var
, COND_EXPR
, cmp
,
2079 build_int_cst (lhs_type
, val
));
2082 if (dump_enabled_p ())
2083 dump_printf_loc (MSG_NOTE
, vect_location
,
2084 "created pattern stmt: %G", pattern_stmt
);
2086 return pattern_stmt
;
2089 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2091 Try to find the following pattern:
2097 temp_in = (UTYPE2)A;
2099 temp_out = __builtin_popcount{,l,ll} (temp_in);
2100 B = (TYPE1) temp_out;
2102 TYPE2 may or may not be equal to TYPE3.
2103 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2104 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2108 * STMT_VINFO: The stmt from which the pattern search begins.
2109 here it starts with B = (TYPE1) temp_out;
2113 * TYPE_OUT: The vector type of the output of this pattern.
2115 * Return value: A new stmt that will be used to replace the sequence of
2116 stmts that constitute the pattern. In this case it will be:
2119 Similarly for clz, ctz and ffs.
2123 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info
*vinfo
,
2124 stmt_vec_info stmt_vinfo
,
2127 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
2128 gimple
*call_stmt
, *pattern_stmt
;
2129 tree rhs_oprnd
, rhs_origin
, lhs_oprnd
, lhs_type
, vec_type
, new_var
;
2130 internal_fn ifn
= IFN_LAST
;
2133 /* Find B = (TYPE1) temp_out. */
2136 tree_code code
= gimple_assign_rhs_code (last_stmt
);
2137 if (!CONVERT_EXPR_CODE_P (code
))
2140 lhs_oprnd
= gimple_assign_lhs (last_stmt
);
2141 lhs_type
= TREE_TYPE (lhs_oprnd
);
2142 if (!INTEGRAL_TYPE_P (lhs_type
))
2145 rhs_oprnd
= gimple_assign_rhs1 (last_stmt
);
2146 if (TREE_CODE (rhs_oprnd
) != SSA_NAME
2147 || !has_single_use (rhs_oprnd
))
2149 call_stmt
= SSA_NAME_DEF_STMT (rhs_oprnd
);
2151 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2152 if (!is_gimple_call (call_stmt
))
2154 switch (gimple_call_combined_fn (call_stmt
))
2162 /* Punt if call result is unsigned and defined value at zero
2163 is negative, as the negative value doesn't extend correctly. */
2164 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd
))
2165 && gimple_call_internal_p (call_stmt
)
2166 && CLZ_DEFINED_VALUE_AT_ZERO
2167 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val
) == 2
2173 /* Punt if call result is unsigned and defined value at zero
2174 is negative, as the negative value doesn't extend correctly. */
2175 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd
))
2176 && gimple_call_internal_p (call_stmt
)
2177 && CTZ_DEFINED_VALUE_AT_ZERO
2178 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val
) == 2
2189 if (gimple_call_num_args (call_stmt
) != 1
2190 && gimple_call_num_args (call_stmt
) != 2)
2193 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
2194 vect_unpromoted_value unprom_diff
;
2196 = vect_look_through_possible_promotion (vinfo
, rhs_oprnd
, &unprom_diff
);
2201 /* Input and output of .POPCOUNT should be same-precision integer. */
2202 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (lhs_type
))
2205 /* Also A should be unsigned or same precision as temp_in, otherwise
2206 different builtins/internal functions have different behaviors. */
2207 if (TYPE_PRECISION (unprom_diff
.type
)
2208 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd
)))
2212 /* For popcount require zero extension, which doesn't add any
2213 further bits to the count. */
2214 if (!TYPE_UNSIGNED (unprom_diff
.type
))
2218 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2219 if it is undefined at zero or if it matches also for the
2220 defined value there. */
2221 if (!TYPE_UNSIGNED (unprom_diff
.type
))
2223 if (!type_has_mode_precision_p (lhs_type
)
2224 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd
)))
2226 addend
= (TYPE_PRECISION (TREE_TYPE (rhs_oprnd
))
2227 - TYPE_PRECISION (lhs_type
));
2228 if (gimple_call_internal_p (call_stmt
)
2229 && gimple_call_num_args (call_stmt
) == 2)
2232 val1
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
2234 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2236 if (d2
!= 2 || val1
!= val2
+ addend
)
2241 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2242 if it is undefined at zero or if it matches also for the
2243 defined value there. */
2244 if (gimple_call_internal_p (call_stmt
)
2245 && gimple_call_num_args (call_stmt
) == 2)
2248 val1
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
2250 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2252 if (d2
!= 2 || val1
!= val2
)
2257 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2263 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
2264 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2269 = direct_internal_fn_supported_p (ifn
, vec_type
, OPTIMIZE_FOR_SPEED
);
2277 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2278 if (direct_internal_fn_supported_p (IFN_CTZ
, vec_type
,
2279 OPTIMIZE_FOR_SPEED
))
2283 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2285 if (direct_internal_fn_supported_p (IFN_CLZ
, vec_type
,
2286 OPTIMIZE_FOR_SPEED
))
2288 if (direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_type
,
2289 OPTIMIZE_FOR_SPEED
))
2296 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2299 /* Create B = .POPCOUNT (A). */
2300 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2301 tree arg2
= NULL_TREE
;
2304 && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2306 arg2
= build_int_cst (integer_type_node
, val
);
2307 else if (ifn
== IFN_CTZ
2308 && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2310 arg2
= build_int_cst (integer_type_node
, val
);
2312 pattern_stmt
= gimple_build_call_internal (ifn
, 2, unprom_diff
.op
, arg2
);
2314 pattern_stmt
= gimple_build_call_internal (ifn
, 1, unprom_diff
.op
);
2315 gimple_call_set_lhs (pattern_stmt
, new_var
);
2316 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
2317 *type_out
= vec_type
;
2319 if (dump_enabled_p ())
2320 dump_printf_loc (MSG_NOTE
, vect_location
,
2321 "created pattern stmt: %G", pattern_stmt
);
2325 gcc_assert (supported
);
2326 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2327 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2328 pattern_stmt
= gimple_build_assign (ret_var
, PLUS_EXPR
, new_var
,
2329 build_int_cst (lhs_type
, addend
));
2331 else if (!supported
)
2333 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
2334 STMT_VINFO_VECTYPE (new_stmt_info
) = vec_type
;
2336 = vect_recog_ctz_ffs_pattern (vinfo
, new_stmt_info
, type_out
);
2337 if (pattern_stmt
== NULL
)
2339 if (gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info
))
2341 gimple_seq
*pseq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
2342 gimple_seq_add_seq_without_update (pseq
, seq
);
2345 return pattern_stmt
;
2348 /* Function vect_recog_pow_pattern
2350 Try to find the following pattern:
2354 with POW being one of pow, powf, powi, powif and N being
2359 * STMT_VINFO: The stmt from which the pattern search begins.
2363 * TYPE_OUT: The type of the output of this pattern.
2365 * Return value: A new stmt that will be used to replace the sequence of
2366 stmts that constitute the pattern. In this case it will be:
2373 vect_recog_pow_pattern (vec_info
*vinfo
,
2374 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2376 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2381 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
2384 switch (gimple_call_combined_fn (last_stmt
))
2394 base
= gimple_call_arg (last_stmt
, 0);
2395 exp
= gimple_call_arg (last_stmt
, 1);
2396 if (TREE_CODE (exp
) != REAL_CST
2397 && TREE_CODE (exp
) != INTEGER_CST
)
2399 if (flag_unsafe_math_optimizations
2400 && TREE_CODE (base
) == REAL_CST
2401 && gimple_call_builtin_p (last_stmt
, BUILT_IN_NORMAL
))
2403 combined_fn log_cfn
;
2404 built_in_function exp_bfn
;
2405 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
2408 log_cfn
= CFN_BUILT_IN_LOG
;
2409 exp_bfn
= BUILT_IN_EXP
;
2412 log_cfn
= CFN_BUILT_IN_LOGF
;
2413 exp_bfn
= BUILT_IN_EXPF
;
2416 log_cfn
= CFN_BUILT_IN_LOGL
;
2417 exp_bfn
= BUILT_IN_EXPL
;
2422 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
2423 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
2424 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2425 does that, but if C is a power of 2, we want to use
2426 exp2 (log2 (C) * x) in the non-vectorized version, but for
2427 vectorization we don't have vectorized exp2. */
2429 && TREE_CODE (logc
) == REAL_CST
2431 && lookup_attribute ("omp declare simd",
2432 DECL_ATTRIBUTES (exp_decl
)))
2434 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
2435 if (node
->simd_clones
== NULL
)
2437 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
2438 || node
->definition
)
2440 expand_simd_clones (node
);
2441 if (node
->simd_clones
== NULL
)
2444 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
2447 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2448 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
2449 append_pattern_def_seq (vinfo
, stmt_vinfo
, g
);
2450 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2451 g
= gimple_build_call (exp_decl
, 1, def
);
2452 gimple_call_set_lhs (g
, res
);
2460 /* We now have a pow or powi builtin function call with a constant
2463 /* Catch squaring. */
2464 if ((tree_fits_shwi_p (exp
)
2465 && tree_to_shwi (exp
) == 2)
2466 || (TREE_CODE (exp
) == REAL_CST
2467 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
2469 if (!vect_supportable_direct_optab_p (vinfo
, TREE_TYPE (base
), MULT_EXPR
,
2470 TREE_TYPE (base
), type_out
))
2473 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2474 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
2478 /* Catch square root. */
2479 if (TREE_CODE (exp
) == REAL_CST
2480 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
2482 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
2484 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
2485 OPTIMIZE_FOR_SPEED
))
2487 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
2488 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
2489 gimple_call_set_lhs (stmt
, var
);
2490 gimple_call_set_nothrow (stmt
, true);
2499 /* Function vect_recog_widen_sum_pattern
2501 Try to find the following pattern:
2504 TYPE x_T, sum = init;
2506 sum_0 = phi <init, sum_1>
2508 S2 x_T = (TYPE) x_t;
2509 S3 sum_1 = x_T + sum_0;
2511 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2512 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2513 a special case of a reduction computation.
2517 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2518 when this function is called with S3, the pattern {S2,S3} will be detected.
2522 * TYPE_OUT: The type of the output of this pattern.
2524 * Return value: A new stmt that will be used to replace the sequence of
2525 stmts that constitute the pattern. In this case it will be:
2526 WIDEN_SUM <x_t, sum_0>
2528 Note: The widening-sum idiom is a widening reduction pattern that is
2529 vectorized without preserving all the intermediate results. It
2530 produces only N/2 (widened) results (by summing up pairs of
2531 intermediate results) rather than all N results. Therefore, we
2532 cannot allow this pattern when we want to get all the results and in
2533 the correct order (as is the case when this computation is in an
2534 inner-loop nested in an outer-loop that us being vectorized). */
2537 vect_recog_widen_sum_pattern (vec_info
*vinfo
,
2538 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2540 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2541 tree oprnd0
, oprnd1
;
2543 gimple
*pattern_stmt
;
2546 /* Look for the following pattern
2549 In which DX is at least double the size of X, and sum_1 has been
2550 recognized as a reduction variable.
2553 /* Starting from LAST_STMT, follow the defs of its uses in search
2554 of the above pattern. */
2556 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
2558 || TREE_CODE (oprnd0
) != SSA_NAME
2559 || !vinfo
->lookup_def (oprnd0
))
2562 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
2564 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2565 we know that oprnd1 is the reduction variable (defined by a loop-header
2566 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2567 Left to check that oprnd0 is defined by a cast from type 'type' to type
2570 vect_unpromoted_value unprom0
;
2571 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
2572 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
2575 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
2577 if (!vect_supportable_direct_optab_p (vinfo
, type
, WIDEN_SUM_EXPR
,
2578 unprom0
.type
, type_out
))
2581 var
= vect_recog_temp_ssa_var (type
, NULL
);
2582 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
2584 return pattern_stmt
;
2587 /* Function vect_recog_bitfield_ref_pattern
2589 Try to find the following pattern:
2591 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2592 result = (type_out) bf_value;
2596 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2598 where type_out is a non-bitfield type, that is to say, it's precision matches
2599 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2603 * STMT_VINFO: The stmt from which the pattern search begins.
2604 here it starts with:
2605 result = (type_out) bf_value;
2609 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2613 * TYPE_OUT: The vector type of the output of this pattern.
2615 * Return value: A new stmt that will be used to replace the sequence of
2616 stmts that constitute the pattern. If the precision of type_out is bigger
2617 than the precision type of _1 we perform the widening before the shifting,
2618 since the new precision will be large enough to shift the value and moving
2619 widening operations up the statement chain enables the generation of
2620 widening loads. If we are widening and the operation after the pattern is
2621 an addition then we mask first and shift later, to enable the generation of
2622 shifting adds. In the case of narrowing we will always mask first, shift
2623 last and then perform a narrowing operation. This will enable the
2624 generation of narrowing shifts.
2626 Widening with mask first, shift later:
2627 container = (type_out) container;
2628 masked = container & (((1 << bitsize) - 1) << bitpos);
2629 result = masked >> bitpos;
2631 Widening with shift first, mask last:
2632 container = (type_out) container;
2633 shifted = container >> bitpos;
2634 result = shifted & ((1 << bitsize) - 1);
2637 masked = container & (((1 << bitsize) - 1) << bitpos);
2638 result = masked >> bitpos;
2639 result = (type_out) result;
2641 If the bitfield is signed and it's wider than type_out, we need to
2642 keep the result sign-extended:
2643 container = (type) container;
2644 masked = container << (prec - bitsize - bitpos);
2645 result = (type_out) (masked >> (prec - bitsize));
2647 Here type is the signed variant of the wider of type_out and the type
2650 The shifting is always optional depending on whether bitpos != 0.
2652 When the original bitfield was inside a gcond then an new gcond is also
2653 generated with the newly `result` as the operand to the comparison.
2658 vect_recog_bitfield_ref_pattern (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2661 gimple
*bf_stmt
= NULL
;
2662 tree lhs
= NULL_TREE
;
2663 tree ret_type
= NULL_TREE
;
2664 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
2665 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
2667 tree op
= gimple_cond_lhs (cond_stmt
);
2668 if (TREE_CODE (op
) != SSA_NAME
)
2670 bf_stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (op
));
2671 if (TREE_CODE (gimple_cond_rhs (cond_stmt
)) != INTEGER_CST
)
2674 else if (is_gimple_assign (stmt
)
2675 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt
))
2676 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
2678 gimple
*second_stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
2679 bf_stmt
= dyn_cast
<gassign
*> (second_stmt
);
2680 lhs
= gimple_assign_lhs (stmt
);
2681 ret_type
= TREE_TYPE (lhs
);
2685 || gimple_assign_rhs_code (bf_stmt
) != BIT_FIELD_REF
)
2688 tree bf_ref
= gimple_assign_rhs1 (bf_stmt
);
2689 tree container
= TREE_OPERAND (bf_ref
, 0);
2690 ret_type
= ret_type
? ret_type
: TREE_TYPE (container
);
2692 if (!bit_field_offset (bf_ref
).is_constant ()
2693 || !bit_field_size (bf_ref
).is_constant ()
2694 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container
))))
2697 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref
))
2698 || !INTEGRAL_TYPE_P (TREE_TYPE (container
))
2699 || TYPE_MODE (TREE_TYPE (container
)) == E_BLKmode
)
2702 gimple
*use_stmt
, *pattern_stmt
;
2703 use_operand_p use_p
;
2704 bool shift_first
= true;
2705 tree container_type
= TREE_TYPE (container
);
2706 tree vectype
= get_vectype_for_scalar_type (vinfo
, container_type
);
2708 /* Calculate shift_n before the adjustments for widening loads, otherwise
2709 the container may change and we have to consider offset change for
2710 widening loads on big endianness. The shift_n calculated here can be
2711 independent of widening. */
2712 unsigned HOST_WIDE_INT shift_n
= bit_field_offset (bf_ref
).to_constant ();
2713 unsigned HOST_WIDE_INT mask_width
= bit_field_size (bf_ref
).to_constant ();
2714 unsigned HOST_WIDE_INT prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2715 if (BYTES_BIG_ENDIAN
)
2716 shift_n
= prec
- shift_n
- mask_width
;
2718 bool ref_sext
= (!TYPE_UNSIGNED (TREE_TYPE (bf_ref
)) &&
2719 TYPE_PRECISION (ret_type
) > mask_width
);
2720 bool load_widen
= (TYPE_PRECISION (TREE_TYPE (container
)) <
2721 TYPE_PRECISION (ret_type
));
2723 /* We move the conversion earlier if the loaded type is smaller than the
2724 return type to enable the use of widening loads. And if we need a
2725 sign extension, we need to convert the loaded value early to a signed
2727 if (ref_sext
|| load_widen
)
2729 tree type
= load_widen
? ret_type
: container_type
;
2731 type
= gimple_signed_type (type
);
2732 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
),
2733 NOP_EXPR
, container
);
2734 container
= gimple_get_lhs (pattern_stmt
);
2735 container_type
= TREE_TYPE (container
);
2736 prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2737 vectype
= get_vectype_for_scalar_type (vinfo
, container_type
);
2738 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2740 else if (!useless_type_conversion_p (TREE_TYPE (container
), ret_type
))
2741 /* If we are doing the conversion last then also delay the shift as we may
2742 be able to combine the shift and conversion in certain cases. */
2743 shift_first
= false;
2745 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2746 PLUS_EXPR then do the shift last as some targets can combine the shift and
2747 add into a single instruction. */
2748 if (lhs
&& !is_pattern_stmt_p (stmt_info
)
2749 && single_imm_use (lhs
, &use_p
, &use_stmt
))
2751 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
2752 && gimple_assign_rhs_code (use_stmt
) == PLUS_EXPR
)
2753 shift_first
= false;
2756 /* If we don't have to shift we only generate the mask, so just fix the
2757 code-path to shift_first. */
2762 if (shift_first
&& !ref_sext
)
2764 tree shifted
= container
;
2768 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2769 RSHIFT_EXPR
, container
,
2770 build_int_cst (sizetype
, shift_n
));
2771 shifted
= gimple_assign_lhs (pattern_stmt
);
2772 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2775 tree mask
= wide_int_to_tree (container_type
,
2776 wi::mask (mask_width
, false, prec
));
2779 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2780 BIT_AND_EXPR
, shifted
, mask
);
2781 result
= gimple_assign_lhs (pattern_stmt
);
2785 tree temp
= vect_recog_temp_ssa_var (container_type
);
2788 tree mask
= wide_int_to_tree (container_type
,
2789 wi::shifted_mask (shift_n
,
2792 pattern_stmt
= gimple_build_assign (temp
, BIT_AND_EXPR
,
2797 HOST_WIDE_INT shl
= prec
- shift_n
- mask_width
;
2799 pattern_stmt
= gimple_build_assign (temp
, LSHIFT_EXPR
,
2801 build_int_cst (sizetype
,
2805 tree masked
= gimple_assign_lhs (pattern_stmt
);
2806 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2808 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2809 RSHIFT_EXPR
, masked
,
2810 build_int_cst (sizetype
, shift_n
));
2811 result
= gimple_assign_lhs (pattern_stmt
);
2814 if (!useless_type_conversion_p (TREE_TYPE (result
), ret_type
))
2816 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2818 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type
),
2827 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2828 vectype
= truth_type_for (vectype
);
2830 /* FIXME: This part extracts the boolean value out of the bitfield in the
2831 same way as vect_recog_gcond_pattern does. However because
2832 patterns cannot match the same root twice, when we handle and
2833 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2834 apply anymore. We should really fix it so that we don't need to
2835 duplicate transformations like these. */
2836 tree new_lhs
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
2837 gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt_info
->stmt
);
2838 tree cond_cst
= gimple_cond_rhs (cond_stmt
);
2840 = gimple_build_assign (new_lhs
, gimple_cond_code (cond_stmt
),
2841 gimple_get_lhs (pattern_stmt
),
2842 fold_convert (container_type
, cond_cst
));
2843 append_pattern_def_seq (vinfo
, stmt_info
, new_stmt
, vectype
, container_type
);
2845 = gimple_build_cond (NE_EXPR
, new_lhs
,
2846 build_zero_cst (TREE_TYPE (new_lhs
)),
2847 NULL_TREE
, NULL_TREE
);
2850 *type_out
= STMT_VINFO_VECTYPE (stmt_info
);
2851 vect_pattern_detected ("bitfield_ref pattern", stmt_info
->stmt
);
2853 return pattern_stmt
;
2856 /* Function vect_recog_bit_insert_pattern
2858 Try to find the following pattern:
2860 written = BIT_INSERT_EXPR (container, value, bitpos);
2864 * STMT_VINFO: The stmt we want to replace.
2868 * TYPE_OUT: The vector type of the output of this pattern.
2870 * Return value: A new stmt that will be used to replace the sequence of
2871 stmts that constitute the pattern. In this case it will be:
2872 value = (container_type) value; // Make sure
2873 shifted = value << bitpos; // Shift value into place
2874 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2875 // the 'to-write value'.
2876 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2877 // write to from the value we want
2879 written = cleared | masked; // Write bits.
2882 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2883 bits corresponding to the real size of the bitfield value we are writing to.
2884 The shifting is always optional depending on whether bitpos != 0.
2889 vect_recog_bit_insert_pattern (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2892 gassign
*bf_stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
2893 if (!bf_stmt
|| gimple_assign_rhs_code (bf_stmt
) != BIT_INSERT_EXPR
)
2896 tree container
= gimple_assign_rhs1 (bf_stmt
);
2897 tree value
= gimple_assign_rhs2 (bf_stmt
);
2898 tree shift
= gimple_assign_rhs3 (bf_stmt
);
2900 tree bf_type
= TREE_TYPE (value
);
2901 tree container_type
= TREE_TYPE (container
);
2903 if (!INTEGRAL_TYPE_P (container_type
)
2904 || !tree_fits_uhwi_p (TYPE_SIZE (container_type
)))
2907 gimple
*pattern_stmt
;
2909 vect_unpromoted_value unprom
;
2910 unprom
.set_op (value
, vect_internal_def
);
2911 value
= vect_convert_input (vinfo
, stmt_info
, container_type
, &unprom
,
2912 get_vectype_for_scalar_type (vinfo
,
2915 unsigned HOST_WIDE_INT mask_width
= TYPE_PRECISION (bf_type
);
2916 unsigned HOST_WIDE_INT prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2917 unsigned HOST_WIDE_INT shift_n
= tree_to_uhwi (shift
);
2918 if (BYTES_BIG_ENDIAN
)
2920 shift_n
= prec
- shift_n
- mask_width
;
2921 shift
= build_int_cst (TREE_TYPE (shift
), shift_n
);
2924 if (!useless_type_conversion_p (TREE_TYPE (value
), container_type
))
2927 gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2929 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2930 value
= gimple_get_lhs (pattern_stmt
);
2933 /* Shift VALUE into place. */
2934 tree shifted
= value
;
2937 gimple_seq stmts
= NULL
;
2939 = gimple_build (&stmts
, LSHIFT_EXPR
, container_type
, value
, shift
);
2940 if (!gimple_seq_empty_p (stmts
))
2941 append_pattern_def_seq (vinfo
, stmt_info
,
2942 gimple_seq_first_stmt (stmts
));
2946 = wide_int_to_tree (container_type
,
2947 wi::shifted_mask (shift_n
, mask_width
, false, prec
));
2949 /* Clear bits we don't want to write back from SHIFTED. */
2950 gimple_seq stmts
= NULL
;
2951 tree masked
= gimple_build (&stmts
, BIT_AND_EXPR
, container_type
, shifted
,
2953 if (!gimple_seq_empty_p (stmts
))
2955 pattern_stmt
= gimple_seq_first_stmt (stmts
);
2956 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2959 /* Mask off the bits in the container that we are to write to. */
2960 mask_t
= wide_int_to_tree (container_type
,
2961 wi::shifted_mask (shift_n
, mask_width
, true, prec
));
2962 tree cleared
= vect_recog_temp_ssa_var (container_type
);
2963 pattern_stmt
= gimple_build_assign (cleared
, BIT_AND_EXPR
, container
, mask_t
);
2964 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2966 /* Write MASKED into CLEARED. */
2968 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2969 BIT_IOR_EXPR
, cleared
, masked
);
2971 *type_out
= STMT_VINFO_VECTYPE (stmt_info
);
2972 vect_pattern_detected ("bit_insert pattern", stmt_info
->stmt
);
2974 return pattern_stmt
;
2978 /* Recognize cases in which an operation is performed in one type WTYPE
2979 but could be done more efficiently in a narrower type NTYPE. For example,
2982 ATYPE a; // narrower than NTYPE
2983 BTYPE b; // narrower than NTYPE
2984 WTYPE aw = (WTYPE) a;
2985 WTYPE bw = (WTYPE) b;
2986 WTYPE res = aw + bw; // only uses of aw and bw
2988 then it would be more efficient to do:
2990 NTYPE an = (NTYPE) a;
2991 NTYPE bn = (NTYPE) b;
2992 NTYPE resn = an + bn;
2993 WTYPE res = (WTYPE) resn;
2995 Other situations include things like:
2997 ATYPE a; // NTYPE or narrower
2998 WTYPE aw = (WTYPE) a;
3001 when only "(NTYPE) res" is significant. In that case it's more efficient
3002 to truncate "b" and do the operation on NTYPE instead:
3004 NTYPE an = (NTYPE) a;
3005 NTYPE bn = (NTYPE) b; // truncation
3006 NTYPE resn = an + bn;
3007 WTYPE res = (WTYPE) resn;
3009 All users of "res" should then use "resn" instead, making the final
3010 statement dead (not marked as relevant). The final statement is still
3011 needed to maintain the type correctness of the IR.
3013 vect_determine_precisions has already determined the minimum
3014 precison of the operation and the minimum precision required
3015 by users of the result. */
3018 vect_recog_over_widening_pattern (vec_info
*vinfo
,
3019 stmt_vec_info last_stmt_info
, tree
*type_out
)
3021 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3025 /* See whether we have found that this operation can be done on a
3026 narrower type without changing its semantics. */
3027 unsigned int new_precision
= last_stmt_info
->operation_precision
;
3031 tree lhs
= gimple_assign_lhs (last_stmt
);
3032 tree type
= TREE_TYPE (lhs
);
3033 tree_code code
= gimple_assign_rhs_code (last_stmt
);
3035 /* Punt for reductions where we don't handle the type conversions. */
3036 if (STMT_VINFO_DEF_TYPE (last_stmt_info
) == vect_reduction_def
)
3039 /* Keep the first operand of a COND_EXPR as-is: only the other two
3040 operands are interesting. */
3041 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
3043 /* Check the operands. */
3044 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
3045 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
3046 unprom
.quick_grow_cleared (nops
);
3047 unsigned int min_precision
= 0;
3048 bool single_use_p
= false;
3049 for (unsigned int i
= 0; i
< nops
; ++i
)
3051 tree op
= gimple_op (last_stmt
, first_op
+ i
);
3052 if (TREE_CODE (op
) == INTEGER_CST
)
3053 unprom
[i
].set_op (op
, vect_constant_def
);
3054 else if (TREE_CODE (op
) == SSA_NAME
)
3056 bool op_single_use_p
= true;
3057 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
3062 (1) N bits of the result are needed;
3063 (2) all inputs are widened from M<N bits; and
3064 (3) one operand OP is a single-use SSA name
3066 we can shift the M->N widening from OP to the output
3067 without changing the number or type of extensions involved.
3068 This then reduces the number of copies of STMT_INFO.
3070 If instead of (3) more than one operand is a single-use SSA name,
3071 shifting the extension to the output is even more of a win.
3075 (1) N bits of the result are needed;
3076 (2) one operand OP2 is widened from M2<N bits;
3077 (3) another operand OP1 is widened from M1<M2 bits; and
3078 (4) both OP1 and OP2 are single-use
3080 the choice is between:
3082 (a) truncating OP2 to M1, doing the operation on M1,
3083 and then widening the result to N
3085 (b) widening OP1 to M2, doing the operation on M2, and then
3086 widening the result to N
3088 Both shift the M2->N widening of the inputs to the output.
3089 (a) additionally shifts the M1->M2 widening to the output;
3090 it requires fewer copies of STMT_INFO but requires an extra
3093 Which is better will depend on the complexity and cost of
3094 STMT_INFO, which is hard to predict at this stage. However,
3095 a clear tie-breaker in favor of (b) is the fact that the
3096 truncation in (a) increases the length of the operation chain.
3098 If instead of (4) only one of OP1 or OP2 is single-use,
3099 (b) is still a win over doing the operation in N bits:
3100 it still shifts the M2->N widening on the single-use operand
3101 to the output and reduces the number of STMT_INFO copies.
3103 If neither operand is single-use then operating on fewer than
3104 N bits might lead to more extensions overall. Whether it does
3105 or not depends on global information about the vectorization
3106 region, and whether that's a good trade-off would again
3107 depend on the complexity and cost of the statements involved,
3108 as well as things like register pressure that are not normally
3109 modelled at this stage. We therefore ignore these cases
3110 and just optimize the clear single-use wins above.
3112 Thus we take the maximum precision of the unpromoted operands
3113 and record whether any operand is single-use. */
3114 if (unprom
[i
].dt
== vect_internal_def
)
3116 min_precision
= MAX (min_precision
,
3117 TYPE_PRECISION (unprom
[i
].type
));
3118 single_use_p
|= op_single_use_p
;
3125 /* Although the operation could be done in operation_precision, we have
3126 to balance that against introducing extra truncations or extensions.
3127 Calculate the minimum precision that can be handled efficiently.
3129 The loop above determined that the operation could be handled
3130 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3131 extension from the inputs to the output without introducing more
3132 instructions, and would reduce the number of instructions required
3133 for STMT_INFO itself.
3135 vect_determine_precisions has also determined that the result only
3136 needs min_output_precision bits. Truncating by a factor of N times
3137 requires a tree of N - 1 instructions, so if TYPE is N times wider
3138 than min_output_precision, doing the operation in TYPE and truncating
3139 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3142 - truncating the input to a unary operation and doing the operation
3143 in the new type requires at most N - 1 + 1 = N instructions per
3146 - doing the same for a binary operation requires at most
3147 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3149 Both unary and binary operations require fewer instructions than
3150 this if the operands were extended from a suitable truncated form.
3151 Thus there is usually nothing to lose by doing operations in
3152 min_output_precision bits, but there can be something to gain. */
3154 min_precision
= last_stmt_info
->min_output_precision
;
3156 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
3158 /* Apply the minimum efficient precision we just calculated. */
3159 if (new_precision
< min_precision
)
3160 new_precision
= min_precision
;
3161 new_precision
= vect_element_precision (new_precision
);
3162 if (new_precision
>= TYPE_PRECISION (type
))
3165 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
3167 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
3171 /* We've found a viable pattern. Get the new type of the operation. */
3172 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
3173 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
3175 /* If we're truncating an operation, we need to make sure that we
3176 don't introduce new undefined overflow. The codes tested here are
3177 a subset of those accepted by vect_truncatable_operation_p. */
3178 tree op_type
= new_type
;
3179 if (TYPE_OVERFLOW_UNDEFINED (new_type
)
3180 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== MULT_EXPR
))
3181 op_type
= build_nonstandard_integer_type (new_precision
, true);
3183 /* We specifically don't check here whether the target supports the
3184 new operation, since it might be something that a later pattern
3185 wants to rewrite anyway. If targets have a minimum element size
3186 for some optabs, we should pattern-match smaller ops to larger ops
3187 where beneficial. */
3188 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3189 tree op_vectype
= get_vectype_for_scalar_type (vinfo
, op_type
);
3190 if (!new_vectype
|| !op_vectype
)
3193 if (dump_enabled_p ())
3194 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting %T to %T\n",
3197 /* Calculate the rhs operands for an operation on OP_TYPE. */
3199 for (unsigned int i
= 1; i
< first_op
; ++i
)
3200 ops
[i
- 1] = gimple_op (last_stmt
, i
);
3201 vect_convert_inputs (vinfo
, last_stmt_info
, nops
, &ops
[first_op
- 1],
3202 op_type
, &unprom
[0], op_vectype
);
3204 /* Use the operation to produce a result of type OP_TYPE. */
3205 tree new_var
= vect_recog_temp_ssa_var (op_type
, NULL
);
3206 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
3207 ops
[0], ops
[1], ops
[2]);
3208 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3210 if (dump_enabled_p ())
3211 dump_printf_loc (MSG_NOTE
, vect_location
,
3212 "created pattern stmt: %G", pattern_stmt
);
3214 /* Convert back to the original signedness, if OP_TYPE is different
3216 if (op_type
!= new_type
)
3217 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, new_type
,
3218 pattern_stmt
, op_vectype
);
3220 /* Promote the result to the original type. */
3221 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, type
,
3222 pattern_stmt
, new_vectype
);
3224 return pattern_stmt
;
3227 /* Recognize the following patterns:
3229 ATYPE a; // narrower than TYPE
3230 BTYPE b; // narrower than TYPE
3232 1) Multiply high with scaling
3233 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3234 Here, c is bitsize (TYPE) / 2 - 1.
3236 2) ... or also with rounding
3237 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3238 Here, d is bitsize (TYPE) / 2 - 2.
3240 3) Normal multiply high
3241 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3242 Here, e is bitsize (TYPE) / 2.
3244 where only the bottom half of res is used. */
3247 vect_recog_mulhs_pattern (vec_info
*vinfo
,
3248 stmt_vec_info last_stmt_info
, tree
*type_out
)
3250 /* Check for a right shift. */
3251 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3253 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
)
3256 /* Check that the shift result is wider than the users of the
3257 result need (i.e. that narrowing would be a natural choice). */
3258 tree lhs_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
3259 unsigned int target_precision
3260 = vect_element_precision (last_stmt_info
->min_output_precision
);
3261 if (!INTEGRAL_TYPE_P (lhs_type
)
3262 || target_precision
>= TYPE_PRECISION (lhs_type
))
3265 /* Look through any change in sign on the outer shift input. */
3266 vect_unpromoted_value unprom_rshift_input
;
3267 tree rshift_input
= vect_look_through_possible_promotion
3268 (vinfo
, gimple_assign_rhs1 (last_stmt
), &unprom_rshift_input
);
3270 || TYPE_PRECISION (TREE_TYPE (rshift_input
))
3271 != TYPE_PRECISION (lhs_type
))
3274 /* Get the definition of the shift input. */
3275 stmt_vec_info rshift_input_stmt_info
3276 = vect_get_internal_def (vinfo
, rshift_input
);
3277 if (!rshift_input_stmt_info
)
3279 gassign
*rshift_input_stmt
3280 = dyn_cast
<gassign
*> (rshift_input_stmt_info
->stmt
);
3281 if (!rshift_input_stmt
)
3284 stmt_vec_info mulh_stmt_info
;
3286 bool rounding_p
= false;
3288 /* Check for the presence of the rounding term. */
3289 if (gimple_assign_rhs_code (rshift_input_stmt
) == PLUS_EXPR
)
3291 /* Check that the outer shift was by 1. */
3292 if (!integer_onep (gimple_assign_rhs2 (last_stmt
)))
3295 /* Check that the second operand of the PLUS_EXPR is 1. */
3296 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt
)))
3299 /* Look through any change in sign on the addition input. */
3300 vect_unpromoted_value unprom_plus_input
;
3301 tree plus_input
= vect_look_through_possible_promotion
3302 (vinfo
, gimple_assign_rhs1 (rshift_input_stmt
), &unprom_plus_input
);
3304 || TYPE_PRECISION (TREE_TYPE (plus_input
))
3305 != TYPE_PRECISION (TREE_TYPE (rshift_input
)))
3308 /* Get the definition of the multiply-high-scale part. */
3309 stmt_vec_info plus_input_stmt_info
3310 = vect_get_internal_def (vinfo
, plus_input
);
3311 if (!plus_input_stmt_info
)
3313 gassign
*plus_input_stmt
3314 = dyn_cast
<gassign
*> (plus_input_stmt_info
->stmt
);
3315 if (!plus_input_stmt
3316 || gimple_assign_rhs_code (plus_input_stmt
) != RSHIFT_EXPR
)
3319 /* Look through any change in sign on the scaling input. */
3320 vect_unpromoted_value unprom_scale_input
;
3321 tree scale_input
= vect_look_through_possible_promotion
3322 (vinfo
, gimple_assign_rhs1 (plus_input_stmt
), &unprom_scale_input
);
3324 || TYPE_PRECISION (TREE_TYPE (scale_input
))
3325 != TYPE_PRECISION (TREE_TYPE (plus_input
)))
3328 /* Get the definition of the multiply-high part. */
3329 mulh_stmt_info
= vect_get_internal_def (vinfo
, scale_input
);
3330 if (!mulh_stmt_info
)
3333 /* Get the scaling term. */
3334 scale_term
= gimple_assign_rhs2 (plus_input_stmt
);
3339 mulh_stmt_info
= rshift_input_stmt_info
;
3340 scale_term
= gimple_assign_rhs2 (last_stmt
);
3343 /* Check that the scaling factor is constant. */
3344 if (TREE_CODE (scale_term
) != INTEGER_CST
)
3347 /* Check whether the scaling input term can be seen as two widened
3348 inputs multiplied together. */
3349 vect_unpromoted_value unprom_mult
[2];
3352 = vect_widened_op_tree (vinfo
, mulh_stmt_info
, MULT_EXPR
, WIDEN_MULT_EXPR
,
3353 false, 2, unprom_mult
, &new_type
);
3357 /* Adjust output precision. */
3358 if (TYPE_PRECISION (new_type
) < target_precision
)
3359 new_type
= build_nonstandard_integer_type
3360 (target_precision
, TYPE_UNSIGNED (new_type
));
3362 unsigned mult_precision
= TYPE_PRECISION (new_type
);
3364 /* Check that the scaling factor is expected. Instead of
3365 target_precision, we should use the one that we actually
3366 use for internal function. */
3369 /* Check pattern 2). */
3370 if (wi::to_widest (scale_term
) + mult_precision
+ 2
3371 != TYPE_PRECISION (lhs_type
))
3378 /* Check for pattern 1). */
3379 if (wi::to_widest (scale_term
) + mult_precision
+ 1
3380 == TYPE_PRECISION (lhs_type
))
3382 /* Check for pattern 3). */
3383 else if (wi::to_widest (scale_term
) + mult_precision
3384 == TYPE_PRECISION (lhs_type
))
3390 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt
);
3392 /* Check for target support. */
3393 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3395 || !direct_internal_fn_supported_p
3396 (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
3399 /* The IR requires a valid vector type for the cast result, even though
3400 it's likely to be discarded. */
3401 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
3405 /* Generate the IFN_MULHRS call. */
3406 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
3408 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
3409 unprom_mult
, new_vectype
);
3411 = gimple_build_call_internal (ifn
, 2, new_ops
[0], new_ops
[1]);
3412 gimple_call_set_lhs (mulhrs_stmt
, new_var
);
3413 gimple_set_location (mulhrs_stmt
, gimple_location (last_stmt
));
3415 if (dump_enabled_p ())
3416 dump_printf_loc (MSG_NOTE
, vect_location
,
3417 "created pattern stmt: %G", (gimple
*) mulhrs_stmt
);
3419 return vect_convert_output (vinfo
, last_stmt_info
, lhs_type
,
3420 mulhrs_stmt
, new_vectype
);
3423 /* Recognize the patterns:
3425 ATYPE a; // narrower than TYPE
3426 BTYPE b; // narrower than TYPE
3427 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3428 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3430 where only the bottom half of avg is used. Try to transform them into:
3432 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3433 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3437 TYPE avg = (TYPE) avg';
3439 where NTYPE is no wider than half of TYPE. Since only the bottom half
3440 of avg is used, all or part of the cast of avg' should become redundant.
3442 If there is no target support available, generate code to distribute rshift
3443 over plus and add a carry. */
3446 vect_recog_average_pattern (vec_info
*vinfo
,
3447 stmt_vec_info last_stmt_info
, tree
*type_out
)
3449 /* Check for a shift right by one bit. */
3450 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3452 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
3453 || !integer_onep (gimple_assign_rhs2 (last_stmt
)))
3456 /* Check that the shift result is wider than the users of the
3457 result need (i.e. that narrowing would be a natural choice). */
3458 tree lhs
= gimple_assign_lhs (last_stmt
);
3459 tree type
= TREE_TYPE (lhs
);
3460 unsigned int target_precision
3461 = vect_element_precision (last_stmt_info
->min_output_precision
);
3462 if (!INTEGRAL_TYPE_P (type
) || target_precision
>= TYPE_PRECISION (type
))
3465 /* Look through any change in sign on the shift input. */
3466 tree rshift_rhs
= gimple_assign_rhs1 (last_stmt
);
3467 vect_unpromoted_value unprom_plus
;
3468 rshift_rhs
= vect_look_through_possible_promotion (vinfo
, rshift_rhs
,
3471 || TYPE_PRECISION (TREE_TYPE (rshift_rhs
)) != TYPE_PRECISION (type
))
3474 /* Get the definition of the shift input. */
3475 stmt_vec_info plus_stmt_info
= vect_get_internal_def (vinfo
, rshift_rhs
);
3476 if (!plus_stmt_info
)
3479 /* Check whether the shift input can be seen as a tree of additions on
3480 2 or 3 widened inputs.
3482 Note that the pattern should be a win even if the result of one or
3483 more additions is reused elsewhere: if the pattern matches, we'd be
3484 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3485 internal_fn ifn
= IFN_AVG_FLOOR
;
3486 vect_unpromoted_value unprom
[3];
3488 unsigned int nops
= vect_widened_op_tree (vinfo
, plus_stmt_info
, PLUS_EXPR
,
3489 IFN_VEC_WIDEN_PLUS
, false, 3,
3495 /* Check that one operand is 1. */
3497 for (i
= 0; i
< 3; ++i
)
3498 if (integer_onep (unprom
[i
].op
))
3502 /* Throw away the 1 operand and keep the other two. */
3504 unprom
[i
] = unprom
[2];
3508 vect_pattern_detected ("vect_recog_average_pattern", last_stmt
);
3512 (a) the operation can be viewed as:
3514 TYPE widened0 = (TYPE) UNPROM[0];
3515 TYPE widened1 = (TYPE) UNPROM[1];
3516 TYPE tmp1 = widened0 + widened1 {+ 1};
3517 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3519 (b) the first two statements are equivalent to:
3521 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3522 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3524 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3527 (d) all the operations can be performed correctly at twice the width of
3528 NEW_TYPE, due to the nature of the average operation; and
3530 (e) users of the result of the right shift need only TARGET_PRECISION
3531 bits, where TARGET_PRECISION is no more than half of TYPE's
3534 Under these circumstances, the only situation in which NEW_TYPE
3535 could be narrower than TARGET_PRECISION is if widened0, widened1
3536 and an addition result are all used more than once. Thus we can
3537 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3538 as "free", whereas widening the result of the average instruction
3539 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3540 therefore better not to go narrower than TARGET_PRECISION. */
3541 if (TYPE_PRECISION (new_type
) < target_precision
)
3542 new_type
= build_nonstandard_integer_type (target_precision
,
3543 TYPE_UNSIGNED (new_type
));
3545 /* Check for target support. */
3546 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3550 bool fallback_p
= false;
3552 if (direct_internal_fn_supported_p (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
3554 else if (TYPE_UNSIGNED (new_type
)
3555 && optab_for_tree_code (RSHIFT_EXPR
, new_vectype
, optab_scalar
)
3556 && optab_for_tree_code (PLUS_EXPR
, new_vectype
, optab_default
)
3557 && optab_for_tree_code (BIT_IOR_EXPR
, new_vectype
, optab_default
)
3558 && optab_for_tree_code (BIT_AND_EXPR
, new_vectype
, optab_default
))
3563 /* The IR requires a valid vector type for the cast result, even though
3564 it's likely to be discarded. */
3565 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
3569 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
3571 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
3572 unprom
, new_vectype
);
3576 /* As a fallback, generate code for following sequence:
3578 shifted_op0 = new_ops[0] >> 1;
3579 shifted_op1 = new_ops[1] >> 1;
3580 sum_of_shifted = shifted_op0 + shifted_op1;
3581 unmasked_carry = new_ops[0] and/or new_ops[1];
3582 carry = unmasked_carry & 1;
3583 new_var = sum_of_shifted + carry;
3586 tree one_cst
= build_one_cst (new_type
);
3589 tree shifted_op0
= vect_recog_temp_ssa_var (new_type
, NULL
);
3590 g
= gimple_build_assign (shifted_op0
, RSHIFT_EXPR
, new_ops
[0], one_cst
);
3591 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3593 tree shifted_op1
= vect_recog_temp_ssa_var (new_type
, NULL
);
3594 g
= gimple_build_assign (shifted_op1
, RSHIFT_EXPR
, new_ops
[1], one_cst
);
3595 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3597 tree sum_of_shifted
= vect_recog_temp_ssa_var (new_type
, NULL
);
3598 g
= gimple_build_assign (sum_of_shifted
, PLUS_EXPR
,
3599 shifted_op0
, shifted_op1
);
3600 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3602 tree unmasked_carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
3603 tree_code c
= (ifn
== IFN_AVG_CEIL
) ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3604 g
= gimple_build_assign (unmasked_carry
, c
, new_ops
[0], new_ops
[1]);
3605 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3607 tree carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
3608 g
= gimple_build_assign (carry
, BIT_AND_EXPR
, unmasked_carry
, one_cst
);
3609 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3611 g
= gimple_build_assign (new_var
, PLUS_EXPR
, sum_of_shifted
, carry
);
3612 return vect_convert_output (vinfo
, last_stmt_info
, type
, g
, new_vectype
);
3615 /* Generate the IFN_AVG* call. */
3616 gcall
*average_stmt
= gimple_build_call_internal (ifn
, 2, new_ops
[0],
3618 gimple_call_set_lhs (average_stmt
, new_var
);
3619 gimple_set_location (average_stmt
, gimple_location (last_stmt
));
3621 if (dump_enabled_p ())
3622 dump_printf_loc (MSG_NOTE
, vect_location
,
3623 "created pattern stmt: %G", (gimple
*) average_stmt
);
3625 return vect_convert_output (vinfo
, last_stmt_info
,
3626 type
, average_stmt
, new_vectype
);
3629 /* Recognize cases in which the input to a cast is wider than its
3630 output, and the input is fed by a widening operation. Fold this
3631 by removing the unnecessary intermediate widening. E.g.:
3634 unsigned int b = (unsigned int) a;
3635 unsigned short c = (unsigned short) b;
3639 unsigned short c = (unsigned short) a;
3641 Although this is rare in input IR, it is an expected side-effect
3642 of the over-widening pattern above.
3644 This is beneficial also for integer-to-float conversions, if the
3645 widened integer has more bits than the float, and if the unwidened
3649 vect_recog_cast_forwprop_pattern (vec_info
*vinfo
,
3650 stmt_vec_info last_stmt_info
, tree
*type_out
)
3652 /* Check for a cast, including an integer-to-float conversion. */
3653 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3656 tree_code code
= gimple_assign_rhs_code (last_stmt
);
3657 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
3660 /* Make sure that the rhs is a scalar with a natural bitsize. */
3661 tree lhs
= gimple_assign_lhs (last_stmt
);
3664 tree lhs_type
= TREE_TYPE (lhs
);
3665 scalar_mode lhs_mode
;
3666 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
3667 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
3670 /* Check for a narrowing operation (from a vector point of view). */
3671 tree rhs
= gimple_assign_rhs1 (last_stmt
);
3672 tree rhs_type
= TREE_TYPE (rhs
);
3673 if (!INTEGRAL_TYPE_P (rhs_type
)
3674 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
3675 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
3678 /* Try to find an unpromoted input. */
3679 vect_unpromoted_value unprom
;
3680 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
3681 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
3684 /* If the bits above RHS_TYPE matter, make sure that they're the
3685 same when extending from UNPROM as they are when extending from RHS. */
3686 if (!INTEGRAL_TYPE_P (lhs_type
)
3687 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
3690 /* We can get the same result by casting UNPROM directly, to avoid
3691 the unnecessary widening and narrowing. */
3692 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
3694 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
3698 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
3699 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
3700 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3702 return pattern_stmt
;
3705 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3706 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3709 vect_recog_widen_shift_pattern (vec_info
*vinfo
,
3710 stmt_vec_info last_stmt_info
, tree
*type_out
)
3712 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
3713 LSHIFT_EXPR
, WIDEN_LSHIFT_EXPR
, true,
3714 "vect_recog_widen_shift_pattern");
3717 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3721 S0 a_t = b_t r<< c_t;
3725 * STMT_VINFO: The stmt from which the pattern search begins,
3726 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3730 S2 e_t = d_t & (B - 1);
3731 S3 f_t = b_t << c_t;
3732 S4 g_t = b_t >> e_t;
3735 where B is element bitsize of type.
3739 * TYPE_OUT: The type of the output of this pattern.
3741 * Return value: A new stmt that will be used to replace the rotate
3745 vect_recog_rotate_pattern (vec_info
*vinfo
,
3746 stmt_vec_info stmt_vinfo
, tree
*type_out
)
3748 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3749 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
3750 gimple
*pattern_stmt
, *def_stmt
;
3751 enum tree_code rhs_code
;
3752 enum vect_def_type dt
;
3753 optab optab1
, optab2
;
3754 edge ext_def
= NULL
;
3755 bool bswap16_p
= false;
3757 if (is_gimple_assign (last_stmt
))
3759 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3769 lhs
= gimple_assign_lhs (last_stmt
);
3770 oprnd0
= gimple_assign_rhs1 (last_stmt
);
3771 type
= TREE_TYPE (oprnd0
);
3772 oprnd1
= gimple_assign_rhs2 (last_stmt
);
3774 else if (gimple_call_builtin_p (last_stmt
, BUILT_IN_BSWAP16
))
3776 /* __builtin_bswap16 (x) is another form of x r>> 8.
3777 The vectorizer has bswap support, but only if the argument isn't
3779 lhs
= gimple_call_lhs (last_stmt
);
3780 oprnd0
= gimple_call_arg (last_stmt
, 0);
3781 type
= TREE_TYPE (oprnd0
);
3783 || TYPE_PRECISION (TREE_TYPE (lhs
)) != 16
3784 || TYPE_PRECISION (type
) <= 16
3785 || TREE_CODE (oprnd0
) != SSA_NAME
3786 || BITS_PER_UNIT
!= 8)
3789 stmt_vec_info def_stmt_info
;
3790 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
3793 if (dt
!= vect_internal_def
)
3796 if (gimple_assign_cast_p (def_stmt
))
3798 def
= gimple_assign_rhs1 (def_stmt
);
3799 if (INTEGRAL_TYPE_P (TREE_TYPE (def
))
3800 && TYPE_PRECISION (TREE_TYPE (def
)) == 16)
3804 type
= TREE_TYPE (lhs
);
3805 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3806 if (vectype
== NULL_TREE
)
3809 if (tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype
))
3811 /* The encoding uses one stepped pattern for each byte in the
3813 vec_perm_builder
elts (TYPE_VECTOR_SUBPARTS (char_vectype
), 2, 3);
3814 for (unsigned i
= 0; i
< 3; ++i
)
3815 for (unsigned j
= 0; j
< 2; ++j
)
3816 elts
.quick_push ((i
+ 1) * 2 - j
- 1);
3818 vec_perm_indices
indices (elts
, 1,
3819 TYPE_VECTOR_SUBPARTS (char_vectype
));
3820 machine_mode vmode
= TYPE_MODE (char_vectype
);
3821 if (can_vec_perm_const_p (vmode
, vmode
, indices
))
3823 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3824 undo the argument promotion. */
3825 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
3827 def
= vect_recog_temp_ssa_var (type
, NULL
);
3828 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3829 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3833 /* Pattern detected. */
3834 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3836 *type_out
= vectype
;
3838 /* Pattern supported. Create a stmt to be used to replace the
3839 pattern, with the unpromoted argument. */
3840 var
= vect_recog_temp_ssa_var (type
, NULL
);
3841 pattern_stmt
= gimple_build_call (gimple_call_fndecl (last_stmt
),
3843 gimple_call_set_lhs (pattern_stmt
, var
);
3844 gimple_call_set_fntype (as_a
<gcall
*> (pattern_stmt
),
3845 gimple_call_fntype (last_stmt
));
3846 return pattern_stmt
;
3850 oprnd1
= build_int_cst (integer_type_node
, 8);
3851 rhs_code
= LROTATE_EXPR
;
3857 if (TREE_CODE (oprnd0
) != SSA_NAME
3858 || !INTEGRAL_TYPE_P (type
)
3859 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
))
3862 stmt_vec_info def_stmt_info
;
3863 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
3866 if (dt
!= vect_internal_def
3867 && dt
!= vect_constant_def
3868 && dt
!= vect_external_def
)
3871 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3872 if (vectype
== NULL_TREE
)
3875 /* If vector/vector or vector/scalar rotate is supported by the target,
3876 don't do anything here. */
3877 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
3879 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3884 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
3886 def
= vect_recog_temp_ssa_var (type
, NULL
);
3887 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3888 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3892 /* Pattern detected. */
3893 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3895 *type_out
= vectype
;
3897 /* Pattern supported. Create a stmt to be used to replace the
3899 var
= vect_recog_temp_ssa_var (type
, NULL
);
3900 pattern_stmt
= gimple_build_assign (var
, LROTATE_EXPR
, oprnd0
,
3902 return pattern_stmt
;
3907 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
3909 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
3911 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3915 tree utype
= unsigned_type_for (type
);
3916 tree uvectype
= get_vectype_for_scalar_type (vinfo
, utype
);
3920 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3921 don't do anything here either. */
3922 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_vector
);
3923 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_vector
);
3925 || optab_handler (optab1
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
3927 || optab_handler (optab2
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
)
3929 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
3931 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_scalar
);
3932 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_scalar
);
3934 || optab_handler (optab1
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
3936 || optab_handler (optab2
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
)
3940 *type_out
= vectype
;
3942 if (!useless_type_conversion_p (utype
, TREE_TYPE (oprnd0
)))
3944 def
= vect_recog_temp_ssa_var (utype
, NULL
);
3945 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3946 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3950 if (dt
== vect_external_def
&& TREE_CODE (oprnd1
) == SSA_NAME
)
3951 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
3954 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (utype
);
3955 if (dt
!= vect_internal_def
|| TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
3957 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
3959 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
3960 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
3961 && TYPE_PRECISION (TREE_TYPE (rhs1
))
3962 == TYPE_PRECISION (type
))
3966 if (def
== NULL_TREE
)
3968 def
= vect_recog_temp_ssa_var (utype
, NULL
);
3969 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
3970 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3972 stype
= TREE_TYPE (def
);
3974 if (TREE_CODE (def
) == INTEGER_CST
)
3976 if (!tree_fits_uhwi_p (def
)
3977 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
3978 || integer_zerop (def
))
3980 def2
= build_int_cst (stype
,
3981 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
3985 tree vecstype
= get_vectype_for_scalar_type (vinfo
, stype
);
3987 if (vecstype
== NULL_TREE
)
3989 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
3990 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
3994 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
3995 gcc_assert (!new_bb
);
3998 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
4000 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
4001 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (mode
) - 1);
4002 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
4003 gimple_assign_lhs (def_stmt
), mask
);
4007 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
4008 gcc_assert (!new_bb
);
4011 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
4014 var1
= vect_recog_temp_ssa_var (utype
, NULL
);
4015 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
4016 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
4018 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
4020 var2
= vect_recog_temp_ssa_var (utype
, NULL
);
4021 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
4022 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
4024 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
4026 /* Pattern detected. */
4027 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
4029 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4030 var
= vect_recog_temp_ssa_var (utype
, NULL
);
4031 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
4033 if (!useless_type_conversion_p (type
, utype
))
4035 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, uvectype
);
4036 tree result
= vect_recog_temp_ssa_var (type
, NULL
);
4037 pattern_stmt
= gimple_build_assign (result
, NOP_EXPR
, var
);
4039 return pattern_stmt
;
4042 /* Detect a vector by vector shift pattern that wouldn't be otherwise
4050 S3 res_T = b_T op a_t;
4052 where type 'TYPE' is a type with different size than 'type',
4053 and op is <<, >> or rotate.
4058 TYPE b_T, c_T, res_T;
4061 S1 a_t = (type) c_T;
4063 S3 res_T = b_T op a_t;
4067 * STMT_VINFO: The stmt from which the pattern search begins,
4068 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4069 with a shift/rotate which has same type on both operands, in the
4070 second case just b_T op c_T, in the first case with added cast
4071 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4075 * TYPE_OUT: The type of the output of this pattern.
4077 * Return value: A new stmt that will be used to replace the shift/rotate
4081 vect_recog_vector_vector_shift_pattern (vec_info
*vinfo
,
4082 stmt_vec_info stmt_vinfo
,
4085 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4086 tree oprnd0
, oprnd1
, lhs
, var
;
4087 gimple
*pattern_stmt
;
4088 enum tree_code rhs_code
;
4090 if (!is_gimple_assign (last_stmt
))
4093 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4105 lhs
= gimple_assign_lhs (last_stmt
);
4106 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4107 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4108 if (TREE_CODE (oprnd0
) != SSA_NAME
4109 || TREE_CODE (oprnd1
) != SSA_NAME
4110 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
4111 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0
))
4112 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
4113 || TYPE_PRECISION (TREE_TYPE (lhs
))
4114 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
4117 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
4121 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (oprnd0
));
4122 if (*type_out
== NULL_TREE
)
4125 tree def
= NULL_TREE
;
4126 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
4127 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
4129 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4130 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
4131 && TYPE_PRECISION (TREE_TYPE (rhs1
))
4132 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
4134 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
4135 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
4140 = build_low_bits_mask (TREE_TYPE (rhs1
),
4141 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
4142 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4143 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
4144 tree vecstype
= get_vectype_for_scalar_type (vinfo
,
4146 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
4151 if (def
== NULL_TREE
)
4153 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
4154 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
4155 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4158 /* Pattern detected. */
4159 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
4161 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4162 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
4163 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
4165 return pattern_stmt
;
4168 /* Return true iff the target has a vector optab implementing the operation
4169 CODE on type VECTYPE. */
4172 target_has_vecop_for_code (tree_code code
, tree vectype
)
4174 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4176 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
4179 /* Verify that the target has optabs of VECTYPE to perform all the steps
4180 needed by the multiplication-by-immediate synthesis algorithm described by
4181 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4182 present. Return true iff the target supports all the steps. */
4185 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
4186 tree vectype
, bool synth_shift_p
)
4188 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
4191 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
4192 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
4194 if (var
== negate_variant
4195 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
4198 /* If we must synthesize shifts with additions make sure that vector
4199 addition is available. */
4200 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
4203 for (int i
= 1; i
< alg
->ops
; i
++)
4211 case alg_add_factor
:
4212 if (!supports_vplus
)
4217 case alg_sub_factor
:
4218 if (!supports_vminus
)
4224 case alg_impossible
:
4234 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4235 putting the final result in DEST. Append all statements but the last into
4236 VINFO. Return the last statement. */
4239 synth_lshift_by_additions (vec_info
*vinfo
,
4240 tree dest
, tree op
, HOST_WIDE_INT amnt
,
4241 stmt_vec_info stmt_info
)
4244 tree itype
= TREE_TYPE (op
);
4246 gcc_assert (amnt
>= 0);
4247 for (i
= 0; i
< amnt
; i
++)
4249 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
4252 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
4255 append_pattern_def_seq (vinfo
, stmt_info
, stmt
);
4263 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4264 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4265 the process if necessary. Append the resulting assignment statements
4266 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4267 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4268 left shifts using additions. */
4271 apply_binop_and_append_stmt (vec_info
*vinfo
,
4272 tree_code code
, tree op1
, tree op2
,
4273 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
4275 if (integer_zerop (op2
)
4276 && (code
== LSHIFT_EXPR
4277 || code
== PLUS_EXPR
))
4279 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
4284 tree itype
= TREE_TYPE (op1
);
4285 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
4287 if (code
== LSHIFT_EXPR
4290 stmt
= synth_lshift_by_additions (vinfo
, tmp_var
, op1
,
4291 TREE_INT_CST_LOW (op2
), stmt_vinfo
);
4292 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4296 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
4297 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4301 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4302 and simple arithmetic operations to be vectorized. Record the statements
4303 produced in STMT_VINFO and return the last statement in the sequence or
4304 NULL if it's not possible to synthesize such a multiplication.
4305 This function mirrors the behavior of expand_mult_const in expmed.cc but
4306 works on tree-ssa form. */
4309 vect_synth_mult_by_constant (vec_info
*vinfo
, tree op
, tree val
,
4310 stmt_vec_info stmt_vinfo
)
4312 tree itype
= TREE_TYPE (op
);
4313 machine_mode mode
= TYPE_MODE (itype
);
4314 struct algorithm alg
;
4315 mult_variant variant
;
4316 if (!tree_fits_shwi_p (val
))
4319 /* Multiplication synthesis by shifts, adds and subs can introduce
4320 signed overflow where the original operation didn't. Perform the
4321 operations on an unsigned type and cast back to avoid this.
4322 In the future we may want to relax this for synthesis algorithms
4323 that we can prove do not cause unexpected overflow. */
4324 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
4326 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
4327 tree vectype
= get_vectype_for_scalar_type (vinfo
, multtype
);
4331 /* Targets that don't support vector shifts but support vector additions
4332 can synthesize shifts that way. */
4333 bool synth_shift_p
= !vect_supportable_shift (vinfo
, LSHIFT_EXPR
, multtype
);
4335 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
4336 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4337 The vectorizer's benefit analysis will decide whether it's beneficial
4339 bool possible
= choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype
))
4340 ? TYPE_MODE (vectype
) : mode
,
4341 hwval
, &alg
, &variant
, MAX_COST
);
4345 if (!target_supports_mult_synth_alg (&alg
, variant
, vectype
, synth_shift_p
))
4350 /* Clear out the sequence of statements so we can populate it below. */
4351 gimple
*stmt
= NULL
;
4353 if (cast_to_unsigned_p
)
4355 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
4356 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
4357 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4361 if (alg
.op
[0] == alg_zero
)
4362 accumulator
= build_int_cst (multtype
, 0);
4366 bool needs_fixup
= (variant
== negate_variant
)
4367 || (variant
== add_variant
);
4369 for (int i
= 1; i
< alg
.ops
; i
++)
4371 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
4372 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4373 tree tmp_var
= NULL_TREE
;
4380 = synth_lshift_by_additions (vinfo
, accum_tmp
, accumulator
,
4381 alg
.log
[i
], stmt_vinfo
);
4383 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
4388 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
, shft_log
,
4389 stmt_vinfo
, synth_shift_p
);
4390 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
4394 tmp_var
= apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
,
4395 shft_log
, stmt_vinfo
,
4397 /* In some algorithms the first step involves zeroing the
4398 accumulator. If subtracting from such an accumulator
4399 just emit the negation directly. */
4400 if (integer_zerop (accumulator
))
4401 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
4403 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
4408 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4409 shft_log
, stmt_vinfo
, synth_shift_p
);
4410 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
4414 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4415 shft_log
, stmt_vinfo
, synth_shift_p
);
4416 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
4418 case alg_add_factor
:
4420 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4421 shft_log
, stmt_vinfo
, synth_shift_p
);
4422 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
4425 case alg_sub_factor
:
4427 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4428 shft_log
, stmt_vinfo
, synth_shift_p
);
4429 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
4435 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4436 but rather return it directly. */
4438 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
4439 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4440 accumulator
= accum_tmp
;
4442 if (variant
== negate_variant
)
4444 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4445 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
4446 accumulator
= accum_tmp
;
4447 if (cast_to_unsigned_p
)
4448 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4450 else if (variant
== add_variant
)
4452 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4453 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
4454 accumulator
= accum_tmp
;
4455 if (cast_to_unsigned_p
)
4456 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4458 /* Move back to a signed if needed. */
4459 if (cast_to_unsigned_p
)
4461 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
4462 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
4468 /* Detect multiplication by constant and convert it into a sequence of
4469 shifts and additions, subtractions, negations. We reuse the
4470 choose_mult_variant algorithms from expmed.cc
4474 STMT_VINFO: The stmt from which the pattern search begins,
4479 * TYPE_OUT: The type of the output of this pattern.
4481 * Return value: A new stmt that will be used to replace
4482 the multiplication. */
4485 vect_recog_mult_pattern (vec_info
*vinfo
,
4486 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4488 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4489 tree oprnd0
, oprnd1
, vectype
, itype
;
4490 gimple
*pattern_stmt
;
4492 if (!is_gimple_assign (last_stmt
))
4495 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
4498 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4499 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4500 itype
= TREE_TYPE (oprnd0
);
4502 if (TREE_CODE (oprnd0
) != SSA_NAME
4503 || TREE_CODE (oprnd1
) != INTEGER_CST
4504 || !INTEGRAL_TYPE_P (itype
)
4505 || !type_has_mode_precision_p (itype
))
4508 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
4509 if (vectype
== NULL_TREE
)
4512 /* If the target can handle vectorized multiplication natively,
4513 don't attempt to optimize this. */
4514 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
4515 if (mul_optab
!= unknown_optab
)
4517 machine_mode vec_mode
= TYPE_MODE (vectype
);
4518 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
4519 if (icode
!= CODE_FOR_nothing
)
4523 pattern_stmt
= vect_synth_mult_by_constant (vinfo
,
4524 oprnd0
, oprnd1
, stmt_vinfo
);
4528 /* Pattern detected. */
4529 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
4531 *type_out
= vectype
;
4533 return pattern_stmt
;
4536 extern bool gimple_unsigned_integer_sat_add (tree
, tree
*, tree (*)(tree
));
4537 extern bool gimple_unsigned_integer_sat_sub (tree
, tree
*, tree (*)(tree
));
4538 extern bool gimple_unsigned_integer_sat_trunc (tree
, tree
*, tree (*)(tree
));
4540 extern bool gimple_signed_integer_sat_add (tree
, tree
*, tree (*)(tree
));
4541 extern bool gimple_signed_integer_sat_sub (tree
, tree
*, tree (*)(tree
));
4544 vect_recog_build_binary_gimple_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
4545 internal_fn fn
, tree
*type_out
,
4546 tree lhs
, tree op_0
, tree op_1
)
4548 tree itype
= TREE_TYPE (op_0
);
4549 tree otype
= TREE_TYPE (lhs
);
4550 tree v_itype
= get_vectype_for_scalar_type (vinfo
, itype
);
4551 tree v_otype
= get_vectype_for_scalar_type (vinfo
, otype
);
4553 if (v_itype
!= NULL_TREE
&& v_otype
!= NULL_TREE
4554 && direct_internal_fn_supported_p (fn
, v_itype
, OPTIMIZE_FOR_BOTH
))
4556 gcall
*call
= gimple_build_call_internal (fn
, 2, op_0
, op_1
);
4557 tree in_ssa
= vect_recog_temp_ssa_var (itype
, NULL
);
4559 gimple_call_set_lhs (call
, in_ssa
);
4560 gimple_call_set_nothrow (call
, /* nothrow_p */ false);
4561 gimple_set_location (call
, gimple_location (STMT_VINFO_STMT (stmt_info
)));
4563 *type_out
= v_otype
;
4565 if (types_compatible_p (itype
, otype
))
4569 append_pattern_def_seq (vinfo
, stmt_info
, call
, v_itype
);
4570 tree out_ssa
= vect_recog_temp_ssa_var (otype
, NULL
);
4572 return gimple_build_assign (out_ssa
, NOP_EXPR
, in_ssa
);
4580 * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
4583 * _9 = (long unsigned int) _8;
4587 * And then simplied to
4588 * _12 = .SAT_ADD (_4, _6);
4592 vect_recog_sat_add_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
4595 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
4597 if (!is_gimple_assign (last_stmt
))
4601 tree lhs
= gimple_assign_lhs (last_stmt
);
4603 if (gimple_unsigned_integer_sat_add (lhs
, ops
, NULL
)
4604 || gimple_signed_integer_sat_add (lhs
, ops
, NULL
))
4606 if (TREE_CODE (ops
[1]) == INTEGER_CST
)
4607 ops
[1] = fold_convert (TREE_TYPE (ops
[0]), ops
[1]);
4609 gimple
*stmt
= vect_recog_build_binary_gimple_stmt (vinfo
, stmt_vinfo
,
4610 IFN_SAT_ADD
, type_out
,
4611 lhs
, ops
[0], ops
[1]);
4614 vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt
);
4623 * Try to transform the truncation for .SAT_SUB pattern, mostly occurs in
4624 * the benchmark zip. Aka:
4628 * unsigned short int _4;
4629 * _9 = (unsigned short int).SAT_SUB (_1, _2);
4631 * if _1 is known to be in the range of unsigned short int. For example
4632 * there is a def _1 = (unsigned short int)_4. Then we can transform the
4635 * _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
4636 * _9 = .SAT_SUB (_4, _3);
4638 * Then, we can better vectorized code and avoid the unnecessary narrowing
4639 * stmt during vectorization with below stmt(s).
4641 * _3 = .SAT_TRUNC(_2); // SI => HI
4642 * _9 = .SAT_SUB (_4, _3);
4645 vect_recog_sat_sub_pattern_transform (vec_info
*vinfo
,
4646 stmt_vec_info stmt_vinfo
,
4647 tree lhs
, tree
*ops
)
4649 tree otype
= TREE_TYPE (lhs
);
4650 tree itype
= TREE_TYPE (ops
[0]);
4651 unsigned itype_prec
= TYPE_PRECISION (itype
);
4652 unsigned otype_prec
= TYPE_PRECISION (otype
);
4654 if (types_compatible_p (otype
, itype
) || otype_prec
>= itype_prec
)
4657 tree v_otype
= get_vectype_for_scalar_type (vinfo
, otype
);
4658 tree v_itype
= get_vectype_for_scalar_type (vinfo
, itype
);
4659 tree_pair v_pair
= tree_pair (v_otype
, v_itype
);
4661 if (v_otype
== NULL_TREE
|| v_itype
== NULL_TREE
4662 || !direct_internal_fn_supported_p (IFN_SAT_TRUNC
, v_pair
,
4666 /* 1. Find the _4 and update ops[0] as above example. */
4667 vect_unpromoted_value unprom
;
4668 tree tmp
= vect_look_through_possible_promotion (vinfo
, ops
[0], &unprom
);
4670 if (tmp
== NULL_TREE
|| TYPE_PRECISION (unprom
.type
) != otype_prec
)
4675 /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example. */
4676 tree trunc_lhs_ssa
= vect_recog_temp_ssa_var (otype
, NULL
);
4677 gcall
*call
= gimple_build_call_internal (IFN_SAT_TRUNC
, 1, ops
[1]);
4679 gimple_call_set_lhs (call
, trunc_lhs_ssa
);
4680 gimple_call_set_nothrow (call
, /* nothrow_p */ false);
4681 append_pattern_def_seq (vinfo
, stmt_vinfo
, call
, v_otype
);
4683 ops
[1] = trunc_lhs_ssa
;
4687 * Try to detect saturation sub pattern (SAT_ADD), aka below gimple:
4691 * _10 = (long unsigned int) _7;
4694 * And then simplied to
4695 * _9 = .SAT_SUB (_1, _2);
4698 * x.0_4 = (unsigned char) x_16;
4699 * y.1_5 = (unsigned char) y_18;
4700 * _6 = x.0_4 - y.1_5;
4701 * minus_19 = (int8_t) _6;
4703 * _8 = x_16 ^ minus_19;
4706 * _24 = (signed char) _23;
4707 * _58 = (unsigned char) _24;
4709 * _25 = (signed char) _59;
4713 * iftmp.2_11 = _41 ? _26 : minus_19;
4715 * And then simplied to
4716 * iftmp.2_11 = .SAT_SUB (x_16, y_18);
4720 vect_recog_sat_sub_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
4723 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
4725 if (!is_gimple_assign (last_stmt
))
4729 tree lhs
= gimple_assign_lhs (last_stmt
);
4731 if (gimple_unsigned_integer_sat_sub (lhs
, ops
, NULL
)
4732 || gimple_signed_integer_sat_sub (lhs
, ops
, NULL
))
4734 vect_recog_sat_sub_pattern_transform (vinfo
, stmt_vinfo
, lhs
, ops
);
4735 gimple
*stmt
= vect_recog_build_binary_gimple_stmt (vinfo
, stmt_vinfo
,
4736 IFN_SAT_SUB
, type_out
,
4737 lhs
, ops
[0], ops
[1]);
4740 vect_pattern_detected ("vect_recog_sat_sub_pattern", last_stmt
);
4749 * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
4750 * overflow_5 = x_4(D) > 4294967295;
4751 * _1 = (unsigned int) x_4(D);
4752 * _2 = (unsigned int) overflow_5;
4756 * And then simplied to
4757 * _6 = .SAT_TRUNC (x_4(D));
4761 vect_recog_sat_trunc_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
4764 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
4766 if (!is_gimple_assign (last_stmt
))
4770 tree lhs
= gimple_assign_lhs (last_stmt
);
4771 tree otype
= TREE_TYPE (lhs
);
4773 if (gimple_unsigned_integer_sat_trunc (lhs
, ops
, NULL
)
4774 && type_has_mode_precision_p (otype
))
4776 tree itype
= TREE_TYPE (ops
[0]);
4777 tree v_itype
= get_vectype_for_scalar_type (vinfo
, itype
);
4778 tree v_otype
= get_vectype_for_scalar_type (vinfo
, otype
);
4779 internal_fn fn
= IFN_SAT_TRUNC
;
4781 if (v_itype
!= NULL_TREE
&& v_otype
!= NULL_TREE
4782 && direct_internal_fn_supported_p (fn
, tree_pair (v_otype
, v_itype
),
4785 gcall
*call
= gimple_build_call_internal (fn
, 1, ops
[0]);
4786 tree out_ssa
= vect_recog_temp_ssa_var (otype
, NULL
);
4788 gimple_call_set_lhs (call
, out_ssa
);
4789 gimple_call_set_nothrow (call
, /* nothrow_p */ false);
4790 gimple_set_location (call
, gimple_location (last_stmt
));
4792 *type_out
= v_otype
;
4801 /* Detect a signed division by a constant that wouldn't be
4802 otherwise vectorized:
4808 where type 'type' is an integral type and N is a constant.
4810 Similarly handle modulo by a constant:
4816 * STMT_VINFO: The stmt from which the pattern search begins,
4817 i.e. the division stmt. S1 is replaced by if N is a power
4818 of two constant and type is signed:
4819 S3 y_t = b_t < 0 ? N - 1 : 0;
4821 S1' a_t = x_t >> log2 (N);
4823 S4 is replaced if N is a power of two constant and
4824 type is signed by (where *_T temporaries have unsigned type):
4825 S9 y_T = b_t < 0 ? -1U : 0U;
4826 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4827 S7 z_t = (type) z_T;
4829 S5 x_t = w_t & (N - 1);
4830 S4' a_t = x_t - z_t;
4834 * TYPE_OUT: The type of the output of this pattern.
4836 * Return value: A new stmt that will be used to replace the division
4837 S1 or modulo S4 stmt. */
4840 vect_recog_divmod_pattern (vec_info
*vinfo
,
4841 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4843 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4844 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
4845 gimple
*pattern_stmt
, *def_stmt
;
4846 enum tree_code rhs_code
;
4851 if (!is_gimple_assign (last_stmt
))
4854 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4857 case TRUNC_DIV_EXPR
:
4858 case EXACT_DIV_EXPR
:
4859 case TRUNC_MOD_EXPR
:
4865 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4866 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4867 itype
= TREE_TYPE (oprnd0
);
4868 if (TREE_CODE (oprnd0
) != SSA_NAME
4869 || TREE_CODE (oprnd1
) != INTEGER_CST
4870 || TREE_CODE (itype
) != INTEGER_TYPE
4871 || !type_has_mode_precision_p (itype
))
4874 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
4875 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
4876 if (vectype
== NULL_TREE
)
4879 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
4881 /* If the target can handle vectorized division or modulo natively,
4882 don't attempt to optimize this, since native division is likely
4883 to give smaller code. */
4884 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
4885 if (optab
!= unknown_optab
)
4887 machine_mode vec_mode
= TYPE_MODE (vectype
);
4888 int icode
= (int) optab_handler (optab
, vec_mode
);
4889 if (icode
!= CODE_FOR_nothing
)
4894 prec
= TYPE_PRECISION (itype
);
4895 if (integer_pow2p (oprnd1
))
4897 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
4900 /* Pattern detected. */
4901 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
4903 *type_out
= vectype
;
4905 /* Check if the target supports this internal function. */
4906 internal_fn ifn
= IFN_DIV_POW2
;
4907 if (direct_internal_fn_supported_p (ifn
, vectype
, OPTIMIZE_FOR_SPEED
))
4909 tree shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
4911 tree var_div
= vect_recog_temp_ssa_var (itype
, NULL
);
4912 gimple
*div_stmt
= gimple_build_call_internal (ifn
, 2, oprnd0
, shift
);
4913 gimple_call_set_lhs (div_stmt
, var_div
);
4915 if (rhs_code
== TRUNC_MOD_EXPR
)
4917 append_pattern_def_seq (vinfo
, stmt_vinfo
, div_stmt
);
4919 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4920 LSHIFT_EXPR
, var_div
, shift
);
4921 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4923 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4925 gimple_assign_lhs (def_stmt
));
4928 pattern_stmt
= div_stmt
;
4929 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
4931 return pattern_stmt
;
4934 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
4935 build_int_cst (itype
, 0));
4936 if (rhs_code
== TRUNC_DIV_EXPR
4937 || rhs_code
== EXACT_DIV_EXPR
)
4939 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
4942 = gimple_build_assign (var
, COND_EXPR
, cond
,
4943 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
4944 build_int_cst (itype
, 1)),
4945 build_int_cst (itype
, 0));
4946 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4947 var
= vect_recog_temp_ssa_var (itype
, NULL
);
4949 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
4950 gimple_assign_lhs (def_stmt
));
4951 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4953 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
4955 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4956 RSHIFT_EXPR
, var
, shift
);
4961 if (compare_tree_int (oprnd1
, 2) == 0)
4963 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
4964 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
4965 build_int_cst (itype
, 1),
4966 build_int_cst (itype
, 0));
4967 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4972 = build_nonstandard_integer_type (prec
, 1);
4973 tree vecutype
= get_vectype_for_scalar_type (vinfo
, utype
);
4975 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
4976 - tree_log2 (oprnd1
));
4977 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
4979 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
4980 build_int_cst (utype
, -1),
4981 build_int_cst (utype
, 0));
4982 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
4983 var
= vect_recog_temp_ssa_var (utype
, NULL
);
4984 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
4985 gimple_assign_lhs (def_stmt
),
4987 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
4988 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
4990 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
4991 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4994 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4995 PLUS_EXPR
, oprnd0
, signmask
);
4996 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4998 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4999 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
5000 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
5001 build_int_cst (itype
, 1)));
5002 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5005 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5006 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
5010 return pattern_stmt
;
5013 if ((cst
= uniform_integer_cst_p (oprnd1
))
5014 && TYPE_UNSIGNED (itype
)
5015 && rhs_code
== TRUNC_DIV_EXPR
5017 && targetm
.vectorize
.preferred_div_as_shifts_over_mult (vectype
))
5019 /* We can use the relationship:
5021 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
5023 to optimize cases where N+1 is a power of 2, and where // (N+1)
5024 is therefore a shift right. When operating in modes that are
5025 multiples of a byte in size, there are two cases:
5027 (1) N(N+3) is not representable, in which case the question
5028 becomes whether the replacement expression overflows.
5029 It is enough to test that x+N+2 does not overflow,
5030 i.e. that x < MAX-(N+1).
5032 (2) N(N+3) is representable, in which case it is the (only)
5033 bound that we need to check.
5035 ??? For now we just handle the case where // (N+1) is a shift
5036 right by half the precision, since some architectures can
5037 optimize the associated addition and shift combinations
5038 into single instructions. */
5040 auto wcst
= wi::to_wide (cst
);
5041 int pow
= wi::exact_log2 (wcst
+ 1);
5042 if (pow
== prec
/ 2)
5044 gimple
*stmt
= SSA_NAME_DEF_STMT (oprnd0
);
5046 gimple_ranger ranger
;
5049 /* Check that no overflow will occur. If we don't have range
5050 information we can't perform the optimization. */
5052 if (ranger
.range_of_expr (r
, oprnd0
, stmt
) && !r
.undefined_p ())
5054 wide_int max
= r
.upper_bound ();
5055 wide_int one
= wi::shwi (1, prec
);
5056 wide_int adder
= wi::add (one
, wi::lshift (one
, pow
));
5057 wi::overflow_type ovf
;
5058 wi::add (max
, adder
, UNSIGNED
, &ovf
);
5059 if (ovf
== wi::OVF_NONE
)
5061 *type_out
= vectype
;
5062 tree tadder
= wide_int_to_tree (itype
, adder
);
5063 tree rshift
= wide_int_to_tree (itype
, pow
);
5065 tree new_lhs1
= vect_recog_temp_ssa_var (itype
, NULL
);
5067 = gimple_build_assign (new_lhs1
, PLUS_EXPR
, oprnd0
, tadder
);
5068 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
5070 tree new_lhs2
= vect_recog_temp_ssa_var (itype
, NULL
);
5071 patt1
= gimple_build_assign (new_lhs2
, RSHIFT_EXPR
, new_lhs1
,
5073 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
5075 tree new_lhs3
= vect_recog_temp_ssa_var (itype
, NULL
);
5076 patt1
= gimple_build_assign (new_lhs3
, PLUS_EXPR
, new_lhs2
,
5078 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
5080 tree new_lhs4
= vect_recog_temp_ssa_var (itype
, NULL
);
5081 pattern_stmt
= gimple_build_assign (new_lhs4
, RSHIFT_EXPR
,
5084 return pattern_stmt
;
5090 if (prec
> HOST_BITS_PER_WIDE_INT
5091 || integer_zerop (oprnd1
))
5094 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
5097 if (TYPE_UNSIGNED (itype
))
5099 unsigned HOST_WIDE_INT mh
, ml
;
5100 int pre_shift
, post_shift
;
5101 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
5102 & GET_MODE_MASK (itype_mode
));
5103 tree t1
, t2
, t3
, t4
;
5105 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
5106 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
5109 /* Find a suitable multiplier and right shift count instead of
5110 directly dividing by D. */
5111 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
);
5113 /* If the suggested multiplier is more than PREC bits, we can do better
5114 for even divisors, using an initial right shift. */
5115 if (mh
!= 0 && (d
& 1) == 0)
5117 pre_shift
= ctz_or_zero (d
);
5118 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
5127 if (post_shift
- 1 >= prec
)
5130 /* t1 = oprnd0 h* ml;
5134 q = t4 >> (post_shift - 1); */
5135 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
5136 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
5137 build_int_cst (itype
, ml
));
5138 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5140 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
5142 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
5143 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5145 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
5147 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
5148 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5150 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
5152 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
5154 if (post_shift
!= 1)
5156 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5158 q
= vect_recog_temp_ssa_var (itype
, NULL
);
5160 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
5161 build_int_cst (itype
, post_shift
- 1));
5166 pattern_stmt
= def_stmt
;
5171 if (pre_shift
>= prec
|| post_shift
>= prec
)
5174 /* t1 = oprnd0 >> pre_shift;
5176 q = t2 >> post_shift; */
5179 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
5181 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
5182 build_int_cst (NULL
, pre_shift
));
5183 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5188 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
5189 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
5190 build_int_cst (itype
, ml
));
5194 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5196 q
= vect_recog_temp_ssa_var (itype
, NULL
);
5198 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
5199 build_int_cst (itype
, post_shift
));
5204 pattern_stmt
= def_stmt
;
5209 unsigned HOST_WIDE_INT ml
;
5211 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
5212 unsigned HOST_WIDE_INT abs_d
;
5214 tree t1
, t2
, t3
, t4
;
5216 /* Give up for -1. */
5220 /* Since d might be INT_MIN, we have to cast to
5221 unsigned HOST_WIDE_INT before negating to avoid
5222 undefined signed overflow. */
5224 ? (unsigned HOST_WIDE_INT
) d
5225 : - (unsigned HOST_WIDE_INT
) d
);
5227 /* n rem d = n rem -d */
5228 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
5231 oprnd1
= build_int_cst (itype
, abs_d
);
5233 if (HOST_BITS_PER_WIDE_INT
>= prec
5234 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
5235 /* This case is not handled correctly below. */
5238 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
);
5239 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
5242 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
5244 if (post_shift
>= prec
)
5247 /* t1 = oprnd0 h* ml; */
5248 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
5249 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
5250 build_int_cst (itype
, ml
));
5254 /* t2 = t1 + oprnd0; */
5255 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5256 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
5257 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
5264 /* t3 = t2 >> post_shift; */
5265 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5266 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
5267 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
5268 build_int_cst (itype
, post_shift
));
5275 get_range_query (cfun
)->range_of_expr (r
, oprnd0
);
5276 if (!r
.varying_p () && !r
.undefined_p ())
5278 if (!wi::neg_p (r
.lower_bound (), TYPE_SIGN (itype
)))
5280 else if (wi::neg_p (r
.upper_bound (), TYPE_SIGN (itype
)))
5284 if (msb
== 0 && d
>= 0)
5288 pattern_stmt
= def_stmt
;
5292 /* t4 = oprnd0 >> (prec - 1);
5293 or if we know from VRP that oprnd0 >= 0
5295 or if we know from VRP that oprnd0 < 0
5297 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5298 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
5300 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
5301 build_int_cst (itype
, msb
));
5303 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
5304 build_int_cst (itype
, prec
- 1));
5305 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5307 /* q = t3 - t4; or q = t4 - t3; */
5308 q
= vect_recog_temp_ssa_var (itype
, NULL
);
5309 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
5314 if (rhs_code
== TRUNC_MOD_EXPR
)
5318 /* We divided. Now finish by:
5321 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
);
5323 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
5324 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
5325 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5327 r
= vect_recog_temp_ssa_var (itype
, NULL
);
5328 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
5331 /* Pattern detected. */
5332 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
5334 *type_out
= vectype
;
5335 return pattern_stmt
;
5338 /* Detects pattern with a modulo operation (S1) where both arguments
5339 are variables of integral type.
5340 The statement is replaced by division, multiplication, and subtraction.
5341 The last statement (S4) is returned.
5349 S4 z_t = a_t - y_t; */
5352 vect_recog_mod_var_pattern (vec_info
*vinfo
,
5353 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5355 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
5356 tree oprnd0
, oprnd1
, vectype
, itype
;
5357 gimple
*pattern_stmt
, *def_stmt
;
5358 enum tree_code rhs_code
;
5360 if (!is_gimple_assign (last_stmt
))
5363 rhs_code
= gimple_assign_rhs_code (last_stmt
);
5364 if (rhs_code
!= TRUNC_MOD_EXPR
)
5367 oprnd0
= gimple_assign_rhs1 (last_stmt
);
5368 oprnd1
= gimple_assign_rhs2 (last_stmt
);
5369 itype
= TREE_TYPE (oprnd0
);
5370 if (TREE_CODE (oprnd0
) != SSA_NAME
5371 || TREE_CODE (oprnd1
) != SSA_NAME
5372 || TREE_CODE (itype
) != INTEGER_TYPE
)
5375 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
5378 || target_has_vecop_for_code (TRUNC_MOD_EXPR
, vectype
)
5379 || !target_has_vecop_for_code (TRUNC_DIV_EXPR
, vectype
)
5380 || !target_has_vecop_for_code (MULT_EXPR
, vectype
)
5381 || !target_has_vecop_for_code (MINUS_EXPR
, vectype
))
5385 q
= vect_recog_temp_ssa_var (itype
, NULL
);
5386 def_stmt
= gimple_build_assign (q
, TRUNC_DIV_EXPR
, oprnd0
, oprnd1
);
5387 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vectype
);
5389 tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
5390 def_stmt
= gimple_build_assign (tmp
, MULT_EXPR
, q
, oprnd1
);
5391 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vectype
);
5393 r
= vect_recog_temp_ssa_var (itype
, NULL
);
5394 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, tmp
);
5396 /* Pattern detected. */
5397 *type_out
= vectype
;
5398 vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt
);
5400 return pattern_stmt
;
5403 /* Function vect_recog_mixed_size_cond_pattern
5405 Try to find the following pattern:
5410 S1 a_T = x_t CMP y_t ? b_T : c_T;
5412 where type 'TYPE' is an integral type which has different size
5413 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5414 than 'type', the constants need to fit into an integer type
5415 with the same width as 'type') or results of conversion from 'type'.
5419 * STMT_VINFO: The stmt from which the pattern search begins.
5423 * TYPE_OUT: The type of the output of this pattern.
5425 * Return value: A new stmt that will be used to replace the pattern.
5426 Additionally a def_stmt is added.
5428 a_it = x_t CMP y_t ? b_it : c_it;
5429 a_T = (TYPE) a_it; */
5432 vect_recog_mixed_size_cond_pattern (vec_info
*vinfo
,
5433 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5435 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5436 tree cond_expr
, then_clause
, else_clause
;
5437 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
5438 gimple
*pattern_stmt
, *def_stmt
;
5439 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
5440 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
5442 tree comp_scalar_type
;
5444 if (!is_gimple_assign (last_stmt
)
5445 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
5446 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
5449 cond_expr
= gimple_assign_rhs1 (last_stmt
);
5450 then_clause
= gimple_assign_rhs2 (last_stmt
);
5451 else_clause
= gimple_assign_rhs3 (last_stmt
);
5453 if (!COMPARISON_CLASS_P (cond_expr
))
5456 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
5457 comp_vectype
= get_vectype_for_scalar_type (vinfo
, comp_scalar_type
);
5458 if (comp_vectype
== NULL_TREE
)
5461 type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
5462 if (types_compatible_p (type
, comp_scalar_type
)
5463 || ((TREE_CODE (then_clause
) != INTEGER_CST
5464 || TREE_CODE (else_clause
) != INTEGER_CST
)
5465 && !INTEGRAL_TYPE_P (comp_scalar_type
))
5466 || !INTEGRAL_TYPE_P (type
))
5469 if ((TREE_CODE (then_clause
) != INTEGER_CST
5470 && !type_conversion_p (vinfo
, then_clause
, false,
5471 &orig_type0
, &def_stmt0
, &promotion
))
5472 || (TREE_CODE (else_clause
) != INTEGER_CST
5473 && !type_conversion_p (vinfo
, else_clause
, false,
5474 &orig_type1
, &def_stmt1
, &promotion
)))
5477 if (orig_type0
&& orig_type1
5478 && !types_compatible_p (orig_type0
, orig_type1
))
5483 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
5485 then_clause
= gimple_assign_rhs1 (def_stmt0
);
5491 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
5493 else_clause
= gimple_assign_rhs1 (def_stmt1
);
5498 HOST_WIDE_INT cmp_mode_size
5499 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
5501 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
5502 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
5505 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5506 if (vectype
== NULL_TREE
)
5509 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
5512 if (itype
== NULL_TREE
)
5513 itype
= build_nonstandard_integer_type (cmp_mode_size
,
5514 TYPE_UNSIGNED (type
));
5516 if (itype
== NULL_TREE
5517 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
5520 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
5521 if (vecitype
== NULL_TREE
)
5524 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
5527 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
5529 if ((TREE_CODE (then_clause
) == INTEGER_CST
5530 && !int_fits_type_p (then_clause
, itype
))
5531 || (TREE_CODE (else_clause
) == INTEGER_CST
5532 && !int_fits_type_p (else_clause
, itype
)))
5536 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5537 COND_EXPR
, unshare_expr (cond_expr
),
5538 fold_convert (itype
, then_clause
),
5539 fold_convert (itype
, else_clause
));
5540 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
5541 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
5543 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecitype
);
5544 *type_out
= vectype
;
5546 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
5548 return pattern_stmt
;
5552 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5553 true if bool VAR can and should be optimized that way. Assume it shouldn't
5554 in case it's a result of a comparison which can be directly vectorized into
5555 a vector comparison. Fills in STMTS with all stmts visited during the
5559 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
5562 enum tree_code rhs_code
;
5564 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
5568 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
5572 if (stmts
.contains (def_stmt
))
5575 rhs1
= gimple_assign_rhs1 (def_stmt
);
5576 rhs_code
= gimple_assign_rhs_code (def_stmt
);
5580 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5585 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
5587 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5592 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5599 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
5600 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
5605 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
5607 tree vecitype
, comp_vectype
;
5609 /* If the comparison can throw, then is_gimple_condexpr will be
5610 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5611 if (stmt_could_throw_p (cfun
, def_stmt
))
5614 comp_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
));
5615 if (comp_vectype
== NULL_TREE
)
5618 tree mask_type
= get_mask_type_for_scalar_type (vinfo
,
5621 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
5624 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
5626 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
5628 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
5629 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
5630 if (vecitype
== NULL_TREE
)
5634 vecitype
= comp_vectype
;
5635 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
5643 bool res
= stmts
.add (def_stmt
);
5644 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5651 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5652 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5653 pattern sequence. */
5656 adjust_bool_pattern_cast (vec_info
*vinfo
,
5657 tree type
, tree var
, stmt_vec_info stmt_info
)
5659 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
5661 append_pattern_def_seq (vinfo
, stmt_info
, cast_stmt
,
5662 get_vectype_for_scalar_type (vinfo
, type
));
5663 return gimple_assign_lhs (cast_stmt
);
5666 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5667 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5668 type, OUT_TYPE is the desired final integer type of the whole pattern.
5669 STMT_INFO is the info of the pattern root and is where pattern stmts should
5670 be associated with. DEFS is a map of pattern defs. */
5673 adjust_bool_pattern (vec_info
*vinfo
, tree var
, tree out_type
,
5674 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
5676 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
5677 enum tree_code rhs_code
, def_rhs_code
;
5678 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
5680 gimple
*pattern_stmt
, *def_stmt
;
5681 tree trueval
= NULL_TREE
;
5683 rhs1
= gimple_assign_rhs1 (stmt
);
5684 rhs2
= gimple_assign_rhs2 (stmt
);
5685 rhs_code
= gimple_assign_rhs_code (stmt
);
5686 loc
= gimple_location (stmt
);
5691 irhs1
= *defs
.get (rhs1
);
5692 itype
= TREE_TYPE (irhs1
);
5694 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5699 irhs1
= *defs
.get (rhs1
);
5700 itype
= TREE_TYPE (irhs1
);
5702 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5703 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
5707 /* Try to optimize x = y & (a < b ? 1 : 0); into
5708 x = (a < b ? y : 0);
5714 S1 a_b = x1 CMP1 y1;
5715 S2 b_b = x2 CMP2 y2;
5717 S4 d_T = (TYPE) c_b;
5719 we would normally emit:
5721 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5722 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5723 S3' c_T = a_T & b_T;
5726 but we can save one stmt by using the
5727 result of one of the COND_EXPRs in the other COND_EXPR and leave
5728 BIT_AND_EXPR stmt out:
5730 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5731 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5734 At least when VEC_COND_EXPR is implemented using masks
5735 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5736 computes the comparison masks and ands it, in one case with
5737 all ones vector, in the other case with a vector register.
5738 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5739 often more expensive. */
5740 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
5741 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
5742 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
5744 irhs1
= *defs
.get (rhs1
);
5745 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
5746 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
5747 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
5749 rhs_code
= def_rhs_code
;
5751 rhs2
= gimple_assign_rhs2 (def_stmt
);
5756 irhs2
= *defs
.get (rhs2
);
5759 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
5760 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
5761 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
5763 irhs2
= *defs
.get (rhs2
);
5764 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
5765 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
5766 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
5768 rhs_code
= def_rhs_code
;
5770 rhs2
= gimple_assign_rhs2 (def_stmt
);
5775 irhs1
= *defs
.get (rhs1
);
5781 irhs1
= *defs
.get (rhs1
);
5782 irhs2
= *defs
.get (rhs2
);
5784 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
5785 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
5787 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
5788 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
5789 int out_prec
= TYPE_PRECISION (out_type
);
5790 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
5791 irhs2
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs1
), irhs2
,
5793 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
5794 irhs1
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs2
), irhs1
,
5798 irhs1
= adjust_bool_pattern_cast (vinfo
,
5799 out_type
, irhs1
, stmt_info
);
5800 irhs2
= adjust_bool_pattern_cast (vinfo
,
5801 out_type
, irhs2
, stmt_info
);
5804 itype
= TREE_TYPE (irhs1
);
5806 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5807 rhs_code
, irhs1
, irhs2
);
5812 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
5813 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
5814 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
5815 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
5816 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
5818 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
5820 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
5823 itype
= TREE_TYPE (rhs1
);
5824 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
5825 if (trueval
== NULL_TREE
)
5826 trueval
= build_int_cst (itype
, 1);
5828 gcc_checking_assert (useless_type_conversion_p (itype
,
5829 TREE_TYPE (trueval
)));
5831 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5832 COND_EXPR
, cond_expr
, trueval
,
5833 build_int_cst (itype
, 0));
5837 gimple_set_location (pattern_stmt
, loc
);
5838 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
,
5839 get_vectype_for_scalar_type (vinfo
, itype
));
5840 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
5843 /* Comparison function to qsort a vector of gimple stmts after UID. */
5846 sort_after_uid (const void *p1
, const void *p2
)
5848 const gimple
*stmt1
= *(const gimple
* const *)p1
;
5849 const gimple
*stmt2
= *(const gimple
* const *)p2
;
5850 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
5853 /* Create pattern stmts for all stmts participating in the bool pattern
5854 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5855 OUT_TYPE. Return the def of the pattern root. */
5858 adjust_bool_stmts (vec_info
*vinfo
, hash_set
<gimple
*> &bool_stmt_set
,
5859 tree out_type
, stmt_vec_info stmt_info
)
5861 /* Gather original stmts in the bool pattern in their order of appearance
5863 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
5864 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
5865 i
!= bool_stmt_set
.end (); ++i
)
5866 bool_stmts
.quick_push (*i
);
5867 bool_stmts
.qsort (sort_after_uid
);
5869 /* Now process them in that order, producing pattern stmts. */
5870 hash_map
<tree
, tree
> defs
;
5871 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
5872 adjust_bool_pattern (vinfo
, gimple_assign_lhs (bool_stmts
[i
]),
5873 out_type
, stmt_info
, defs
);
5875 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5876 gimple
*pattern_stmt
5877 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
5878 return gimple_assign_lhs (pattern_stmt
);
5881 /* Return the proper type for converting bool VAR into
5882 an integer value or NULL_TREE if no such type exists.
5883 The type is chosen so that the converted value has the
5884 same number of elements as VAR's vector type. */
5887 integer_type_for_mask (tree var
, vec_info
*vinfo
)
5889 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
5892 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
5893 if (!def_stmt_info
|| !vect_use_mask_type_p (def_stmt_info
))
5896 return build_nonstandard_integer_type (def_stmt_info
->mask_precision
, 1);
5899 /* Function vect_recog_gcond_pattern
5901 Try to find pattern like following:
5905 where operator 'op' is not != and convert it to an adjusted boolean pattern
5910 and set the mask type on MASK.
5914 * STMT_VINFO: The stmt at the end from which the pattern
5915 search begins, i.e. cast of a bool to
5920 * TYPE_OUT: The type of the output of this pattern.
5922 * Return value: A new stmt that will be used to replace the pattern. */
5925 vect_recog_gcond_pattern (vec_info
*vinfo
,
5926 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5928 /* Currently we only support this for loop vectorization and when multiple
5930 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5931 if (!loop_vinfo
|| !LOOP_VINFO_EARLY_BREAKS (loop_vinfo
))
5934 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
5936 if (!(cond
= dyn_cast
<gcond
*> (last_stmt
)))
5939 auto lhs
= gimple_cond_lhs (cond
);
5940 auto rhs
= gimple_cond_rhs (cond
);
5941 auto code
= gimple_cond_code (cond
);
5943 tree scalar_type
= TREE_TYPE (lhs
);
5944 if (VECTOR_TYPE_P (scalar_type
))
5949 && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
5952 tree vecitype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5953 if (vecitype
== NULL_TREE
)
5956 tree vectype
= truth_type_for (vecitype
);
5958 tree new_lhs
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
5959 gimple
*new_stmt
= gimple_build_assign (new_lhs
, code
, lhs
, rhs
);
5960 append_pattern_def_seq (vinfo
, stmt_vinfo
, new_stmt
, vectype
, scalar_type
);
5962 gimple
*pattern_stmt
5963 = gimple_build_cond (NE_EXPR
, new_lhs
,
5964 build_int_cst (TREE_TYPE (new_lhs
), 0),
5965 NULL_TREE
, NULL_TREE
);
5966 *type_out
= vectype
;
5967 vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt
);
5968 return pattern_stmt
;
5971 /* Function vect_recog_bool_pattern
5973 Try to find pattern like following:
5975 bool a_b, b_b, c_b, d_b, e_b;
5978 S1 a_b = x1 CMP1 y1;
5979 S2 b_b = x2 CMP2 y2;
5981 S4 d_b = x3 CMP3 y3;
5983 S6 f_T = (TYPE) e_b;
5985 where type 'TYPE' is an integral type. Or a similar pattern
5988 S6 f_Y = e_b ? r_Y : s_Y;
5990 as results from if-conversion of a complex condition.
5994 * STMT_VINFO: The stmt at the end from which the pattern
5995 search begins, i.e. cast of a bool to
6000 * TYPE_OUT: The type of the output of this pattern.
6002 * Return value: A new stmt that will be used to replace the pattern.
6004 Assuming size of TYPE is the same as size of all comparisons
6005 (otherwise some casts would be added where needed), the above
6006 sequence we create related pattern stmts:
6007 S1' a_T = x1 CMP1 y1 ? 1 : 0;
6008 S3' c_T = x2 CMP2 y2 ? a_T : 0;
6009 S4' d_T = x3 CMP3 y3 ? 1 : 0;
6010 S5' e_T = c_T | d_T;
6013 Instead of the above S3' we could emit:
6014 S2' b_T = x2 CMP2 y2 ? 1 : 0;
6015 S3' c_T = a_T | b_T;
6016 but the above is more efficient. */
6019 vect_recog_bool_pattern (vec_info
*vinfo
,
6020 stmt_vec_info stmt_vinfo
, tree
*type_out
)
6022 gimple
*last_stmt
= stmt_vinfo
->stmt
;
6023 enum tree_code rhs_code
;
6024 tree var
, lhs
, rhs
, vectype
;
6025 gimple
*pattern_stmt
;
6027 if (!is_gimple_assign (last_stmt
))
6030 var
= gimple_assign_rhs1 (last_stmt
);
6031 lhs
= gimple_assign_lhs (last_stmt
);
6032 rhs_code
= gimple_assign_rhs_code (last_stmt
);
6034 if (rhs_code
== VIEW_CONVERT_EXPR
)
6035 var
= TREE_OPERAND (var
, 0);
6037 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
6040 hash_set
<gimple
*> bool_stmts
;
6042 if (CONVERT_EXPR_CODE_P (rhs_code
)
6043 || rhs_code
== VIEW_CONVERT_EXPR
)
6045 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
6046 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
6048 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
6050 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
6052 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
6053 TREE_TYPE (lhs
), stmt_vinfo
);
6054 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6055 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
6056 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
6059 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
6063 tree type
= integer_type_for_mask (var
, vinfo
);
6064 tree cst0
, cst1
, tmp
;
6069 /* We may directly use cond with narrowed type to avoid
6070 multiple cond exprs with following result packing and
6071 perform single cond with packed mask instead. In case
6072 of widening we better make cond first and then extract
6074 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
6075 type
= TREE_TYPE (lhs
);
6077 cst0
= build_int_cst (type
, 0);
6078 cst1
= build_int_cst (type
, 1);
6079 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
6080 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
6082 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
6084 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
6085 append_pattern_def_seq (vinfo
, stmt_vinfo
,
6086 pattern_stmt
, new_vectype
);
6088 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6089 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
6093 *type_out
= vectype
;
6094 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
6096 return pattern_stmt
;
6098 else if (rhs_code
== COND_EXPR
6099 && TREE_CODE (var
) == SSA_NAME
)
6101 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
6102 if (vectype
== NULL_TREE
)
6105 /* Build a scalar type for the boolean result that when
6106 vectorized matches the vector type of the result in
6107 size and number of elements. */
6109 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
6110 TYPE_VECTOR_SUBPARTS (vectype
));
6113 = build_nonstandard_integer_type (prec
,
6114 TYPE_UNSIGNED (TREE_TYPE (var
)));
6115 if (get_vectype_for_scalar_type (vinfo
, type
) == NULL_TREE
)
6118 enum vect_def_type dt
;
6119 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
6120 var
= adjust_bool_stmts (vinfo
, bool_stmts
, type
, stmt_vinfo
);
6121 else if (integer_type_for_mask (var
, vinfo
))
6123 else if (TREE_CODE (TREE_TYPE (var
)) == BOOLEAN_TYPE
6124 && vect_is_simple_use (var
, vinfo
, &dt
)
6125 && (dt
== vect_external_def
6126 || dt
== vect_constant_def
))
6128 /* If the condition is already a boolean then manually convert it to a
6129 mask of the given integer type but don't set a vectype. */
6130 tree lhs_ivar
= vect_recog_temp_ssa_var (type
, NULL
);
6131 pattern_stmt
= gimple_build_assign (lhs_ivar
, COND_EXPR
, var
,
6132 build_all_ones_cst (type
),
6133 build_zero_cst (type
));
6134 append_inv_pattern_def_seq (vinfo
, pattern_stmt
);
6138 tree lhs_var
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
6139 pattern_stmt
= gimple_build_assign (lhs_var
, NE_EXPR
, var
,
6140 build_zero_cst (TREE_TYPE (var
)));
6142 tree new_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (var
));
6146 new_vectype
= truth_type_for (new_vectype
);
6147 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, new_vectype
,
6150 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6152 = gimple_build_assign (lhs
, COND_EXPR
, lhs_var
,
6153 gimple_assign_rhs2 (last_stmt
),
6154 gimple_assign_rhs3 (last_stmt
));
6155 *type_out
= vectype
;
6156 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
6158 return pattern_stmt
;
6160 else if (rhs_code
== SSA_NAME
6161 && STMT_VINFO_DATA_REF (stmt_vinfo
))
6163 stmt_vec_info pattern_stmt_info
;
6164 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
6165 if (!vectype
|| !VECTOR_MODE_P (TYPE_MODE (vectype
)))
6168 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
6169 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
6170 TREE_TYPE (vectype
), stmt_vinfo
);
6173 tree type
= integer_type_for_mask (var
, vinfo
);
6174 tree cst0
, cst1
, new_vectype
;
6179 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
6180 type
= TREE_TYPE (vectype
);
6182 cst0
= build_int_cst (type
, 0);
6183 cst1
= build_int_cst (type
, 1);
6184 new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
6186 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
6187 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
6188 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, new_vectype
);
6191 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
6192 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
6194 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6195 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
6196 append_pattern_def_seq (vinfo
, stmt_vinfo
, cast_stmt
);
6199 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
6200 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
6201 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
6202 *type_out
= vectype
;
6203 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
6205 return pattern_stmt
;
6212 /* A helper for vect_recog_mask_conversion_pattern. Build
6213 conversion of MASK to a type suitable for masking VECTYPE.
6214 Built statement gets required vectype and is appended to
6215 a pattern sequence of STMT_VINFO.
6217 Return converted mask. */
6220 build_mask_conversion (vec_info
*vinfo
,
6221 tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
)
6226 masktype
= truth_type_for (vectype
);
6227 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
6228 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
6229 append_pattern_def_seq (vinfo
, stmt_vinfo
,
6230 stmt
, masktype
, TREE_TYPE (vectype
));
6236 /* Function vect_recog_mask_conversion_pattern
6238 Try to find statements which require boolean type
6239 converison. Additional conversion statements are
6240 added to handle such cases. For example:
6250 S4 c_1 = m_3 ? c_2 : c_3;
6252 Will be transformed into:
6256 S3'' m_2' = (_Bool[bitsize=32])m_2
6257 S3' m_3' = m_1 & m_2';
6258 S4'' m_3'' = (_Bool[bitsize=8])m_3'
6259 S4' c_1' = m_3'' ? c_2 : c_3; */
6262 vect_recog_mask_conversion_pattern (vec_info
*vinfo
,
6263 stmt_vec_info stmt_vinfo
, tree
*type_out
)
6265 gimple
*last_stmt
= stmt_vinfo
->stmt
;
6266 enum tree_code rhs_code
;
6267 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
6268 tree vectype1
, vectype2
;
6269 stmt_vec_info pattern_stmt_info
;
6270 tree rhs1_op0
= NULL_TREE
, rhs1_op1
= NULL_TREE
;
6271 tree rhs1_op0_type
= NULL_TREE
, rhs1_op1_type
= NULL_TREE
;
6273 /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
6275 if (is_gimple_call (last_stmt
)
6276 && gimple_call_internal_p (last_stmt
))
6278 gcall
*pattern_stmt
;
6280 internal_fn ifn
= gimple_call_internal_fn (last_stmt
);
6281 int mask_argno
= internal_fn_mask_index (ifn
);
6285 bool store_p
= internal_store_fn_p (ifn
);
6286 bool load_p
= internal_store_fn_p (ifn
);
6289 int rhs_index
= internal_fn_stored_value_index (ifn
);
6290 tree rhs
= gimple_call_arg (last_stmt
, rhs_index
);
6291 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs
));
6295 lhs
= gimple_call_lhs (last_stmt
);
6298 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
6304 tree mask_arg
= gimple_call_arg (last_stmt
, mask_argno
);
6305 tree mask_arg_type
= integer_type_for_mask (mask_arg
, vinfo
);
6308 vectype2
= get_mask_type_for_scalar_type (vinfo
, mask_arg_type
);
6311 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
6312 TYPE_VECTOR_SUBPARTS (vectype2
)))
6315 else if (store_p
|| load_p
)
6318 tmp
= build_mask_conversion (vinfo
, mask_arg
, vectype1
, stmt_vinfo
);
6320 auto_vec
<tree
, 8> args
;
6321 unsigned int nargs
= gimple_call_num_args (last_stmt
);
6322 args
.safe_grow (nargs
, true);
6323 for (unsigned int i
= 0; i
< nargs
; ++i
)
6324 args
[i
] = ((int) i
== mask_argno
6326 : gimple_call_arg (last_stmt
, i
));
6327 pattern_stmt
= gimple_build_call_internal_vec (ifn
, args
);
6331 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6332 gimple_call_set_lhs (pattern_stmt
, lhs
);
6335 if (load_p
|| store_p
)
6336 gimple_call_set_nothrow (pattern_stmt
, true);
6338 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
6339 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
6340 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
6342 *type_out
= vectype1
;
6343 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
6345 return pattern_stmt
;
6348 if (!is_gimple_assign (last_stmt
))
6351 gimple
*pattern_stmt
;
6352 lhs
= gimple_assign_lhs (last_stmt
);
6353 rhs1
= gimple_assign_rhs1 (last_stmt
);
6354 rhs_code
= gimple_assign_rhs_code (last_stmt
);
6356 /* Check for cond expression requiring mask conversion. */
6357 if (rhs_code
== COND_EXPR
)
6359 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
6361 if (TREE_CODE (rhs1
) == SSA_NAME
)
6363 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
6367 else if (COMPARISON_CLASS_P (rhs1
))
6369 /* Check whether we're comparing scalar booleans and (if so)
6370 whether a better mask type exists than the mask associated
6371 with boolean-sized elements. This avoids unnecessary packs
6372 and unpacks if the booleans are set from comparisons of
6373 wider types. E.g. in:
6375 int x1, x2, x3, x4, y1, y1;
6377 bool b1 = (x1 == x2);
6378 bool b2 = (x3 == x4);
6379 ... = b1 == b2 ? y1 : y2;
6381 it is better for b1 and b2 to use the mask type associated
6382 with int elements rather bool (byte) elements. */
6383 rhs1_op0
= TREE_OPERAND (rhs1
, 0);
6384 rhs1_op1
= TREE_OPERAND (rhs1
, 1);
6385 if (!rhs1_op0
|| !rhs1_op1
)
6387 rhs1_op0_type
= integer_type_for_mask (rhs1_op0
, vinfo
);
6388 rhs1_op1_type
= integer_type_for_mask (rhs1_op1
, vinfo
);
6391 rhs1_type
= TREE_TYPE (rhs1_op0
);
6392 else if (!rhs1_op1_type
)
6393 rhs1_type
= TREE_TYPE (rhs1_op1
);
6394 else if (TYPE_PRECISION (rhs1_op0_type
)
6395 != TYPE_PRECISION (rhs1_op1_type
))
6397 int tmp0
= (int) TYPE_PRECISION (rhs1_op0_type
)
6398 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
6399 int tmp1
= (int) TYPE_PRECISION (rhs1_op1_type
)
6400 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
6401 if ((tmp0
> 0 && tmp1
> 0) || (tmp0
< 0 && tmp1
< 0))
6403 if (abs (tmp0
) > abs (tmp1
))
6404 rhs1_type
= rhs1_op1_type
;
6406 rhs1_type
= rhs1_op0_type
;
6409 rhs1_type
= build_nonstandard_integer_type
6410 (TYPE_PRECISION (TREE_TYPE (lhs
)), 1);
6413 rhs1_type
= rhs1_op0_type
;
6418 vectype2
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
6420 if (!vectype1
|| !vectype2
)
6423 /* Continue if a conversion is needed. Also continue if we have
6424 a comparison whose vector type would normally be different from
6425 VECTYPE2 when considered in isolation. In that case we'll
6426 replace the comparison with an SSA name (so that we can record
6427 its vector type) and behave as though the comparison was an SSA
6428 name from the outset. */
6429 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
6430 TYPE_VECTOR_SUBPARTS (vectype2
))
6435 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
6436 in place, we can handle it in vectorizable_condition. This avoids
6437 unnecessary promotion stmts and increased vectorization factor. */
6438 if (COMPARISON_CLASS_P (rhs1
)
6439 && INTEGRAL_TYPE_P (rhs1_type
)
6440 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
6441 TYPE_VECTOR_SUBPARTS (vectype2
)))
6443 enum vect_def_type dt
;
6444 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), vinfo
, &dt
)
6445 && dt
== vect_external_def
6446 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), vinfo
, &dt
)
6447 && (dt
== vect_external_def
6448 || dt
== vect_constant_def
))
6450 tree wide_scalar_type
= build_nonstandard_integer_type
6451 (vector_element_bits (vectype1
), TYPE_UNSIGNED (rhs1_type
));
6452 tree vectype3
= get_vectype_for_scalar_type (vinfo
,
6454 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
6459 /* If rhs1 is a comparison we need to move it into a
6460 separate statement. */
6461 if (TREE_CODE (rhs1
) != SSA_NAME
)
6463 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
6465 && TYPE_PRECISION (rhs1_op0_type
) != TYPE_PRECISION (rhs1_type
))
6466 rhs1_op0
= build_mask_conversion (vinfo
, rhs1_op0
,
6467 vectype2
, stmt_vinfo
);
6469 && TYPE_PRECISION (rhs1_op1_type
) != TYPE_PRECISION (rhs1_type
))
6470 rhs1_op1
= build_mask_conversion (vinfo
, rhs1_op1
,
6471 vectype2
, stmt_vinfo
);
6472 pattern_stmt
= gimple_build_assign (tmp
, TREE_CODE (rhs1
),
6473 rhs1_op0
, rhs1_op1
);
6475 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vectype2
,
6479 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
6480 TYPE_VECTOR_SUBPARTS (vectype2
)))
6481 tmp
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
6485 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6486 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
6487 gimple_assign_rhs2 (last_stmt
),
6488 gimple_assign_rhs3 (last_stmt
));
6490 *type_out
= vectype1
;
6491 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
6493 return pattern_stmt
;
6496 /* Now check for binary boolean operations requiring conversion for
6498 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
6501 if (rhs_code
!= BIT_IOR_EXPR
6502 && rhs_code
!= BIT_XOR_EXPR
6503 && rhs_code
!= BIT_AND_EXPR
6504 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
6507 rhs2
= gimple_assign_rhs2 (last_stmt
);
6509 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
6510 rhs2_type
= integer_type_for_mask (rhs2
, vinfo
);
6512 if (!rhs1_type
|| !rhs2_type
6513 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
6516 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
6518 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
6521 rhs2
= build_mask_conversion (vinfo
, rhs2
, vectype1
, stmt_vinfo
);
6525 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs2_type
);
6528 rhs1
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
6531 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6532 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
6534 *type_out
= vectype1
;
6535 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
6537 return pattern_stmt
;
6540 /* STMT_INFO is a load or store. If the load or store is conditional, return
6541 the boolean condition under which it occurs, otherwise return null. */
6544 vect_get_load_store_mask (stmt_vec_info stmt_info
)
6546 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6548 gcc_assert (gimple_assign_single_p (def_assign
));
6552 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
6554 internal_fn ifn
= gimple_call_internal_fn (def_call
);
6555 int mask_index
= internal_fn_mask_index (ifn
);
6556 return gimple_call_arg (def_call
, mask_index
);
6562 /* Return MASK if MASK is suitable for masking an operation on vectors
6563 of type VECTYPE, otherwise convert it into such a form and return
6564 the result. Associate any conversion statements with STMT_INFO's
6568 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
6569 stmt_vec_info stmt_info
, vec_info
*vinfo
)
6571 tree mask_type
= integer_type_for_mask (mask
, vinfo
);
6574 tree mask_vectype
= get_mask_type_for_scalar_type (vinfo
, mask_type
);
6576 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
6577 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
6578 mask
= build_mask_conversion (vinfo
, mask
, vectype
, stmt_info
);
6583 /* Return the equivalent of:
6585 fold_convert (TYPE, VALUE)
6587 with the expectation that the operation will be vectorized.
6588 If new statements are needed, add them as pattern statements
6592 vect_add_conversion_to_pattern (vec_info
*vinfo
,
6593 tree type
, tree value
, stmt_vec_info stmt_info
)
6595 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
6598 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
6599 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
6600 append_pattern_def_seq (vinfo
, stmt_info
, conversion
,
6601 get_vectype_for_scalar_type (vinfo
, type
));
6605 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6606 internal function. Return the final statement on success and set
6607 *TYPE_OUT to the vector type being loaded or stored.
6609 This function only handles gathers and scatters that were recognized
6610 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6613 vect_recog_gather_scatter_pattern (vec_info
*vinfo
,
6614 stmt_vec_info stmt_info
, tree
*type_out
)
6616 /* Currently we only support this for loop vectorization. */
6617 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6621 /* Make sure that we're looking at a gather load or scatter store. */
6622 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
6623 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6626 /* Get the boolean that controls whether the load or store happens.
6627 This is null if the operation is unconditional. */
6628 tree mask
= vect_get_load_store_mask (stmt_info
);
6630 /* Make sure that the target supports an appropriate internal
6631 function for the gather/scatter operation. */
6632 gather_scatter_info gs_info
;
6633 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, &gs_info
)
6634 || gs_info
.ifn
== IFN_LAST
)
6637 /* Convert the mask to the right form. */
6638 tree gs_vectype
= get_vectype_for_scalar_type (loop_vinfo
,
6639 gs_info
.element_type
);
6641 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, stmt_info
,
6643 else if (gs_info
.ifn
== IFN_MASK_SCATTER_STORE
6644 || gs_info
.ifn
== IFN_MASK_GATHER_LOAD
6645 || gs_info
.ifn
== IFN_MASK_LEN_SCATTER_STORE
6646 || gs_info
.ifn
== IFN_MASK_LEN_GATHER_LOAD
)
6647 mask
= build_int_cst (TREE_TYPE (truth_type_for (gs_vectype
)), -1);
6649 /* Get the invariant base and non-invariant offset, converting the
6650 latter to the same width as the vector elements. */
6651 tree base
= gs_info
.base
;
6652 tree offset_type
= TREE_TYPE (gs_info
.offset_vectype
);
6653 tree offset
= vect_add_conversion_to_pattern (vinfo
, offset_type
,
6654 gs_info
.offset
, stmt_info
);
6656 /* Build the new pattern statement. */
6657 tree scale
= size_int (gs_info
.scale
);
6658 gcall
*pattern_stmt
;
6659 if (DR_IS_READ (dr
))
6661 tree zero
= build_zero_cst (gs_info
.element_type
);
6663 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5, base
,
6664 offset
, scale
, zero
, mask
);
6666 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
6667 offset
, scale
, zero
);
6668 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
6669 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
6673 tree rhs
= vect_get_store_rhs (stmt_info
);
6675 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5,
6676 base
, offset
, scale
, rhs
,
6679 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4,
6680 base
, offset
, scale
, rhs
);
6682 gimple_call_set_nothrow (pattern_stmt
, true);
6684 /* Copy across relevant vectorization info and associate DR with the
6685 new pattern statement instead of the original statement. */
6686 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (pattern_stmt
);
6687 loop_vinfo
->move_dr (pattern_stmt_info
, stmt_info
);
6689 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6690 *type_out
= vectype
;
6691 vect_pattern_detected ("gather/scatter pattern", stmt_info
->stmt
);
6693 return pattern_stmt
;
6696 /* Helper method of vect_recog_cond_store_pattern, checks to see if COND_ARG
6697 is points to a load statement that reads the same data as that of
6701 vect_cond_store_pattern_same_ref (vec_info
*vinfo
,
6702 stmt_vec_info store_vinfo
, tree cond_arg
)
6704 stmt_vec_info load_stmt_vinfo
= vinfo
->lookup_def (cond_arg
);
6705 if (!load_stmt_vinfo
6706 || !STMT_VINFO_DATA_REF (load_stmt_vinfo
)
6707 || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo
))
6708 || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo
),
6709 STMT_VINFO_DATA_REF (load_stmt_vinfo
)))
6715 /* Function vect_recog_cond_store_pattern
6717 Try to find the following pattern:
6724 where the store of _3 happens on a conditional select on a value loaded
6725 from the same location. In such case we can elide the initial load if
6726 MASK_STORE is supported and instead only conditionally write out the result.
6728 The pattern produces for the above:
6731 .MASK_STORE (_3, c, t_20)
6735 * STMT_VINFO: The stmt from which the pattern search begins. In the
6736 example, when this function is called with _3 then the search begins.
6740 * TYPE_OUT: The type of the output of this pattern.
6742 * Return value: A new stmt that will be used to replace the sequence. */
6745 vect_recog_cond_store_pattern (vec_info
*vinfo
,
6746 stmt_vec_info stmt_vinfo
, tree
*type_out
)
6748 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6752 gimple
*store_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
6754 /* Needs to be a gimple store where we have DR info for. */
6755 if (!STMT_VINFO_DATA_REF (stmt_vinfo
)
6756 || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo
))
6757 || !gimple_store_p (store_stmt
))
6760 tree st_rhs
= gimple_assign_rhs1 (store_stmt
);
6762 if (TREE_CODE (st_rhs
) != SSA_NAME
)
6765 auto cond_vinfo
= vinfo
->lookup_def (st_rhs
);
6767 /* If the condition isn't part of the loop then bool recog wouldn't have seen
6768 it and so this transformation may not be valid. */
6772 cond_vinfo
= vect_stmt_to_vectorize (cond_vinfo
);
6773 gassign
*cond_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (cond_vinfo
));
6774 if (!cond_stmt
|| gimple_assign_rhs_code (cond_stmt
) != COND_EXPR
)
6777 /* Check if the else value matches the original loaded one. */
6778 bool invert
= false;
6779 tree cmp_ls
= gimple_arg (cond_stmt
, 0);
6780 if (TREE_CODE (cmp_ls
) != SSA_NAME
)
6783 tree cond_arg1
= gimple_arg (cond_stmt
, 1);
6784 tree cond_arg2
= gimple_arg (cond_stmt
, 2);
6786 if (!vect_cond_store_pattern_same_ref (vinfo
, stmt_vinfo
, cond_arg2
)
6787 && !(invert
= vect_cond_store_pattern_same_ref (vinfo
, stmt_vinfo
,
6791 vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt
);
6793 tree scalar_type
= TREE_TYPE (st_rhs
);
6794 if (VECTOR_TYPE_P (scalar_type
))
6797 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
6798 if (vectype
== NULL_TREE
)
6801 machine_mode mask_mode
;
6802 machine_mode vecmode
= TYPE_MODE (vectype
);
6803 if (!VECTOR_MODE_P (vecmode
)
6804 || targetm
.vectorize
.conditional_operation_is_expensive (IFN_MASK_STORE
)
6805 || !targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
6806 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, false))
6809 tree base
= DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo
));
6810 if (may_be_nonaddressable_p (base
))
6813 /* We need to use the false parameter of the conditional select. */
6814 tree cond_store_arg
= invert
? cond_arg2
: cond_arg1
;
6815 tree cond_load_arg
= invert
? cond_arg1
: cond_arg2
;
6816 gimple
*load_stmt
= SSA_NAME_DEF_STMT (cond_load_arg
);
6818 /* This is a rough estimation to check that there aren't any aliasing stores
6819 in between the load and store. It's a bit strict, but for now it's good
6821 if (gimple_vuse (load_stmt
) != gimple_vuse (store_stmt
))
6824 /* If we have to invert the condition, i.e. use the true argument rather than
6825 the false argument, we have to negate the mask. */
6828 tree var
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
6830 /* Invert the mask using ^ 1. */
6831 tree itype
= TREE_TYPE (cmp_ls
);
6832 gassign
*conv
= gimple_build_assign (var
, BIT_XOR_EXPR
, cmp_ls
,
6833 build_int_cst (itype
, 1));
6835 tree mask_vec_type
= get_mask_type_for_scalar_type (vinfo
, itype
);
6836 append_pattern_def_seq (vinfo
, stmt_vinfo
, conv
, mask_vec_type
, itype
);
6840 if (TREE_CODE (base
) != MEM_REF
)
6841 base
= build_fold_addr_expr (base
);
6843 tree ptr
= build_int_cst (reference_alias_ptr_type (base
),
6844 get_object_alignment (base
));
6846 /* Convert the mask to the right form. */
6847 tree mask
= vect_convert_mask_for_vectype (cmp_ls
, vectype
, stmt_vinfo
,
6851 = gimple_build_call_internal (IFN_MASK_STORE
, 4, base
, ptr
, mask
,
6853 gimple_set_location (call
, gimple_location (store_stmt
));
6855 /* Copy across relevant vectorization info and associate DR with the
6856 new pattern statement instead of the original statement. */
6857 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (call
);
6858 loop_vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
6860 *type_out
= vectype
;
6864 /* Return true if TYPE is a non-boolean integer type. These are the types
6865 that we want to consider for narrowing. */
6868 vect_narrowable_type_p (tree type
)
6870 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
6873 /* Return true if the operation given by CODE can be truncated to N bits
6874 when only N bits of the output are needed. This is only true if bit N+1
6875 of the inputs has no effect on the low N bits of the result. */
6878 vect_truncatable_operation_p (tree_code code
)
6898 /* Record that STMT_INFO could be changed from operating on TYPE to
6899 operating on a type with the precision and sign given by PRECISION
6900 and SIGN respectively. PRECISION is an arbitrary bit precision;
6901 it might not be a whole number of bytes. */
6904 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
6905 unsigned int precision
, signop sign
)
6907 /* Round the precision up to a whole number of bytes. */
6908 precision
= vect_element_precision (precision
);
6909 if (precision
< TYPE_PRECISION (type
)
6910 && (!stmt_info
->operation_precision
6911 || stmt_info
->operation_precision
> precision
))
6913 stmt_info
->operation_precision
= precision
;
6914 stmt_info
->operation_sign
= sign
;
6918 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6919 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6920 is an arbitrary bit precision; it might not be a whole number of bytes. */
6923 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
6924 unsigned int min_input_precision
)
6926 /* This operation in isolation only requires the inputs to have
6927 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6928 that MIN_INPUT_PRECISION is a natural precision for the chain
6929 as a whole. E.g. consider something like:
6931 unsigned short *x, *y;
6932 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6934 The right shift can be done on unsigned chars, and only requires the
6935 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6936 approach would mean turning a natural chain of single-vector unsigned
6937 short operations into one that truncates "*x" and then extends
6938 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6939 operation and one vector for each unsigned char operation.
6940 This would be a significant pessimization.
6942 Instead only propagate the maximum of this precision and the precision
6943 required by the users of the result. This means that we don't pessimize
6944 the case above but continue to optimize things like:
6948 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6950 Here we would truncate two vectors of *x to a single vector of
6951 unsigned chars and use single-vector unsigned char operations for
6952 everything else, rather than doing two unsigned short copies of
6953 "(*x & 0xf0) >> 4" and then truncating the result. */
6954 min_input_precision
= MAX (min_input_precision
,
6955 stmt_info
->min_output_precision
);
6957 if (min_input_precision
< TYPE_PRECISION (type
)
6958 && (!stmt_info
->min_input_precision
6959 || stmt_info
->min_input_precision
> min_input_precision
))
6960 stmt_info
->min_input_precision
= min_input_precision
;
6963 /* Subroutine of vect_determine_min_output_precision. Return true if
6964 we can calculate a reduced number of output bits for STMT_INFO,
6965 whose result is LHS. */
6968 vect_determine_min_output_precision_1 (vec_info
*vinfo
,
6969 stmt_vec_info stmt_info
, tree lhs
)
6971 /* Take the maximum precision required by users of the result. */
6972 unsigned int precision
= 0;
6973 imm_use_iterator iter
;
6975 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
6977 gimple
*use_stmt
= USE_STMT (use
);
6978 if (is_gimple_debug (use_stmt
))
6980 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
6981 if (!use_stmt_info
|| !use_stmt_info
->min_input_precision
)
6983 /* The input precision recorded for COND_EXPRs applies only to the
6984 "then" and "else" values. */
6985 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6987 && gimple_assign_rhs_code (assign
) == COND_EXPR
6988 && use
->use
!= gimple_assign_rhs2_ptr (assign
)
6989 && use
->use
!= gimple_assign_rhs3_ptr (assign
))
6991 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
6994 if (dump_enabled_p ())
6995 dump_printf_loc (MSG_NOTE
, vect_location
,
6996 "only the low %d bits of %T are significant\n",
6998 stmt_info
->min_output_precision
= precision
;
7002 /* Calculate min_output_precision for STMT_INFO. */
7005 vect_determine_min_output_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
7007 /* We're only interested in statements with a narrowable result. */
7008 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
7010 || TREE_CODE (lhs
) != SSA_NAME
7011 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
7014 if (!vect_determine_min_output_precision_1 (vinfo
, stmt_info
, lhs
))
7015 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
7018 /* Use range information to decide whether STMT (described by STMT_INFO)
7019 could be done in a narrower type. This is effectively a forward
7020 propagation, since it uses context-independent information that applies
7021 to all users of an SSA name. */
7024 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
7026 tree lhs
= gimple_assign_lhs (stmt
);
7027 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
7030 tree type
= TREE_TYPE (lhs
);
7031 if (!vect_narrowable_type_p (type
))
7034 /* First see whether we have any useful range information for the result. */
7035 unsigned int precision
= TYPE_PRECISION (type
);
7036 signop sign
= TYPE_SIGN (type
);
7037 wide_int min_value
, max_value
;
7038 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
7041 tree_code code
= gimple_assign_rhs_code (stmt
);
7042 unsigned int nops
= gimple_num_ops (stmt
);
7044 if (!vect_truncatable_operation_p (code
))
7046 /* Handle operations that can be computed in type T if all inputs
7047 and outputs can be represented in type T. Also handle left and
7048 right shifts, where (in addition) the maximum shift amount must
7049 be less than the number of bits in T. */
7061 case TRUNC_DIV_EXPR
:
7063 case FLOOR_DIV_EXPR
:
7064 case ROUND_DIV_EXPR
:
7065 case EXACT_DIV_EXPR
:
7066 /* Modulus is excluded because it is typically calculated by doing
7067 a division, for which minimum signed / -1 isn't representable in
7068 the original signed type. We could take the division range into
7069 account instead, if handling modulus ever becomes important. */
7076 for (unsigned int i
= 1; i
< nops
; ++i
)
7078 tree op
= gimple_op (stmt
, i
);
7079 wide_int op_min_value
, op_max_value
;
7080 if (TREE_CODE (op
) == INTEGER_CST
)
7082 unsigned int op_precision
= TYPE_PRECISION (TREE_TYPE (op
));
7083 op_min_value
= op_max_value
= wi::to_wide (op
, op_precision
);
7085 else if (TREE_CODE (op
) == SSA_NAME
)
7087 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
7093 if (is_shift
&& i
== 2)
7095 /* There needs to be one more bit than the maximum shift amount.
7097 If the maximum shift amount is already 1 less than PRECISION
7098 then we can't narrow the shift further. Dealing with that
7099 case first ensures that we can safely use an unsigned range
7102 op_min_value isn't relevant, since shifts by negative amounts
7104 if (wi::geu_p (op_max_value
, precision
- 1))
7106 unsigned int min_bits
= op_max_value
.to_uhwi () + 1;
7108 /* As explained below, we can convert a signed shift into an
7109 unsigned shift if the sign bit is always clear. At this
7110 point we've already processed the ranges of the output and
7112 auto op_sign
= sign
;
7113 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
7115 op_min_value
= wide_int::from (wi::min_value (min_bits
, op_sign
),
7116 precision
, op_sign
);
7117 op_max_value
= wide_int::from (wi::max_value (min_bits
, op_sign
),
7118 precision
, op_sign
);
7120 min_value
= wi::min (min_value
, op_min_value
, sign
);
7121 max_value
= wi::max (max_value
, op_max_value
, sign
);
7125 /* Try to switch signed types for unsigned types if we can.
7126 This is better for two reasons. First, unsigned ops tend
7127 to be cheaper than signed ops. Second, it means that we can
7131 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
7136 unsigned short res_1 = (unsigned short) c & 0xff00;
7137 int res = (int) res_1;
7139 where the intermediate result res_1 has unsigned rather than
7141 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
7144 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
7145 unsigned int precision1
= wi::min_precision (min_value
, sign
);
7146 unsigned int precision2
= wi::min_precision (max_value
, sign
);
7147 unsigned int value_precision
= MAX (precision1
, precision2
);
7148 if (value_precision
>= precision
)
7151 if (dump_enabled_p ())
7152 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
7153 " without loss of precision: %G",
7154 sign
== SIGNED
? "signed" : "unsigned",
7155 value_precision
, (gimple
*) stmt
);
7157 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
7158 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
7161 /* Use information about the users of STMT's result to decide whether
7162 STMT (described by STMT_INFO) could be done in a narrower type.
7163 This is effectively a backward propagation. */
7166 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
7168 tree_code code
= gimple_assign_rhs_code (stmt
);
7169 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
7170 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
7171 if (!vect_narrowable_type_p (type
))
7174 unsigned int precision
= TYPE_PRECISION (type
);
7175 unsigned int operation_precision
, min_input_precision
;
7179 /* Only the bits that contribute to the output matter. Don't change
7180 the precision of the operation itself. */
7181 operation_precision
= precision
;
7182 min_input_precision
= stmt_info
->min_output_precision
;
7188 tree shift
= gimple_assign_rhs2 (stmt
);
7189 if (TREE_CODE (shift
) != INTEGER_CST
7190 || !wi::ltu_p (wi::to_widest (shift
), precision
))
7192 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
7193 if (code
== LSHIFT_EXPR
)
7195 /* Avoid creating an undefined shift.
7197 ??? We could instead use min_output_precision as-is and
7198 optimize out-of-range shifts to zero. However, only
7199 degenerate testcases shift away all their useful input data,
7200 and it isn't natural to drop input operations in the middle
7201 of vectorization. This sort of thing should really be
7202 handled before vectorization. */
7203 operation_precision
= MAX (stmt_info
->min_output_precision
,
7205 /* We need CONST_SHIFT fewer bits of the input. */
7206 min_input_precision
= (MAX (operation_precision
, const_shift
)
7211 /* We need CONST_SHIFT extra bits to do the operation. */
7212 operation_precision
= (stmt_info
->min_output_precision
7214 min_input_precision
= operation_precision
;
7220 if (vect_truncatable_operation_p (code
))
7222 /* Input bit N has no effect on output bits N-1 and lower. */
7223 operation_precision
= stmt_info
->min_output_precision
;
7224 min_input_precision
= operation_precision
;
7230 if (operation_precision
< precision
)
7232 if (dump_enabled_p ())
7233 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
7234 " without affecting users: %G",
7235 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
7236 operation_precision
, (gimple
*) stmt
);
7237 vect_set_operation_type (stmt_info
, type
, operation_precision
,
7240 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
7243 /* Return true if the statement described by STMT_INFO sets a boolean
7244 SSA_NAME and if we know how to vectorize this kind of statement using
7245 vector mask types. */
7248 possible_vector_mask_operation_p (stmt_vec_info stmt_info
)
7250 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
7251 tree_code code
= ERROR_MARK
;
7252 gassign
*assign
= NULL
;
7255 if ((assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
)))
7256 code
= gimple_assign_rhs_code (assign
);
7257 else if ((cond
= dyn_cast
<gcond
*> (stmt_info
->stmt
)))
7259 lhs
= gimple_cond_lhs (cond
);
7260 code
= gimple_cond_code (cond
);
7264 || TREE_CODE (lhs
) != SSA_NAME
7265 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
7268 if (code
!= ERROR_MARK
)
7281 return TREE_CODE_CLASS (code
) == tcc_comparison
;
7284 else if (is_a
<gphi
*> (stmt_info
->stmt
))
7289 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
7290 a vector mask type instead of a normal vector type. Record the
7291 result in STMT_INFO->mask_precision. */
7294 vect_determine_mask_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
7296 if (!possible_vector_mask_operation_p (stmt_info
))
7299 /* If at least one boolean input uses a vector mask type,
7300 pick the mask type with the narrowest elements.
7302 ??? This is the traditional behavior. It should always produce
7303 the smallest number of operations, but isn't necessarily the
7304 optimal choice. For example, if we have:
7310 - the user of a wants it to have a mask type for 16-bit elements (M16)
7312 - c uses a mask type for 8-bit elements (M8)
7314 then picking M8 gives:
7316 - 1 M16->M8 pack for b
7318 - 2 M8->M16 unpacks for the user of a
7320 whereas picking M16 would have given:
7322 - 2 M8->M16 unpacks for c
7325 The number of operations are equal, but M16 would have given
7326 a shorter dependency chain and allowed more ILP. */
7327 unsigned int precision
= ~0U;
7328 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7330 /* If the statement compares two values that shouldn't use vector masks,
7331 try comparing the values as normal scalars instead. */
7332 tree_code code
= ERROR_MARK
;
7334 unsigned int nops
= -1;
7335 unsigned int ops_start
= 0;
7337 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt
))
7339 code
= gimple_assign_rhs_code (assign
);
7340 op0_type
= TREE_TYPE (gimple_assign_rhs1 (assign
));
7341 nops
= gimple_num_ops (assign
);
7344 else if (gcond
*cond
= dyn_cast
<gcond
*> (stmt
))
7346 code
= gimple_cond_code (cond
);
7347 op0_type
= TREE_TYPE (gimple_cond_lhs (cond
));
7352 if (code
!= ERROR_MARK
)
7354 for (unsigned int i
= ops_start
; i
< nops
; ++i
)
7356 tree rhs
= gimple_op (stmt
, i
);
7357 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs
)))
7360 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
7362 /* Don't let external or constant operands influence the choice.
7363 We can convert them to whichever vector type we pick. */
7366 if (def_stmt_info
->mask_precision
)
7368 if (precision
> def_stmt_info
->mask_precision
)
7369 precision
= def_stmt_info
->mask_precision
;
7373 if (precision
== ~0U
7374 && TREE_CODE_CLASS (code
) == tcc_comparison
)
7377 tree vectype
, mask_type
;
7378 if (is_a
<scalar_mode
> (TYPE_MODE (op0_type
), &mode
)
7379 && (vectype
= get_vectype_for_scalar_type (vinfo
, op0_type
))
7380 && (mask_type
= get_mask_type_for_scalar_type (vinfo
, op0_type
))
7381 && expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
7382 precision
= GET_MODE_BITSIZE (mode
);
7387 gphi
*phi
= as_a
<gphi
*> (stmt_info
->stmt
);
7388 for (unsigned i
= 0; i
< gimple_phi_num_args (phi
); ++i
)
7390 tree rhs
= gimple_phi_arg_def (phi
, i
);
7392 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
7394 /* Don't let external or constant operands influence the choice.
7395 We can convert them to whichever vector type we pick. */
7398 if (def_stmt_info
->mask_precision
)
7400 if (precision
> def_stmt_info
->mask_precision
)
7401 precision
= def_stmt_info
->mask_precision
;
7406 if (dump_enabled_p ())
7408 if (precision
== ~0U)
7409 dump_printf_loc (MSG_NOTE
, vect_location
,
7410 "using normal nonmask vectors for %G",
7413 dump_printf_loc (MSG_NOTE
, vect_location
,
7414 "using boolean precision %d for %G",
7415 precision
, stmt_info
->stmt
);
7418 stmt_info
->mask_precision
= precision
;
7421 /* Handle vect_determine_precisions for STMT_INFO, given that we
7422 have already done so for the users of its result. */
7425 vect_determine_stmt_precisions (vec_info
*vinfo
, stmt_vec_info stmt_info
)
7427 vect_determine_min_output_precision (vinfo
, stmt_info
);
7428 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7430 vect_determine_precisions_from_range (stmt_info
, stmt
);
7431 vect_determine_precisions_from_users (stmt_info
, stmt
);
7435 /* Walk backwards through the vectorizable region to determine the
7436 values of these fields:
7438 - min_output_precision
7439 - min_input_precision
7440 - operation_precision
7441 - operation_sign. */
7444 vect_determine_precisions (vec_info
*vinfo
)
7446 basic_block
*bbs
= vinfo
->bbs
;
7447 unsigned int nbbs
= vinfo
->nbbs
;
7449 DUMP_VECT_SCOPE ("vect_determine_precisions");
7451 for (unsigned int i
= 0; i
< nbbs
; i
++)
7453 basic_block bb
= bbs
[i
];
7454 for (auto gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
7456 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
7457 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
7458 vect_determine_mask_precision (vinfo
, stmt_info
);
7460 for (auto gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
7462 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
7463 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
7464 vect_determine_mask_precision (vinfo
, stmt_info
);
7467 for (unsigned int i
= 0; i
< nbbs
; i
++)
7469 basic_block bb
= bbs
[nbbs
- i
- 1];
7470 for (auto gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
7472 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
7473 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
7474 vect_determine_stmt_precisions (vinfo
, stmt_info
);
7476 for (auto gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
7478 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
7479 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
7480 vect_determine_stmt_precisions (vinfo
, stmt_info
);
7485 typedef gimple
*(*vect_recog_func_ptr
) (vec_info
*, stmt_vec_info
, tree
*);
7487 struct vect_recog_func
7489 vect_recog_func_ptr fn
;
7493 /* Note that ordering matters - the first pattern matching on a stmt is
7494 taken which means usually the more complex one needs to preceed the
7495 less comples onex (widen_sum only after dot_prod or sad for example). */
7496 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
7497 { vect_recog_bitfield_ref_pattern
, "bitfield_ref" },
7498 { vect_recog_bit_insert_pattern
, "bit_insert" },
7499 { vect_recog_abd_pattern
, "abd" },
7500 { vect_recog_over_widening_pattern
, "over_widening" },
7501 /* Must come after over_widening, which narrows the shift as much as
7502 possible beforehand. */
7503 { vect_recog_average_pattern
, "average" },
7504 { vect_recog_cond_expr_convert_pattern
, "cond_expr_convert" },
7505 { vect_recog_mulhs_pattern
, "mult_high" },
7506 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
7507 { vect_recog_widen_mult_pattern
, "widen_mult" },
7508 { vect_recog_dot_prod_pattern
, "dot_prod" },
7509 { vect_recog_sad_pattern
, "sad" },
7510 { vect_recog_widen_sum_pattern
, "widen_sum" },
7511 { vect_recog_pow_pattern
, "pow" },
7512 { vect_recog_popcount_clz_ctz_ffs_pattern
, "popcount_clz_ctz_ffs" },
7513 { vect_recog_ctz_ffs_pattern
, "ctz_ffs" },
7514 { vect_recog_widen_shift_pattern
, "widen_shift" },
7515 { vect_recog_rotate_pattern
, "rotate" },
7516 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
7517 { vect_recog_divmod_pattern
, "divmod" },
7518 { vect_recog_mod_var_pattern
, "modvar" },
7519 { vect_recog_mult_pattern
, "mult" },
7520 { vect_recog_sat_add_pattern
, "sat_add" },
7521 { vect_recog_sat_sub_pattern
, "sat_sub" },
7522 { vect_recog_sat_trunc_pattern
, "sat_trunc" },
7523 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
7524 { vect_recog_gcond_pattern
, "gcond" },
7525 { vect_recog_bool_pattern
, "bool" },
7526 /* This must come before mask conversion, and includes the parts
7527 of mask conversion that are needed for gather and scatter
7528 internal functions. */
7529 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
7530 { vect_recog_cond_store_pattern
, "cond_store" },
7531 { vect_recog_mask_conversion_pattern
, "mask_conversion" },
7532 { vect_recog_widen_plus_pattern
, "widen_plus" },
7533 { vect_recog_widen_minus_pattern
, "widen_minus" },
7534 { vect_recog_widen_abd_pattern
, "widen_abd" },
7535 /* These must come after the double widening ones. */
7538 /* Mark statements that are involved in a pattern. */
7541 vect_mark_pattern_stmts (vec_info
*vinfo
,
7542 stmt_vec_info orig_stmt_info
, gimple
*pattern_stmt
,
7543 tree pattern_vectype
)
7545 stmt_vec_info orig_stmt_info_saved
= orig_stmt_info
;
7546 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
7548 gimple
*orig_pattern_stmt
= NULL
;
7549 if (is_pattern_stmt_p (orig_stmt_info
))
7551 /* We're replacing a statement in an existing pattern definition
7553 orig_pattern_stmt
= orig_stmt_info
->stmt
;
7554 if (dump_enabled_p ())
7555 dump_printf_loc (MSG_NOTE
, vect_location
,
7556 "replacing earlier pattern %G", orig_pattern_stmt
);
7558 /* To keep the book-keeping simple, just swap the lhs of the
7559 old and new statements, so that the old one has a valid but
7561 tree old_lhs
= gimple_get_lhs (orig_pattern_stmt
);
7562 gimple_set_lhs (orig_pattern_stmt
, gimple_get_lhs (pattern_stmt
));
7563 gimple_set_lhs (pattern_stmt
, old_lhs
);
7565 if (dump_enabled_p ())
7566 dump_printf_loc (MSG_NOTE
, vect_location
, "with %G", pattern_stmt
);
7568 /* Switch to the statement that ORIG replaces. */
7569 orig_stmt_info
= STMT_VINFO_RELATED_STMT (orig_stmt_info
);
7571 /* We shouldn't be replacing the main pattern statement. */
7572 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
)->stmt
7573 != orig_pattern_stmt
);
7577 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
7578 !gsi_end_p (si
); gsi_next (&si
))
7580 if (dump_enabled_p ())
7581 dump_printf_loc (MSG_NOTE
, vect_location
,
7582 "extra pattern stmt: %G", gsi_stmt (si
));
7583 stmt_vec_info pattern_stmt_info
7584 = vect_init_pattern_stmt (vinfo
, gsi_stmt (si
),
7585 orig_stmt_info
, pattern_vectype
);
7586 /* Stmts in the def sequence are not vectorizable cycle or
7587 induction defs, instead they should all be vect_internal_def
7588 feeding the main pattern stmt which retains this def type. */
7589 STMT_VINFO_DEF_TYPE (pattern_stmt_info
) = vect_internal_def
;
7592 if (orig_pattern_stmt
)
7594 vect_init_pattern_stmt (vinfo
, pattern_stmt
,
7595 orig_stmt_info
, pattern_vectype
);
7597 /* Insert all the new pattern statements before the original one. */
7598 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
7599 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_pattern_stmt
,
7601 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
7602 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
7604 /* Remove the pattern statement that this new pattern replaces. */
7605 gsi_remove (&gsi
, false);
7608 vect_set_pattern_stmt (vinfo
,
7609 pattern_stmt
, orig_stmt_info
, pattern_vectype
);
7611 /* For any conditionals mark them as vect_condition_def. */
7612 if (is_a
<gcond
*> (pattern_stmt
))
7613 STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info
)) = vect_condition_def
;
7615 /* Transfer reduction path info to the pattern. */
7616 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved
) != -1)
7619 if (!gimple_extract_op (orig_stmt_info_saved
->stmt
, &op
))
7621 tree lookfor
= op
.ops
[STMT_VINFO_REDUC_IDX (orig_stmt_info
)];
7622 /* Search the pattern def sequence and the main pattern stmt. Note
7623 we may have inserted all into a containing pattern def sequence
7624 so the following is a bit awkward. */
7625 gimple_stmt_iterator si
;
7629 si
= gsi_start (def_seq
);
7641 if (gimple_extract_op (s
, &op
))
7642 for (unsigned i
= 0; i
< op
.num_ops
; ++i
)
7643 if (op
.ops
[i
] == lookfor
)
7645 STMT_VINFO_REDUC_IDX (vinfo
->lookup_stmt (s
)) = i
;
7646 lookfor
= gimple_get_lhs (s
);
7650 if (s
== pattern_stmt
)
7652 if (!found
&& dump_enabled_p ())
7653 dump_printf_loc (MSG_NOTE
, vect_location
,
7654 "failed to update reduction index.\n");
7662 if (s
== pattern_stmt
)
7663 /* Found the end inside a bigger pattern def seq. */
7672 /* Function vect_pattern_recog_1
7675 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7676 computation pattern.
7677 STMT_INFO: A stmt from which the pattern search should start.
7679 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7680 a sequence of statements that has the same functionality and can be
7681 used to replace STMT_INFO. It returns the last statement in the sequence
7682 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7683 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7684 statement, having first checked that the target supports the new operation
7687 This function also does some bookkeeping, as explained in the documentation
7688 for vect_recog_pattern. */
7691 vect_pattern_recog_1 (vec_info
*vinfo
,
7692 const vect_recog_func
&recog_func
, stmt_vec_info stmt_info
)
7694 gimple
*pattern_stmt
;
7695 tree pattern_vectype
;
7697 /* If this statement has already been replaced with pattern statements,
7698 leave the original statement alone, since the first match wins.
7699 Instead try to match against the definition statements that feed
7700 the main pattern statement. */
7701 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7703 gimple_stmt_iterator gsi
;
7704 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
7705 !gsi_end_p (gsi
); gsi_next (&gsi
))
7706 vect_pattern_recog_1 (vinfo
, recog_func
,
7707 vinfo
->lookup_stmt (gsi_stmt (gsi
)));
7711 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
7712 pattern_stmt
= recog_func
.fn (vinfo
, stmt_info
, &pattern_vectype
);
7715 /* Clear any half-formed pattern definition sequence. */
7716 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
7720 /* Found a vectorizable pattern. */
7721 if (dump_enabled_p ())
7722 dump_printf_loc (MSG_NOTE
, vect_location
,
7723 "%s pattern recognized: %G",
7724 recog_func
.name
, pattern_stmt
);
7726 /* Mark the stmts that are involved in the pattern. */
7727 vect_mark_pattern_stmts (vinfo
, stmt_info
, pattern_stmt
, pattern_vectype
);
7731 /* Function vect_pattern_recog
7734 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7737 Output - for each computation idiom that is detected we create a new stmt
7738 that provides the same functionality and that can be vectorized. We
7739 also record some information in the struct_stmt_info of the relevant
7740 stmts, as explained below:
7742 At the entry to this function we have the following stmts, with the
7743 following initial value in the STMT_VINFO fields:
7745 stmt in_pattern_p related_stmt vec_stmt
7746 S1: a_i = .... - - -
7747 S2: a_2 = ..use(a_i).. - - -
7748 S3: a_1 = ..use(a_2).. - - -
7749 S4: a_0 = ..use(a_1).. - - -
7750 S5: ... = ..use(a_0).. - - -
7752 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7753 represented by a single stmt. We then:
7754 - create a new stmt S6 equivalent to the pattern (the stmt is not
7755 inserted into the code)
7756 - fill in the STMT_VINFO fields as follows:
7758 in_pattern_p related_stmt vec_stmt
7759 S1: a_i = .... - - -
7760 S2: a_2 = ..use(a_i).. - - -
7761 S3: a_1 = ..use(a_2).. - - -
7762 S4: a_0 = ..use(a_1).. true S6 -
7763 '---> S6: a_new = .... - S4 -
7764 S5: ... = ..use(a_0).. - - -
7766 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7767 to each other through the RELATED_STMT field).
7769 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7770 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7771 remain irrelevant unless used by stmts other than S4.
7773 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7774 (because they are marked as irrelevant). It will vectorize S6, and record
7775 a pointer to the new vector stmt VS6 from S6 (as usual).
7776 S4 will be skipped, and S5 will be vectorized as usual:
7778 in_pattern_p related_stmt vec_stmt
7779 S1: a_i = .... - - -
7780 S2: a_2 = ..use(a_i).. - - -
7781 S3: a_1 = ..use(a_2).. - - -
7782 > VS6: va_new = .... - - -
7783 S4: a_0 = ..use(a_1).. true S6 VS6
7784 '---> S6: a_new = .... - S4 VS6
7785 > VS5: ... = ..vuse(va_new).. - - -
7786 S5: ... = ..use(a_0).. - - -
7788 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7789 elsewhere), and we'll end up with:
7792 VS5: ... = ..vuse(va_new)..
7794 In case of more than one pattern statements, e.g., widen-mult with
7798 S2 a_T = (TYPE) a_t;
7799 '--> S3: a_it = (interm_type) a_t;
7800 S4 prod_T = a_T * CONST;
7801 '--> S5: prod_T' = a_it w* CONST;
7803 there may be other users of a_T outside the pattern. In that case S2 will
7804 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7805 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7806 be recorded in S3. */
7809 vect_pattern_recog (vec_info
*vinfo
)
7811 basic_block
*bbs
= vinfo
->bbs
;
7812 unsigned int nbbs
= vinfo
->nbbs
;
7814 vect_determine_precisions (vinfo
);
7816 DUMP_VECT_SCOPE ("vect_pattern_recog");
7818 /* Scan through the stmts in the region, applying the pattern recognition
7819 functions starting at each stmt visited. */
7820 for (unsigned i
= 0; i
< nbbs
; i
++)
7822 basic_block bb
= bbs
[i
];
7824 for (auto si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
7826 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (si
));
7828 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
))
7831 /* Scan over all generic vect_recog_xxx_pattern functions. */
7832 for (const auto &func_ptr
: vect_vect_recog_func_ptrs
)
7833 vect_pattern_recog_1 (vinfo
, func_ptr
,
7838 /* After this no more add_stmt calls are allowed. */
7839 vinfo
->stmt_vec_info_ro
= true;
7842 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7843 or internal_fn contained in ch, respectively. */
7845 vect_gimple_build (tree lhs
, code_helper ch
, tree op0
, tree op1
)
7847 gcc_assert (op0
!= NULL_TREE
);
7848 if (ch
.is_tree_code ())
7849 return gimple_build_assign (lhs
, (tree_code
) ch
, op0
, op1
);
7851 gcc_assert (ch
.is_internal_fn ());
7852 gimple
* stmt
= gimple_build_call_internal (as_internal_fn ((combined_fn
) ch
),
7853 op1
== NULL_TREE
? 1 : 2,
7855 gimple_call_set_lhs (stmt
, lhs
);