libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / tree-vect-patterns.cc
blob746f100a0842090953571b535ff05375f46033c0
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #define INCLUDE_MEMORY
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "gimple-iterator.h"
30 #include "gimple-fold.h"
31 #include "ssa.h"
32 #include "expmed.h"
33 #include "optabs-tree.h"
34 #include "insn-config.h"
35 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "tree-eh.h"
39 #include "gimplify.h"
40 #include "gimple-iterator.h"
41 #include "gimple-fold.h"
42 #include "gimplify-me.h"
43 #include "cfgloop.h"
44 #include "tree-vectorizer.h"
45 #include "dumpfile.h"
46 #include "builtins.h"
47 #include "internal-fn.h"
48 #include "case-cfn-macros.h"
49 #include "fold-const-call.h"
50 #include "attribs.h"
51 #include "cgraph.h"
52 #include "omp-simd-clone.h"
53 #include "predict.h"
54 #include "tree-vector-builder.h"
55 #include "tree-ssa-loop-ivopts.h"
56 #include "vec-perm-indices.h"
57 #include "gimple-range.h"
58 #include "alias.h"
61 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
62 in the first operand. Disentangling this is future work, the
63 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
66 /* Return true if we have a useful VR_RANGE range for VAR, storing it
67 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
69 bool
70 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
72 int_range_max vr;
73 tree vr_min, vr_max;
74 get_range_query (cfun)->range_of_expr (vr, var);
75 if (vr.undefined_p ())
76 vr.set_varying (TREE_TYPE (var));
77 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
78 *min_value = wi::to_wide (vr_min);
79 *max_value = wi::to_wide (vr_max);
80 wide_int nonzero = get_nonzero_bits (var);
81 signop sgn = TYPE_SIGN (TREE_TYPE (var));
82 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
83 nonzero, sgn) == VR_RANGE)
85 if (dump_enabled_p ())
87 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
88 dump_printf (MSG_NOTE, " has range [");
89 dump_hex (MSG_NOTE, *min_value);
90 dump_printf (MSG_NOTE, ", ");
91 dump_hex (MSG_NOTE, *max_value);
92 dump_printf (MSG_NOTE, "]\n");
94 return true;
96 else
98 if (dump_enabled_p ())
100 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
101 dump_printf (MSG_NOTE, " has no range info\n");
103 return false;
107 /* Report that we've found an instance of pattern PATTERN in
108 statement STMT. */
110 static void
111 vect_pattern_detected (const char *name, gimple *stmt)
113 if (dump_enabled_p ())
114 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
117 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
118 return the pattern statement's stmt_vec_info. Set its vector type to
119 VECTYPE if it doesn't have one already. */
121 static stmt_vec_info
122 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
123 stmt_vec_info orig_stmt_info, tree vectype)
125 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
126 if (pattern_stmt_info == NULL)
127 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
128 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
130 pattern_stmt_info->pattern_stmt_p = true;
131 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
132 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
133 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
134 STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
135 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
137 gcc_assert (!vectype
138 || is_a <gcond *> (pattern_stmt)
139 || (VECTOR_BOOLEAN_TYPE_P (vectype)
140 == vect_use_mask_type_p (orig_stmt_info)));
141 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
142 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
144 return pattern_stmt_info;
147 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
148 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
149 have one already. */
151 static void
152 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
153 stmt_vec_info orig_stmt_info, tree vectype)
155 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
156 STMT_VINFO_RELATED_STMT (orig_stmt_info)
157 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
160 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
161 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
162 be different from the vector type of the final pattern statement.
163 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
164 from which it was derived. */
166 static inline void
167 append_pattern_def_seq (vec_info *vinfo,
168 stmt_vec_info stmt_info, gimple *new_stmt,
169 tree vectype = NULL_TREE,
170 tree scalar_type_for_mask = NULL_TREE)
172 gcc_assert (!scalar_type_for_mask
173 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
174 if (vectype)
176 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
177 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
178 if (scalar_type_for_mask)
179 new_stmt_info->mask_precision
180 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
182 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
183 new_stmt);
187 /* Add NEW_STMT to VINFO's invariant pattern definition statements. These
188 statements are not vectorized but are materialized as scalar in the loop
189 preheader. */
191 static inline void
192 append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt)
194 gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt);
197 /* The caller wants to perform new operations on vect_external variable
198 VAR, so that the result of the operations would also be vect_external.
199 Return the edge on which the operations can be performed, if one exists.
200 Return null if the operations should instead be treated as part of
201 the pattern that needs them. */
203 static edge
204 vect_get_external_def_edge (vec_info *vinfo, tree var)
206 edge e = NULL;
207 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
209 e = loop_preheader_edge (loop_vinfo->loop);
210 if (!SSA_NAME_IS_DEFAULT_DEF (var))
212 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
213 if (bb == NULL
214 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
215 e = NULL;
218 return e;
221 /* Return true if the target supports a vector version of CODE,
222 where CODE is known to map to a direct optab with the given SUBTYPE.
223 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
224 specifies the type of the scalar result.
226 If CODE allows the inputs and outputs to have different type
227 (such as for WIDEN_SUM_EXPR), it is the input mode rather
228 than the output mode that determines the appropriate target pattern.
229 Operand 0 of the target pattern then specifies the mode that the output
230 must have.
232 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
233 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
234 is nonnull. */
236 static bool
237 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
238 tree itype, tree *vecotype_out,
239 tree *vecitype_out = NULL,
240 enum optab_subtype subtype = optab_default)
242 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
243 if (!vecitype)
244 return false;
246 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
247 if (!vecotype)
248 return false;
250 optab optab = optab_for_tree_code (code, vecitype, subtype);
251 if (!optab)
252 return false;
254 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
255 if (icode == CODE_FOR_nothing
256 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
257 return false;
259 *vecotype_out = vecotype;
260 if (vecitype_out)
261 *vecitype_out = vecitype;
262 return true;
265 /* Return true if the target supports a vector version of CODE,
266 where CODE is known to map to a conversion optab with the given SUBTYPE.
267 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
268 specifies the type of the scalar result.
270 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
271 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
272 is nonnull. */
274 static bool
275 vect_supportable_conv_optab_p (vec_info *vinfo, tree otype, tree_code code,
276 tree itype, tree *vecotype_out,
277 tree *vecitype_out = NULL,
278 enum optab_subtype subtype = optab_default)
280 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
281 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
282 if (!vecitype || !vecotype)
283 return false;
285 if (!directly_supported_p (code, vecotype, vecitype, subtype))
286 return false;
288 *vecotype_out = vecotype;
289 if (vecitype_out)
290 *vecitype_out = vecitype;
291 return true;
294 /* Round bit precision PRECISION up to a full element. */
296 static unsigned int
297 vect_element_precision (unsigned int precision)
299 precision = 1 << ceil_log2 (precision);
300 return MAX (precision, BITS_PER_UNIT);
303 /* If OP is defined by a statement that's being considered for vectorization,
304 return information about that statement, otherwise return NULL. */
306 static stmt_vec_info
307 vect_get_internal_def (vec_info *vinfo, tree op)
309 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
310 if (def_stmt_info
311 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
312 return vect_stmt_to_vectorize (def_stmt_info);
313 return NULL;
316 /* Check whether NAME, an ssa-name used in STMT_VINFO,
317 is a result of a type promotion, such that:
318 DEF_STMT: NAME = NOP (name0)
319 If CHECK_SIGN is TRUE, check that either both types are signed or both are
320 unsigned. */
322 static bool
323 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
324 tree *orig_type, gimple **def_stmt, bool *promotion)
326 tree type = TREE_TYPE (name);
327 tree oprnd0;
328 enum vect_def_type dt;
330 stmt_vec_info def_stmt_info;
331 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
332 return false;
334 if (dt != vect_internal_def
335 && dt != vect_external_def && dt != vect_constant_def)
336 return false;
338 if (!*def_stmt)
339 return false;
341 if (!is_gimple_assign (*def_stmt))
342 return false;
344 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
345 return false;
347 oprnd0 = gimple_assign_rhs1 (*def_stmt);
349 *orig_type = TREE_TYPE (oprnd0);
350 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
351 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
352 return false;
354 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
355 *promotion = true;
356 else
357 *promotion = false;
359 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
360 return false;
362 return true;
365 /* Holds information about an input operand after some sign changes
366 and type promotions have been peeled away. */
367 class vect_unpromoted_value {
368 public:
369 vect_unpromoted_value ();
371 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
373 /* The value obtained after peeling away zero or more casts. */
374 tree op;
376 /* The type of OP. */
377 tree type;
379 /* The definition type of OP. */
380 vect_def_type dt;
382 /* If OP is the result of peeling at least one cast, and if the cast
383 of OP itself is a vectorizable statement, CASTER identifies that
384 statement, otherwise it is null. */
385 stmt_vec_info caster;
388 inline vect_unpromoted_value::vect_unpromoted_value ()
389 : op (NULL_TREE),
390 type (NULL_TREE),
391 dt (vect_uninitialized_def),
392 caster (NULL)
396 /* Set the operand to OP_IN, its definition type to DT_IN, and the
397 statement that casts it to CASTER_IN. */
399 inline void
400 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
401 stmt_vec_info caster_in)
403 op = op_in;
404 type = TREE_TYPE (op);
405 dt = dt_in;
406 caster = caster_in;
409 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
410 to reach some vectorizable inner operand OP', continuing as long as it
411 is possible to convert OP' back to OP using a possible sign change
412 followed by a possible promotion P. Return this OP', or null if OP is
413 not a vectorizable SSA name. If there is a promotion P, describe its
414 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
415 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
416 have more than one user.
418 A successful return means that it is possible to go from OP' to OP
419 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
420 whereas the cast from UNPROM to OP might be a promotion, a sign
421 change, or a nop.
423 E.g. say we have:
425 signed short *ptr = ...;
426 signed short C = *ptr;
427 unsigned short B = (unsigned short) C; // sign change
428 signed int A = (signed int) B; // unsigned promotion
429 ...possible other uses of A...
430 unsigned int OP = (unsigned int) A; // sign change
432 In this case it's possible to go directly from C to OP using:
434 OP = (unsigned int) (unsigned short) C;
435 +------------+ +--------------+
436 promotion sign change
438 so OP' would be C. The input to the promotion is B, so UNPROM
439 would describe B. */
441 static tree
442 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
443 vect_unpromoted_value *unprom,
444 bool *single_use_p = NULL)
446 tree op_type = TREE_TYPE (op);
447 if (!INTEGRAL_TYPE_P (op_type))
448 return NULL_TREE;
450 tree res = NULL_TREE;
451 unsigned int orig_precision = TYPE_PRECISION (op_type);
452 unsigned int min_precision = orig_precision;
453 stmt_vec_info caster = NULL;
454 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
456 /* See whether OP is simple enough to vectorize. */
457 stmt_vec_info def_stmt_info;
458 gimple *def_stmt;
459 vect_def_type dt;
460 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
461 break;
463 /* If OP is the input of a demotion, skip over it to see whether
464 OP is itself the result of a promotion. If so, the combined
465 effect of the promotion and the demotion might fit the required
466 pattern, otherwise neither operation fits.
468 This copes with cases such as the result of an arithmetic
469 operation being truncated before being stored, and where that
470 arithmetic operation has been recognized as an over-widened one. */
471 if (TYPE_PRECISION (op_type) <= min_precision)
473 /* Use OP as the UNPROM described above if we haven't yet
474 found a promotion, or if using the new input preserves the
475 sign of the previous promotion. */
476 if (!res
477 || TYPE_PRECISION (unprom->type) == orig_precision
478 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type)
479 || (TYPE_UNSIGNED (op_type)
480 && TYPE_PRECISION (op_type) < TYPE_PRECISION (unprom->type)))
482 unprom->set_op (op, dt, caster);
483 min_precision = TYPE_PRECISION (op_type);
485 /* Stop if we've already seen a promotion and if this
486 conversion does more than change the sign. */
487 else if (TYPE_PRECISION (op_type)
488 != TYPE_PRECISION (unprom->type))
489 break;
491 /* The sequence now extends to OP. */
492 res = op;
495 /* See whether OP is defined by a cast. Record it as CASTER if
496 the cast is potentially vectorizable. */
497 if (!def_stmt)
498 break;
499 caster = def_stmt_info;
501 /* Ignore pattern statements, since we don't link uses for them. */
502 if (caster
503 && single_use_p
504 && !STMT_VINFO_RELATED_STMT (caster)
505 && !has_single_use (res))
506 *single_use_p = false;
508 gassign *assign = dyn_cast <gassign *> (def_stmt);
509 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
510 break;
512 /* Continue with the input to the cast. */
513 op = gimple_assign_rhs1 (def_stmt);
514 op_type = TREE_TYPE (op);
516 return res;
519 /* OP is an integer operand to an operation that returns TYPE, and we
520 want to treat the operation as a widening one. So far we can treat
521 it as widening from *COMMON_TYPE.
523 Return true if OP is suitable for such a widening operation,
524 either widening from *COMMON_TYPE or from some supertype of it.
525 Update *COMMON_TYPE to the supertype in the latter case.
527 SHIFT_P is true if OP is a shift amount. */
529 static bool
530 vect_joust_widened_integer (tree type, bool shift_p, tree op,
531 tree *common_type)
533 /* Calculate the minimum precision required by OP, without changing
534 the sign of either operand. */
535 unsigned int precision;
536 if (shift_p)
538 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
539 return false;
540 precision = TREE_INT_CST_LOW (op);
542 else
544 precision = wi::min_precision (wi::to_widest (op),
545 TYPE_SIGN (*common_type));
546 if (precision * 2 > TYPE_PRECISION (type))
547 return false;
550 /* If OP requires a wider type, switch to that type. The checks
551 above ensure that this is still narrower than the result. */
552 precision = vect_element_precision (precision);
553 if (TYPE_PRECISION (*common_type) < precision)
554 *common_type = build_nonstandard_integer_type
555 (precision, TYPE_UNSIGNED (*common_type));
556 return true;
559 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
560 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
562 static bool
563 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
565 if (types_compatible_p (*common_type, new_type))
566 return true;
568 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
569 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
570 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
571 return true;
573 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
574 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
575 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
577 *common_type = new_type;
578 return true;
581 /* We have mismatched signs, with the signed type being
582 no wider than the unsigned type. In this case we need
583 a wider signed type. */
584 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
585 TYPE_PRECISION (new_type));
586 precision *= 2;
588 if (precision * 2 > TYPE_PRECISION (type))
589 return false;
591 *common_type = build_nonstandard_integer_type (precision, false);
592 return true;
595 /* Check whether STMT_INFO can be viewed as a tree of integer operations
596 in which each node either performs CODE or WIDENED_CODE, and where
597 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
598 specifies the maximum number of leaf operands. SHIFT_P says whether
599 CODE and WIDENED_CODE are some sort of shift.
601 If STMT_INFO is such a tree, return the number of leaf operands
602 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
603 to a type that (a) is narrower than the result of STMT_INFO and
604 (b) can hold all leaf operand values.
606 If SUBTYPE then allow that the signs of the operands
607 may differ in signs but not in precision. SUBTYPE is updated to reflect
608 this.
610 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
611 exists. */
613 static unsigned int
614 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
615 code_helper widened_code, bool shift_p,
616 unsigned int max_nops,
617 vect_unpromoted_value *unprom, tree *common_type,
618 enum optab_subtype *subtype = NULL)
620 /* Check for an integer operation with the right code. */
621 gimple* stmt = stmt_info->stmt;
622 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
623 return 0;
625 code_helper rhs_code;
626 if (is_gimple_assign (stmt))
627 rhs_code = gimple_assign_rhs_code (stmt);
628 else if (is_gimple_call (stmt))
629 rhs_code = gimple_call_combined_fn (stmt);
630 else
631 return 0;
633 if (rhs_code != code
634 && rhs_code != widened_code)
635 return 0;
637 tree lhs = gimple_get_lhs (stmt);
638 tree type = TREE_TYPE (lhs);
639 if (!INTEGRAL_TYPE_P (type))
640 return 0;
642 /* Assume that both operands will be leaf operands. */
643 max_nops -= 2;
645 /* Check the operands. */
646 unsigned int next_op = 0;
647 for (unsigned int i = 0; i < 2; ++i)
649 vect_unpromoted_value *this_unprom = &unprom[next_op];
650 unsigned int nops = 1;
651 tree op = gimple_arg (stmt, i);
652 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
654 /* We already have a common type from earlier operands.
655 Update it to account for OP. */
656 this_unprom->set_op (op, vect_constant_def);
657 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
658 return 0;
660 else
662 /* Only allow shifts by constants. */
663 if (shift_p && i == 1)
664 return 0;
666 if (rhs_code != code)
668 /* If rhs_code is widened_code, don't look through further
669 possible promotions, there is a promotion already embedded
670 in the WIDEN_*_EXPR. */
671 if (TREE_CODE (op) != SSA_NAME
672 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
673 return 0;
675 stmt_vec_info def_stmt_info;
676 gimple *def_stmt;
677 vect_def_type dt;
678 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
679 &def_stmt))
680 return 0;
681 this_unprom->set_op (op, dt, NULL);
683 else if (!vect_look_through_possible_promotion (vinfo, op,
684 this_unprom))
685 return 0;
687 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
689 /* The operand isn't widened. If STMT_INFO has the code
690 for an unwidened operation, recursively check whether
691 this operand is a node of the tree. */
692 if (rhs_code != code
693 || max_nops == 0
694 || this_unprom->dt != vect_internal_def)
695 return 0;
697 /* Give back the leaf slot allocated above now that we're
698 not treating this as a leaf operand. */
699 max_nops += 1;
701 /* Recursively process the definition of the operand. */
702 stmt_vec_info def_stmt_info
703 = vect_get_internal_def (vinfo, this_unprom->op);
705 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
706 widened_code, shift_p, max_nops,
707 this_unprom, common_type,
708 subtype);
709 if (nops == 0)
710 return 0;
712 max_nops -= nops;
714 else
716 /* Make sure that the operand is narrower than the result. */
717 if (TYPE_PRECISION (this_unprom->type) * 2
718 > TYPE_PRECISION (type))
719 return 0;
721 /* Update COMMON_TYPE for the new operand. */
722 if (i == 0)
723 *common_type = this_unprom->type;
724 else if (!vect_joust_widened_type (type, this_unprom->type,
725 common_type))
727 if (subtype)
729 /* See if we can sign extend the smaller type. */
730 if (TYPE_PRECISION (this_unprom->type)
731 > TYPE_PRECISION (*common_type))
732 *common_type = this_unprom->type;
733 *subtype = optab_vector_mixed_sign;
735 else
736 return 0;
740 next_op += nops;
742 return next_op;
745 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
746 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
748 static tree
749 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
751 return make_temp_ssa_name (type, stmt, "patt");
754 /* STMT2_INFO describes a type conversion that could be split into STMT1
755 followed by a version of STMT2_INFO that takes NEW_RHS as its first
756 input. Try to do this using pattern statements, returning true on
757 success. */
759 static bool
760 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
761 gimple *stmt1, tree vectype)
763 if (is_pattern_stmt_p (stmt2_info))
765 /* STMT2_INFO is part of a pattern. Get the statement to which
766 the pattern is attached. */
767 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
768 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
770 if (dump_enabled_p ())
771 dump_printf_loc (MSG_NOTE, vect_location,
772 "Splitting pattern statement: %G", stmt2_info->stmt);
774 /* Since STMT2_INFO is a pattern statement, we can change it
775 in-situ without worrying about changing the code for the
776 containing block. */
777 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
779 if (dump_enabled_p ())
781 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
782 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
783 stmt2_info->stmt);
786 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
787 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
788 /* STMT2_INFO is the actual pattern statement. Add STMT1
789 to the end of the definition sequence. */
790 gimple_seq_add_stmt_without_update (def_seq, stmt1);
791 else
793 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
794 before it. */
795 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
796 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
798 return true;
800 else
802 /* STMT2_INFO doesn't yet have a pattern. Try to create a
803 two-statement pattern now. */
804 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
805 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
806 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
807 if (!lhs_vectype)
808 return false;
810 if (dump_enabled_p ())
811 dump_printf_loc (MSG_NOTE, vect_location,
812 "Splitting statement: %G", stmt2_info->stmt);
814 /* Add STMT1 as a singleton pattern definition sequence. */
815 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
816 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
817 gimple_seq_add_stmt_without_update (def_seq, stmt1);
819 /* Build the second of the two pattern statements. */
820 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
821 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
822 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
824 if (dump_enabled_p ())
826 dump_printf_loc (MSG_NOTE, vect_location,
827 "into pattern statements: %G", stmt1);
828 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
829 (gimple *) new_stmt2);
832 return true;
836 /* Look for the following pattern
837 X = x[i]
838 Y = y[i]
839 DIFF = X - Y
840 DAD = ABS_EXPR<DIFF>
842 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
843 HALF_TYPE and UNPROM will be set should the statement be found to
844 be a widened operation.
845 DIFF_STMT will be set to the MINUS_EXPR
846 statement that precedes the ABS_STMT if it is a MINUS_EXPR..
848 static bool
849 vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
850 tree *half_type,
851 vect_unpromoted_value unprom[2],
852 gassign **diff_stmt)
854 if (!abs_stmt)
855 return false;
857 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
858 inside the loop (in case we are analyzing an outer-loop). */
859 enum tree_code code = gimple_assign_rhs_code (abs_stmt);
860 if (code != ABS_EXPR && code != ABSU_EXPR)
861 return false;
863 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
864 tree abs_type = TREE_TYPE (abs_oprnd);
865 if (!abs_oprnd)
866 return false;
867 if (!ANY_INTEGRAL_TYPE_P (abs_type)
868 || TYPE_OVERFLOW_WRAPS (abs_type)
869 || TYPE_UNSIGNED (abs_type))
870 return false;
872 /* Peel off conversions from the ABS input. This can involve sign
873 changes (e.g. from an unsigned subtraction to a signed ABS input)
874 or signed promotion, but it can't include unsigned promotion.
875 (Note that ABS of an unsigned promotion should have been folded
876 away before now anyway.) */
877 vect_unpromoted_value unprom_diff;
878 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
879 &unprom_diff);
880 if (!abs_oprnd)
881 return false;
882 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
883 && TYPE_UNSIGNED (unprom_diff.type))
884 return false;
886 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
887 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
888 if (!diff_stmt_vinfo)
889 return false;
891 gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
892 if (diff_stmt && diff
893 && gimple_assign_rhs_code (diff) == MINUS_EXPR
894 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
895 *diff_stmt = diff;
897 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
898 inside the loop (in case we are analyzing an outer-loop). */
899 if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
900 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
901 false, 2, unprom, half_type))
902 return true;
904 return false;
907 /* Convert UNPROM to TYPE and return the result, adding new statements
908 to STMT_INFO's pattern definition statements if no better way is
909 available. VECTYPE is the vector form of TYPE.
911 If SUBTYPE then convert the type based on the subtype. */
913 static tree
914 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
915 vect_unpromoted_value *unprom, tree vectype,
916 enum optab_subtype subtype = optab_default)
918 /* Update the type if the signs differ. */
919 if (subtype == optab_vector_mixed_sign)
921 gcc_assert (!TYPE_UNSIGNED (type));
922 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
924 type = unsigned_type_for (type);
925 vectype = unsigned_type_for (vectype);
929 /* Check for a no-op conversion. */
930 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
931 return unprom->op;
933 /* Allow the caller to create constant vect_unpromoted_values. */
934 if (TREE_CODE (unprom->op) == INTEGER_CST)
935 return wide_int_to_tree (type, wi::to_widest (unprom->op));
937 tree input = unprom->op;
938 if (unprom->caster)
940 tree lhs = gimple_get_lhs (unprom->caster->stmt);
941 tree lhs_type = TREE_TYPE (lhs);
943 /* If the result of the existing cast is the right width, use it
944 instead of the source of the cast. */
945 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
946 input = lhs;
947 /* If the precision we want is between the source and result
948 precisions of the existing cast, try splitting the cast into
949 two and tapping into a mid-way point. */
950 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
951 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
953 /* In order to preserve the semantics of the original cast,
954 give the mid-way point the same signedness as the input value.
956 It would be possible to use a signed type here instead if
957 TYPE is signed and UNPROM->TYPE is unsigned, but that would
958 make the sign of the midtype sensitive to the order in
959 which we process the statements, since the signedness of
960 TYPE is the signedness required by just one of possibly
961 many users. Also, unsigned promotions are usually as cheap
962 as or cheaper than signed ones, so it's better to keep an
963 unsigned promotion. */
964 tree midtype = build_nonstandard_integer_type
965 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
966 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
967 if (vec_midtype)
969 input = vect_recog_temp_ssa_var (midtype, NULL);
970 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
971 unprom->op);
972 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
973 vec_midtype))
974 append_pattern_def_seq (vinfo, stmt_info,
975 new_stmt, vec_midtype);
979 /* See if we can reuse an existing result. */
980 if (types_compatible_p (type, TREE_TYPE (input)))
981 return input;
984 /* We need a new conversion statement. */
985 tree new_op = vect_recog_temp_ssa_var (type, NULL);
986 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
988 /* If OP is an external value, see if we can insert the new statement
989 on an incoming edge. */
990 if (input == unprom->op && unprom->dt == vect_external_def)
991 if (edge e = vect_get_external_def_edge (vinfo, input))
993 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
994 gcc_assert (!new_bb);
995 return new_op;
998 /* As a (common) last resort, add the statement to the pattern itself. */
999 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
1000 return new_op;
1003 /* Invoke vect_convert_input for N elements of UNPROM and store the
1004 result in the corresponding elements of RESULT.
1006 If SUBTYPE then convert the type based on the subtype. */
1008 static void
1009 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
1010 tree *result, tree type, vect_unpromoted_value *unprom,
1011 tree vectype, enum optab_subtype subtype = optab_default)
1013 for (unsigned int i = 0; i < n; ++i)
1015 unsigned int j;
1016 for (j = 0; j < i; ++j)
1017 if (unprom[j].op == unprom[i].op)
1018 break;
1020 if (j < i)
1021 result[i] = result[j];
1022 else
1023 result[i] = vect_convert_input (vinfo, stmt_info,
1024 type, &unprom[i], vectype, subtype);
1028 /* The caller has created a (possibly empty) sequence of pattern definition
1029 statements followed by a single statement PATTERN_STMT. Cast the result
1030 of this final statement to TYPE. If a new statement is needed, add
1031 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
1032 and return the new statement, otherwise return PATTERN_STMT as-is.
1033 VECITYPE is the vector form of PATTERN_STMT's result type. */
1035 static gimple *
1036 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
1037 gimple *pattern_stmt, tree vecitype)
1039 tree lhs = gimple_get_lhs (pattern_stmt);
1040 if (!types_compatible_p (type, TREE_TYPE (lhs)))
1042 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
1043 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
1044 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
1046 return pattern_stmt;
1049 /* Return true if STMT_VINFO describes a reduction for which reassociation
1050 is allowed. If STMT_INFO is part of a group, assume that it's part of
1051 a reduction chain and optimistically assume that all statements
1052 except the last allow reassociation.
1053 Also require it to have code CODE and to be a reduction
1054 in the outermost loop. When returning true, store the operands in
1055 *OP0_OUT and *OP1_OUT. */
1057 static bool
1058 vect_reassociating_reduction_p (vec_info *vinfo,
1059 stmt_vec_info stmt_info, tree_code code,
1060 tree *op0_out, tree *op1_out)
1062 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
1063 if (!loop_info)
1064 return false;
1066 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
1067 if (!assign || gimple_assign_rhs_code (assign) != code)
1068 return false;
1070 /* We don't allow changing the order of the computation in the inner-loop
1071 when doing outer-loop vectorization. */
1072 class loop *loop = LOOP_VINFO_LOOP (loop_info);
1073 if (loop && nested_in_vect_loop_p (loop, stmt_info))
1074 return false;
1076 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1078 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1079 code))
1080 return false;
1082 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
1083 return false;
1085 *op0_out = gimple_assign_rhs1 (assign);
1086 *op1_out = gimple_assign_rhs2 (assign);
1087 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1088 std::swap (*op0_out, *op1_out);
1089 return true;
1092 /* match.pd function to match
1093 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1094 with conditions:
1095 1) @1, @2, c, d, a, b are all integral type.
1096 2) There's single_use for both @1 and @2.
1097 3) a, c have same precision.
1098 4) c and @1 have different precision.
1099 5) c, d are the same type or they can differ in sign when convert is
1100 truncation.
1102 record a and c and d and @3. */
1104 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1106 /* Function vect_recog_cond_expr_convert
1108 Try to find the following pattern:
1110 TYPE_AB A,B;
1111 TYPE_CD C,D;
1112 TYPE_E E;
1113 TYPE_E op_true = (TYPE_E) A;
1114 TYPE_E op_false = (TYPE_E) B;
1116 E = C cmp D ? op_true : op_false;
1118 where
1119 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1120 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1121 single_use of op_true and op_false.
1122 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1124 Input:
1126 * STMT_VINFO: The stmt from which the pattern search begins.
1127 here it starts with E = c cmp D ? op_true : op_false;
1129 Output:
1131 TYPE1 E' = C cmp D ? A : B;
1132 TYPE3 E = (TYPE3) E';
1134 There may extra nop_convert for A or B to handle different signness.
1136 * TYPE_OUT: The vector type of the output of this pattern.
1138 * Return value: A new stmt that will be used to replace the sequence of
1139 stmts that constitute the pattern. In this case it will be:
1140 E = (TYPE3)E';
1141 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1143 static gimple *
1144 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1145 stmt_vec_info stmt_vinfo, tree *type_out)
1147 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1148 tree lhs, match[4], temp, type, new_lhs, op2;
1149 gimple *cond_stmt;
1150 gimple *pattern_stmt;
1152 if (!last_stmt)
1153 return NULL;
1155 lhs = gimple_assign_lhs (last_stmt);
1157 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1158 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1159 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1160 return NULL;
1162 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1164 op2 = match[2];
1165 type = TREE_TYPE (match[1]);
1166 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1168 op2 = vect_recog_temp_ssa_var (type, NULL);
1169 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1170 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1171 get_vectype_for_scalar_type (vinfo, type));
1174 temp = vect_recog_temp_ssa_var (type, NULL);
1175 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1176 match[1], op2));
1177 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1178 get_vectype_for_scalar_type (vinfo, type));
1179 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1180 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1181 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1183 if (dump_enabled_p ())
1184 dump_printf_loc (MSG_NOTE, vect_location,
1185 "created pattern stmt: %G", pattern_stmt);
1186 return pattern_stmt;
1189 /* Function vect_recog_dot_prod_pattern
1191 Try to find the following pattern:
1193 type1a x_t
1194 type1b y_t;
1195 TYPE1 prod;
1196 TYPE2 sum = init;
1197 loop:
1198 sum_0 = phi <init, sum_1>
1199 S1 x_t = ...
1200 S2 y_t = ...
1201 S3 x_T = (TYPE1) x_t;
1202 S4 y_T = (TYPE1) y_t;
1203 S5 prod = x_T * y_T;
1204 [S6 prod = (TYPE2) prod; #optional]
1205 S7 sum_1 = prod + sum_0;
1207 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1208 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1209 'type1a' and 'type1b' can differ.
1211 Input:
1213 * STMT_VINFO: The stmt from which the pattern search begins. In the
1214 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1215 will be detected.
1217 Output:
1219 * TYPE_OUT: The type of the output of this pattern.
1221 * Return value: A new stmt that will be used to replace the sequence of
1222 stmts that constitute the pattern. In this case it will be:
1223 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1225 Note: The dot-prod idiom is a widening reduction pattern that is
1226 vectorized without preserving all the intermediate results. It
1227 produces only N/2 (widened) results (by summing up pairs of
1228 intermediate results) rather than all N results. Therefore, we
1229 cannot allow this pattern when we want to get all the results and in
1230 the correct order (as is the case when this computation is in an
1231 inner-loop nested in an outer-loop that us being vectorized). */
1233 static gimple *
1234 vect_recog_dot_prod_pattern (vec_info *vinfo,
1235 stmt_vec_info stmt_vinfo, tree *type_out)
1237 tree oprnd0, oprnd1;
1238 gimple *last_stmt = stmt_vinfo->stmt;
1239 tree type, half_type;
1240 gimple *pattern_stmt;
1241 tree var;
1243 /* Look for the following pattern
1244 DX = (TYPE1) X;
1245 DY = (TYPE1) Y;
1246 DPROD = DX * DY;
1247 DDPROD = (TYPE2) DPROD;
1248 sum_1 = DDPROD + sum_0;
1249 In which
1250 - DX is double the size of X
1251 - DY is double the size of Y
1252 - DX, DY, DPROD all have the same type but the sign
1253 between X, Y and DPROD can differ.
1254 - sum is the same size of DPROD or bigger
1255 - sum has been recognized as a reduction variable.
1257 This is equivalent to:
1258 DPROD = X w* Y; #widen mult
1259 sum_1 = DPROD w+ sum_0; #widen summation
1261 DPROD = X w* Y; #widen mult
1262 sum_1 = DPROD + sum_0; #summation
1265 /* Starting from LAST_STMT, follow the defs of its uses in search
1266 of the above pattern. */
1268 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1269 &oprnd0, &oprnd1))
1270 return NULL;
1272 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1274 vect_unpromoted_value unprom_mult;
1275 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1277 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1278 we know that oprnd1 is the reduction variable (defined by a loop-header
1279 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1280 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1281 if (!oprnd0)
1282 return NULL;
1284 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1285 if (!mult_vinfo)
1286 return NULL;
1288 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1289 inside the loop (in case we are analyzing an outer-loop). */
1290 vect_unpromoted_value unprom0[2];
1291 enum optab_subtype subtype = optab_vector;
1292 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1293 false, 2, unprom0, &half_type, &subtype))
1294 return NULL;
1296 /* If there are two widening operations, make sure they agree on the sign
1297 of the extension. The result of an optab_vector_mixed_sign operation
1298 is signed; otherwise, the result has the same sign as the operands. */
1299 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1300 && (subtype == optab_vector_mixed_sign
1301 ? TYPE_UNSIGNED (unprom_mult.type)
1302 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1303 return NULL;
1305 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1307 /* If the inputs have mixed signs, canonicalize on using the signed
1308 input type for analysis. This also helps when emulating mixed-sign
1309 operations using signed operations. */
1310 if (subtype == optab_vector_mixed_sign)
1311 half_type = signed_type_for (half_type);
1313 tree half_vectype;
1314 if (!vect_supportable_conv_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1315 type_out, &half_vectype, subtype))
1317 /* We can emulate a mixed-sign dot-product using a sequence of
1318 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1319 if (subtype != optab_vector_mixed_sign
1320 || !vect_supportable_conv_optab_p (vinfo, signed_type_for (type),
1321 DOT_PROD_EXPR, half_type,
1322 type_out, &half_vectype,
1323 optab_vector))
1324 return NULL;
1326 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1327 *type_out);
1330 /* Get the inputs in the appropriate types. */
1331 tree mult_oprnd[2];
1332 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1333 unprom0, half_vectype, subtype);
1335 var = vect_recog_temp_ssa_var (type, NULL);
1336 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1337 mult_oprnd[0], mult_oprnd[1], oprnd1);
1339 return pattern_stmt;
1343 /* Function vect_recog_sad_pattern
1345 Try to find the following Sum of Absolute Difference (SAD) pattern:
1347 type x_t, y_t;
1348 signed TYPE1 diff, abs_diff;
1349 TYPE2 sum = init;
1350 loop:
1351 sum_0 = phi <init, sum_1>
1352 S1 x_t = ...
1353 S2 y_t = ...
1354 S3 x_T = (TYPE1) x_t;
1355 S4 y_T = (TYPE1) y_t;
1356 S5 diff = x_T - y_T;
1357 S6 abs_diff = ABS_EXPR <diff>;
1358 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1359 S8 sum_1 = abs_diff + sum_0;
1361 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1362 same size of 'TYPE1' or bigger. This is a special case of a reduction
1363 computation.
1365 Input:
1367 * STMT_VINFO: The stmt from which the pattern search begins. In the
1368 example, when this function is called with S8, the pattern
1369 {S3,S4,S5,S6,S7,S8} will be detected.
1371 Output:
1373 * TYPE_OUT: The type of the output of this pattern.
1375 * Return value: A new stmt that will be used to replace the sequence of
1376 stmts that constitute the pattern. In this case it will be:
1377 SAD_EXPR <x_t, y_t, sum_0>
1380 static gimple *
1381 vect_recog_sad_pattern (vec_info *vinfo,
1382 stmt_vec_info stmt_vinfo, tree *type_out)
1384 gimple *last_stmt = stmt_vinfo->stmt;
1385 tree half_type;
1387 /* Look for the following pattern
1388 DX = (TYPE1) X;
1389 DY = (TYPE1) Y;
1390 DDIFF = DX - DY;
1391 DAD = ABS_EXPR <DDIFF>;
1392 DDPROD = (TYPE2) DPROD;
1393 sum_1 = DAD + sum_0;
1394 In which
1395 - DX is at least double the size of X
1396 - DY is at least double the size of Y
1397 - DX, DY, DDIFF, DAD all have the same type
1398 - sum is the same size of DAD or bigger
1399 - sum has been recognized as a reduction variable.
1401 This is equivalent to:
1402 DDIFF = X w- Y; #widen sub
1403 DAD = ABS_EXPR <DDIFF>;
1404 sum_1 = DAD w+ sum_0; #widen summation
1406 DDIFF = X w- Y; #widen sub
1407 DAD = ABS_EXPR <DDIFF>;
1408 sum_1 = DAD + sum_0; #summation
1411 /* Starting from LAST_STMT, follow the defs of its uses in search
1412 of the above pattern. */
1414 tree plus_oprnd0, plus_oprnd1;
1415 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1416 &plus_oprnd0, &plus_oprnd1))
1417 return NULL;
1419 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1421 /* Any non-truncating sequence of conversions is OK here, since
1422 with a successful match, the result of the ABS(U) is known to fit
1423 within the nonnegative range of the result type. (It cannot be the
1424 negative of the minimum signed value due to the range of the widening
1425 MINUS_EXPR.) */
1426 vect_unpromoted_value unprom_abs;
1427 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1428 &unprom_abs);
1430 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1431 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1432 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1433 Then check that plus_oprnd0 is defined by an abs_expr. */
1435 if (!plus_oprnd0)
1436 return NULL;
1438 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1439 if (!abs_stmt_vinfo)
1440 return NULL;
1442 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1443 inside the loop (in case we are analyzing an outer-loop). */
1444 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1445 vect_unpromoted_value unprom[2];
1447 if (!abs_stmt)
1449 gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
1450 if (!abd_stmt
1451 || !gimple_call_internal_p (abd_stmt)
1452 || gimple_call_num_args (abd_stmt) != 2)
1453 return NULL;
1455 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1456 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1458 if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
1460 if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
1461 &unprom[0])
1462 || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
1463 &unprom[1]))
1464 return NULL;
1466 else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
1468 unprom[0].op = abd_oprnd0;
1469 unprom[0].type = TREE_TYPE (abd_oprnd0);
1470 unprom[1].op = abd_oprnd1;
1471 unprom[1].type = TREE_TYPE (abd_oprnd1);
1473 else
1474 return NULL;
1476 half_type = unprom[0].type;
1478 else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
1479 unprom, NULL))
1480 return NULL;
1482 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1484 tree half_vectype;
1485 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1486 type_out, &half_vectype))
1487 return NULL;
1489 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1490 tree sad_oprnd[2];
1491 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1492 unprom, half_vectype);
1494 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1495 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1496 sad_oprnd[1], plus_oprnd1);
1498 return pattern_stmt;
1501 /* Function vect_recog_abd_pattern
1503 Try to find the following ABsolute Difference (ABD) or
1504 widening ABD (WIDEN_ABD) pattern:
1506 TYPE1 x;
1507 TYPE2 y;
1508 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1509 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1510 TYPE3 diff = x_cast - y_cast;
1511 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1512 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1514 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1515 twice as wide as TYPE3.
1517 Input:
1519 * STMT_VINFO: The stmt from which the pattern search begins
1521 Output:
1523 * TYPE_OUT: The type of the output of this pattern
1525 * Return value: A new stmt that will be used to replace the sequence of
1526 stmts that constitute the pattern, principally:
1527 out = IFN_ABD (x, y)
1528 out = IFN_WIDEN_ABD (x, y)
1531 static gimple *
1532 vect_recog_abd_pattern (vec_info *vinfo,
1533 stmt_vec_info stmt_vinfo, tree *type_out)
1535 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1536 if (!last_stmt)
1537 return NULL;
1539 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1541 vect_unpromoted_value unprom[2];
1542 gassign *diff_stmt = NULL;
1543 tree abd_in_type;
1544 if (!vect_recog_absolute_difference (vinfo, last_stmt, &abd_in_type,
1545 unprom, &diff_stmt))
1547 /* We cannot try further without having a non-widening MINUS. */
1548 if (!diff_stmt)
1549 return NULL;
1551 unprom[0].op = gimple_assign_rhs1 (diff_stmt);
1552 unprom[1].op = gimple_assign_rhs2 (diff_stmt);
1553 abd_in_type = signed_type_for (out_type);
1556 tree abd_out_type = abd_in_type;
1558 tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1559 if (!vectype_in)
1560 return NULL;
1562 internal_fn ifn = IFN_ABD;
1563 tree vectype_out = vectype_in;
1565 if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1566 && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1568 tree mid_type
1569 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1570 TYPE_UNSIGNED (abd_in_type));
1571 tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1573 code_helper dummy_code;
1574 int dummy_int;
1575 auto_vec<tree> dummy_vec;
1576 if (mid_vectype
1577 && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1578 stmt_vinfo, mid_vectype,
1579 vectype_in,
1580 &dummy_code, &dummy_code,
1581 &dummy_int, &dummy_vec))
1583 ifn = IFN_VEC_WIDEN_ABD;
1584 abd_out_type = mid_type;
1585 vectype_out = mid_vectype;
1589 if (ifn == IFN_ABD
1590 && !direct_internal_fn_supported_p (ifn, vectype_in,
1591 OPTIMIZE_FOR_SPEED))
1592 return NULL;
1594 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
1596 tree abd_oprnds[2];
1597 vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
1598 abd_in_type, unprom, vectype_in);
1600 *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1602 tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
1603 gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1604 abd_oprnds[0], abd_oprnds[1]);
1605 gimple_call_set_lhs (abd_stmt, abd_result);
1606 gimple_set_location (abd_stmt, gimple_location (last_stmt));
1608 gimple *stmt = abd_stmt;
1609 if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1610 && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1611 && !TYPE_UNSIGNED (abd_out_type))
1613 tree unsign = unsigned_type_for (abd_out_type);
1614 stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
1615 vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
1618 return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
1621 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1622 so that it can be treated as though it had the form:
1624 A_TYPE a;
1625 B_TYPE b;
1626 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1627 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1628 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1629 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1630 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1632 Try to replace the pattern with:
1634 A_TYPE a;
1635 B_TYPE b;
1636 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1637 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1638 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1639 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1641 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1643 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1644 name of the pattern being matched, for dump purposes. */
1646 static gimple *
1647 vect_recog_widen_op_pattern (vec_info *vinfo,
1648 stmt_vec_info last_stmt_info, tree *type_out,
1649 tree_code orig_code, code_helper wide_code,
1650 bool shift_p, const char *name)
1652 gimple *last_stmt = last_stmt_info->stmt;
1654 vect_unpromoted_value unprom[2];
1655 tree half_type;
1656 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1657 shift_p, 2, unprom, &half_type))
1659 return NULL;
1661 /* Pattern detected. */
1662 vect_pattern_detected (name, last_stmt);
1664 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1665 tree itype = type;
1666 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1667 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1668 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1669 TYPE_UNSIGNED (half_type));
1671 /* Check target support */
1672 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1673 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1674 tree ctype = itype;
1675 tree vecctype = vecitype;
1676 if (orig_code == MINUS_EXPR
1677 && TYPE_UNSIGNED (itype)
1678 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1680 /* Subtraction is special, even if half_type is unsigned and no matter
1681 whether type is signed or unsigned, if type is wider than itype,
1682 we need to sign-extend from the widening operation result to the
1683 result type.
1684 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1685 itype unsigned short and type either int or unsigned int.
1686 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1687 (unsigned short) 0xffff, but for type int we want the result -1
1688 and for type unsigned int 0xffffffff rather than 0xffff. */
1689 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1690 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1693 code_helper dummy_code;
1694 int dummy_int;
1695 auto_vec<tree> dummy_vec;
1696 if (!vectype
1697 || !vecitype
1698 || !vecctype
1699 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1700 vecitype, vectype,
1701 &dummy_code, &dummy_code,
1702 &dummy_int, &dummy_vec))
1703 return NULL;
1705 *type_out = get_vectype_for_scalar_type (vinfo, type);
1706 if (!*type_out)
1707 return NULL;
1709 tree oprnd[2];
1710 vect_convert_inputs (vinfo, last_stmt_info,
1711 2, oprnd, half_type, unprom, vectype);
1713 tree var = vect_recog_temp_ssa_var (itype, NULL);
1714 gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1716 if (vecctype != vecitype)
1717 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1718 pattern_stmt, vecitype);
1720 return vect_convert_output (vinfo, last_stmt_info,
1721 type, pattern_stmt, vecctype);
1724 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1725 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1727 static gimple *
1728 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1729 tree *type_out)
1731 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1732 MULT_EXPR, WIDEN_MULT_EXPR, false,
1733 "vect_recog_widen_mult_pattern");
1736 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1737 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1739 static gimple *
1740 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1741 tree *type_out)
1743 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1744 PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
1745 false, "vect_recog_widen_plus_pattern");
1748 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1749 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1750 static gimple *
1751 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1752 tree *type_out)
1754 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1755 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
1756 false, "vect_recog_widen_minus_pattern");
1759 /* Try to detect abd on widened inputs, converting IFN_ABD
1760 to IFN_VEC_WIDEN_ABD. */
1761 static gimple *
1762 vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1763 tree *type_out)
1765 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1766 if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1767 return NULL;
1769 tree last_rhs = gimple_assign_rhs1 (last_stmt);
1771 tree in_type = TREE_TYPE (last_rhs);
1772 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1773 if (!INTEGRAL_TYPE_P (in_type)
1774 || !INTEGRAL_TYPE_P (out_type)
1775 || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1776 || !TYPE_UNSIGNED (in_type))
1777 return NULL;
1779 vect_unpromoted_value unprom;
1780 tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
1781 if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1782 return NULL;
1784 stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1785 if (!abd_pattern_vinfo)
1786 return NULL;
1788 gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1789 if (!abd_stmt
1790 || !gimple_call_internal_p (abd_stmt)
1791 || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
1792 return NULL;
1794 tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1795 tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1797 code_helper dummy_code;
1798 int dummy_int;
1799 auto_vec<tree> dummy_vec;
1800 if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1801 vectype_out, vectype_in,
1802 &dummy_code, &dummy_code,
1803 &dummy_int, &dummy_vec))
1804 return NULL;
1806 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
1808 *type_out = vectype_out;
1810 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1811 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1812 tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
1813 gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1814 abd_oprnd0, abd_oprnd1);
1815 gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
1816 gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
1817 return widen_abd_stmt;
1820 /* Function vect_recog_ctz_ffs_pattern
1822 Try to find the following pattern:
1824 TYPE1 A;
1825 TYPE1 B;
1827 B = __builtin_ctz{,l,ll} (A);
1831 B = __builtin_ffs{,l,ll} (A);
1833 Input:
1835 * STMT_VINFO: The stmt from which the pattern search begins.
1836 here it starts with B = __builtin_* (A);
1838 Output:
1840 * TYPE_OUT: The vector type of the output of this pattern.
1842 * Return value: A new stmt that will be used to replace the sequence of
1843 stmts that constitute the pattern, using clz or popcount builtins. */
1845 static gimple *
1846 vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1847 tree *type_out)
1849 gimple *call_stmt = stmt_vinfo->stmt;
1850 gimple *pattern_stmt;
1851 tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1852 tree new_var;
1853 internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1854 bool defined_at_zero = true, defined_at_zero_new = false;
1855 int val = 0, val_new = 0, val_cmp = 0;
1856 int prec;
1857 int sub = 0, add = 0;
1858 location_t loc;
1860 if (!is_gimple_call (call_stmt))
1861 return NULL;
1863 if (gimple_call_num_args (call_stmt) != 1
1864 && gimple_call_num_args (call_stmt) != 2)
1865 return NULL;
1867 rhs_oprnd = gimple_call_arg (call_stmt, 0);
1868 rhs_type = TREE_TYPE (rhs_oprnd);
1869 lhs_oprnd = gimple_call_lhs (call_stmt);
1870 if (!lhs_oprnd)
1871 return NULL;
1872 lhs_type = TREE_TYPE (lhs_oprnd);
1873 if (!INTEGRAL_TYPE_P (lhs_type)
1874 || !INTEGRAL_TYPE_P (rhs_type)
1875 || !type_has_mode_precision_p (rhs_type)
1876 || TREE_CODE (rhs_oprnd) != SSA_NAME)
1877 return NULL;
1879 switch (gimple_call_combined_fn (call_stmt))
1881 CASE_CFN_CTZ:
1882 ifn = IFN_CTZ;
1883 if (!gimple_call_internal_p (call_stmt)
1884 || gimple_call_num_args (call_stmt) != 2)
1885 defined_at_zero = false;
1886 else
1887 val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
1888 break;
1889 CASE_CFN_FFS:
1890 ifn = IFN_FFS;
1891 break;
1892 default:
1893 return NULL;
1896 prec = TYPE_PRECISION (rhs_type);
1897 loc = gimple_location (call_stmt);
1899 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1900 if (!vec_type)
1901 return NULL;
1903 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1904 if (!vec_rhs_type)
1905 return NULL;
1907 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1908 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1909 popcount<vector_mode>2. */
1910 if (!vec_type
1911 || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1912 OPTIMIZE_FOR_SPEED))
1913 return NULL;
1915 if (ifn == IFN_FFS
1916 && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1917 OPTIMIZE_FOR_SPEED))
1919 ifnnew = IFN_CTZ;
1920 defined_at_zero_new
1921 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1922 val_new) == 2;
1924 else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1925 OPTIMIZE_FOR_SPEED))
1927 ifnnew = IFN_CLZ;
1928 defined_at_zero_new
1929 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1930 val_new) == 2;
1932 if ((ifnnew == IFN_LAST
1933 || (defined_at_zero && !defined_at_zero_new))
1934 && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1935 OPTIMIZE_FOR_SPEED))
1937 ifnnew = IFN_POPCOUNT;
1938 defined_at_zero_new = true;
1939 val_new = prec;
1941 if (ifnnew == IFN_LAST)
1942 return NULL;
1944 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1946 val_cmp = val_new;
1947 if ((ifnnew == IFN_CLZ
1948 && defined_at_zero
1949 && defined_at_zero_new
1950 && val == prec
1951 && val_new == prec)
1952 || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1954 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1955 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1956 if (ifnnew == IFN_CLZ)
1957 sub = prec;
1958 val_cmp = prec;
1960 if (!TYPE_UNSIGNED (rhs_type))
1962 rhs_type = unsigned_type_for (rhs_type);
1963 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1964 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1965 pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1966 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1967 vec_rhs_type);
1968 rhs_oprnd = new_var;
1971 tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1972 pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1973 build_int_cst (rhs_type, -1));
1974 gimple_set_location (pattern_stmt, loc);
1975 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1977 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1978 pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1979 gimple_set_location (pattern_stmt, loc);
1980 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1981 rhs_oprnd = new_var;
1983 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1984 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1985 m1, rhs_oprnd);
1986 gimple_set_location (pattern_stmt, loc);
1987 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1988 rhs_oprnd = new_var;
1990 else if (ifnnew == IFN_CLZ)
1992 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1993 .FFS (X) = PREC - .CLZ (X & -X). */
1994 sub = prec - (ifn == IFN_CTZ);
1995 val_cmp = sub - val_new;
1997 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1998 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1999 gimple_set_location (pattern_stmt, loc);
2000 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
2002 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
2003 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
2004 rhs_oprnd, neg);
2005 gimple_set_location (pattern_stmt, loc);
2006 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
2007 rhs_oprnd = new_var;
2009 else if (ifnnew == IFN_POPCOUNT)
2011 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
2012 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
2013 sub = prec + (ifn == IFN_FFS);
2014 val_cmp = sub;
2016 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
2017 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
2018 gimple_set_location (pattern_stmt, loc);
2019 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
2021 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
2022 pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
2023 rhs_oprnd, neg);
2024 gimple_set_location (pattern_stmt, loc);
2025 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
2026 rhs_oprnd = new_var;
2028 else if (ifnnew == IFN_CTZ)
2030 /* .FFS (X) = .CTZ (X) + 1. */
2031 add = 1;
2032 val_cmp++;
2035 /* Create B = .IFNNEW (A). */
2036 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2037 if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
2038 pattern_stmt
2039 = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
2040 build_int_cst (integer_type_node,
2041 val_new));
2042 else
2043 pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2044 gimple_call_set_lhs (pattern_stmt, new_var);
2045 gimple_set_location (pattern_stmt, loc);
2046 *type_out = vec_type;
2048 if (sub)
2050 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2051 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2052 pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2053 build_int_cst (lhs_type, sub),
2054 new_var);
2055 gimple_set_location (pattern_stmt, loc);
2056 new_var = ret_var;
2058 else if (add)
2060 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2061 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2062 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2063 build_int_cst (lhs_type, add));
2064 gimple_set_location (pattern_stmt, loc);
2065 new_var = ret_var;
2068 if (defined_at_zero
2069 && (!defined_at_zero_new || val != val_cmp))
2071 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2072 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2073 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2074 rhs_type = TREE_TYPE (rhs_oprnd);
2075 tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
2076 rhs_oprnd, build_zero_cst (rhs_type));
2077 pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2078 new_var,
2079 build_int_cst (lhs_type, val));
2082 if (dump_enabled_p ())
2083 dump_printf_loc (MSG_NOTE, vect_location,
2084 "created pattern stmt: %G", pattern_stmt);
2086 return pattern_stmt;
2089 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2091 Try to find the following pattern:
2093 UTYPE1 A;
2094 TYPE1 B;
2095 UTYPE2 temp_in;
2096 TYPE3 temp_out;
2097 temp_in = (UTYPE2)A;
2099 temp_out = __builtin_popcount{,l,ll} (temp_in);
2100 B = (TYPE1) temp_out;
2102 TYPE2 may or may not be equal to TYPE3.
2103 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2104 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2106 Input:
2108 * STMT_VINFO: The stmt from which the pattern search begins.
2109 here it starts with B = (TYPE1) temp_out;
2111 Output:
2113 * TYPE_OUT: The vector type of the output of this pattern.
2115 * Return value: A new stmt that will be used to replace the sequence of
2116 stmts that constitute the pattern. In this case it will be:
2117 B = .POPCOUNT (A);
2119 Similarly for clz, ctz and ffs.
2122 static gimple *
2123 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2124 stmt_vec_info stmt_vinfo,
2125 tree *type_out)
2127 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
2128 gimple *call_stmt, *pattern_stmt;
2129 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2130 internal_fn ifn = IFN_LAST;
2131 int addend = 0;
2133 /* Find B = (TYPE1) temp_out. */
2134 if (!last_stmt)
2135 return NULL;
2136 tree_code code = gimple_assign_rhs_code (last_stmt);
2137 if (!CONVERT_EXPR_CODE_P (code))
2138 return NULL;
2140 lhs_oprnd = gimple_assign_lhs (last_stmt);
2141 lhs_type = TREE_TYPE (lhs_oprnd);
2142 if (!INTEGRAL_TYPE_P (lhs_type))
2143 return NULL;
2145 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
2146 if (TREE_CODE (rhs_oprnd) != SSA_NAME
2147 || !has_single_use (rhs_oprnd))
2148 return NULL;
2149 call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2151 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2152 if (!is_gimple_call (call_stmt))
2153 return NULL;
2154 switch (gimple_call_combined_fn (call_stmt))
2156 int val;
2157 CASE_CFN_POPCOUNT:
2158 ifn = IFN_POPCOUNT;
2159 break;
2160 CASE_CFN_CLZ:
2161 ifn = IFN_CLZ;
2162 /* Punt if call result is unsigned and defined value at zero
2163 is negative, as the negative value doesn't extend correctly. */
2164 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2165 && gimple_call_internal_p (call_stmt)
2166 && CLZ_DEFINED_VALUE_AT_ZERO
2167 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2168 && val < 0)
2169 return NULL;
2170 break;
2171 CASE_CFN_CTZ:
2172 ifn = IFN_CTZ;
2173 /* Punt if call result is unsigned and defined value at zero
2174 is negative, as the negative value doesn't extend correctly. */
2175 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2176 && gimple_call_internal_p (call_stmt)
2177 && CTZ_DEFINED_VALUE_AT_ZERO
2178 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2179 && val < 0)
2180 return NULL;
2181 break;
2182 CASE_CFN_FFS:
2183 ifn = IFN_FFS;
2184 break;
2185 default:
2186 return NULL;
2189 if (gimple_call_num_args (call_stmt) != 1
2190 && gimple_call_num_args (call_stmt) != 2)
2191 return NULL;
2193 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2194 vect_unpromoted_value unprom_diff;
2195 rhs_origin
2196 = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
2198 if (!rhs_origin)
2199 return NULL;
2201 /* Input and output of .POPCOUNT should be same-precision integer. */
2202 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2203 return NULL;
2205 /* Also A should be unsigned or same precision as temp_in, otherwise
2206 different builtins/internal functions have different behaviors. */
2207 if (TYPE_PRECISION (unprom_diff.type)
2208 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2209 switch (ifn)
2211 case IFN_POPCOUNT:
2212 /* For popcount require zero extension, which doesn't add any
2213 further bits to the count. */
2214 if (!TYPE_UNSIGNED (unprom_diff.type))
2215 return NULL;
2216 break;
2217 case IFN_CLZ:
2218 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2219 if it is undefined at zero or if it matches also for the
2220 defined value there. */
2221 if (!TYPE_UNSIGNED (unprom_diff.type))
2222 return NULL;
2223 if (!type_has_mode_precision_p (lhs_type)
2224 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2225 return NULL;
2226 addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2227 - TYPE_PRECISION (lhs_type));
2228 if (gimple_call_internal_p (call_stmt)
2229 && gimple_call_num_args (call_stmt) == 2)
2231 int val1, val2;
2232 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2233 int d2
2234 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2235 val2);
2236 if (d2 != 2 || val1 != val2 + addend)
2237 return NULL;
2239 break;
2240 case IFN_CTZ:
2241 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2242 if it is undefined at zero or if it matches also for the
2243 defined value there. */
2244 if (gimple_call_internal_p (call_stmt)
2245 && gimple_call_num_args (call_stmt) == 2)
2247 int val1, val2;
2248 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2249 int d2
2250 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2251 val2);
2252 if (d2 != 2 || val1 != val2)
2253 return NULL;
2255 break;
2256 case IFN_FFS:
2257 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2258 break;
2259 default:
2260 gcc_unreachable ();
2263 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2264 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2265 if (!vec_type)
2266 return NULL;
2268 bool supported
2269 = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2270 if (!supported)
2271 switch (ifn)
2273 case IFN_POPCOUNT:
2274 case IFN_CLZ:
2275 return NULL;
2276 case IFN_FFS:
2277 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2278 if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2279 OPTIMIZE_FOR_SPEED))
2280 break;
2281 /* FALLTHRU */
2282 case IFN_CTZ:
2283 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2284 clz or popcount. */
2285 if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2286 OPTIMIZE_FOR_SPEED))
2287 break;
2288 if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2289 OPTIMIZE_FOR_SPEED))
2290 break;
2291 return NULL;
2292 default:
2293 gcc_unreachable ();
2296 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2297 call_stmt);
2299 /* Create B = .POPCOUNT (A). */
2300 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2301 tree arg2 = NULL_TREE;
2302 int val;
2303 if (ifn == IFN_CLZ
2304 && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2305 val) == 2)
2306 arg2 = build_int_cst (integer_type_node, val);
2307 else if (ifn == IFN_CTZ
2308 && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2309 val) == 2)
2310 arg2 = build_int_cst (integer_type_node, val);
2311 if (arg2)
2312 pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
2313 else
2314 pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2315 gimple_call_set_lhs (pattern_stmt, new_var);
2316 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2317 *type_out = vec_type;
2319 if (dump_enabled_p ())
2320 dump_printf_loc (MSG_NOTE, vect_location,
2321 "created pattern stmt: %G", pattern_stmt);
2323 if (addend)
2325 gcc_assert (supported);
2326 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2327 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2328 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2329 build_int_cst (lhs_type, addend));
2331 else if (!supported)
2333 stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2334 STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2335 pattern_stmt
2336 = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2337 if (pattern_stmt == NULL)
2338 return NULL;
2339 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2341 gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2342 gimple_seq_add_seq_without_update (pseq, seq);
2345 return pattern_stmt;
2348 /* Function vect_recog_pow_pattern
2350 Try to find the following pattern:
2352 x = POW (y, N);
2354 with POW being one of pow, powf, powi, powif and N being
2355 either 2 or 0.5.
2357 Input:
2359 * STMT_VINFO: The stmt from which the pattern search begins.
2361 Output:
2363 * TYPE_OUT: The type of the output of this pattern.
2365 * Return value: A new stmt that will be used to replace the sequence of
2366 stmts that constitute the pattern. In this case it will be:
2367 x = x * x
2369 x = sqrt (x)
2372 static gimple *
2373 vect_recog_pow_pattern (vec_info *vinfo,
2374 stmt_vec_info stmt_vinfo, tree *type_out)
2376 gimple *last_stmt = stmt_vinfo->stmt;
2377 tree base, exp;
2378 gimple *stmt;
2379 tree var;
2381 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2382 return NULL;
2384 switch (gimple_call_combined_fn (last_stmt))
2386 CASE_CFN_POW:
2387 CASE_CFN_POWI:
2388 break;
2390 default:
2391 return NULL;
2394 base = gimple_call_arg (last_stmt, 0);
2395 exp = gimple_call_arg (last_stmt, 1);
2396 if (TREE_CODE (exp) != REAL_CST
2397 && TREE_CODE (exp) != INTEGER_CST)
2399 if (flag_unsafe_math_optimizations
2400 && TREE_CODE (base) == REAL_CST
2401 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2403 combined_fn log_cfn;
2404 built_in_function exp_bfn;
2405 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2407 case BUILT_IN_POW:
2408 log_cfn = CFN_BUILT_IN_LOG;
2409 exp_bfn = BUILT_IN_EXP;
2410 break;
2411 case BUILT_IN_POWF:
2412 log_cfn = CFN_BUILT_IN_LOGF;
2413 exp_bfn = BUILT_IN_EXPF;
2414 break;
2415 case BUILT_IN_POWL:
2416 log_cfn = CFN_BUILT_IN_LOGL;
2417 exp_bfn = BUILT_IN_EXPL;
2418 break;
2419 default:
2420 return NULL;
2422 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2423 tree exp_decl = builtin_decl_implicit (exp_bfn);
2424 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2425 does that, but if C is a power of 2, we want to use
2426 exp2 (log2 (C) * x) in the non-vectorized version, but for
2427 vectorization we don't have vectorized exp2. */
2428 if (logc
2429 && TREE_CODE (logc) == REAL_CST
2430 && exp_decl
2431 && lookup_attribute ("omp declare simd",
2432 DECL_ATTRIBUTES (exp_decl)))
2434 cgraph_node *node = cgraph_node::get_create (exp_decl);
2435 if (node->simd_clones == NULL)
2437 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2438 || node->definition)
2439 return NULL;
2440 expand_simd_clones (node);
2441 if (node->simd_clones == NULL)
2442 return NULL;
2444 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2445 if (!*type_out)
2446 return NULL;
2447 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2448 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2449 append_pattern_def_seq (vinfo, stmt_vinfo, g);
2450 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2451 g = gimple_build_call (exp_decl, 1, def);
2452 gimple_call_set_lhs (g, res);
2453 return g;
2457 return NULL;
2460 /* We now have a pow or powi builtin function call with a constant
2461 exponent. */
2463 /* Catch squaring. */
2464 if ((tree_fits_shwi_p (exp)
2465 && tree_to_shwi (exp) == 2)
2466 || (TREE_CODE (exp) == REAL_CST
2467 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2469 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2470 TREE_TYPE (base), type_out))
2471 return NULL;
2473 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2474 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2475 return stmt;
2478 /* Catch square root. */
2479 if (TREE_CODE (exp) == REAL_CST
2480 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2482 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2483 if (*type_out
2484 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2485 OPTIMIZE_FOR_SPEED))
2487 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2488 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2489 gimple_call_set_lhs (stmt, var);
2490 gimple_call_set_nothrow (stmt, true);
2491 return stmt;
2495 return NULL;
2499 /* Function vect_recog_widen_sum_pattern
2501 Try to find the following pattern:
2503 type x_t;
2504 TYPE x_T, sum = init;
2505 loop:
2506 sum_0 = phi <init, sum_1>
2507 S1 x_t = *p;
2508 S2 x_T = (TYPE) x_t;
2509 S3 sum_1 = x_T + sum_0;
2511 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2512 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2513 a special case of a reduction computation.
2515 Input:
2517 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2518 when this function is called with S3, the pattern {S2,S3} will be detected.
2520 Output:
2522 * TYPE_OUT: The type of the output of this pattern.
2524 * Return value: A new stmt that will be used to replace the sequence of
2525 stmts that constitute the pattern. In this case it will be:
2526 WIDEN_SUM <x_t, sum_0>
2528 Note: The widening-sum idiom is a widening reduction pattern that is
2529 vectorized without preserving all the intermediate results. It
2530 produces only N/2 (widened) results (by summing up pairs of
2531 intermediate results) rather than all N results. Therefore, we
2532 cannot allow this pattern when we want to get all the results and in
2533 the correct order (as is the case when this computation is in an
2534 inner-loop nested in an outer-loop that us being vectorized). */
2536 static gimple *
2537 vect_recog_widen_sum_pattern (vec_info *vinfo,
2538 stmt_vec_info stmt_vinfo, tree *type_out)
2540 gimple *last_stmt = stmt_vinfo->stmt;
2541 tree oprnd0, oprnd1;
2542 tree type;
2543 gimple *pattern_stmt;
2544 tree var;
2546 /* Look for the following pattern
2547 DX = (TYPE) X;
2548 sum_1 = DX + sum_0;
2549 In which DX is at least double the size of X, and sum_1 has been
2550 recognized as a reduction variable.
2553 /* Starting from LAST_STMT, follow the defs of its uses in search
2554 of the above pattern. */
2556 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2557 &oprnd0, &oprnd1)
2558 || TREE_CODE (oprnd0) != SSA_NAME
2559 || !vinfo->lookup_def (oprnd0))
2560 return NULL;
2562 type = TREE_TYPE (gimple_get_lhs (last_stmt));
2564 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2565 we know that oprnd1 is the reduction variable (defined by a loop-header
2566 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2567 Left to check that oprnd0 is defined by a cast from type 'type' to type
2568 'TYPE'. */
2570 vect_unpromoted_value unprom0;
2571 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2572 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2573 return NULL;
2575 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2577 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2578 unprom0.type, type_out))
2579 return NULL;
2581 var = vect_recog_temp_ssa_var (type, NULL);
2582 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2584 return pattern_stmt;
2587 /* Function vect_recog_bitfield_ref_pattern
2589 Try to find the following pattern:
2591 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2592 result = (type_out) bf_value;
2596 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2598 where type_out is a non-bitfield type, that is to say, it's precision matches
2599 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2601 Input:
2603 * STMT_VINFO: The stmt from which the pattern search begins.
2604 here it starts with:
2605 result = (type_out) bf_value;
2609 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2611 Output:
2613 * TYPE_OUT: The vector type of the output of this pattern.
2615 * Return value: A new stmt that will be used to replace the sequence of
2616 stmts that constitute the pattern. If the precision of type_out is bigger
2617 than the precision type of _1 we perform the widening before the shifting,
2618 since the new precision will be large enough to shift the value and moving
2619 widening operations up the statement chain enables the generation of
2620 widening loads. If we are widening and the operation after the pattern is
2621 an addition then we mask first and shift later, to enable the generation of
2622 shifting adds. In the case of narrowing we will always mask first, shift
2623 last and then perform a narrowing operation. This will enable the
2624 generation of narrowing shifts.
2626 Widening with mask first, shift later:
2627 container = (type_out) container;
2628 masked = container & (((1 << bitsize) - 1) << bitpos);
2629 result = masked >> bitpos;
2631 Widening with shift first, mask last:
2632 container = (type_out) container;
2633 shifted = container >> bitpos;
2634 result = shifted & ((1 << bitsize) - 1);
2636 Narrowing:
2637 masked = container & (((1 << bitsize) - 1) << bitpos);
2638 result = masked >> bitpos;
2639 result = (type_out) result;
2641 If the bitfield is signed and it's wider than type_out, we need to
2642 keep the result sign-extended:
2643 container = (type) container;
2644 masked = container << (prec - bitsize - bitpos);
2645 result = (type_out) (masked >> (prec - bitsize));
2647 Here type is the signed variant of the wider of type_out and the type
2648 of container.
2650 The shifting is always optional depending on whether bitpos != 0.
2652 When the original bitfield was inside a gcond then an new gcond is also
2653 generated with the newly `result` as the operand to the comparison.
2657 static gimple *
2658 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2659 tree *type_out)
2661 gimple *bf_stmt = NULL;
2662 tree lhs = NULL_TREE;
2663 tree ret_type = NULL_TREE;
2664 gimple *stmt = STMT_VINFO_STMT (stmt_info);
2665 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
2667 tree op = gimple_cond_lhs (cond_stmt);
2668 if (TREE_CODE (op) != SSA_NAME)
2669 return NULL;
2670 bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
2671 if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
2672 return NULL;
2674 else if (is_gimple_assign (stmt)
2675 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
2676 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2678 gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2679 bf_stmt = dyn_cast <gassign *> (second_stmt);
2680 lhs = gimple_assign_lhs (stmt);
2681 ret_type = TREE_TYPE (lhs);
2684 if (!bf_stmt
2685 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2686 return NULL;
2688 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2689 tree container = TREE_OPERAND (bf_ref, 0);
2690 ret_type = ret_type ? ret_type : TREE_TYPE (container);
2692 if (!bit_field_offset (bf_ref).is_constant ()
2693 || !bit_field_size (bf_ref).is_constant ()
2694 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2695 return NULL;
2697 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2698 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2699 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2700 return NULL;
2702 gimple *use_stmt, *pattern_stmt;
2703 use_operand_p use_p;
2704 bool shift_first = true;
2705 tree container_type = TREE_TYPE (container);
2706 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2708 /* Calculate shift_n before the adjustments for widening loads, otherwise
2709 the container may change and we have to consider offset change for
2710 widening loads on big endianness. The shift_n calculated here can be
2711 independent of widening. */
2712 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2713 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2714 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2715 if (BYTES_BIG_ENDIAN)
2716 shift_n = prec - shift_n - mask_width;
2718 bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2719 TYPE_PRECISION (ret_type) > mask_width);
2720 bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2721 TYPE_PRECISION (ret_type));
2723 /* We move the conversion earlier if the loaded type is smaller than the
2724 return type to enable the use of widening loads. And if we need a
2725 sign extension, we need to convert the loaded value early to a signed
2726 type as well. */
2727 if (ref_sext || load_widen)
2729 tree type = load_widen ? ret_type : container_type;
2730 if (ref_sext)
2731 type = gimple_signed_type (type);
2732 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2733 NOP_EXPR, container);
2734 container = gimple_get_lhs (pattern_stmt);
2735 container_type = TREE_TYPE (container);
2736 prec = tree_to_uhwi (TYPE_SIZE (container_type));
2737 vectype = get_vectype_for_scalar_type (vinfo, container_type);
2738 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2740 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2741 /* If we are doing the conversion last then also delay the shift as we may
2742 be able to combine the shift and conversion in certain cases. */
2743 shift_first = false;
2745 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2746 PLUS_EXPR then do the shift last as some targets can combine the shift and
2747 add into a single instruction. */
2748 if (lhs && !is_pattern_stmt_p (stmt_info)
2749 && single_imm_use (lhs, &use_p, &use_stmt))
2751 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2752 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2753 shift_first = false;
2756 /* If we don't have to shift we only generate the mask, so just fix the
2757 code-path to shift_first. */
2758 if (shift_n == 0)
2759 shift_first = true;
2761 tree result;
2762 if (shift_first && !ref_sext)
2764 tree shifted = container;
2765 if (shift_n)
2767 pattern_stmt
2768 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2769 RSHIFT_EXPR, container,
2770 build_int_cst (sizetype, shift_n));
2771 shifted = gimple_assign_lhs (pattern_stmt);
2772 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2775 tree mask = wide_int_to_tree (container_type,
2776 wi::mask (mask_width, false, prec));
2778 pattern_stmt
2779 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2780 BIT_AND_EXPR, shifted, mask);
2781 result = gimple_assign_lhs (pattern_stmt);
2783 else
2785 tree temp = vect_recog_temp_ssa_var (container_type);
2786 if (!ref_sext)
2788 tree mask = wide_int_to_tree (container_type,
2789 wi::shifted_mask (shift_n,
2790 mask_width,
2791 false, prec));
2792 pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2793 container, mask);
2795 else
2797 HOST_WIDE_INT shl = prec - shift_n - mask_width;
2798 shift_n += shl;
2799 pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2800 container,
2801 build_int_cst (sizetype,
2802 shl));
2805 tree masked = gimple_assign_lhs (pattern_stmt);
2806 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2807 pattern_stmt
2808 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2809 RSHIFT_EXPR, masked,
2810 build_int_cst (sizetype, shift_n));
2811 result = gimple_assign_lhs (pattern_stmt);
2814 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2816 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2817 pattern_stmt
2818 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2819 NOP_EXPR, result);
2822 if (!lhs)
2824 if (!vectype)
2825 return NULL;
2827 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2828 vectype = truth_type_for (vectype);
2830 /* FIXME: This part extracts the boolean value out of the bitfield in the
2831 same way as vect_recog_gcond_pattern does. However because
2832 patterns cannot match the same root twice, when we handle and
2833 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2834 apply anymore. We should really fix it so that we don't need to
2835 duplicate transformations like these. */
2836 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
2837 gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
2838 tree cond_cst = gimple_cond_rhs (cond_stmt);
2839 gimple *new_stmt
2840 = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
2841 gimple_get_lhs (pattern_stmt),
2842 fold_convert (container_type, cond_cst));
2843 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
2844 pattern_stmt
2845 = gimple_build_cond (NE_EXPR, new_lhs,
2846 build_zero_cst (TREE_TYPE (new_lhs)),
2847 NULL_TREE, NULL_TREE);
2850 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2851 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2853 return pattern_stmt;
2856 /* Function vect_recog_bit_insert_pattern
2858 Try to find the following pattern:
2860 written = BIT_INSERT_EXPR (container, value, bitpos);
2862 Input:
2864 * STMT_VINFO: The stmt we want to replace.
2866 Output:
2868 * TYPE_OUT: The vector type of the output of this pattern.
2870 * Return value: A new stmt that will be used to replace the sequence of
2871 stmts that constitute the pattern. In this case it will be:
2872 value = (container_type) value; // Make sure
2873 shifted = value << bitpos; // Shift value into place
2874 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2875 // the 'to-write value'.
2876 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2877 // write to from the value we want
2878 // to write to.
2879 written = cleared | masked; // Write bits.
2882 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2883 bits corresponding to the real size of the bitfield value we are writing to.
2884 The shifting is always optional depending on whether bitpos != 0.
2888 static gimple *
2889 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2890 tree *type_out)
2892 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2893 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2894 return NULL;
2896 tree container = gimple_assign_rhs1 (bf_stmt);
2897 tree value = gimple_assign_rhs2 (bf_stmt);
2898 tree shift = gimple_assign_rhs3 (bf_stmt);
2900 tree bf_type = TREE_TYPE (value);
2901 tree container_type = TREE_TYPE (container);
2903 if (!INTEGRAL_TYPE_P (container_type)
2904 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2905 return NULL;
2907 gimple *pattern_stmt;
2909 vect_unpromoted_value unprom;
2910 unprom.set_op (value, vect_internal_def);
2911 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2912 get_vectype_for_scalar_type (vinfo,
2913 container_type));
2915 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2916 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2917 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2918 if (BYTES_BIG_ENDIAN)
2920 shift_n = prec - shift_n - mask_width;
2921 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2924 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2926 pattern_stmt =
2927 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2928 NOP_EXPR, value);
2929 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2930 value = gimple_get_lhs (pattern_stmt);
2933 /* Shift VALUE into place. */
2934 tree shifted = value;
2935 if (shift_n)
2937 gimple_seq stmts = NULL;
2938 shifted
2939 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2940 if (!gimple_seq_empty_p (stmts))
2941 append_pattern_def_seq (vinfo, stmt_info,
2942 gimple_seq_first_stmt (stmts));
2945 tree mask_t
2946 = wide_int_to_tree (container_type,
2947 wi::shifted_mask (shift_n, mask_width, false, prec));
2949 /* Clear bits we don't want to write back from SHIFTED. */
2950 gimple_seq stmts = NULL;
2951 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2952 mask_t);
2953 if (!gimple_seq_empty_p (stmts))
2955 pattern_stmt = gimple_seq_first_stmt (stmts);
2956 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2959 /* Mask off the bits in the container that we are to write to. */
2960 mask_t = wide_int_to_tree (container_type,
2961 wi::shifted_mask (shift_n, mask_width, true, prec));
2962 tree cleared = vect_recog_temp_ssa_var (container_type);
2963 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2964 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2966 /* Write MASKED into CLEARED. */
2967 pattern_stmt
2968 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2969 BIT_IOR_EXPR, cleared, masked);
2971 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2972 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2974 return pattern_stmt;
2978 /* Recognize cases in which an operation is performed in one type WTYPE
2979 but could be done more efficiently in a narrower type NTYPE. For example,
2980 if we have:
2982 ATYPE a; // narrower than NTYPE
2983 BTYPE b; // narrower than NTYPE
2984 WTYPE aw = (WTYPE) a;
2985 WTYPE bw = (WTYPE) b;
2986 WTYPE res = aw + bw; // only uses of aw and bw
2988 then it would be more efficient to do:
2990 NTYPE an = (NTYPE) a;
2991 NTYPE bn = (NTYPE) b;
2992 NTYPE resn = an + bn;
2993 WTYPE res = (WTYPE) resn;
2995 Other situations include things like:
2997 ATYPE a; // NTYPE or narrower
2998 WTYPE aw = (WTYPE) a;
2999 WTYPE res = aw + b;
3001 when only "(NTYPE) res" is significant. In that case it's more efficient
3002 to truncate "b" and do the operation on NTYPE instead:
3004 NTYPE an = (NTYPE) a;
3005 NTYPE bn = (NTYPE) b; // truncation
3006 NTYPE resn = an + bn;
3007 WTYPE res = (WTYPE) resn;
3009 All users of "res" should then use "resn" instead, making the final
3010 statement dead (not marked as relevant). The final statement is still
3011 needed to maintain the type correctness of the IR.
3013 vect_determine_precisions has already determined the minimum
3014 precison of the operation and the minimum precision required
3015 by users of the result. */
3017 static gimple *
3018 vect_recog_over_widening_pattern (vec_info *vinfo,
3019 stmt_vec_info last_stmt_info, tree *type_out)
3021 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3022 if (!last_stmt)
3023 return NULL;
3025 /* See whether we have found that this operation can be done on a
3026 narrower type without changing its semantics. */
3027 unsigned int new_precision = last_stmt_info->operation_precision;
3028 if (!new_precision)
3029 return NULL;
3031 tree lhs = gimple_assign_lhs (last_stmt);
3032 tree type = TREE_TYPE (lhs);
3033 tree_code code = gimple_assign_rhs_code (last_stmt);
3035 /* Punt for reductions where we don't handle the type conversions. */
3036 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
3037 return NULL;
3039 /* Keep the first operand of a COND_EXPR as-is: only the other two
3040 operands are interesting. */
3041 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
3043 /* Check the operands. */
3044 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
3045 auto_vec <vect_unpromoted_value, 3> unprom (nops);
3046 unprom.quick_grow_cleared (nops);
3047 unsigned int min_precision = 0;
3048 bool single_use_p = false;
3049 for (unsigned int i = 0; i < nops; ++i)
3051 tree op = gimple_op (last_stmt, first_op + i);
3052 if (TREE_CODE (op) == INTEGER_CST)
3053 unprom[i].set_op (op, vect_constant_def);
3054 else if (TREE_CODE (op) == SSA_NAME)
3056 bool op_single_use_p = true;
3057 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
3058 &op_single_use_p))
3059 return NULL;
3060 /* If:
3062 (1) N bits of the result are needed;
3063 (2) all inputs are widened from M<N bits; and
3064 (3) one operand OP is a single-use SSA name
3066 we can shift the M->N widening from OP to the output
3067 without changing the number or type of extensions involved.
3068 This then reduces the number of copies of STMT_INFO.
3070 If instead of (3) more than one operand is a single-use SSA name,
3071 shifting the extension to the output is even more of a win.
3073 If instead:
3075 (1) N bits of the result are needed;
3076 (2) one operand OP2 is widened from M2<N bits;
3077 (3) another operand OP1 is widened from M1<M2 bits; and
3078 (4) both OP1 and OP2 are single-use
3080 the choice is between:
3082 (a) truncating OP2 to M1, doing the operation on M1,
3083 and then widening the result to N
3085 (b) widening OP1 to M2, doing the operation on M2, and then
3086 widening the result to N
3088 Both shift the M2->N widening of the inputs to the output.
3089 (a) additionally shifts the M1->M2 widening to the output;
3090 it requires fewer copies of STMT_INFO but requires an extra
3091 M2->M1 truncation.
3093 Which is better will depend on the complexity and cost of
3094 STMT_INFO, which is hard to predict at this stage. However,
3095 a clear tie-breaker in favor of (b) is the fact that the
3096 truncation in (a) increases the length of the operation chain.
3098 If instead of (4) only one of OP1 or OP2 is single-use,
3099 (b) is still a win over doing the operation in N bits:
3100 it still shifts the M2->N widening on the single-use operand
3101 to the output and reduces the number of STMT_INFO copies.
3103 If neither operand is single-use then operating on fewer than
3104 N bits might lead to more extensions overall. Whether it does
3105 or not depends on global information about the vectorization
3106 region, and whether that's a good trade-off would again
3107 depend on the complexity and cost of the statements involved,
3108 as well as things like register pressure that are not normally
3109 modelled at this stage. We therefore ignore these cases
3110 and just optimize the clear single-use wins above.
3112 Thus we take the maximum precision of the unpromoted operands
3113 and record whether any operand is single-use. */
3114 if (unprom[i].dt == vect_internal_def)
3116 min_precision = MAX (min_precision,
3117 TYPE_PRECISION (unprom[i].type));
3118 single_use_p |= op_single_use_p;
3121 else
3122 return NULL;
3125 /* Although the operation could be done in operation_precision, we have
3126 to balance that against introducing extra truncations or extensions.
3127 Calculate the minimum precision that can be handled efficiently.
3129 The loop above determined that the operation could be handled
3130 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3131 extension from the inputs to the output without introducing more
3132 instructions, and would reduce the number of instructions required
3133 for STMT_INFO itself.
3135 vect_determine_precisions has also determined that the result only
3136 needs min_output_precision bits. Truncating by a factor of N times
3137 requires a tree of N - 1 instructions, so if TYPE is N times wider
3138 than min_output_precision, doing the operation in TYPE and truncating
3139 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3140 In contrast:
3142 - truncating the input to a unary operation and doing the operation
3143 in the new type requires at most N - 1 + 1 = N instructions per
3144 output vector
3146 - doing the same for a binary operation requires at most
3147 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3149 Both unary and binary operations require fewer instructions than
3150 this if the operands were extended from a suitable truncated form.
3151 Thus there is usually nothing to lose by doing operations in
3152 min_output_precision bits, but there can be something to gain. */
3153 if (!single_use_p)
3154 min_precision = last_stmt_info->min_output_precision;
3155 else
3156 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3158 /* Apply the minimum efficient precision we just calculated. */
3159 if (new_precision < min_precision)
3160 new_precision = min_precision;
3161 new_precision = vect_element_precision (new_precision);
3162 if (new_precision >= TYPE_PRECISION (type))
3163 return NULL;
3165 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
3167 *type_out = get_vectype_for_scalar_type (vinfo, type);
3168 if (!*type_out)
3169 return NULL;
3171 /* We've found a viable pattern. Get the new type of the operation. */
3172 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3173 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3175 /* If we're truncating an operation, we need to make sure that we
3176 don't introduce new undefined overflow. The codes tested here are
3177 a subset of those accepted by vect_truncatable_operation_p. */
3178 tree op_type = new_type;
3179 if (TYPE_OVERFLOW_UNDEFINED (new_type)
3180 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3181 op_type = build_nonstandard_integer_type (new_precision, true);
3183 /* We specifically don't check here whether the target supports the
3184 new operation, since it might be something that a later pattern
3185 wants to rewrite anyway. If targets have a minimum element size
3186 for some optabs, we should pattern-match smaller ops to larger ops
3187 where beneficial. */
3188 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3189 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3190 if (!new_vectype || !op_vectype)
3191 return NULL;
3193 if (dump_enabled_p ())
3194 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3195 type, new_type);
3197 /* Calculate the rhs operands for an operation on OP_TYPE. */
3198 tree ops[3] = {};
3199 for (unsigned int i = 1; i < first_op; ++i)
3200 ops[i - 1] = gimple_op (last_stmt, i);
3201 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
3202 op_type, &unprom[0], op_vectype);
3204 /* Use the operation to produce a result of type OP_TYPE. */
3205 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3206 gimple *pattern_stmt = gimple_build_assign (new_var, code,
3207 ops[0], ops[1], ops[2]);
3208 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3210 if (dump_enabled_p ())
3211 dump_printf_loc (MSG_NOTE, vect_location,
3212 "created pattern stmt: %G", pattern_stmt);
3214 /* Convert back to the original signedness, if OP_TYPE is different
3215 from NEW_TYPE. */
3216 if (op_type != new_type)
3217 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
3218 pattern_stmt, op_vectype);
3220 /* Promote the result to the original type. */
3221 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
3222 pattern_stmt, new_vectype);
3224 return pattern_stmt;
3227 /* Recognize the following patterns:
3229 ATYPE a; // narrower than TYPE
3230 BTYPE b; // narrower than TYPE
3232 1) Multiply high with scaling
3233 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3234 Here, c is bitsize (TYPE) / 2 - 1.
3236 2) ... or also with rounding
3237 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3238 Here, d is bitsize (TYPE) / 2 - 2.
3240 3) Normal multiply high
3241 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3242 Here, e is bitsize (TYPE) / 2.
3244 where only the bottom half of res is used. */
3246 static gimple *
3247 vect_recog_mulhs_pattern (vec_info *vinfo,
3248 stmt_vec_info last_stmt_info, tree *type_out)
3250 /* Check for a right shift. */
3251 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3252 if (!last_stmt
3253 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
3254 return NULL;
3256 /* Check that the shift result is wider than the users of the
3257 result need (i.e. that narrowing would be a natural choice). */
3258 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3259 unsigned int target_precision
3260 = vect_element_precision (last_stmt_info->min_output_precision);
3261 if (!INTEGRAL_TYPE_P (lhs_type)
3262 || target_precision >= TYPE_PRECISION (lhs_type))
3263 return NULL;
3265 /* Look through any change in sign on the outer shift input. */
3266 vect_unpromoted_value unprom_rshift_input;
3267 tree rshift_input = vect_look_through_possible_promotion
3268 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
3269 if (!rshift_input
3270 || TYPE_PRECISION (TREE_TYPE (rshift_input))
3271 != TYPE_PRECISION (lhs_type))
3272 return NULL;
3274 /* Get the definition of the shift input. */
3275 stmt_vec_info rshift_input_stmt_info
3276 = vect_get_internal_def (vinfo, rshift_input);
3277 if (!rshift_input_stmt_info)
3278 return NULL;
3279 gassign *rshift_input_stmt
3280 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
3281 if (!rshift_input_stmt)
3282 return NULL;
3284 stmt_vec_info mulh_stmt_info;
3285 tree scale_term;
3286 bool rounding_p = false;
3288 /* Check for the presence of the rounding term. */
3289 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
3291 /* Check that the outer shift was by 1. */
3292 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
3293 return NULL;
3295 /* Check that the second operand of the PLUS_EXPR is 1. */
3296 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
3297 return NULL;
3299 /* Look through any change in sign on the addition input. */
3300 vect_unpromoted_value unprom_plus_input;
3301 tree plus_input = vect_look_through_possible_promotion
3302 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
3303 if (!plus_input
3304 || TYPE_PRECISION (TREE_TYPE (plus_input))
3305 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3306 return NULL;
3308 /* Get the definition of the multiply-high-scale part. */
3309 stmt_vec_info plus_input_stmt_info
3310 = vect_get_internal_def (vinfo, plus_input);
3311 if (!plus_input_stmt_info)
3312 return NULL;
3313 gassign *plus_input_stmt
3314 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
3315 if (!plus_input_stmt
3316 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
3317 return NULL;
3319 /* Look through any change in sign on the scaling input. */
3320 vect_unpromoted_value unprom_scale_input;
3321 tree scale_input = vect_look_through_possible_promotion
3322 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
3323 if (!scale_input
3324 || TYPE_PRECISION (TREE_TYPE (scale_input))
3325 != TYPE_PRECISION (TREE_TYPE (plus_input)))
3326 return NULL;
3328 /* Get the definition of the multiply-high part. */
3329 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
3330 if (!mulh_stmt_info)
3331 return NULL;
3333 /* Get the scaling term. */
3334 scale_term = gimple_assign_rhs2 (plus_input_stmt);
3335 rounding_p = true;
3337 else
3339 mulh_stmt_info = rshift_input_stmt_info;
3340 scale_term = gimple_assign_rhs2 (last_stmt);
3343 /* Check that the scaling factor is constant. */
3344 if (TREE_CODE (scale_term) != INTEGER_CST)
3345 return NULL;
3347 /* Check whether the scaling input term can be seen as two widened
3348 inputs multiplied together. */
3349 vect_unpromoted_value unprom_mult[2];
3350 tree new_type;
3351 unsigned int nops
3352 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
3353 false, 2, unprom_mult, &new_type);
3354 if (nops != 2)
3355 return NULL;
3357 /* Adjust output precision. */
3358 if (TYPE_PRECISION (new_type) < target_precision)
3359 new_type = build_nonstandard_integer_type
3360 (target_precision, TYPE_UNSIGNED (new_type));
3362 unsigned mult_precision = TYPE_PRECISION (new_type);
3363 internal_fn ifn;
3364 /* Check that the scaling factor is expected. Instead of
3365 target_precision, we should use the one that we actually
3366 use for internal function. */
3367 if (rounding_p)
3369 /* Check pattern 2). */
3370 if (wi::to_widest (scale_term) + mult_precision + 2
3371 != TYPE_PRECISION (lhs_type))
3372 return NULL;
3374 ifn = IFN_MULHRS;
3376 else
3378 /* Check for pattern 1). */
3379 if (wi::to_widest (scale_term) + mult_precision + 1
3380 == TYPE_PRECISION (lhs_type))
3381 ifn = IFN_MULHS;
3382 /* Check for pattern 3). */
3383 else if (wi::to_widest (scale_term) + mult_precision
3384 == TYPE_PRECISION (lhs_type))
3385 ifn = IFN_MULH;
3386 else
3387 return NULL;
3390 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
3392 /* Check for target support. */
3393 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3394 if (!new_vectype
3395 || !direct_internal_fn_supported_p
3396 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3397 return NULL;
3399 /* The IR requires a valid vector type for the cast result, even though
3400 it's likely to be discarded. */
3401 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3402 if (!*type_out)
3403 return NULL;
3405 /* Generate the IFN_MULHRS call. */
3406 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3407 tree new_ops[2];
3408 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3409 unprom_mult, new_vectype);
3410 gcall *mulhrs_stmt
3411 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3412 gimple_call_set_lhs (mulhrs_stmt, new_var);
3413 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3415 if (dump_enabled_p ())
3416 dump_printf_loc (MSG_NOTE, vect_location,
3417 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3419 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3420 mulhrs_stmt, new_vectype);
3423 /* Recognize the patterns:
3425 ATYPE a; // narrower than TYPE
3426 BTYPE b; // narrower than TYPE
3427 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3428 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3430 where only the bottom half of avg is used. Try to transform them into:
3432 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3433 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3435 followed by:
3437 TYPE avg = (TYPE) avg';
3439 where NTYPE is no wider than half of TYPE. Since only the bottom half
3440 of avg is used, all or part of the cast of avg' should become redundant.
3442 If there is no target support available, generate code to distribute rshift
3443 over plus and add a carry. */
3445 static gimple *
3446 vect_recog_average_pattern (vec_info *vinfo,
3447 stmt_vec_info last_stmt_info, tree *type_out)
3449 /* Check for a shift right by one bit. */
3450 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3451 if (!last_stmt
3452 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3453 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3454 return NULL;
3456 /* Check that the shift result is wider than the users of the
3457 result need (i.e. that narrowing would be a natural choice). */
3458 tree lhs = gimple_assign_lhs (last_stmt);
3459 tree type = TREE_TYPE (lhs);
3460 unsigned int target_precision
3461 = vect_element_precision (last_stmt_info->min_output_precision);
3462 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3463 return NULL;
3465 /* Look through any change in sign on the shift input. */
3466 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3467 vect_unpromoted_value unprom_plus;
3468 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3469 &unprom_plus);
3470 if (!rshift_rhs
3471 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3472 return NULL;
3474 /* Get the definition of the shift input. */
3475 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3476 if (!plus_stmt_info)
3477 return NULL;
3479 /* Check whether the shift input can be seen as a tree of additions on
3480 2 or 3 widened inputs.
3482 Note that the pattern should be a win even if the result of one or
3483 more additions is reused elsewhere: if the pattern matches, we'd be
3484 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3485 internal_fn ifn = IFN_AVG_FLOOR;
3486 vect_unpromoted_value unprom[3];
3487 tree new_type;
3488 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3489 IFN_VEC_WIDEN_PLUS, false, 3,
3490 unprom, &new_type);
3491 if (nops == 0)
3492 return NULL;
3493 if (nops == 3)
3495 /* Check that one operand is 1. */
3496 unsigned int i;
3497 for (i = 0; i < 3; ++i)
3498 if (integer_onep (unprom[i].op))
3499 break;
3500 if (i == 3)
3501 return NULL;
3502 /* Throw away the 1 operand and keep the other two. */
3503 if (i < 2)
3504 unprom[i] = unprom[2];
3505 ifn = IFN_AVG_CEIL;
3508 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3510 /* We know that:
3512 (a) the operation can be viewed as:
3514 TYPE widened0 = (TYPE) UNPROM[0];
3515 TYPE widened1 = (TYPE) UNPROM[1];
3516 TYPE tmp1 = widened0 + widened1 {+ 1};
3517 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3519 (b) the first two statements are equivalent to:
3521 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3522 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3524 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3525 where sensible;
3527 (d) all the operations can be performed correctly at twice the width of
3528 NEW_TYPE, due to the nature of the average operation; and
3530 (e) users of the result of the right shift need only TARGET_PRECISION
3531 bits, where TARGET_PRECISION is no more than half of TYPE's
3532 precision.
3534 Under these circumstances, the only situation in which NEW_TYPE
3535 could be narrower than TARGET_PRECISION is if widened0, widened1
3536 and an addition result are all used more than once. Thus we can
3537 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3538 as "free", whereas widening the result of the average instruction
3539 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3540 therefore better not to go narrower than TARGET_PRECISION. */
3541 if (TYPE_PRECISION (new_type) < target_precision)
3542 new_type = build_nonstandard_integer_type (target_precision,
3543 TYPE_UNSIGNED (new_type));
3545 /* Check for target support. */
3546 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3547 if (!new_vectype)
3548 return NULL;
3550 bool fallback_p = false;
3552 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3554 else if (TYPE_UNSIGNED (new_type)
3555 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3556 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3557 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3558 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3559 fallback_p = true;
3560 else
3561 return NULL;
3563 /* The IR requires a valid vector type for the cast result, even though
3564 it's likely to be discarded. */
3565 *type_out = get_vectype_for_scalar_type (vinfo, type);
3566 if (!*type_out)
3567 return NULL;
3569 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3570 tree new_ops[2];
3571 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3572 unprom, new_vectype);
3574 if (fallback_p)
3576 /* As a fallback, generate code for following sequence:
3578 shifted_op0 = new_ops[0] >> 1;
3579 shifted_op1 = new_ops[1] >> 1;
3580 sum_of_shifted = shifted_op0 + shifted_op1;
3581 unmasked_carry = new_ops[0] and/or new_ops[1];
3582 carry = unmasked_carry & 1;
3583 new_var = sum_of_shifted + carry;
3586 tree one_cst = build_one_cst (new_type);
3587 gassign *g;
3589 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3590 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3591 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3593 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3594 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3595 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3597 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3598 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3599 shifted_op0, shifted_op1);
3600 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3602 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3603 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3604 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3605 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3607 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3608 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3609 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3611 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3612 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3615 /* Generate the IFN_AVG* call. */
3616 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3617 new_ops[1]);
3618 gimple_call_set_lhs (average_stmt, new_var);
3619 gimple_set_location (average_stmt, gimple_location (last_stmt));
3621 if (dump_enabled_p ())
3622 dump_printf_loc (MSG_NOTE, vect_location,
3623 "created pattern stmt: %G", (gimple *) average_stmt);
3625 return vect_convert_output (vinfo, last_stmt_info,
3626 type, average_stmt, new_vectype);
3629 /* Recognize cases in which the input to a cast is wider than its
3630 output, and the input is fed by a widening operation. Fold this
3631 by removing the unnecessary intermediate widening. E.g.:
3633 unsigned char a;
3634 unsigned int b = (unsigned int) a;
3635 unsigned short c = (unsigned short) b;
3639 unsigned short c = (unsigned short) a;
3641 Although this is rare in input IR, it is an expected side-effect
3642 of the over-widening pattern above.
3644 This is beneficial also for integer-to-float conversions, if the
3645 widened integer has more bits than the float, and if the unwidened
3646 input doesn't. */
3648 static gimple *
3649 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3650 stmt_vec_info last_stmt_info, tree *type_out)
3652 /* Check for a cast, including an integer-to-float conversion. */
3653 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3654 if (!last_stmt)
3655 return NULL;
3656 tree_code code = gimple_assign_rhs_code (last_stmt);
3657 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3658 return NULL;
3660 /* Make sure that the rhs is a scalar with a natural bitsize. */
3661 tree lhs = gimple_assign_lhs (last_stmt);
3662 if (!lhs)
3663 return NULL;
3664 tree lhs_type = TREE_TYPE (lhs);
3665 scalar_mode lhs_mode;
3666 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3667 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3668 return NULL;
3670 /* Check for a narrowing operation (from a vector point of view). */
3671 tree rhs = gimple_assign_rhs1 (last_stmt);
3672 tree rhs_type = TREE_TYPE (rhs);
3673 if (!INTEGRAL_TYPE_P (rhs_type)
3674 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3675 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3676 return NULL;
3678 /* Try to find an unpromoted input. */
3679 vect_unpromoted_value unprom;
3680 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3681 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3682 return NULL;
3684 /* If the bits above RHS_TYPE matter, make sure that they're the
3685 same when extending from UNPROM as they are when extending from RHS. */
3686 if (!INTEGRAL_TYPE_P (lhs_type)
3687 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3688 return NULL;
3690 /* We can get the same result by casting UNPROM directly, to avoid
3691 the unnecessary widening and narrowing. */
3692 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3694 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3695 if (!*type_out)
3696 return NULL;
3698 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3699 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3700 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3702 return pattern_stmt;
3705 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3706 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3708 static gimple *
3709 vect_recog_widen_shift_pattern (vec_info *vinfo,
3710 stmt_vec_info last_stmt_info, tree *type_out)
3712 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3713 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3714 "vect_recog_widen_shift_pattern");
3717 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3719 type a_t, b_t, c_t;
3721 S0 a_t = b_t r<< c_t;
3723 Input/Output:
3725 * STMT_VINFO: The stmt from which the pattern search begins,
3726 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3727 with a sequence:
3729 S1 d_t = -c_t;
3730 S2 e_t = d_t & (B - 1);
3731 S3 f_t = b_t << c_t;
3732 S4 g_t = b_t >> e_t;
3733 S0 a_t = f_t | g_t;
3735 where B is element bitsize of type.
3737 Output:
3739 * TYPE_OUT: The type of the output of this pattern.
3741 * Return value: A new stmt that will be used to replace the rotate
3742 S0 stmt. */
3744 static gimple *
3745 vect_recog_rotate_pattern (vec_info *vinfo,
3746 stmt_vec_info stmt_vinfo, tree *type_out)
3748 gimple *last_stmt = stmt_vinfo->stmt;
3749 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3750 gimple *pattern_stmt, *def_stmt;
3751 enum tree_code rhs_code;
3752 enum vect_def_type dt;
3753 optab optab1, optab2;
3754 edge ext_def = NULL;
3755 bool bswap16_p = false;
3757 if (is_gimple_assign (last_stmt))
3759 rhs_code = gimple_assign_rhs_code (last_stmt);
3760 switch (rhs_code)
3762 case LROTATE_EXPR:
3763 case RROTATE_EXPR:
3764 break;
3765 default:
3766 return NULL;
3769 lhs = gimple_assign_lhs (last_stmt);
3770 oprnd0 = gimple_assign_rhs1 (last_stmt);
3771 type = TREE_TYPE (oprnd0);
3772 oprnd1 = gimple_assign_rhs2 (last_stmt);
3774 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3776 /* __builtin_bswap16 (x) is another form of x r>> 8.
3777 The vectorizer has bswap support, but only if the argument isn't
3778 promoted. */
3779 lhs = gimple_call_lhs (last_stmt);
3780 oprnd0 = gimple_call_arg (last_stmt, 0);
3781 type = TREE_TYPE (oprnd0);
3782 if (!lhs
3783 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3784 || TYPE_PRECISION (type) <= 16
3785 || TREE_CODE (oprnd0) != SSA_NAME
3786 || BITS_PER_UNIT != 8)
3787 return NULL;
3789 stmt_vec_info def_stmt_info;
3790 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3791 return NULL;
3793 if (dt != vect_internal_def)
3794 return NULL;
3796 if (gimple_assign_cast_p (def_stmt))
3798 def = gimple_assign_rhs1 (def_stmt);
3799 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3800 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3801 oprnd0 = def;
3804 type = TREE_TYPE (lhs);
3805 vectype = get_vectype_for_scalar_type (vinfo, type);
3806 if (vectype == NULL_TREE)
3807 return NULL;
3809 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3811 /* The encoding uses one stepped pattern for each byte in the
3812 16-bit word. */
3813 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3814 for (unsigned i = 0; i < 3; ++i)
3815 for (unsigned j = 0; j < 2; ++j)
3816 elts.quick_push ((i + 1) * 2 - j - 1);
3818 vec_perm_indices indices (elts, 1,
3819 TYPE_VECTOR_SUBPARTS (char_vectype));
3820 machine_mode vmode = TYPE_MODE (char_vectype);
3821 if (can_vec_perm_const_p (vmode, vmode, indices))
3823 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3824 undo the argument promotion. */
3825 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3827 def = vect_recog_temp_ssa_var (type, NULL);
3828 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3829 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3830 oprnd0 = def;
3833 /* Pattern detected. */
3834 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3836 *type_out = vectype;
3838 /* Pattern supported. Create a stmt to be used to replace the
3839 pattern, with the unpromoted argument. */
3840 var = vect_recog_temp_ssa_var (type, NULL);
3841 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3842 1, oprnd0);
3843 gimple_call_set_lhs (pattern_stmt, var);
3844 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3845 gimple_call_fntype (last_stmt));
3846 return pattern_stmt;
3850 oprnd1 = build_int_cst (integer_type_node, 8);
3851 rhs_code = LROTATE_EXPR;
3852 bswap16_p = true;
3854 else
3855 return NULL;
3857 if (TREE_CODE (oprnd0) != SSA_NAME
3858 || !INTEGRAL_TYPE_P (type)
3859 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3860 return NULL;
3862 stmt_vec_info def_stmt_info;
3863 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3864 return NULL;
3866 if (dt != vect_internal_def
3867 && dt != vect_constant_def
3868 && dt != vect_external_def)
3869 return NULL;
3871 vectype = get_vectype_for_scalar_type (vinfo, type);
3872 if (vectype == NULL_TREE)
3873 return NULL;
3875 /* If vector/vector or vector/scalar rotate is supported by the target,
3876 don't do anything here. */
3877 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3878 if (optab1
3879 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3881 use_rotate:
3882 if (bswap16_p)
3884 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3886 def = vect_recog_temp_ssa_var (type, NULL);
3887 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3888 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3889 oprnd0 = def;
3892 /* Pattern detected. */
3893 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3895 *type_out = vectype;
3897 /* Pattern supported. Create a stmt to be used to replace the
3898 pattern. */
3899 var = vect_recog_temp_ssa_var (type, NULL);
3900 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3901 oprnd1);
3902 return pattern_stmt;
3904 return NULL;
3907 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3909 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3910 if (optab2
3911 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3912 goto use_rotate;
3915 tree utype = unsigned_type_for (type);
3916 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3917 if (!uvectype)
3918 return NULL;
3920 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3921 don't do anything here either. */
3922 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3923 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3924 if (!optab1
3925 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3926 || !optab2
3927 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3929 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3930 return NULL;
3931 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3932 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3933 if (!optab1
3934 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3935 || !optab2
3936 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3937 return NULL;
3940 *type_out = vectype;
3942 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3944 def = vect_recog_temp_ssa_var (utype, NULL);
3945 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3946 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3947 oprnd0 = def;
3950 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3951 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3953 def = NULL_TREE;
3954 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3955 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3956 def = oprnd1;
3957 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3959 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3960 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3961 && TYPE_PRECISION (TREE_TYPE (rhs1))
3962 == TYPE_PRECISION (type))
3963 def = rhs1;
3966 if (def == NULL_TREE)
3968 def = vect_recog_temp_ssa_var (utype, NULL);
3969 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3970 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3972 stype = TREE_TYPE (def);
3974 if (TREE_CODE (def) == INTEGER_CST)
3976 if (!tree_fits_uhwi_p (def)
3977 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3978 || integer_zerop (def))
3979 return NULL;
3980 def2 = build_int_cst (stype,
3981 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3983 else
3985 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3987 if (vecstype == NULL_TREE)
3988 return NULL;
3989 def2 = vect_recog_temp_ssa_var (stype, NULL);
3990 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3991 if (ext_def)
3993 basic_block new_bb
3994 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3995 gcc_assert (!new_bb);
3997 else
3998 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4000 def2 = vect_recog_temp_ssa_var (stype, NULL);
4001 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
4002 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
4003 gimple_assign_lhs (def_stmt), mask);
4004 if (ext_def)
4006 basic_block new_bb
4007 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
4008 gcc_assert (!new_bb);
4010 else
4011 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4014 var1 = vect_recog_temp_ssa_var (utype, NULL);
4015 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
4016 ? LSHIFT_EXPR : RSHIFT_EXPR,
4017 oprnd0, def);
4018 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
4020 var2 = vect_recog_temp_ssa_var (utype, NULL);
4021 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
4022 ? RSHIFT_EXPR : LSHIFT_EXPR,
4023 oprnd0, def2);
4024 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
4026 /* Pattern detected. */
4027 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
4029 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4030 var = vect_recog_temp_ssa_var (utype, NULL);
4031 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
4033 if (!useless_type_conversion_p (type, utype))
4035 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
4036 tree result = vect_recog_temp_ssa_var (type, NULL);
4037 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
4039 return pattern_stmt;
4042 /* Detect a vector by vector shift pattern that wouldn't be otherwise
4043 vectorized:
4045 type a_t;
4046 TYPE b_T, res_T;
4048 S1 a_t = ;
4049 S2 b_T = ;
4050 S3 res_T = b_T op a_t;
4052 where type 'TYPE' is a type with different size than 'type',
4053 and op is <<, >> or rotate.
4055 Also detect cases:
4057 type a_t;
4058 TYPE b_T, c_T, res_T;
4060 S0 c_T = ;
4061 S1 a_t = (type) c_T;
4062 S2 b_T = ;
4063 S3 res_T = b_T op a_t;
4065 Input/Output:
4067 * STMT_VINFO: The stmt from which the pattern search begins,
4068 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4069 with a shift/rotate which has same type on both operands, in the
4070 second case just b_T op c_T, in the first case with added cast
4071 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4073 Output:
4075 * TYPE_OUT: The type of the output of this pattern.
4077 * Return value: A new stmt that will be used to replace the shift/rotate
4078 S3 stmt. */
4080 static gimple *
4081 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4082 stmt_vec_info stmt_vinfo,
4083 tree *type_out)
4085 gimple *last_stmt = stmt_vinfo->stmt;
4086 tree oprnd0, oprnd1, lhs, var;
4087 gimple *pattern_stmt;
4088 enum tree_code rhs_code;
4090 if (!is_gimple_assign (last_stmt))
4091 return NULL;
4093 rhs_code = gimple_assign_rhs_code (last_stmt);
4094 switch (rhs_code)
4096 case LSHIFT_EXPR:
4097 case RSHIFT_EXPR:
4098 case LROTATE_EXPR:
4099 case RROTATE_EXPR:
4100 break;
4101 default:
4102 return NULL;
4105 lhs = gimple_assign_lhs (last_stmt);
4106 oprnd0 = gimple_assign_rhs1 (last_stmt);
4107 oprnd1 = gimple_assign_rhs2 (last_stmt);
4108 if (TREE_CODE (oprnd0) != SSA_NAME
4109 || TREE_CODE (oprnd1) != SSA_NAME
4110 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4111 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4112 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4113 || TYPE_PRECISION (TREE_TYPE (lhs))
4114 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4115 return NULL;
4117 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
4118 if (!def_vinfo)
4119 return NULL;
4121 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4122 if (*type_out == NULL_TREE)
4123 return NULL;
4125 tree def = NULL_TREE;
4126 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
4127 if (def_stmt && gimple_assign_cast_p (def_stmt))
4129 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4130 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4131 && TYPE_PRECISION (TREE_TYPE (rhs1))
4132 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4134 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4135 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4136 def = rhs1;
4137 else
4139 tree mask
4140 = build_low_bits_mask (TREE_TYPE (rhs1),
4141 TYPE_PRECISION (TREE_TYPE (oprnd1)));
4142 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4143 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4144 tree vecstype = get_vectype_for_scalar_type (vinfo,
4145 TREE_TYPE (rhs1));
4146 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4151 if (def == NULL_TREE)
4153 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4154 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4155 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4158 /* Pattern detected. */
4159 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
4161 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4162 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4163 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4165 return pattern_stmt;
4168 /* Return true iff the target has a vector optab implementing the operation
4169 CODE on type VECTYPE. */
4171 static bool
4172 target_has_vecop_for_code (tree_code code, tree vectype)
4174 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4175 return voptab
4176 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
4179 /* Verify that the target has optabs of VECTYPE to perform all the steps
4180 needed by the multiplication-by-immediate synthesis algorithm described by
4181 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4182 present. Return true iff the target supports all the steps. */
4184 static bool
4185 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4186 tree vectype, bool synth_shift_p)
4188 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4189 return false;
4191 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
4192 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
4194 if (var == negate_variant
4195 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
4196 return false;
4198 /* If we must synthesize shifts with additions make sure that vector
4199 addition is available. */
4200 if ((var == add_variant || synth_shift_p) && !supports_vplus)
4201 return false;
4203 for (int i = 1; i < alg->ops; i++)
4205 switch (alg->op[i])
4207 case alg_shift:
4208 break;
4209 case alg_add_t_m2:
4210 case alg_add_t2_m:
4211 case alg_add_factor:
4212 if (!supports_vplus)
4213 return false;
4214 break;
4215 case alg_sub_t_m2:
4216 case alg_sub_t2_m:
4217 case alg_sub_factor:
4218 if (!supports_vminus)
4219 return false;
4220 break;
4221 case alg_unknown:
4222 case alg_m:
4223 case alg_zero:
4224 case alg_impossible:
4225 return false;
4226 default:
4227 gcc_unreachable ();
4231 return true;
4234 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4235 putting the final result in DEST. Append all statements but the last into
4236 VINFO. Return the last statement. */
4238 static gimple *
4239 synth_lshift_by_additions (vec_info *vinfo,
4240 tree dest, tree op, HOST_WIDE_INT amnt,
4241 stmt_vec_info stmt_info)
4243 HOST_WIDE_INT i;
4244 tree itype = TREE_TYPE (op);
4245 tree prev_res = op;
4246 gcc_assert (amnt >= 0);
4247 for (i = 0; i < amnt; i++)
4249 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
4250 : dest;
4251 gimple *stmt
4252 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4253 prev_res = tmp_var;
4254 if (i < amnt - 1)
4255 append_pattern_def_seq (vinfo, stmt_info, stmt);
4256 else
4257 return stmt;
4259 gcc_unreachable ();
4260 return NULL;
4263 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4264 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4265 the process if necessary. Append the resulting assignment statements
4266 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4267 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4268 left shifts using additions. */
4270 static tree
4271 apply_binop_and_append_stmt (vec_info *vinfo,
4272 tree_code code, tree op1, tree op2,
4273 stmt_vec_info stmt_vinfo, bool synth_shift_p)
4275 if (integer_zerop (op2)
4276 && (code == LSHIFT_EXPR
4277 || code == PLUS_EXPR))
4279 gcc_assert (TREE_CODE (op1) == SSA_NAME);
4280 return op1;
4283 gimple *stmt;
4284 tree itype = TREE_TYPE (op1);
4285 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
4287 if (code == LSHIFT_EXPR
4288 && synth_shift_p)
4290 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
4291 TREE_INT_CST_LOW (op2), stmt_vinfo);
4292 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4293 return tmp_var;
4296 stmt = gimple_build_assign (tmp_var, code, op1, op2);
4297 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4298 return tmp_var;
4301 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4302 and simple arithmetic operations to be vectorized. Record the statements
4303 produced in STMT_VINFO and return the last statement in the sequence or
4304 NULL if it's not possible to synthesize such a multiplication.
4305 This function mirrors the behavior of expand_mult_const in expmed.cc but
4306 works on tree-ssa form. */
4308 static gimple *
4309 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4310 stmt_vec_info stmt_vinfo)
4312 tree itype = TREE_TYPE (op);
4313 machine_mode mode = TYPE_MODE (itype);
4314 struct algorithm alg;
4315 mult_variant variant;
4316 if (!tree_fits_shwi_p (val))
4317 return NULL;
4319 /* Multiplication synthesis by shifts, adds and subs can introduce
4320 signed overflow where the original operation didn't. Perform the
4321 operations on an unsigned type and cast back to avoid this.
4322 In the future we may want to relax this for synthesis algorithms
4323 that we can prove do not cause unexpected overflow. */
4324 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4326 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4327 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4328 if (!vectype)
4329 return NULL;
4331 /* Targets that don't support vector shifts but support vector additions
4332 can synthesize shifts that way. */
4333 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4335 HOST_WIDE_INT hwval = tree_to_shwi (val);
4336 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4337 The vectorizer's benefit analysis will decide whether it's beneficial
4338 to do this. */
4339 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4340 ? TYPE_MODE (vectype) : mode,
4341 hwval, &alg, &variant, MAX_COST);
4342 if (!possible)
4343 return NULL;
4345 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
4346 return NULL;
4348 tree accumulator;
4350 /* Clear out the sequence of statements so we can populate it below. */
4351 gimple *stmt = NULL;
4353 if (cast_to_unsigned_p)
4355 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
4356 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4357 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4358 op = tmp_op;
4361 if (alg.op[0] == alg_zero)
4362 accumulator = build_int_cst (multtype, 0);
4363 else
4364 accumulator = op;
4366 bool needs_fixup = (variant == negate_variant)
4367 || (variant == add_variant);
4369 for (int i = 1; i < alg.ops; i++)
4371 tree shft_log = build_int_cst (multtype, alg.log[i]);
4372 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4373 tree tmp_var = NULL_TREE;
4375 switch (alg.op[i])
4377 case alg_shift:
4378 if (synth_shift_p)
4379 stmt
4380 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
4381 alg.log[i], stmt_vinfo);
4382 else
4383 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4384 shft_log);
4385 break;
4386 case alg_add_t_m2:
4387 tmp_var
4388 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
4389 stmt_vinfo, synth_shift_p);
4390 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4391 tmp_var);
4392 break;
4393 case alg_sub_t_m2:
4394 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
4395 shft_log, stmt_vinfo,
4396 synth_shift_p);
4397 /* In some algorithms the first step involves zeroing the
4398 accumulator. If subtracting from such an accumulator
4399 just emit the negation directly. */
4400 if (integer_zerop (accumulator))
4401 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4402 else
4403 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4404 tmp_var);
4405 break;
4406 case alg_add_t2_m:
4407 tmp_var
4408 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4409 shft_log, stmt_vinfo, synth_shift_p);
4410 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4411 break;
4412 case alg_sub_t2_m:
4413 tmp_var
4414 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4415 shft_log, stmt_vinfo, synth_shift_p);
4416 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4417 break;
4418 case alg_add_factor:
4419 tmp_var
4420 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4421 shft_log, stmt_vinfo, synth_shift_p);
4422 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4423 tmp_var);
4424 break;
4425 case alg_sub_factor:
4426 tmp_var
4427 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4428 shft_log, stmt_vinfo, synth_shift_p);
4429 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4430 accumulator);
4431 break;
4432 default:
4433 gcc_unreachable ();
4435 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4436 but rather return it directly. */
4438 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4439 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4440 accumulator = accum_tmp;
4442 if (variant == negate_variant)
4444 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4445 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4446 accumulator = accum_tmp;
4447 if (cast_to_unsigned_p)
4448 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4450 else if (variant == add_variant)
4452 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4453 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4454 accumulator = accum_tmp;
4455 if (cast_to_unsigned_p)
4456 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4458 /* Move back to a signed if needed. */
4459 if (cast_to_unsigned_p)
4461 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4462 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4465 return stmt;
4468 /* Detect multiplication by constant and convert it into a sequence of
4469 shifts and additions, subtractions, negations. We reuse the
4470 choose_mult_variant algorithms from expmed.cc
4472 Input/Output:
4474 STMT_VINFO: The stmt from which the pattern search begins,
4475 i.e. the mult stmt.
4477 Output:
4479 * TYPE_OUT: The type of the output of this pattern.
4481 * Return value: A new stmt that will be used to replace
4482 the multiplication. */
4484 static gimple *
4485 vect_recog_mult_pattern (vec_info *vinfo,
4486 stmt_vec_info stmt_vinfo, tree *type_out)
4488 gimple *last_stmt = stmt_vinfo->stmt;
4489 tree oprnd0, oprnd1, vectype, itype;
4490 gimple *pattern_stmt;
4492 if (!is_gimple_assign (last_stmt))
4493 return NULL;
4495 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4496 return NULL;
4498 oprnd0 = gimple_assign_rhs1 (last_stmt);
4499 oprnd1 = gimple_assign_rhs2 (last_stmt);
4500 itype = TREE_TYPE (oprnd0);
4502 if (TREE_CODE (oprnd0) != SSA_NAME
4503 || TREE_CODE (oprnd1) != INTEGER_CST
4504 || !INTEGRAL_TYPE_P (itype)
4505 || !type_has_mode_precision_p (itype))
4506 return NULL;
4508 vectype = get_vectype_for_scalar_type (vinfo, itype);
4509 if (vectype == NULL_TREE)
4510 return NULL;
4512 /* If the target can handle vectorized multiplication natively,
4513 don't attempt to optimize this. */
4514 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4515 if (mul_optab != unknown_optab)
4517 machine_mode vec_mode = TYPE_MODE (vectype);
4518 int icode = (int) optab_handler (mul_optab, vec_mode);
4519 if (icode != CODE_FOR_nothing)
4520 return NULL;
4523 pattern_stmt = vect_synth_mult_by_constant (vinfo,
4524 oprnd0, oprnd1, stmt_vinfo);
4525 if (!pattern_stmt)
4526 return NULL;
4528 /* Pattern detected. */
4529 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4531 *type_out = vectype;
4533 return pattern_stmt;
4536 extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
4537 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
4538 extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
4540 extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
4541 extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
4543 static gimple *
4544 vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
4545 internal_fn fn, tree *type_out,
4546 tree lhs, tree op_0, tree op_1)
4548 tree itype = TREE_TYPE (op_0);
4549 tree otype = TREE_TYPE (lhs);
4550 tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4551 tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4553 if (v_itype != NULL_TREE && v_otype != NULL_TREE
4554 && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
4556 gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4557 tree in_ssa = vect_recog_temp_ssa_var (itype, NULL);
4559 gimple_call_set_lhs (call, in_ssa);
4560 gimple_call_set_nothrow (call, /* nothrow_p */ false);
4561 gimple_set_location (call, gimple_location (STMT_VINFO_STMT (stmt_info)));
4563 *type_out = v_otype;
4565 if (types_compatible_p (itype, otype))
4566 return call;
4567 else
4569 append_pattern_def_seq (vinfo, stmt_info, call, v_itype);
4570 tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
4572 return gimple_build_assign (out_ssa, NOP_EXPR, in_ssa);
4576 return NULL;
4580 * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
4581 * _7 = _4 + _6;
4582 * _8 = _4 > _7;
4583 * _9 = (long unsigned int) _8;
4584 * _10 = -_9;
4585 * _12 = _7 | _10;
4587 * And then simplied to
4588 * _12 = .SAT_ADD (_4, _6);
4591 static gimple *
4592 vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4593 tree *type_out)
4595 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4597 if (!is_gimple_assign (last_stmt))
4598 return NULL;
4600 tree ops[2];
4601 tree lhs = gimple_assign_lhs (last_stmt);
4603 if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
4604 || gimple_signed_integer_sat_add (lhs, ops, NULL))
4606 if (TREE_CODE (ops[1]) == INTEGER_CST)
4607 ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4609 gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
4610 IFN_SAT_ADD, type_out,
4611 lhs, ops[0], ops[1]);
4612 if (stmt)
4614 vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
4615 return stmt;
4619 return NULL;
4623 * Try to transform the truncation for .SAT_SUB pattern, mostly occurs in
4624 * the benchmark zip. Aka:
4626 * unsigned int _1;
4627 * unsigned int _2;
4628 * unsigned short int _4;
4629 * _9 = (unsigned short int).SAT_SUB (_1, _2);
4631 * if _1 is known to be in the range of unsigned short int. For example
4632 * there is a def _1 = (unsigned short int)_4. Then we can transform the
4633 * truncation to:
4635 * _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
4636 * _9 = .SAT_SUB (_4, _3);
4638 * Then, we can better vectorized code and avoid the unnecessary narrowing
4639 * stmt during vectorization with below stmt(s).
4641 * _3 = .SAT_TRUNC(_2); // SI => HI
4642 * _9 = .SAT_SUB (_4, _3);
4644 static void
4645 vect_recog_sat_sub_pattern_transform (vec_info *vinfo,
4646 stmt_vec_info stmt_vinfo,
4647 tree lhs, tree *ops)
4649 tree otype = TREE_TYPE (lhs);
4650 tree itype = TREE_TYPE (ops[0]);
4651 unsigned itype_prec = TYPE_PRECISION (itype);
4652 unsigned otype_prec = TYPE_PRECISION (otype);
4654 if (types_compatible_p (otype, itype) || otype_prec >= itype_prec)
4655 return;
4657 tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4658 tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4659 tree_pair v_pair = tree_pair (v_otype, v_itype);
4661 if (v_otype == NULL_TREE || v_itype == NULL_TREE
4662 || !direct_internal_fn_supported_p (IFN_SAT_TRUNC, v_pair,
4663 OPTIMIZE_FOR_BOTH))
4664 return;
4666 /* 1. Find the _4 and update ops[0] as above example. */
4667 vect_unpromoted_value unprom;
4668 tree tmp = vect_look_through_possible_promotion (vinfo, ops[0], &unprom);
4670 if (tmp == NULL_TREE || TYPE_PRECISION (unprom.type) != otype_prec)
4671 return;
4673 ops[0] = tmp;
4675 /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example. */
4676 tree trunc_lhs_ssa = vect_recog_temp_ssa_var (otype, NULL);
4677 gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[1]);
4679 gimple_call_set_lhs (call, trunc_lhs_ssa);
4680 gimple_call_set_nothrow (call, /* nothrow_p */ false);
4681 append_pattern_def_seq (vinfo, stmt_vinfo, call, v_otype);
4683 ops[1] = trunc_lhs_ssa;
4687 * Try to detect saturation sub pattern (SAT_ADD), aka below gimple:
4688 * Unsigned:
4689 * _7 = _1 >= _2;
4690 * _8 = _1 - _2;
4691 * _10 = (long unsigned int) _7;
4692 * _9 = _8 * _10;
4694 * And then simplied to
4695 * _9 = .SAT_SUB (_1, _2);
4697 * Signed:
4698 * x.0_4 = (unsigned char) x_16;
4699 * y.1_5 = (unsigned char) y_18;
4700 * _6 = x.0_4 - y.1_5;
4701 * minus_19 = (int8_t) _6;
4702 * _7 = x_16 ^ y_18;
4703 * _8 = x_16 ^ minus_19;
4704 * _44 = _7 < 0;
4705 * _23 = x_16 < 0;
4706 * _24 = (signed char) _23;
4707 * _58 = (unsigned char) _24;
4708 * _59 = -_58;
4709 * _25 = (signed char) _59;
4710 * _26 = _25 ^ 127;
4711 * _42 = _8 < 0;
4712 * _41 = _42 & _44;
4713 * iftmp.2_11 = _41 ? _26 : minus_19;
4715 * And then simplied to
4716 * iftmp.2_11 = .SAT_SUB (x_16, y_18);
4719 static gimple *
4720 vect_recog_sat_sub_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4721 tree *type_out)
4723 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4725 if (!is_gimple_assign (last_stmt))
4726 return NULL;
4728 tree ops[2];
4729 tree lhs = gimple_assign_lhs (last_stmt);
4731 if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)
4732 || gimple_signed_integer_sat_sub (lhs, ops, NULL))
4734 vect_recog_sat_sub_pattern_transform (vinfo, stmt_vinfo, lhs, ops);
4735 gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
4736 IFN_SAT_SUB, type_out,
4737 lhs, ops[0], ops[1]);
4738 if (stmt)
4740 vect_pattern_detected ("vect_recog_sat_sub_pattern", last_stmt);
4741 return stmt;
4745 return NULL;
4749 * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
4750 * overflow_5 = x_4(D) > 4294967295;
4751 * _1 = (unsigned int) x_4(D);
4752 * _2 = (unsigned int) overflow_5;
4753 * _3 = -_2;
4754 * _6 = _1 | _3;
4756 * And then simplied to
4757 * _6 = .SAT_TRUNC (x_4(D));
4760 static gimple *
4761 vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4762 tree *type_out)
4764 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4766 if (!is_gimple_assign (last_stmt))
4767 return NULL;
4769 tree ops[1];
4770 tree lhs = gimple_assign_lhs (last_stmt);
4771 tree otype = TREE_TYPE (lhs);
4773 if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
4774 && type_has_mode_precision_p (otype))
4776 tree itype = TREE_TYPE (ops[0]);
4777 tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4778 tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4779 internal_fn fn = IFN_SAT_TRUNC;
4781 if (v_itype != NULL_TREE && v_otype != NULL_TREE
4782 && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
4783 OPTIMIZE_FOR_BOTH))
4785 gcall *call = gimple_build_call_internal (fn, 1, ops[0]);
4786 tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
4788 gimple_call_set_lhs (call, out_ssa);
4789 gimple_call_set_nothrow (call, /* nothrow_p */ false);
4790 gimple_set_location (call, gimple_location (last_stmt));
4792 *type_out = v_otype;
4794 return call;
4798 return NULL;
4801 /* Detect a signed division by a constant that wouldn't be
4802 otherwise vectorized:
4804 type a_t, b_t;
4806 S1 a_t = b_t / N;
4808 where type 'type' is an integral type and N is a constant.
4810 Similarly handle modulo by a constant:
4812 S4 a_t = b_t % N;
4814 Input/Output:
4816 * STMT_VINFO: The stmt from which the pattern search begins,
4817 i.e. the division stmt. S1 is replaced by if N is a power
4818 of two constant and type is signed:
4819 S3 y_t = b_t < 0 ? N - 1 : 0;
4820 S2 x_t = b_t + y_t;
4821 S1' a_t = x_t >> log2 (N);
4823 S4 is replaced if N is a power of two constant and
4824 type is signed by (where *_T temporaries have unsigned type):
4825 S9 y_T = b_t < 0 ? -1U : 0U;
4826 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4827 S7 z_t = (type) z_T;
4828 S6 w_t = b_t + z_t;
4829 S5 x_t = w_t & (N - 1);
4830 S4' a_t = x_t - z_t;
4832 Output:
4834 * TYPE_OUT: The type of the output of this pattern.
4836 * Return value: A new stmt that will be used to replace the division
4837 S1 or modulo S4 stmt. */
4839 static gimple *
4840 vect_recog_divmod_pattern (vec_info *vinfo,
4841 stmt_vec_info stmt_vinfo, tree *type_out)
4843 gimple *last_stmt = stmt_vinfo->stmt;
4844 tree oprnd0, oprnd1, vectype, itype, cond;
4845 gimple *pattern_stmt, *def_stmt;
4846 enum tree_code rhs_code;
4847 optab optab;
4848 tree q, cst;
4849 int prec;
4851 if (!is_gimple_assign (last_stmt))
4852 return NULL;
4854 rhs_code = gimple_assign_rhs_code (last_stmt);
4855 switch (rhs_code)
4857 case TRUNC_DIV_EXPR:
4858 case EXACT_DIV_EXPR:
4859 case TRUNC_MOD_EXPR:
4860 break;
4861 default:
4862 return NULL;
4865 oprnd0 = gimple_assign_rhs1 (last_stmt);
4866 oprnd1 = gimple_assign_rhs2 (last_stmt);
4867 itype = TREE_TYPE (oprnd0);
4868 if (TREE_CODE (oprnd0) != SSA_NAME
4869 || TREE_CODE (oprnd1) != INTEGER_CST
4870 || TREE_CODE (itype) != INTEGER_TYPE
4871 || !type_has_mode_precision_p (itype))
4872 return NULL;
4874 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4875 vectype = get_vectype_for_scalar_type (vinfo, itype);
4876 if (vectype == NULL_TREE)
4877 return NULL;
4879 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
4881 /* If the target can handle vectorized division or modulo natively,
4882 don't attempt to optimize this, since native division is likely
4883 to give smaller code. */
4884 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4885 if (optab != unknown_optab)
4887 machine_mode vec_mode = TYPE_MODE (vectype);
4888 int icode = (int) optab_handler (optab, vec_mode);
4889 if (icode != CODE_FOR_nothing)
4890 return NULL;
4894 prec = TYPE_PRECISION (itype);
4895 if (integer_pow2p (oprnd1))
4897 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4898 return NULL;
4900 /* Pattern detected. */
4901 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4903 *type_out = vectype;
4905 /* Check if the target supports this internal function. */
4906 internal_fn ifn = IFN_DIV_POW2;
4907 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4909 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4911 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
4912 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4913 gimple_call_set_lhs (div_stmt, var_div);
4915 if (rhs_code == TRUNC_MOD_EXPR)
4917 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
4918 def_stmt
4919 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4920 LSHIFT_EXPR, var_div, shift);
4921 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4922 pattern_stmt
4923 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4924 MINUS_EXPR, oprnd0,
4925 gimple_assign_lhs (def_stmt));
4927 else
4928 pattern_stmt = div_stmt;
4929 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
4931 return pattern_stmt;
4934 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4935 build_int_cst (itype, 0));
4936 if (rhs_code == TRUNC_DIV_EXPR
4937 || rhs_code == EXACT_DIV_EXPR)
4939 tree var = vect_recog_temp_ssa_var (itype, NULL);
4940 tree shift;
4941 def_stmt
4942 = gimple_build_assign (var, COND_EXPR, cond,
4943 fold_build2 (MINUS_EXPR, itype, oprnd1,
4944 build_int_cst (itype, 1)),
4945 build_int_cst (itype, 0));
4946 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4947 var = vect_recog_temp_ssa_var (itype, NULL);
4948 def_stmt
4949 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4950 gimple_assign_lhs (def_stmt));
4951 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4953 shift = build_int_cst (itype, tree_log2 (oprnd1));
4954 pattern_stmt
4955 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4956 RSHIFT_EXPR, var, shift);
4958 else
4960 tree signmask;
4961 if (compare_tree_int (oprnd1, 2) == 0)
4963 signmask = vect_recog_temp_ssa_var (itype, NULL);
4964 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4965 build_int_cst (itype, 1),
4966 build_int_cst (itype, 0));
4967 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4969 else
4971 tree utype
4972 = build_nonstandard_integer_type (prec, 1);
4973 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4974 tree shift
4975 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
4976 - tree_log2 (oprnd1));
4977 tree var = vect_recog_temp_ssa_var (utype, NULL);
4979 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4980 build_int_cst (utype, -1),
4981 build_int_cst (utype, 0));
4982 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4983 var = vect_recog_temp_ssa_var (utype, NULL);
4984 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4985 gimple_assign_lhs (def_stmt),
4986 shift);
4987 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4988 signmask = vect_recog_temp_ssa_var (itype, NULL);
4989 def_stmt
4990 = gimple_build_assign (signmask, NOP_EXPR, var);
4991 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4993 def_stmt
4994 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4995 PLUS_EXPR, oprnd0, signmask);
4996 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4997 def_stmt
4998 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4999 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
5000 fold_build2 (MINUS_EXPR, itype, oprnd1,
5001 build_int_cst (itype, 1)));
5002 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5004 pattern_stmt
5005 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5006 MINUS_EXPR, gimple_assign_lhs (def_stmt),
5007 signmask);
5010 return pattern_stmt;
5013 if ((cst = uniform_integer_cst_p (oprnd1))
5014 && TYPE_UNSIGNED (itype)
5015 && rhs_code == TRUNC_DIV_EXPR
5016 && vectype
5017 && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
5019 /* We can use the relationship:
5021 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
5023 to optimize cases where N+1 is a power of 2, and where // (N+1)
5024 is therefore a shift right. When operating in modes that are
5025 multiples of a byte in size, there are two cases:
5027 (1) N(N+3) is not representable, in which case the question
5028 becomes whether the replacement expression overflows.
5029 It is enough to test that x+N+2 does not overflow,
5030 i.e. that x < MAX-(N+1).
5032 (2) N(N+3) is representable, in which case it is the (only)
5033 bound that we need to check.
5035 ??? For now we just handle the case where // (N+1) is a shift
5036 right by half the precision, since some architectures can
5037 optimize the associated addition and shift combinations
5038 into single instructions. */
5040 auto wcst = wi::to_wide (cst);
5041 int pow = wi::exact_log2 (wcst + 1);
5042 if (pow == prec / 2)
5044 gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
5046 gimple_ranger ranger;
5047 int_range_max r;
5049 /* Check that no overflow will occur. If we don't have range
5050 information we can't perform the optimization. */
5052 if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
5054 wide_int max = r.upper_bound ();
5055 wide_int one = wi::shwi (1, prec);
5056 wide_int adder = wi::add (one, wi::lshift (one, pow));
5057 wi::overflow_type ovf;
5058 wi::add (max, adder, UNSIGNED, &ovf);
5059 if (ovf == wi::OVF_NONE)
5061 *type_out = vectype;
5062 tree tadder = wide_int_to_tree (itype, adder);
5063 tree rshift = wide_int_to_tree (itype, pow);
5065 tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
5066 gassign *patt1
5067 = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
5068 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5070 tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
5071 patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
5072 rshift);
5073 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5075 tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
5076 patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
5077 oprnd0);
5078 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5080 tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
5081 pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
5082 new_lhs3, rshift);
5084 return pattern_stmt;
5090 if (prec > HOST_BITS_PER_WIDE_INT
5091 || integer_zerop (oprnd1))
5092 return NULL;
5094 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
5095 return NULL;
5097 if (TYPE_UNSIGNED (itype))
5099 unsigned HOST_WIDE_INT mh, ml;
5100 int pre_shift, post_shift;
5101 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
5102 & GET_MODE_MASK (itype_mode));
5103 tree t1, t2, t3, t4;
5105 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
5106 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
5107 return NULL;
5109 /* Find a suitable multiplier and right shift count instead of
5110 directly dividing by D. */
5111 mh = choose_multiplier (d, prec, prec, &ml, &post_shift);
5113 /* If the suggested multiplier is more than PREC bits, we can do better
5114 for even divisors, using an initial right shift. */
5115 if (mh != 0 && (d & 1) == 0)
5117 pre_shift = ctz_or_zero (d);
5118 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
5119 &ml, &post_shift);
5120 gcc_assert (!mh);
5122 else
5123 pre_shift = 0;
5125 if (mh != 0)
5127 if (post_shift - 1 >= prec)
5128 return NULL;
5130 /* t1 = oprnd0 h* ml;
5131 t2 = oprnd0 - t1;
5132 t3 = t2 >> 1;
5133 t4 = t1 + t3;
5134 q = t4 >> (post_shift - 1); */
5135 t1 = vect_recog_temp_ssa_var (itype, NULL);
5136 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
5137 build_int_cst (itype, ml));
5138 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5140 t2 = vect_recog_temp_ssa_var (itype, NULL);
5141 def_stmt
5142 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
5143 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5145 t3 = vect_recog_temp_ssa_var (itype, NULL);
5146 def_stmt
5147 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
5148 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5150 t4 = vect_recog_temp_ssa_var (itype, NULL);
5151 def_stmt
5152 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
5154 if (post_shift != 1)
5156 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5158 q = vect_recog_temp_ssa_var (itype, NULL);
5159 pattern_stmt
5160 = gimple_build_assign (q, RSHIFT_EXPR, t4,
5161 build_int_cst (itype, post_shift - 1));
5163 else
5165 q = t4;
5166 pattern_stmt = def_stmt;
5169 else
5171 if (pre_shift >= prec || post_shift >= prec)
5172 return NULL;
5174 /* t1 = oprnd0 >> pre_shift;
5175 t2 = t1 h* ml;
5176 q = t2 >> post_shift; */
5177 if (pre_shift)
5179 t1 = vect_recog_temp_ssa_var (itype, NULL);
5180 def_stmt
5181 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
5182 build_int_cst (NULL, pre_shift));
5183 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5185 else
5186 t1 = oprnd0;
5188 t2 = vect_recog_temp_ssa_var (itype, NULL);
5189 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
5190 build_int_cst (itype, ml));
5192 if (post_shift)
5194 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5196 q = vect_recog_temp_ssa_var (itype, NULL);
5197 def_stmt
5198 = gimple_build_assign (q, RSHIFT_EXPR, t2,
5199 build_int_cst (itype, post_shift));
5201 else
5202 q = t2;
5204 pattern_stmt = def_stmt;
5207 else
5209 unsigned HOST_WIDE_INT ml;
5210 int post_shift;
5211 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
5212 unsigned HOST_WIDE_INT abs_d;
5213 bool add = false;
5214 tree t1, t2, t3, t4;
5216 /* Give up for -1. */
5217 if (d == -1)
5218 return NULL;
5220 /* Since d might be INT_MIN, we have to cast to
5221 unsigned HOST_WIDE_INT before negating to avoid
5222 undefined signed overflow. */
5223 abs_d = (d >= 0
5224 ? (unsigned HOST_WIDE_INT) d
5225 : - (unsigned HOST_WIDE_INT) d);
5227 /* n rem d = n rem -d */
5228 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
5230 d = abs_d;
5231 oprnd1 = build_int_cst (itype, abs_d);
5233 if (HOST_BITS_PER_WIDE_INT >= prec
5234 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
5235 /* This case is not handled correctly below. */
5236 return NULL;
5238 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift);
5239 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
5241 add = true;
5242 ml |= HOST_WIDE_INT_M1U << (prec - 1);
5244 if (post_shift >= prec)
5245 return NULL;
5247 /* t1 = oprnd0 h* ml; */
5248 t1 = vect_recog_temp_ssa_var (itype, NULL);
5249 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
5250 build_int_cst (itype, ml));
5252 if (add)
5254 /* t2 = t1 + oprnd0; */
5255 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5256 t2 = vect_recog_temp_ssa_var (itype, NULL);
5257 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
5259 else
5260 t2 = t1;
5262 if (post_shift)
5264 /* t3 = t2 >> post_shift; */
5265 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5266 t3 = vect_recog_temp_ssa_var (itype, NULL);
5267 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
5268 build_int_cst (itype, post_shift));
5270 else
5271 t3 = t2;
5273 int msb = 1;
5274 int_range_max r;
5275 get_range_query (cfun)->range_of_expr (r, oprnd0);
5276 if (!r.varying_p () && !r.undefined_p ())
5278 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
5279 msb = 0;
5280 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
5281 msb = -1;
5284 if (msb == 0 && d >= 0)
5286 /* q = t3; */
5287 q = t3;
5288 pattern_stmt = def_stmt;
5290 else
5292 /* t4 = oprnd0 >> (prec - 1);
5293 or if we know from VRP that oprnd0 >= 0
5294 t4 = 0;
5295 or if we know from VRP that oprnd0 < 0
5296 t4 = -1; */
5297 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5298 t4 = vect_recog_temp_ssa_var (itype, NULL);
5299 if (msb != 1)
5300 def_stmt = gimple_build_assign (t4, INTEGER_CST,
5301 build_int_cst (itype, msb));
5302 else
5303 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
5304 build_int_cst (itype, prec - 1));
5305 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5307 /* q = t3 - t4; or q = t4 - t3; */
5308 q = vect_recog_temp_ssa_var (itype, NULL);
5309 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
5310 d < 0 ? t3 : t4);
5314 if (rhs_code == TRUNC_MOD_EXPR)
5316 tree r, t1;
5318 /* We divided. Now finish by:
5319 t1 = q * oprnd1;
5320 r = oprnd0 - t1; */
5321 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5323 t1 = vect_recog_temp_ssa_var (itype, NULL);
5324 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
5325 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5327 r = vect_recog_temp_ssa_var (itype, NULL);
5328 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5331 /* Pattern detected. */
5332 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5334 *type_out = vectype;
5335 return pattern_stmt;
5338 /* Detects pattern with a modulo operation (S1) where both arguments
5339 are variables of integral type.
5340 The statement is replaced by division, multiplication, and subtraction.
5341 The last statement (S4) is returned.
5343 Example:
5344 S1 c_t = a_t % b_t;
5346 is replaced by
5347 S2 x_t = a_t / b_t;
5348 S3 y_t = x_t * b_t;
5349 S4 z_t = a_t - y_t; */
5351 static gimple *
5352 vect_recog_mod_var_pattern (vec_info *vinfo,
5353 stmt_vec_info stmt_vinfo, tree *type_out)
5355 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5356 tree oprnd0, oprnd1, vectype, itype;
5357 gimple *pattern_stmt, *def_stmt;
5358 enum tree_code rhs_code;
5360 if (!is_gimple_assign (last_stmt))
5361 return NULL;
5363 rhs_code = gimple_assign_rhs_code (last_stmt);
5364 if (rhs_code != TRUNC_MOD_EXPR)
5365 return NULL;
5367 oprnd0 = gimple_assign_rhs1 (last_stmt);
5368 oprnd1 = gimple_assign_rhs2 (last_stmt);
5369 itype = TREE_TYPE (oprnd0);
5370 if (TREE_CODE (oprnd0) != SSA_NAME
5371 || TREE_CODE (oprnd1) != SSA_NAME
5372 || TREE_CODE (itype) != INTEGER_TYPE)
5373 return NULL;
5375 vectype = get_vectype_for_scalar_type (vinfo, itype);
5377 if (!vectype
5378 || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
5379 || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
5380 || !target_has_vecop_for_code (MULT_EXPR, vectype)
5381 || !target_has_vecop_for_code (MINUS_EXPR, vectype))
5382 return NULL;
5384 tree q, tmp, r;
5385 q = vect_recog_temp_ssa_var (itype, NULL);
5386 def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
5387 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
5389 tmp = vect_recog_temp_ssa_var (itype, NULL);
5390 def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
5391 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
5393 r = vect_recog_temp_ssa_var (itype, NULL);
5394 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
5396 /* Pattern detected. */
5397 *type_out = vectype;
5398 vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
5400 return pattern_stmt;
5403 /* Function vect_recog_mixed_size_cond_pattern
5405 Try to find the following pattern:
5407 type x_t, y_t;
5408 TYPE a_T, b_T, c_T;
5409 loop:
5410 S1 a_T = x_t CMP y_t ? b_T : c_T;
5412 where type 'TYPE' is an integral type which has different size
5413 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5414 than 'type', the constants need to fit into an integer type
5415 with the same width as 'type') or results of conversion from 'type'.
5417 Input:
5419 * STMT_VINFO: The stmt from which the pattern search begins.
5421 Output:
5423 * TYPE_OUT: The type of the output of this pattern.
5425 * Return value: A new stmt that will be used to replace the pattern.
5426 Additionally a def_stmt is added.
5428 a_it = x_t CMP y_t ? b_it : c_it;
5429 a_T = (TYPE) a_it; */
5431 static gimple *
5432 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
5433 stmt_vec_info stmt_vinfo, tree *type_out)
5435 gimple *last_stmt = stmt_vinfo->stmt;
5436 tree cond_expr, then_clause, else_clause;
5437 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
5438 gimple *pattern_stmt, *def_stmt;
5439 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
5440 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
5441 bool promotion;
5442 tree comp_scalar_type;
5444 if (!is_gimple_assign (last_stmt)
5445 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
5446 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
5447 return NULL;
5449 cond_expr = gimple_assign_rhs1 (last_stmt);
5450 then_clause = gimple_assign_rhs2 (last_stmt);
5451 else_clause = gimple_assign_rhs3 (last_stmt);
5453 if (!COMPARISON_CLASS_P (cond_expr))
5454 return NULL;
5456 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
5457 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
5458 if (comp_vectype == NULL_TREE)
5459 return NULL;
5461 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
5462 if (types_compatible_p (type, comp_scalar_type)
5463 || ((TREE_CODE (then_clause) != INTEGER_CST
5464 || TREE_CODE (else_clause) != INTEGER_CST)
5465 && !INTEGRAL_TYPE_P (comp_scalar_type))
5466 || !INTEGRAL_TYPE_P (type))
5467 return NULL;
5469 if ((TREE_CODE (then_clause) != INTEGER_CST
5470 && !type_conversion_p (vinfo, then_clause, false,
5471 &orig_type0, &def_stmt0, &promotion))
5472 || (TREE_CODE (else_clause) != INTEGER_CST
5473 && !type_conversion_p (vinfo, else_clause, false,
5474 &orig_type1, &def_stmt1, &promotion)))
5475 return NULL;
5477 if (orig_type0 && orig_type1
5478 && !types_compatible_p (orig_type0, orig_type1))
5479 return NULL;
5481 if (orig_type0)
5483 if (!types_compatible_p (orig_type0, comp_scalar_type))
5484 return NULL;
5485 then_clause = gimple_assign_rhs1 (def_stmt0);
5486 itype = orig_type0;
5489 if (orig_type1)
5491 if (!types_compatible_p (orig_type1, comp_scalar_type))
5492 return NULL;
5493 else_clause = gimple_assign_rhs1 (def_stmt1);
5494 itype = orig_type1;
5498 HOST_WIDE_INT cmp_mode_size
5499 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
5501 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
5502 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
5503 return NULL;
5505 vectype = get_vectype_for_scalar_type (vinfo, type);
5506 if (vectype == NULL_TREE)
5507 return NULL;
5509 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
5510 return NULL;
5512 if (itype == NULL_TREE)
5513 itype = build_nonstandard_integer_type (cmp_mode_size,
5514 TYPE_UNSIGNED (type));
5516 if (itype == NULL_TREE
5517 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
5518 return NULL;
5520 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5521 if (vecitype == NULL_TREE)
5522 return NULL;
5524 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
5525 return NULL;
5527 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
5529 if ((TREE_CODE (then_clause) == INTEGER_CST
5530 && !int_fits_type_p (then_clause, itype))
5531 || (TREE_CODE (else_clause) == INTEGER_CST
5532 && !int_fits_type_p (else_clause, itype)))
5533 return NULL;
5536 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5537 COND_EXPR, unshare_expr (cond_expr),
5538 fold_convert (itype, then_clause),
5539 fold_convert (itype, else_clause));
5540 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5541 NOP_EXPR, gimple_assign_lhs (def_stmt));
5543 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
5544 *type_out = vectype;
5546 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
5548 return pattern_stmt;
5552 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5553 true if bool VAR can and should be optimized that way. Assume it shouldn't
5554 in case it's a result of a comparison which can be directly vectorized into
5555 a vector comparison. Fills in STMTS with all stmts visited during the
5556 walk. */
5558 static bool
5559 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
5561 tree rhs1;
5562 enum tree_code rhs_code;
5564 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5565 if (!def_stmt_info)
5566 return false;
5568 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
5569 if (!def_stmt)
5570 return false;
5572 if (stmts.contains (def_stmt))
5573 return true;
5575 rhs1 = gimple_assign_rhs1 (def_stmt);
5576 rhs_code = gimple_assign_rhs_code (def_stmt);
5577 switch (rhs_code)
5579 case SSA_NAME:
5580 if (! check_bool_pattern (rhs1, vinfo, stmts))
5581 return false;
5582 break;
5584 CASE_CONVERT:
5585 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
5586 return false;
5587 if (! check_bool_pattern (rhs1, vinfo, stmts))
5588 return false;
5589 break;
5591 case BIT_NOT_EXPR:
5592 if (! check_bool_pattern (rhs1, vinfo, stmts))
5593 return false;
5594 break;
5596 case BIT_AND_EXPR:
5597 case BIT_IOR_EXPR:
5598 case BIT_XOR_EXPR:
5599 if (! check_bool_pattern (rhs1, vinfo, stmts)
5600 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
5601 return false;
5602 break;
5604 default:
5605 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5607 tree vecitype, comp_vectype;
5609 /* If the comparison can throw, then is_gimple_condexpr will be
5610 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5611 if (stmt_could_throw_p (cfun, def_stmt))
5612 return false;
5614 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
5615 if (comp_vectype == NULL_TREE)
5616 return false;
5618 tree mask_type = get_mask_type_for_scalar_type (vinfo,
5619 TREE_TYPE (rhs1));
5620 if (mask_type
5621 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5622 return false;
5624 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
5626 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5627 tree itype
5628 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5629 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5630 if (vecitype == NULL_TREE)
5631 return false;
5633 else
5634 vecitype = comp_vectype;
5635 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
5636 return false;
5638 else
5639 return false;
5640 break;
5643 bool res = stmts.add (def_stmt);
5644 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5645 gcc_assert (!res);
5647 return true;
5651 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5652 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5653 pattern sequence. */
5655 static tree
5656 adjust_bool_pattern_cast (vec_info *vinfo,
5657 tree type, tree var, stmt_vec_info stmt_info)
5659 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5660 NOP_EXPR, var);
5661 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
5662 get_vectype_for_scalar_type (vinfo, type));
5663 return gimple_assign_lhs (cast_stmt);
5666 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5667 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5668 type, OUT_TYPE is the desired final integer type of the whole pattern.
5669 STMT_INFO is the info of the pattern root and is where pattern stmts should
5670 be associated with. DEFS is a map of pattern defs. */
5672 static void
5673 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
5674 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
5676 gimple *stmt = SSA_NAME_DEF_STMT (var);
5677 enum tree_code rhs_code, def_rhs_code;
5678 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
5679 location_t loc;
5680 gimple *pattern_stmt, *def_stmt;
5681 tree trueval = NULL_TREE;
5683 rhs1 = gimple_assign_rhs1 (stmt);
5684 rhs2 = gimple_assign_rhs2 (stmt);
5685 rhs_code = gimple_assign_rhs_code (stmt);
5686 loc = gimple_location (stmt);
5687 switch (rhs_code)
5689 case SSA_NAME:
5690 CASE_CONVERT:
5691 irhs1 = *defs.get (rhs1);
5692 itype = TREE_TYPE (irhs1);
5693 pattern_stmt
5694 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5695 SSA_NAME, irhs1);
5696 break;
5698 case BIT_NOT_EXPR:
5699 irhs1 = *defs.get (rhs1);
5700 itype = TREE_TYPE (irhs1);
5701 pattern_stmt
5702 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5703 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
5704 break;
5706 case BIT_AND_EXPR:
5707 /* Try to optimize x = y & (a < b ? 1 : 0); into
5708 x = (a < b ? y : 0);
5710 E.g. for:
5711 bool a_b, b_b, c_b;
5712 TYPE d_T;
5714 S1 a_b = x1 CMP1 y1;
5715 S2 b_b = x2 CMP2 y2;
5716 S3 c_b = a_b & b_b;
5717 S4 d_T = (TYPE) c_b;
5719 we would normally emit:
5721 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5722 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5723 S3' c_T = a_T & b_T;
5724 S4' d_T = c_T;
5726 but we can save one stmt by using the
5727 result of one of the COND_EXPRs in the other COND_EXPR and leave
5728 BIT_AND_EXPR stmt out:
5730 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5731 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5732 S4' f_T = c_T;
5734 At least when VEC_COND_EXPR is implemented using masks
5735 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5736 computes the comparison masks and ands it, in one case with
5737 all ones vector, in the other case with a vector register.
5738 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5739 often more expensive. */
5740 def_stmt = SSA_NAME_DEF_STMT (rhs2);
5741 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5742 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5744 irhs1 = *defs.get (rhs1);
5745 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5746 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5747 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5749 rhs_code = def_rhs_code;
5750 rhs1 = def_rhs1;
5751 rhs2 = gimple_assign_rhs2 (def_stmt);
5752 trueval = irhs1;
5753 goto do_compare;
5755 else
5756 irhs2 = *defs.get (rhs2);
5757 goto and_ior_xor;
5759 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5760 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5761 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5763 irhs2 = *defs.get (rhs2);
5764 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5765 if (TYPE_PRECISION (TREE_TYPE (irhs2))
5766 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5768 rhs_code = def_rhs_code;
5769 rhs1 = def_rhs1;
5770 rhs2 = gimple_assign_rhs2 (def_stmt);
5771 trueval = irhs2;
5772 goto do_compare;
5774 else
5775 irhs1 = *defs.get (rhs1);
5776 goto and_ior_xor;
5778 /* FALLTHRU */
5779 case BIT_IOR_EXPR:
5780 case BIT_XOR_EXPR:
5781 irhs1 = *defs.get (rhs1);
5782 irhs2 = *defs.get (rhs2);
5783 and_ior_xor:
5784 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5785 != TYPE_PRECISION (TREE_TYPE (irhs2)))
5787 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5788 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5789 int out_prec = TYPE_PRECISION (out_type);
5790 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
5791 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
5792 stmt_info);
5793 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
5794 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
5795 stmt_info);
5796 else
5798 irhs1 = adjust_bool_pattern_cast (vinfo,
5799 out_type, irhs1, stmt_info);
5800 irhs2 = adjust_bool_pattern_cast (vinfo,
5801 out_type, irhs2, stmt_info);
5804 itype = TREE_TYPE (irhs1);
5805 pattern_stmt
5806 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5807 rhs_code, irhs1, irhs2);
5808 break;
5810 default:
5811 do_compare:
5812 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5813 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5814 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5815 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5816 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5818 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5819 itype
5820 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5822 else
5823 itype = TREE_TYPE (rhs1);
5824 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
5825 if (trueval == NULL_TREE)
5826 trueval = build_int_cst (itype, 1);
5827 else
5828 gcc_checking_assert (useless_type_conversion_p (itype,
5829 TREE_TYPE (trueval)));
5830 pattern_stmt
5831 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5832 COND_EXPR, cond_expr, trueval,
5833 build_int_cst (itype, 0));
5834 break;
5837 gimple_set_location (pattern_stmt, loc);
5838 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
5839 get_vectype_for_scalar_type (vinfo, itype));
5840 defs.put (var, gimple_assign_lhs (pattern_stmt));
5843 /* Comparison function to qsort a vector of gimple stmts after UID. */
5845 static int
5846 sort_after_uid (const void *p1, const void *p2)
5848 const gimple *stmt1 = *(const gimple * const *)p1;
5849 const gimple *stmt2 = *(const gimple * const *)p2;
5850 return gimple_uid (stmt1) - gimple_uid (stmt2);
5853 /* Create pattern stmts for all stmts participating in the bool pattern
5854 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5855 OUT_TYPE. Return the def of the pattern root. */
5857 static tree
5858 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5859 tree out_type, stmt_vec_info stmt_info)
5861 /* Gather original stmts in the bool pattern in their order of appearance
5862 in the IL. */
5863 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5864 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5865 i != bool_stmt_set.end (); ++i)
5866 bool_stmts.quick_push (*i);
5867 bool_stmts.qsort (sort_after_uid);
5869 /* Now process them in that order, producing pattern stmts. */
5870 hash_map <tree, tree> defs;
5871 for (unsigned i = 0; i < bool_stmts.length (); ++i)
5872 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
5873 out_type, stmt_info, defs);
5875 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5876 gimple *pattern_stmt
5877 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5878 return gimple_assign_lhs (pattern_stmt);
5881 /* Return the proper type for converting bool VAR into
5882 an integer value or NULL_TREE if no such type exists.
5883 The type is chosen so that the converted value has the
5884 same number of elements as VAR's vector type. */
5886 static tree
5887 integer_type_for_mask (tree var, vec_info *vinfo)
5889 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5890 return NULL_TREE;
5892 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5893 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
5894 return NULL_TREE;
5896 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5899 /* Function vect_recog_gcond_pattern
5901 Try to find pattern like following:
5903 if (a op b)
5905 where operator 'op' is not != and convert it to an adjusted boolean pattern
5907 mask = a op b
5908 if (mask != 0)
5910 and set the mask type on MASK.
5912 Input:
5914 * STMT_VINFO: The stmt at the end from which the pattern
5915 search begins, i.e. cast of a bool to
5916 an integer type.
5918 Output:
5920 * TYPE_OUT: The type of the output of this pattern.
5922 * Return value: A new stmt that will be used to replace the pattern. */
5924 static gimple *
5925 vect_recog_gcond_pattern (vec_info *vinfo,
5926 stmt_vec_info stmt_vinfo, tree *type_out)
5928 /* Currently we only support this for loop vectorization and when multiple
5929 exits. */
5930 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5931 if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
5932 return NULL;
5934 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5935 gcond* cond = NULL;
5936 if (!(cond = dyn_cast <gcond *> (last_stmt)))
5937 return NULL;
5939 auto lhs = gimple_cond_lhs (cond);
5940 auto rhs = gimple_cond_rhs (cond);
5941 auto code = gimple_cond_code (cond);
5943 tree scalar_type = TREE_TYPE (lhs);
5944 if (VECTOR_TYPE_P (scalar_type))
5945 return NULL;
5947 if (code == NE_EXPR
5948 && zerop (rhs)
5949 && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
5950 return NULL;
5952 tree vecitype = get_vectype_for_scalar_type (vinfo, scalar_type);
5953 if (vecitype == NULL_TREE)
5954 return NULL;
5956 tree vectype = truth_type_for (vecitype);
5958 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5959 gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
5960 append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
5962 gimple *pattern_stmt
5963 = gimple_build_cond (NE_EXPR, new_lhs,
5964 build_int_cst (TREE_TYPE (new_lhs), 0),
5965 NULL_TREE, NULL_TREE);
5966 *type_out = vectype;
5967 vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
5968 return pattern_stmt;
5971 /* Function vect_recog_bool_pattern
5973 Try to find pattern like following:
5975 bool a_b, b_b, c_b, d_b, e_b;
5976 TYPE f_T;
5977 loop:
5978 S1 a_b = x1 CMP1 y1;
5979 S2 b_b = x2 CMP2 y2;
5980 S3 c_b = a_b & b_b;
5981 S4 d_b = x3 CMP3 y3;
5982 S5 e_b = c_b | d_b;
5983 S6 f_T = (TYPE) e_b;
5985 where type 'TYPE' is an integral type. Or a similar pattern
5986 ending in
5988 S6 f_Y = e_b ? r_Y : s_Y;
5990 as results from if-conversion of a complex condition.
5992 Input:
5994 * STMT_VINFO: The stmt at the end from which the pattern
5995 search begins, i.e. cast of a bool to
5996 an integer type.
5998 Output:
6000 * TYPE_OUT: The type of the output of this pattern.
6002 * Return value: A new stmt that will be used to replace the pattern.
6004 Assuming size of TYPE is the same as size of all comparisons
6005 (otherwise some casts would be added where needed), the above
6006 sequence we create related pattern stmts:
6007 S1' a_T = x1 CMP1 y1 ? 1 : 0;
6008 S3' c_T = x2 CMP2 y2 ? a_T : 0;
6009 S4' d_T = x3 CMP3 y3 ? 1 : 0;
6010 S5' e_T = c_T | d_T;
6011 S6' f_T = e_T;
6013 Instead of the above S3' we could emit:
6014 S2' b_T = x2 CMP2 y2 ? 1 : 0;
6015 S3' c_T = a_T | b_T;
6016 but the above is more efficient. */
6018 static gimple *
6019 vect_recog_bool_pattern (vec_info *vinfo,
6020 stmt_vec_info stmt_vinfo, tree *type_out)
6022 gimple *last_stmt = stmt_vinfo->stmt;
6023 enum tree_code rhs_code;
6024 tree var, lhs, rhs, vectype;
6025 gimple *pattern_stmt;
6027 if (!is_gimple_assign (last_stmt))
6028 return NULL;
6030 var = gimple_assign_rhs1 (last_stmt);
6031 lhs = gimple_assign_lhs (last_stmt);
6032 rhs_code = gimple_assign_rhs_code (last_stmt);
6034 if (rhs_code == VIEW_CONVERT_EXPR)
6035 var = TREE_OPERAND (var, 0);
6037 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
6038 return NULL;
6040 hash_set<gimple *> bool_stmts;
6042 if (CONVERT_EXPR_CODE_P (rhs_code)
6043 || rhs_code == VIEW_CONVERT_EXPR)
6045 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
6046 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6047 return NULL;
6048 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6050 if (check_bool_pattern (var, vinfo, bool_stmts))
6052 rhs = adjust_bool_stmts (vinfo, bool_stmts,
6053 TREE_TYPE (lhs), stmt_vinfo);
6054 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6055 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
6056 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
6057 else
6058 pattern_stmt
6059 = gimple_build_assign (lhs, NOP_EXPR, rhs);
6061 else
6063 tree type = integer_type_for_mask (var, vinfo);
6064 tree cst0, cst1, tmp;
6066 if (!type)
6067 return NULL;
6069 /* We may directly use cond with narrowed type to avoid
6070 multiple cond exprs with following result packing and
6071 perform single cond with packed mask instead. In case
6072 of widening we better make cond first and then extract
6073 results. */
6074 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
6075 type = TREE_TYPE (lhs);
6077 cst0 = build_int_cst (type, 0);
6078 cst1 = build_int_cst (type, 1);
6079 tmp = vect_recog_temp_ssa_var (type, NULL);
6080 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
6082 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
6084 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
6085 append_pattern_def_seq (vinfo, stmt_vinfo,
6086 pattern_stmt, new_vectype);
6088 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6089 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
6093 *type_out = vectype;
6094 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6096 return pattern_stmt;
6098 else if (rhs_code == COND_EXPR
6099 && TREE_CODE (var) == SSA_NAME)
6101 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6102 if (vectype == NULL_TREE)
6103 return NULL;
6105 /* Build a scalar type for the boolean result that when
6106 vectorized matches the vector type of the result in
6107 size and number of elements. */
6108 unsigned prec
6109 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
6110 TYPE_VECTOR_SUBPARTS (vectype));
6112 tree type
6113 = build_nonstandard_integer_type (prec,
6114 TYPE_UNSIGNED (TREE_TYPE (var)));
6115 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
6116 return NULL;
6118 enum vect_def_type dt;
6119 if (check_bool_pattern (var, vinfo, bool_stmts))
6120 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
6121 else if (integer_type_for_mask (var, vinfo))
6122 return NULL;
6123 else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
6124 && vect_is_simple_use (var, vinfo, &dt)
6125 && (dt == vect_external_def
6126 || dt == vect_constant_def))
6128 /* If the condition is already a boolean then manually convert it to a
6129 mask of the given integer type but don't set a vectype. */
6130 tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL);
6131 pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var,
6132 build_all_ones_cst (type),
6133 build_zero_cst (type));
6134 append_inv_pattern_def_seq (vinfo, pattern_stmt);
6135 var = lhs_ivar;
6138 tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
6139 pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var,
6140 build_zero_cst (TREE_TYPE (var)));
6142 tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var));
6143 if (!new_vectype)
6144 return NULL;
6146 new_vectype = truth_type_for (new_vectype);
6147 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype,
6148 TREE_TYPE (var));
6150 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6151 pattern_stmt
6152 = gimple_build_assign (lhs, COND_EXPR, lhs_var,
6153 gimple_assign_rhs2 (last_stmt),
6154 gimple_assign_rhs3 (last_stmt));
6155 *type_out = vectype;
6156 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6158 return pattern_stmt;
6160 else if (rhs_code == SSA_NAME
6161 && STMT_VINFO_DATA_REF (stmt_vinfo))
6163 stmt_vec_info pattern_stmt_info;
6164 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6165 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
6166 return NULL;
6168 if (check_bool_pattern (var, vinfo, bool_stmts))
6169 rhs = adjust_bool_stmts (vinfo, bool_stmts,
6170 TREE_TYPE (vectype), stmt_vinfo);
6171 else
6173 tree type = integer_type_for_mask (var, vinfo);
6174 tree cst0, cst1, new_vectype;
6176 if (!type)
6177 return NULL;
6179 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
6180 type = TREE_TYPE (vectype);
6182 cst0 = build_int_cst (type, 0);
6183 cst1 = build_int_cst (type, 1);
6184 new_vectype = get_vectype_for_scalar_type (vinfo, type);
6186 rhs = vect_recog_temp_ssa_var (type, NULL);
6187 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
6188 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
6191 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
6192 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
6194 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6195 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
6196 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
6197 rhs = rhs2;
6199 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
6200 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
6201 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6202 *type_out = vectype;
6203 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6205 return pattern_stmt;
6207 else
6208 return NULL;
6212 /* A helper for vect_recog_mask_conversion_pattern. Build
6213 conversion of MASK to a type suitable for masking VECTYPE.
6214 Built statement gets required vectype and is appended to
6215 a pattern sequence of STMT_VINFO.
6217 Return converted mask. */
6219 static tree
6220 build_mask_conversion (vec_info *vinfo,
6221 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
6223 gimple *stmt;
6224 tree masktype, tmp;
6226 masktype = truth_type_for (vectype);
6227 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
6228 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
6229 append_pattern_def_seq (vinfo, stmt_vinfo,
6230 stmt, masktype, TREE_TYPE (vectype));
6232 return tmp;
6236 /* Function vect_recog_mask_conversion_pattern
6238 Try to find statements which require boolean type
6239 converison. Additional conversion statements are
6240 added to handle such cases. For example:
6242 bool m_1, m_2, m_3;
6243 int i_4, i_5;
6244 double d_6, d_7;
6245 char c_1, c_2, c_3;
6247 S1 m_1 = i_4 > i_5;
6248 S2 m_2 = d_6 < d_7;
6249 S3 m_3 = m_1 & m_2;
6250 S4 c_1 = m_3 ? c_2 : c_3;
6252 Will be transformed into:
6254 S1 m_1 = i_4 > i_5;
6255 S2 m_2 = d_6 < d_7;
6256 S3'' m_2' = (_Bool[bitsize=32])m_2
6257 S3' m_3' = m_1 & m_2';
6258 S4'' m_3'' = (_Bool[bitsize=8])m_3'
6259 S4' c_1' = m_3'' ? c_2 : c_3; */
6261 static gimple *
6262 vect_recog_mask_conversion_pattern (vec_info *vinfo,
6263 stmt_vec_info stmt_vinfo, tree *type_out)
6265 gimple *last_stmt = stmt_vinfo->stmt;
6266 enum tree_code rhs_code;
6267 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
6268 tree vectype1, vectype2;
6269 stmt_vec_info pattern_stmt_info;
6270 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
6271 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
6273 /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
6274 conversion. */
6275 if (is_gimple_call (last_stmt)
6276 && gimple_call_internal_p (last_stmt))
6278 gcall *pattern_stmt;
6280 internal_fn ifn = gimple_call_internal_fn (last_stmt);
6281 int mask_argno = internal_fn_mask_index (ifn);
6282 if (mask_argno < 0)
6283 return NULL;
6285 bool store_p = internal_store_fn_p (ifn);
6286 bool load_p = internal_store_fn_p (ifn);
6287 if (store_p)
6289 int rhs_index = internal_fn_stored_value_index (ifn);
6290 tree rhs = gimple_call_arg (last_stmt, rhs_index);
6291 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
6293 else
6295 lhs = gimple_call_lhs (last_stmt);
6296 if (!lhs)
6297 return NULL;
6298 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6301 if (!vectype1)
6302 return NULL;
6304 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
6305 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
6306 if (mask_arg_type)
6308 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
6310 if (!vectype2
6311 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6312 TYPE_VECTOR_SUBPARTS (vectype2)))
6313 return NULL;
6315 else if (store_p || load_p)
6316 return NULL;
6318 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
6320 auto_vec<tree, 8> args;
6321 unsigned int nargs = gimple_call_num_args (last_stmt);
6322 args.safe_grow (nargs, true);
6323 for (unsigned int i = 0; i < nargs; ++i)
6324 args[i] = ((int) i == mask_argno
6325 ? tmp
6326 : gimple_call_arg (last_stmt, i));
6327 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
6329 if (!store_p)
6331 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6332 gimple_call_set_lhs (pattern_stmt, lhs);
6335 if (load_p || store_p)
6336 gimple_call_set_nothrow (pattern_stmt, true);
6338 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
6339 if (STMT_VINFO_DATA_REF (stmt_vinfo))
6340 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6342 *type_out = vectype1;
6343 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6345 return pattern_stmt;
6348 if (!is_gimple_assign (last_stmt))
6349 return NULL;
6351 gimple *pattern_stmt;
6352 lhs = gimple_assign_lhs (last_stmt);
6353 rhs1 = gimple_assign_rhs1 (last_stmt);
6354 rhs_code = gimple_assign_rhs_code (last_stmt);
6356 /* Check for cond expression requiring mask conversion. */
6357 if (rhs_code == COND_EXPR)
6359 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6361 if (TREE_CODE (rhs1) == SSA_NAME)
6363 rhs1_type = integer_type_for_mask (rhs1, vinfo);
6364 if (!rhs1_type)
6365 return NULL;
6367 else if (COMPARISON_CLASS_P (rhs1))
6369 /* Check whether we're comparing scalar booleans and (if so)
6370 whether a better mask type exists than the mask associated
6371 with boolean-sized elements. This avoids unnecessary packs
6372 and unpacks if the booleans are set from comparisons of
6373 wider types. E.g. in:
6375 int x1, x2, x3, x4, y1, y1;
6377 bool b1 = (x1 == x2);
6378 bool b2 = (x3 == x4);
6379 ... = b1 == b2 ? y1 : y2;
6381 it is better for b1 and b2 to use the mask type associated
6382 with int elements rather bool (byte) elements. */
6383 rhs1_op0 = TREE_OPERAND (rhs1, 0);
6384 rhs1_op1 = TREE_OPERAND (rhs1, 1);
6385 if (!rhs1_op0 || !rhs1_op1)
6386 return NULL;
6387 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
6388 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
6390 if (!rhs1_op0_type)
6391 rhs1_type = TREE_TYPE (rhs1_op0);
6392 else if (!rhs1_op1_type)
6393 rhs1_type = TREE_TYPE (rhs1_op1);
6394 else if (TYPE_PRECISION (rhs1_op0_type)
6395 != TYPE_PRECISION (rhs1_op1_type))
6397 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
6398 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6399 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
6400 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6401 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
6403 if (abs (tmp0) > abs (tmp1))
6404 rhs1_type = rhs1_op1_type;
6405 else
6406 rhs1_type = rhs1_op0_type;
6408 else
6409 rhs1_type = build_nonstandard_integer_type
6410 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
6412 else
6413 rhs1_type = rhs1_op0_type;
6415 else
6416 return NULL;
6418 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6420 if (!vectype1 || !vectype2)
6421 return NULL;
6423 /* Continue if a conversion is needed. Also continue if we have
6424 a comparison whose vector type would normally be different from
6425 VECTYPE2 when considered in isolation. In that case we'll
6426 replace the comparison with an SSA name (so that we can record
6427 its vector type) and behave as though the comparison was an SSA
6428 name from the outset. */
6429 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6430 TYPE_VECTOR_SUBPARTS (vectype2))
6431 && !rhs1_op0_type
6432 && !rhs1_op1_type)
6433 return NULL;
6435 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
6436 in place, we can handle it in vectorizable_condition. This avoids
6437 unnecessary promotion stmts and increased vectorization factor. */
6438 if (COMPARISON_CLASS_P (rhs1)
6439 && INTEGRAL_TYPE_P (rhs1_type)
6440 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
6441 TYPE_VECTOR_SUBPARTS (vectype2)))
6443 enum vect_def_type dt;
6444 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
6445 && dt == vect_external_def
6446 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
6447 && (dt == vect_external_def
6448 || dt == vect_constant_def))
6450 tree wide_scalar_type = build_nonstandard_integer_type
6451 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
6452 tree vectype3 = get_vectype_for_scalar_type (vinfo,
6453 wide_scalar_type);
6454 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
6455 return NULL;
6459 /* If rhs1 is a comparison we need to move it into a
6460 separate statement. */
6461 if (TREE_CODE (rhs1) != SSA_NAME)
6463 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
6464 if (rhs1_op0_type
6465 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
6466 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
6467 vectype2, stmt_vinfo);
6468 if (rhs1_op1_type
6469 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
6470 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
6471 vectype2, stmt_vinfo);
6472 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
6473 rhs1_op0, rhs1_op1);
6474 rhs1 = tmp;
6475 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
6476 rhs1_type);
6479 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
6480 TYPE_VECTOR_SUBPARTS (vectype2)))
6481 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6482 else
6483 tmp = rhs1;
6485 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6486 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
6487 gimple_assign_rhs2 (last_stmt),
6488 gimple_assign_rhs3 (last_stmt));
6490 *type_out = vectype1;
6491 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6493 return pattern_stmt;
6496 /* Now check for binary boolean operations requiring conversion for
6497 one of operands. */
6498 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6499 return NULL;
6501 if (rhs_code != BIT_IOR_EXPR
6502 && rhs_code != BIT_XOR_EXPR
6503 && rhs_code != BIT_AND_EXPR
6504 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6505 return NULL;
6507 rhs2 = gimple_assign_rhs2 (last_stmt);
6509 rhs1_type = integer_type_for_mask (rhs1, vinfo);
6510 rhs2_type = integer_type_for_mask (rhs2, vinfo);
6512 if (!rhs1_type || !rhs2_type
6513 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6514 return NULL;
6516 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6518 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6519 if (!vectype1)
6520 return NULL;
6521 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
6523 else
6525 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6526 if (!vectype1)
6527 return NULL;
6528 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6531 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6532 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6534 *type_out = vectype1;
6535 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6537 return pattern_stmt;
6540 /* STMT_INFO is a load or store. If the load or store is conditional, return
6541 the boolean condition under which it occurs, otherwise return null. */
6543 static tree
6544 vect_get_load_store_mask (stmt_vec_info stmt_info)
6546 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
6548 gcc_assert (gimple_assign_single_p (def_assign));
6549 return NULL_TREE;
6552 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
6554 internal_fn ifn = gimple_call_internal_fn (def_call);
6555 int mask_index = internal_fn_mask_index (ifn);
6556 return gimple_call_arg (def_call, mask_index);
6559 gcc_unreachable ();
6562 /* Return MASK if MASK is suitable for masking an operation on vectors
6563 of type VECTYPE, otherwise convert it into such a form and return
6564 the result. Associate any conversion statements with STMT_INFO's
6565 pattern. */
6567 static tree
6568 vect_convert_mask_for_vectype (tree mask, tree vectype,
6569 stmt_vec_info stmt_info, vec_info *vinfo)
6571 tree mask_type = integer_type_for_mask (mask, vinfo);
6572 if (mask_type)
6574 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
6575 if (mask_vectype
6576 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
6577 TYPE_VECTOR_SUBPARTS (mask_vectype)))
6578 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
6580 return mask;
6583 /* Return the equivalent of:
6585 fold_convert (TYPE, VALUE)
6587 with the expectation that the operation will be vectorized.
6588 If new statements are needed, add them as pattern statements
6589 to STMT_INFO. */
6591 static tree
6592 vect_add_conversion_to_pattern (vec_info *vinfo,
6593 tree type, tree value, stmt_vec_info stmt_info)
6595 if (useless_type_conversion_p (type, TREE_TYPE (value)))
6596 return value;
6598 tree new_value = vect_recog_temp_ssa_var (type, NULL);
6599 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6600 append_pattern_def_seq (vinfo, stmt_info, conversion,
6601 get_vectype_for_scalar_type (vinfo, type));
6602 return new_value;
6605 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6606 internal function. Return the final statement on success and set
6607 *TYPE_OUT to the vector type being loaded or stored.
6609 This function only handles gathers and scatters that were recognized
6610 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6612 static gimple *
6613 vect_recog_gather_scatter_pattern (vec_info *vinfo,
6614 stmt_vec_info stmt_info, tree *type_out)
6616 /* Currently we only support this for loop vectorization. */
6617 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6618 if (!loop_vinfo)
6619 return NULL;
6621 /* Make sure that we're looking at a gather load or scatter store. */
6622 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6623 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6624 return NULL;
6626 /* Get the boolean that controls whether the load or store happens.
6627 This is null if the operation is unconditional. */
6628 tree mask = vect_get_load_store_mask (stmt_info);
6630 /* Make sure that the target supports an appropriate internal
6631 function for the gather/scatter operation. */
6632 gather_scatter_info gs_info;
6633 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
6634 || gs_info.ifn == IFN_LAST)
6635 return NULL;
6637 /* Convert the mask to the right form. */
6638 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
6639 gs_info.element_type);
6640 if (mask)
6641 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
6642 loop_vinfo);
6643 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6644 || gs_info.ifn == IFN_MASK_GATHER_LOAD
6645 || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6646 || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6647 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6649 /* Get the invariant base and non-invariant offset, converting the
6650 latter to the same width as the vector elements. */
6651 tree base = gs_info.base;
6652 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6653 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
6654 gs_info.offset, stmt_info);
6656 /* Build the new pattern statement. */
6657 tree scale = size_int (gs_info.scale);
6658 gcall *pattern_stmt;
6659 if (DR_IS_READ (dr))
6661 tree zero = build_zero_cst (gs_info.element_type);
6662 if (mask != NULL)
6663 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6664 offset, scale, zero, mask);
6665 else
6666 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
6667 offset, scale, zero);
6668 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
6669 gimple_call_set_lhs (pattern_stmt, load_lhs);
6671 else
6673 tree rhs = vect_get_store_rhs (stmt_info);
6674 if (mask != NULL)
6675 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6676 base, offset, scale, rhs,
6677 mask);
6678 else
6679 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
6680 base, offset, scale, rhs);
6682 gimple_call_set_nothrow (pattern_stmt, true);
6684 /* Copy across relevant vectorization info and associate DR with the
6685 new pattern statement instead of the original statement. */
6686 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6687 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6689 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6690 *type_out = vectype;
6691 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
6693 return pattern_stmt;
6696 /* Helper method of vect_recog_cond_store_pattern, checks to see if COND_ARG
6697 is points to a load statement that reads the same data as that of
6698 STORE_VINFO. */
6700 static bool
6701 vect_cond_store_pattern_same_ref (vec_info *vinfo,
6702 stmt_vec_info store_vinfo, tree cond_arg)
6704 stmt_vec_info load_stmt_vinfo = vinfo->lookup_def (cond_arg);
6705 if (!load_stmt_vinfo
6706 || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
6707 || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
6708 || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
6709 STMT_VINFO_DATA_REF (load_stmt_vinfo)))
6710 return false;
6712 return true;
6715 /* Function vect_recog_cond_store_pattern
6717 Try to find the following pattern:
6719 x = *_3;
6720 c = a CMP b;
6721 y = c ? t_20 : x;
6722 *_3 = y;
6724 where the store of _3 happens on a conditional select on a value loaded
6725 from the same location. In such case we can elide the initial load if
6726 MASK_STORE is supported and instead only conditionally write out the result.
6728 The pattern produces for the above:
6730 c = a CMP b;
6731 .MASK_STORE (_3, c, t_20)
6733 Input:
6735 * STMT_VINFO: The stmt from which the pattern search begins. In the
6736 example, when this function is called with _3 then the search begins.
6738 Output:
6740 * TYPE_OUT: The type of the output of this pattern.
6742 * Return value: A new stmt that will be used to replace the sequence. */
6744 static gimple *
6745 vect_recog_cond_store_pattern (vec_info *vinfo,
6746 stmt_vec_info stmt_vinfo, tree *type_out)
6748 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6749 if (!loop_vinfo)
6750 return NULL;
6752 gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
6754 /* Needs to be a gimple store where we have DR info for. */
6755 if (!STMT_VINFO_DATA_REF (stmt_vinfo)
6756 || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo))
6757 || !gimple_store_p (store_stmt))
6758 return NULL;
6760 tree st_rhs = gimple_assign_rhs1 (store_stmt);
6762 if (TREE_CODE (st_rhs) != SSA_NAME)
6763 return NULL;
6765 auto cond_vinfo = vinfo->lookup_def (st_rhs);
6767 /* If the condition isn't part of the loop then bool recog wouldn't have seen
6768 it and so this transformation may not be valid. */
6769 if (!cond_vinfo)
6770 return NULL;
6772 cond_vinfo = vect_stmt_to_vectorize (cond_vinfo);
6773 gassign *cond_stmt = dyn_cast<gassign *> (STMT_VINFO_STMT (cond_vinfo));
6774 if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
6775 return NULL;
6777 /* Check if the else value matches the original loaded one. */
6778 bool invert = false;
6779 tree cmp_ls = gimple_arg (cond_stmt, 0);
6780 if (TREE_CODE (cmp_ls) != SSA_NAME)
6781 return NULL;
6783 tree cond_arg1 = gimple_arg (cond_stmt, 1);
6784 tree cond_arg2 = gimple_arg (cond_stmt, 2);
6786 if (!vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo, cond_arg2)
6787 && !(invert = vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo,
6788 cond_arg1)))
6789 return NULL;
6791 vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
6793 tree scalar_type = TREE_TYPE (st_rhs);
6794 if (VECTOR_TYPE_P (scalar_type))
6795 return NULL;
6797 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6798 if (vectype == NULL_TREE)
6799 return NULL;
6801 machine_mode mask_mode;
6802 machine_mode vecmode = TYPE_MODE (vectype);
6803 if (!VECTOR_MODE_P (vecmode)
6804 || targetm.vectorize.conditional_operation_is_expensive (IFN_MASK_STORE)
6805 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
6806 || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
6807 return NULL;
6809 tree base = DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo));
6810 if (may_be_nonaddressable_p (base))
6811 return NULL;
6813 /* We need to use the false parameter of the conditional select. */
6814 tree cond_store_arg = invert ? cond_arg2 : cond_arg1;
6815 tree cond_load_arg = invert ? cond_arg1 : cond_arg2;
6816 gimple *load_stmt = SSA_NAME_DEF_STMT (cond_load_arg);
6818 /* This is a rough estimation to check that there aren't any aliasing stores
6819 in between the load and store. It's a bit strict, but for now it's good
6820 enough. */
6821 if (gimple_vuse (load_stmt) != gimple_vuse (store_stmt))
6822 return NULL;
6824 /* If we have to invert the condition, i.e. use the true argument rather than
6825 the false argument, we have to negate the mask. */
6826 if (invert)
6828 tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
6830 /* Invert the mask using ^ 1. */
6831 tree itype = TREE_TYPE (cmp_ls);
6832 gassign *conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
6833 build_int_cst (itype, 1));
6835 tree mask_vec_type = get_mask_type_for_scalar_type (vinfo, itype);
6836 append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
6837 cmp_ls= var;
6840 if (TREE_CODE (base) != MEM_REF)
6841 base = build_fold_addr_expr (base);
6843 tree ptr = build_int_cst (reference_alias_ptr_type (base),
6844 get_object_alignment (base));
6846 /* Convert the mask to the right form. */
6847 tree mask = vect_convert_mask_for_vectype (cmp_ls, vectype, stmt_vinfo,
6848 vinfo);
6850 gcall *call
6851 = gimple_build_call_internal (IFN_MASK_STORE, 4, base, ptr, mask,
6852 cond_store_arg);
6853 gimple_set_location (call, gimple_location (store_stmt));
6855 /* Copy across relevant vectorization info and associate DR with the
6856 new pattern statement instead of the original statement. */
6857 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
6858 loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6860 *type_out = vectype;
6861 return call;
6864 /* Return true if TYPE is a non-boolean integer type. These are the types
6865 that we want to consider for narrowing. */
6867 static bool
6868 vect_narrowable_type_p (tree type)
6870 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6873 /* Return true if the operation given by CODE can be truncated to N bits
6874 when only N bits of the output are needed. This is only true if bit N+1
6875 of the inputs has no effect on the low N bits of the result. */
6877 static bool
6878 vect_truncatable_operation_p (tree_code code)
6880 switch (code)
6882 case NEGATE_EXPR:
6883 case PLUS_EXPR:
6884 case MINUS_EXPR:
6885 case MULT_EXPR:
6886 case BIT_NOT_EXPR:
6887 case BIT_AND_EXPR:
6888 case BIT_IOR_EXPR:
6889 case BIT_XOR_EXPR:
6890 case COND_EXPR:
6891 return true;
6893 default:
6894 return false;
6898 /* Record that STMT_INFO could be changed from operating on TYPE to
6899 operating on a type with the precision and sign given by PRECISION
6900 and SIGN respectively. PRECISION is an arbitrary bit precision;
6901 it might not be a whole number of bytes. */
6903 static void
6904 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6905 unsigned int precision, signop sign)
6907 /* Round the precision up to a whole number of bytes. */
6908 precision = vect_element_precision (precision);
6909 if (precision < TYPE_PRECISION (type)
6910 && (!stmt_info->operation_precision
6911 || stmt_info->operation_precision > precision))
6913 stmt_info->operation_precision = precision;
6914 stmt_info->operation_sign = sign;
6918 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6919 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6920 is an arbitrary bit precision; it might not be a whole number of bytes. */
6922 static void
6923 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6924 unsigned int min_input_precision)
6926 /* This operation in isolation only requires the inputs to have
6927 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6928 that MIN_INPUT_PRECISION is a natural precision for the chain
6929 as a whole. E.g. consider something like:
6931 unsigned short *x, *y;
6932 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6934 The right shift can be done on unsigned chars, and only requires the
6935 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6936 approach would mean turning a natural chain of single-vector unsigned
6937 short operations into one that truncates "*x" and then extends
6938 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6939 operation and one vector for each unsigned char operation.
6940 This would be a significant pessimization.
6942 Instead only propagate the maximum of this precision and the precision
6943 required by the users of the result. This means that we don't pessimize
6944 the case above but continue to optimize things like:
6946 unsigned char *y;
6947 unsigned short *x;
6948 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6950 Here we would truncate two vectors of *x to a single vector of
6951 unsigned chars and use single-vector unsigned char operations for
6952 everything else, rather than doing two unsigned short copies of
6953 "(*x & 0xf0) >> 4" and then truncating the result. */
6954 min_input_precision = MAX (min_input_precision,
6955 stmt_info->min_output_precision);
6957 if (min_input_precision < TYPE_PRECISION (type)
6958 && (!stmt_info->min_input_precision
6959 || stmt_info->min_input_precision > min_input_precision))
6960 stmt_info->min_input_precision = min_input_precision;
6963 /* Subroutine of vect_determine_min_output_precision. Return true if
6964 we can calculate a reduced number of output bits for STMT_INFO,
6965 whose result is LHS. */
6967 static bool
6968 vect_determine_min_output_precision_1 (vec_info *vinfo,
6969 stmt_vec_info stmt_info, tree lhs)
6971 /* Take the maximum precision required by users of the result. */
6972 unsigned int precision = 0;
6973 imm_use_iterator iter;
6974 use_operand_p use;
6975 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6977 gimple *use_stmt = USE_STMT (use);
6978 if (is_gimple_debug (use_stmt))
6979 continue;
6980 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6981 if (!use_stmt_info || !use_stmt_info->min_input_precision)
6982 return false;
6983 /* The input precision recorded for COND_EXPRs applies only to the
6984 "then" and "else" values. */
6985 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
6986 if (assign
6987 && gimple_assign_rhs_code (assign) == COND_EXPR
6988 && use->use != gimple_assign_rhs2_ptr (assign)
6989 && use->use != gimple_assign_rhs3_ptr (assign))
6990 return false;
6991 precision = MAX (precision, use_stmt_info->min_input_precision);
6994 if (dump_enabled_p ())
6995 dump_printf_loc (MSG_NOTE, vect_location,
6996 "only the low %d bits of %T are significant\n",
6997 precision, lhs);
6998 stmt_info->min_output_precision = precision;
6999 return true;
7002 /* Calculate min_output_precision for STMT_INFO. */
7004 static void
7005 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
7007 /* We're only interested in statements with a narrowable result. */
7008 tree lhs = gimple_get_lhs (stmt_info->stmt);
7009 if (!lhs
7010 || TREE_CODE (lhs) != SSA_NAME
7011 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
7012 return;
7014 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
7015 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
7018 /* Use range information to decide whether STMT (described by STMT_INFO)
7019 could be done in a narrower type. This is effectively a forward
7020 propagation, since it uses context-independent information that applies
7021 to all users of an SSA name. */
7023 static void
7024 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
7026 tree lhs = gimple_assign_lhs (stmt);
7027 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
7028 return;
7030 tree type = TREE_TYPE (lhs);
7031 if (!vect_narrowable_type_p (type))
7032 return;
7034 /* First see whether we have any useful range information for the result. */
7035 unsigned int precision = TYPE_PRECISION (type);
7036 signop sign = TYPE_SIGN (type);
7037 wide_int min_value, max_value;
7038 if (!vect_get_range_info (lhs, &min_value, &max_value))
7039 return;
7041 tree_code code = gimple_assign_rhs_code (stmt);
7042 unsigned int nops = gimple_num_ops (stmt);
7044 if (!vect_truncatable_operation_p (code))
7046 /* Handle operations that can be computed in type T if all inputs
7047 and outputs can be represented in type T. Also handle left and
7048 right shifts, where (in addition) the maximum shift amount must
7049 be less than the number of bits in T. */
7050 bool is_shift;
7051 switch (code)
7053 case LSHIFT_EXPR:
7054 case RSHIFT_EXPR:
7055 is_shift = true;
7056 break;
7058 case ABS_EXPR:
7059 case MIN_EXPR:
7060 case MAX_EXPR:
7061 case TRUNC_DIV_EXPR:
7062 case CEIL_DIV_EXPR:
7063 case FLOOR_DIV_EXPR:
7064 case ROUND_DIV_EXPR:
7065 case EXACT_DIV_EXPR:
7066 /* Modulus is excluded because it is typically calculated by doing
7067 a division, for which minimum signed / -1 isn't representable in
7068 the original signed type. We could take the division range into
7069 account instead, if handling modulus ever becomes important. */
7070 is_shift = false;
7071 break;
7073 default:
7074 return;
7076 for (unsigned int i = 1; i < nops; ++i)
7078 tree op = gimple_op (stmt, i);
7079 wide_int op_min_value, op_max_value;
7080 if (TREE_CODE (op) == INTEGER_CST)
7082 unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
7083 op_min_value = op_max_value = wi::to_wide (op, op_precision);
7085 else if (TREE_CODE (op) == SSA_NAME)
7087 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
7088 return;
7090 else
7091 return;
7093 if (is_shift && i == 2)
7095 /* There needs to be one more bit than the maximum shift amount.
7097 If the maximum shift amount is already 1 less than PRECISION
7098 then we can't narrow the shift further. Dealing with that
7099 case first ensures that we can safely use an unsigned range
7100 below.
7102 op_min_value isn't relevant, since shifts by negative amounts
7103 are UB. */
7104 if (wi::geu_p (op_max_value, precision - 1))
7105 return;
7106 unsigned int min_bits = op_max_value.to_uhwi () + 1;
7108 /* As explained below, we can convert a signed shift into an
7109 unsigned shift if the sign bit is always clear. At this
7110 point we've already processed the ranges of the output and
7111 the first input. */
7112 auto op_sign = sign;
7113 if (sign == SIGNED && !wi::neg_p (min_value))
7114 op_sign = UNSIGNED;
7115 op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
7116 precision, op_sign);
7117 op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
7118 precision, op_sign);
7120 min_value = wi::min (min_value, op_min_value, sign);
7121 max_value = wi::max (max_value, op_max_value, sign);
7125 /* Try to switch signed types for unsigned types if we can.
7126 This is better for two reasons. First, unsigned ops tend
7127 to be cheaper than signed ops. Second, it means that we can
7128 handle things like:
7130 signed char c;
7131 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
7135 signed char c;
7136 unsigned short res_1 = (unsigned short) c & 0xff00;
7137 int res = (int) res_1;
7139 where the intermediate result res_1 has unsigned rather than
7140 signed type. */
7141 if (sign == SIGNED && !wi::neg_p (min_value))
7142 sign = UNSIGNED;
7144 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
7145 unsigned int precision1 = wi::min_precision (min_value, sign);
7146 unsigned int precision2 = wi::min_precision (max_value, sign);
7147 unsigned int value_precision = MAX (precision1, precision2);
7148 if (value_precision >= precision)
7149 return;
7151 if (dump_enabled_p ())
7152 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
7153 " without loss of precision: %G",
7154 sign == SIGNED ? "signed" : "unsigned",
7155 value_precision, (gimple *) stmt);
7157 vect_set_operation_type (stmt_info, type, value_precision, sign);
7158 vect_set_min_input_precision (stmt_info, type, value_precision);
7161 /* Use information about the users of STMT's result to decide whether
7162 STMT (described by STMT_INFO) could be done in a narrower type.
7163 This is effectively a backward propagation. */
7165 static void
7166 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
7168 tree_code code = gimple_assign_rhs_code (stmt);
7169 unsigned int opno = (code == COND_EXPR ? 2 : 1);
7170 tree type = TREE_TYPE (gimple_op (stmt, opno));
7171 if (!vect_narrowable_type_p (type))
7172 return;
7174 unsigned int precision = TYPE_PRECISION (type);
7175 unsigned int operation_precision, min_input_precision;
7176 switch (code)
7178 CASE_CONVERT:
7179 /* Only the bits that contribute to the output matter. Don't change
7180 the precision of the operation itself. */
7181 operation_precision = precision;
7182 min_input_precision = stmt_info->min_output_precision;
7183 break;
7185 case LSHIFT_EXPR:
7186 case RSHIFT_EXPR:
7188 tree shift = gimple_assign_rhs2 (stmt);
7189 if (TREE_CODE (shift) != INTEGER_CST
7190 || !wi::ltu_p (wi::to_widest (shift), precision))
7191 return;
7192 unsigned int const_shift = TREE_INT_CST_LOW (shift);
7193 if (code == LSHIFT_EXPR)
7195 /* Avoid creating an undefined shift.
7197 ??? We could instead use min_output_precision as-is and
7198 optimize out-of-range shifts to zero. However, only
7199 degenerate testcases shift away all their useful input data,
7200 and it isn't natural to drop input operations in the middle
7201 of vectorization. This sort of thing should really be
7202 handled before vectorization. */
7203 operation_precision = MAX (stmt_info->min_output_precision,
7204 const_shift + 1);
7205 /* We need CONST_SHIFT fewer bits of the input. */
7206 min_input_precision = (MAX (operation_precision, const_shift)
7207 - const_shift);
7209 else
7211 /* We need CONST_SHIFT extra bits to do the operation. */
7212 operation_precision = (stmt_info->min_output_precision
7213 + const_shift);
7214 min_input_precision = operation_precision;
7216 break;
7219 default:
7220 if (vect_truncatable_operation_p (code))
7222 /* Input bit N has no effect on output bits N-1 and lower. */
7223 operation_precision = stmt_info->min_output_precision;
7224 min_input_precision = operation_precision;
7225 break;
7227 return;
7230 if (operation_precision < precision)
7232 if (dump_enabled_p ())
7233 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
7234 " without affecting users: %G",
7235 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
7236 operation_precision, (gimple *) stmt);
7237 vect_set_operation_type (stmt_info, type, operation_precision,
7238 TYPE_SIGN (type));
7240 vect_set_min_input_precision (stmt_info, type, min_input_precision);
7243 /* Return true if the statement described by STMT_INFO sets a boolean
7244 SSA_NAME and if we know how to vectorize this kind of statement using
7245 vector mask types. */
7247 static bool
7248 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
7250 tree lhs = gimple_get_lhs (stmt_info->stmt);
7251 tree_code code = ERROR_MARK;
7252 gassign *assign = NULL;
7253 gcond *cond = NULL;
7255 if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
7256 code = gimple_assign_rhs_code (assign);
7257 else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
7259 lhs = gimple_cond_lhs (cond);
7260 code = gimple_cond_code (cond);
7263 if (!lhs
7264 || TREE_CODE (lhs) != SSA_NAME
7265 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
7266 return false;
7268 if (code != ERROR_MARK)
7270 switch (code)
7272 CASE_CONVERT:
7273 case SSA_NAME:
7274 case BIT_NOT_EXPR:
7275 case BIT_IOR_EXPR:
7276 case BIT_XOR_EXPR:
7277 case BIT_AND_EXPR:
7278 return true;
7280 default:
7281 return TREE_CODE_CLASS (code) == tcc_comparison;
7284 else if (is_a <gphi *> (stmt_info->stmt))
7285 return true;
7286 return false;
7289 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
7290 a vector mask type instead of a normal vector type. Record the
7291 result in STMT_INFO->mask_precision. */
7293 static void
7294 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
7296 if (!possible_vector_mask_operation_p (stmt_info))
7297 return;
7299 /* If at least one boolean input uses a vector mask type,
7300 pick the mask type with the narrowest elements.
7302 ??? This is the traditional behavior. It should always produce
7303 the smallest number of operations, but isn't necessarily the
7304 optimal choice. For example, if we have:
7306 a = b & c
7308 where:
7310 - the user of a wants it to have a mask type for 16-bit elements (M16)
7311 - b also uses M16
7312 - c uses a mask type for 8-bit elements (M8)
7314 then picking M8 gives:
7316 - 1 M16->M8 pack for b
7317 - 1 M8 AND for a
7318 - 2 M8->M16 unpacks for the user of a
7320 whereas picking M16 would have given:
7322 - 2 M8->M16 unpacks for c
7323 - 2 M16 ANDs for a
7325 The number of operations are equal, but M16 would have given
7326 a shorter dependency chain and allowed more ILP. */
7327 unsigned int precision = ~0U;
7328 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7330 /* If the statement compares two values that shouldn't use vector masks,
7331 try comparing the values as normal scalars instead. */
7332 tree_code code = ERROR_MARK;
7333 tree op0_type;
7334 unsigned int nops = -1;
7335 unsigned int ops_start = 0;
7337 if (gassign *assign = dyn_cast <gassign *> (stmt))
7339 code = gimple_assign_rhs_code (assign);
7340 op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
7341 nops = gimple_num_ops (assign);
7342 ops_start = 1;
7344 else if (gcond *cond = dyn_cast <gcond *> (stmt))
7346 code = gimple_cond_code (cond);
7347 op0_type = TREE_TYPE (gimple_cond_lhs (cond));
7348 nops = 2;
7349 ops_start = 0;
7352 if (code != ERROR_MARK)
7354 for (unsigned int i = ops_start; i < nops; ++i)
7356 tree rhs = gimple_op (stmt, i);
7357 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
7358 continue;
7360 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
7361 if (!def_stmt_info)
7362 /* Don't let external or constant operands influence the choice.
7363 We can convert them to whichever vector type we pick. */
7364 continue;
7366 if (def_stmt_info->mask_precision)
7368 if (precision > def_stmt_info->mask_precision)
7369 precision = def_stmt_info->mask_precision;
7373 if (precision == ~0U
7374 && TREE_CODE_CLASS (code) == tcc_comparison)
7376 scalar_mode mode;
7377 tree vectype, mask_type;
7378 if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
7379 && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
7380 && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
7381 && expand_vec_cmp_expr_p (vectype, mask_type, code))
7382 precision = GET_MODE_BITSIZE (mode);
7385 else
7387 gphi *phi = as_a <gphi *> (stmt_info->stmt);
7388 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
7390 tree rhs = gimple_phi_arg_def (phi, i);
7392 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
7393 if (!def_stmt_info)
7394 /* Don't let external or constant operands influence the choice.
7395 We can convert them to whichever vector type we pick. */
7396 continue;
7398 if (def_stmt_info->mask_precision)
7400 if (precision > def_stmt_info->mask_precision)
7401 precision = def_stmt_info->mask_precision;
7406 if (dump_enabled_p ())
7408 if (precision == ~0U)
7409 dump_printf_loc (MSG_NOTE, vect_location,
7410 "using normal nonmask vectors for %G",
7411 stmt_info->stmt);
7412 else
7413 dump_printf_loc (MSG_NOTE, vect_location,
7414 "using boolean precision %d for %G",
7415 precision, stmt_info->stmt);
7418 stmt_info->mask_precision = precision;
7421 /* Handle vect_determine_precisions for STMT_INFO, given that we
7422 have already done so for the users of its result. */
7424 void
7425 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
7427 vect_determine_min_output_precision (vinfo, stmt_info);
7428 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
7430 vect_determine_precisions_from_range (stmt_info, stmt);
7431 vect_determine_precisions_from_users (stmt_info, stmt);
7435 /* Walk backwards through the vectorizable region to determine the
7436 values of these fields:
7438 - min_output_precision
7439 - min_input_precision
7440 - operation_precision
7441 - operation_sign. */
7443 void
7444 vect_determine_precisions (vec_info *vinfo)
7446 basic_block *bbs = vinfo->bbs;
7447 unsigned int nbbs = vinfo->nbbs;
7449 DUMP_VECT_SCOPE ("vect_determine_precisions");
7451 for (unsigned int i = 0; i < nbbs; i++)
7453 basic_block bb = bbs[i];
7454 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7456 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
7457 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7458 vect_determine_mask_precision (vinfo, stmt_info);
7460 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7462 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
7463 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7464 vect_determine_mask_precision (vinfo, stmt_info);
7467 for (unsigned int i = 0; i < nbbs; i++)
7469 basic_block bb = bbs[nbbs - i - 1];
7470 for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
7472 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
7473 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7474 vect_determine_stmt_precisions (vinfo, stmt_info);
7476 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7478 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
7479 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7480 vect_determine_stmt_precisions (vinfo, stmt_info);
7485 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
7487 struct vect_recog_func
7489 vect_recog_func_ptr fn;
7490 const char *name;
7493 /* Note that ordering matters - the first pattern matching on a stmt is
7494 taken which means usually the more complex one needs to preceed the
7495 less comples onex (widen_sum only after dot_prod or sad for example). */
7496 static vect_recog_func vect_vect_recog_func_ptrs[] = {
7497 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
7498 { vect_recog_bit_insert_pattern, "bit_insert" },
7499 { vect_recog_abd_pattern, "abd" },
7500 { vect_recog_over_widening_pattern, "over_widening" },
7501 /* Must come after over_widening, which narrows the shift as much as
7502 possible beforehand. */
7503 { vect_recog_average_pattern, "average" },
7504 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
7505 { vect_recog_mulhs_pattern, "mult_high" },
7506 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
7507 { vect_recog_widen_mult_pattern, "widen_mult" },
7508 { vect_recog_dot_prod_pattern, "dot_prod" },
7509 { vect_recog_sad_pattern, "sad" },
7510 { vect_recog_widen_sum_pattern, "widen_sum" },
7511 { vect_recog_pow_pattern, "pow" },
7512 { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
7513 { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
7514 { vect_recog_widen_shift_pattern, "widen_shift" },
7515 { vect_recog_rotate_pattern, "rotate" },
7516 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
7517 { vect_recog_divmod_pattern, "divmod" },
7518 { vect_recog_mod_var_pattern, "modvar" },
7519 { vect_recog_mult_pattern, "mult" },
7520 { vect_recog_sat_add_pattern, "sat_add" },
7521 { vect_recog_sat_sub_pattern, "sat_sub" },
7522 { vect_recog_sat_trunc_pattern, "sat_trunc" },
7523 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
7524 { vect_recog_gcond_pattern, "gcond" },
7525 { vect_recog_bool_pattern, "bool" },
7526 /* This must come before mask conversion, and includes the parts
7527 of mask conversion that are needed for gather and scatter
7528 internal functions. */
7529 { vect_recog_gather_scatter_pattern, "gather_scatter" },
7530 { vect_recog_cond_store_pattern, "cond_store" },
7531 { vect_recog_mask_conversion_pattern, "mask_conversion" },
7532 { vect_recog_widen_plus_pattern, "widen_plus" },
7533 { vect_recog_widen_minus_pattern, "widen_minus" },
7534 { vect_recog_widen_abd_pattern, "widen_abd" },
7535 /* These must come after the double widening ones. */
7538 /* Mark statements that are involved in a pattern. */
7540 void
7541 vect_mark_pattern_stmts (vec_info *vinfo,
7542 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
7543 tree pattern_vectype)
7545 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
7546 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7548 gimple *orig_pattern_stmt = NULL;
7549 if (is_pattern_stmt_p (orig_stmt_info))
7551 /* We're replacing a statement in an existing pattern definition
7552 sequence. */
7553 orig_pattern_stmt = orig_stmt_info->stmt;
7554 if (dump_enabled_p ())
7555 dump_printf_loc (MSG_NOTE, vect_location,
7556 "replacing earlier pattern %G", orig_pattern_stmt);
7558 /* To keep the book-keeping simple, just swap the lhs of the
7559 old and new statements, so that the old one has a valid but
7560 unused lhs. */
7561 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
7562 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
7563 gimple_set_lhs (pattern_stmt, old_lhs);
7565 if (dump_enabled_p ())
7566 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
7568 /* Switch to the statement that ORIG replaces. */
7569 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
7571 /* We shouldn't be replacing the main pattern statement. */
7572 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
7573 != orig_pattern_stmt);
7576 if (def_seq)
7577 for (gimple_stmt_iterator si = gsi_start (def_seq);
7578 !gsi_end_p (si); gsi_next (&si))
7580 if (dump_enabled_p ())
7581 dump_printf_loc (MSG_NOTE, vect_location,
7582 "extra pattern stmt: %G", gsi_stmt (si));
7583 stmt_vec_info pattern_stmt_info
7584 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
7585 orig_stmt_info, pattern_vectype);
7586 /* Stmts in the def sequence are not vectorizable cycle or
7587 induction defs, instead they should all be vect_internal_def
7588 feeding the main pattern stmt which retains this def type. */
7589 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
7592 if (orig_pattern_stmt)
7594 vect_init_pattern_stmt (vinfo, pattern_stmt,
7595 orig_stmt_info, pattern_vectype);
7597 /* Insert all the new pattern statements before the original one. */
7598 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7599 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
7600 orig_def_seq);
7601 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
7602 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
7604 /* Remove the pattern statement that this new pattern replaces. */
7605 gsi_remove (&gsi, false);
7607 else
7608 vect_set_pattern_stmt (vinfo,
7609 pattern_stmt, orig_stmt_info, pattern_vectype);
7611 /* For any conditionals mark them as vect_condition_def. */
7612 if (is_a <gcond *> (pattern_stmt))
7613 STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
7615 /* Transfer reduction path info to the pattern. */
7616 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
7618 gimple_match_op op;
7619 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
7620 gcc_unreachable ();
7621 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
7622 /* Search the pattern def sequence and the main pattern stmt. Note
7623 we may have inserted all into a containing pattern def sequence
7624 so the following is a bit awkward. */
7625 gimple_stmt_iterator si;
7626 gimple *s;
7627 if (def_seq)
7629 si = gsi_start (def_seq);
7630 s = gsi_stmt (si);
7631 gsi_next (&si);
7633 else
7635 si = gsi_none ();
7636 s = pattern_stmt;
7640 bool found = false;
7641 if (gimple_extract_op (s, &op))
7642 for (unsigned i = 0; i < op.num_ops; ++i)
7643 if (op.ops[i] == lookfor)
7645 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
7646 lookfor = gimple_get_lhs (s);
7647 found = true;
7648 break;
7650 if (s == pattern_stmt)
7652 if (!found && dump_enabled_p ())
7653 dump_printf_loc (MSG_NOTE, vect_location,
7654 "failed to update reduction index.\n");
7655 break;
7657 if (gsi_end_p (si))
7658 s = pattern_stmt;
7659 else
7661 s = gsi_stmt (si);
7662 if (s == pattern_stmt)
7663 /* Found the end inside a bigger pattern def seq. */
7664 si = gsi_none ();
7665 else
7666 gsi_next (&si);
7668 } while (1);
7672 /* Function vect_pattern_recog_1
7674 Input:
7675 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7676 computation pattern.
7677 STMT_INFO: A stmt from which the pattern search should start.
7679 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7680 a sequence of statements that has the same functionality and can be
7681 used to replace STMT_INFO. It returns the last statement in the sequence
7682 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7683 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7684 statement, having first checked that the target supports the new operation
7685 in that type.
7687 This function also does some bookkeeping, as explained in the documentation
7688 for vect_recog_pattern. */
7690 static void
7691 vect_pattern_recog_1 (vec_info *vinfo,
7692 const vect_recog_func &recog_func, stmt_vec_info stmt_info)
7694 gimple *pattern_stmt;
7695 tree pattern_vectype;
7697 /* If this statement has already been replaced with pattern statements,
7698 leave the original statement alone, since the first match wins.
7699 Instead try to match against the definition statements that feed
7700 the main pattern statement. */
7701 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7703 gimple_stmt_iterator gsi;
7704 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7705 !gsi_end_p (gsi); gsi_next (&gsi))
7706 vect_pattern_recog_1 (vinfo, recog_func,
7707 vinfo->lookup_stmt (gsi_stmt (gsi)));
7708 return;
7711 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7712 pattern_stmt = recog_func.fn (vinfo, stmt_info, &pattern_vectype);
7713 if (!pattern_stmt)
7715 /* Clear any half-formed pattern definition sequence. */
7716 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7717 return;
7720 /* Found a vectorizable pattern. */
7721 if (dump_enabled_p ())
7722 dump_printf_loc (MSG_NOTE, vect_location,
7723 "%s pattern recognized: %G",
7724 recog_func.name, pattern_stmt);
7726 /* Mark the stmts that are involved in the pattern. */
7727 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
7731 /* Function vect_pattern_recog
7733 Input:
7734 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7735 computation idioms.
7737 Output - for each computation idiom that is detected we create a new stmt
7738 that provides the same functionality and that can be vectorized. We
7739 also record some information in the struct_stmt_info of the relevant
7740 stmts, as explained below:
7742 At the entry to this function we have the following stmts, with the
7743 following initial value in the STMT_VINFO fields:
7745 stmt in_pattern_p related_stmt vec_stmt
7746 S1: a_i = .... - - -
7747 S2: a_2 = ..use(a_i).. - - -
7748 S3: a_1 = ..use(a_2).. - - -
7749 S4: a_0 = ..use(a_1).. - - -
7750 S5: ... = ..use(a_0).. - - -
7752 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7753 represented by a single stmt. We then:
7754 - create a new stmt S6 equivalent to the pattern (the stmt is not
7755 inserted into the code)
7756 - fill in the STMT_VINFO fields as follows:
7758 in_pattern_p related_stmt vec_stmt
7759 S1: a_i = .... - - -
7760 S2: a_2 = ..use(a_i).. - - -
7761 S3: a_1 = ..use(a_2).. - - -
7762 S4: a_0 = ..use(a_1).. true S6 -
7763 '---> S6: a_new = .... - S4 -
7764 S5: ... = ..use(a_0).. - - -
7766 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7767 to each other through the RELATED_STMT field).
7769 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7770 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7771 remain irrelevant unless used by stmts other than S4.
7773 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7774 (because they are marked as irrelevant). It will vectorize S6, and record
7775 a pointer to the new vector stmt VS6 from S6 (as usual).
7776 S4 will be skipped, and S5 will be vectorized as usual:
7778 in_pattern_p related_stmt vec_stmt
7779 S1: a_i = .... - - -
7780 S2: a_2 = ..use(a_i).. - - -
7781 S3: a_1 = ..use(a_2).. - - -
7782 > VS6: va_new = .... - - -
7783 S4: a_0 = ..use(a_1).. true S6 VS6
7784 '---> S6: a_new = .... - S4 VS6
7785 > VS5: ... = ..vuse(va_new).. - - -
7786 S5: ... = ..use(a_0).. - - -
7788 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7789 elsewhere), and we'll end up with:
7791 VS6: va_new = ....
7792 VS5: ... = ..vuse(va_new)..
7794 In case of more than one pattern statements, e.g., widen-mult with
7795 intermediate type:
7797 S1 a_t = ;
7798 S2 a_T = (TYPE) a_t;
7799 '--> S3: a_it = (interm_type) a_t;
7800 S4 prod_T = a_T * CONST;
7801 '--> S5: prod_T' = a_it w* CONST;
7803 there may be other users of a_T outside the pattern. In that case S2 will
7804 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7805 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7806 be recorded in S3. */
7808 void
7809 vect_pattern_recog (vec_info *vinfo)
7811 basic_block *bbs = vinfo->bbs;
7812 unsigned int nbbs = vinfo->nbbs;
7814 vect_determine_precisions (vinfo);
7816 DUMP_VECT_SCOPE ("vect_pattern_recog");
7818 /* Scan through the stmts in the region, applying the pattern recognition
7819 functions starting at each stmt visited. */
7820 for (unsigned i = 0; i < nbbs; i++)
7822 basic_block bb = bbs[i];
7824 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7826 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
7828 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7829 continue;
7831 /* Scan over all generic vect_recog_xxx_pattern functions. */
7832 for (const auto &func_ptr : vect_vect_recog_func_ptrs)
7833 vect_pattern_recog_1 (vinfo, func_ptr,
7834 stmt_info);
7838 /* After this no more add_stmt calls are allowed. */
7839 vinfo->stmt_vec_info_ro = true;
7842 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7843 or internal_fn contained in ch, respectively. */
7844 gimple *
7845 vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7847 gcc_assert (op0 != NULL_TREE);
7848 if (ch.is_tree_code ())
7849 return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7851 gcc_assert (ch.is_internal_fn ());
7852 gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
7853 op1 == NULL_TREE ? 1 : 2,
7854 op0, op1);
7855 gimple_call_set_lhs (stmt, lhs);
7856 return stmt;