libstdc++: Refactor loops in std::__platform_semaphore
[official-gcc.git] / gcc / tree-vect-patterns.cc
bloba2bf90a575ae84c2ab25dc5a066f0cdb660d7e65
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #define INCLUDE_MEMORY
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "gimple-iterator.h"
30 #include "gimple-fold.h"
31 #include "ssa.h"
32 #include "expmed.h"
33 #include "optabs-tree.h"
34 #include "insn-config.h"
35 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "tree-eh.h"
39 #include "gimplify.h"
40 #include "gimple-iterator.h"
41 #include "gimple-fold.h"
42 #include "gimplify-me.h"
43 #include "cfgloop.h"
44 #include "tree-vectorizer.h"
45 #include "dumpfile.h"
46 #include "builtins.h"
47 #include "internal-fn.h"
48 #include "case-cfn-macros.h"
49 #include "fold-const-call.h"
50 #include "attribs.h"
51 #include "cgraph.h"
52 #include "omp-simd-clone.h"
53 #include "predict.h"
54 #include "tree-vector-builder.h"
55 #include "tree-ssa-loop-ivopts.h"
56 #include "vec-perm-indices.h"
57 #include "gimple-range.h"
58 #include "alias.h"
61 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
62 in the first operand. Disentangling this is future work, the
63 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
66 /* Return true if we have a useful VR_RANGE range for VAR, storing it
67 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
69 bool
70 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
72 int_range_max vr;
73 tree vr_min, vr_max;
74 get_range_query (cfun)->range_of_expr (vr, var);
75 if (vr.undefined_p ())
76 vr.set_varying (TREE_TYPE (var));
77 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
78 *min_value = wi::to_wide (vr_min);
79 *max_value = wi::to_wide (vr_max);
80 wide_int nonzero = get_nonzero_bits (var);
81 signop sgn = TYPE_SIGN (TREE_TYPE (var));
82 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
83 nonzero, sgn) == VR_RANGE)
85 if (dump_enabled_p ())
87 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
88 dump_printf (MSG_NOTE, " has range [");
89 dump_hex (MSG_NOTE, *min_value);
90 dump_printf (MSG_NOTE, ", ");
91 dump_hex (MSG_NOTE, *max_value);
92 dump_printf (MSG_NOTE, "]\n");
94 return true;
96 else
98 if (dump_enabled_p ())
100 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
101 dump_printf (MSG_NOTE, " has no range info\n");
103 return false;
107 /* Report that we've found an instance of pattern PATTERN in
108 statement STMT. */
110 static void
111 vect_pattern_detected (const char *name, gimple *stmt)
113 if (dump_enabled_p ())
114 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
117 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
118 return the pattern statement's stmt_vec_info. Set its vector type to
119 VECTYPE if it doesn't have one already. */
121 static stmt_vec_info
122 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
123 stmt_vec_info orig_stmt_info, tree vectype)
125 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
126 if (pattern_stmt_info == NULL)
127 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
128 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
130 pattern_stmt_info->pattern_stmt_p = true;
131 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
132 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
133 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
134 STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
135 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
137 gcc_assert (!vectype
138 || is_a <gcond *> (pattern_stmt)
139 || (VECTOR_BOOLEAN_TYPE_P (vectype)
140 == vect_use_mask_type_p (orig_stmt_info)));
141 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
142 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
144 return pattern_stmt_info;
147 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
148 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
149 have one already. */
151 static void
152 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
153 stmt_vec_info orig_stmt_info, tree vectype)
155 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
156 STMT_VINFO_RELATED_STMT (orig_stmt_info)
157 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
160 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
161 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
162 be different from the vector type of the final pattern statement.
163 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
164 from which it was derived. */
166 static inline void
167 append_pattern_def_seq (vec_info *vinfo,
168 stmt_vec_info stmt_info, gimple *new_stmt,
169 tree vectype = NULL_TREE,
170 tree scalar_type_for_mask = NULL_TREE)
172 gcc_assert (!scalar_type_for_mask
173 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
174 if (vectype)
176 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
177 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
178 if (scalar_type_for_mask)
179 new_stmt_info->mask_precision
180 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
182 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
183 new_stmt);
186 /* The caller wants to perform new operations on vect_external variable
187 VAR, so that the result of the operations would also be vect_external.
188 Return the edge on which the operations can be performed, if one exists.
189 Return null if the operations should instead be treated as part of
190 the pattern that needs them. */
192 static edge
193 vect_get_external_def_edge (vec_info *vinfo, tree var)
195 edge e = NULL;
196 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
198 e = loop_preheader_edge (loop_vinfo->loop);
199 if (!SSA_NAME_IS_DEFAULT_DEF (var))
201 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
202 if (bb == NULL
203 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
204 e = NULL;
207 return e;
210 /* Return true if the target supports a vector version of CODE,
211 where CODE is known to map to a direct optab with the given SUBTYPE.
212 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
213 specifies the type of the scalar result.
215 If CODE allows the inputs and outputs to have different type
216 (such as for WIDEN_SUM_EXPR), it is the input mode rather
217 than the output mode that determines the appropriate target pattern.
218 Operand 0 of the target pattern then specifies the mode that the output
219 must have.
221 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
222 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
223 is nonnull. */
225 static bool
226 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
227 tree itype, tree *vecotype_out,
228 tree *vecitype_out = NULL,
229 enum optab_subtype subtype = optab_default)
231 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
232 if (!vecitype)
233 return false;
235 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
236 if (!vecotype)
237 return false;
239 optab optab = optab_for_tree_code (code, vecitype, subtype);
240 if (!optab)
241 return false;
243 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
244 if (icode == CODE_FOR_nothing
245 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
246 return false;
248 *vecotype_out = vecotype;
249 if (vecitype_out)
250 *vecitype_out = vecitype;
251 return true;
254 /* Round bit precision PRECISION up to a full element. */
256 static unsigned int
257 vect_element_precision (unsigned int precision)
259 precision = 1 << ceil_log2 (precision);
260 return MAX (precision, BITS_PER_UNIT);
263 /* If OP is defined by a statement that's being considered for vectorization,
264 return information about that statement, otherwise return NULL. */
266 static stmt_vec_info
267 vect_get_internal_def (vec_info *vinfo, tree op)
269 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
270 if (def_stmt_info
271 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
272 return vect_stmt_to_vectorize (def_stmt_info);
273 return NULL;
276 /* Check whether NAME, an ssa-name used in STMT_VINFO,
277 is a result of a type promotion, such that:
278 DEF_STMT: NAME = NOP (name0)
279 If CHECK_SIGN is TRUE, check that either both types are signed or both are
280 unsigned. */
282 static bool
283 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
284 tree *orig_type, gimple **def_stmt, bool *promotion)
286 tree type = TREE_TYPE (name);
287 tree oprnd0;
288 enum vect_def_type dt;
290 stmt_vec_info def_stmt_info;
291 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
292 return false;
294 if (dt != vect_internal_def
295 && dt != vect_external_def && dt != vect_constant_def)
296 return false;
298 if (!*def_stmt)
299 return false;
301 if (!is_gimple_assign (*def_stmt))
302 return false;
304 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
305 return false;
307 oprnd0 = gimple_assign_rhs1 (*def_stmt);
309 *orig_type = TREE_TYPE (oprnd0);
310 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
311 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
312 return false;
314 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
315 *promotion = true;
316 else
317 *promotion = false;
319 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
320 return false;
322 return true;
325 /* Holds information about an input operand after some sign changes
326 and type promotions have been peeled away. */
327 class vect_unpromoted_value {
328 public:
329 vect_unpromoted_value ();
331 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
333 /* The value obtained after peeling away zero or more casts. */
334 tree op;
336 /* The type of OP. */
337 tree type;
339 /* The definition type of OP. */
340 vect_def_type dt;
342 /* If OP is the result of peeling at least one cast, and if the cast
343 of OP itself is a vectorizable statement, CASTER identifies that
344 statement, otherwise it is null. */
345 stmt_vec_info caster;
348 inline vect_unpromoted_value::vect_unpromoted_value ()
349 : op (NULL_TREE),
350 type (NULL_TREE),
351 dt (vect_uninitialized_def),
352 caster (NULL)
356 /* Set the operand to OP_IN, its definition type to DT_IN, and the
357 statement that casts it to CASTER_IN. */
359 inline void
360 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
361 stmt_vec_info caster_in)
363 op = op_in;
364 type = TREE_TYPE (op);
365 dt = dt_in;
366 caster = caster_in;
369 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
370 to reach some vectorizable inner operand OP', continuing as long as it
371 is possible to convert OP' back to OP using a possible sign change
372 followed by a possible promotion P. Return this OP', or null if OP is
373 not a vectorizable SSA name. If there is a promotion P, describe its
374 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
375 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
376 have more than one user.
378 A successful return means that it is possible to go from OP' to OP
379 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
380 whereas the cast from UNPROM to OP might be a promotion, a sign
381 change, or a nop.
383 E.g. say we have:
385 signed short *ptr = ...;
386 signed short C = *ptr;
387 unsigned short B = (unsigned short) C; // sign change
388 signed int A = (signed int) B; // unsigned promotion
389 ...possible other uses of A...
390 unsigned int OP = (unsigned int) A; // sign change
392 In this case it's possible to go directly from C to OP using:
394 OP = (unsigned int) (unsigned short) C;
395 +------------+ +--------------+
396 promotion sign change
398 so OP' would be C. The input to the promotion is B, so UNPROM
399 would describe B. */
401 static tree
402 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
403 vect_unpromoted_value *unprom,
404 bool *single_use_p = NULL)
406 tree op_type = TREE_TYPE (op);
407 if (!INTEGRAL_TYPE_P (op_type))
408 return NULL_TREE;
410 tree res = NULL_TREE;
411 unsigned int orig_precision = TYPE_PRECISION (op_type);
412 unsigned int min_precision = orig_precision;
413 stmt_vec_info caster = NULL;
414 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
416 /* See whether OP is simple enough to vectorize. */
417 stmt_vec_info def_stmt_info;
418 gimple *def_stmt;
419 vect_def_type dt;
420 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
421 break;
423 /* If OP is the input of a demotion, skip over it to see whether
424 OP is itself the result of a promotion. If so, the combined
425 effect of the promotion and the demotion might fit the required
426 pattern, otherwise neither operation fits.
428 This copes with cases such as the result of an arithmetic
429 operation being truncated before being stored, and where that
430 arithmetic operation has been recognized as an over-widened one. */
431 if (TYPE_PRECISION (op_type) <= min_precision)
433 /* Use OP as the UNPROM described above if we haven't yet
434 found a promotion, or if using the new input preserves the
435 sign of the previous promotion. */
436 if (!res
437 || TYPE_PRECISION (unprom->type) == orig_precision
438 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type)
439 || (TYPE_UNSIGNED (op_type)
440 && TYPE_PRECISION (op_type) < TYPE_PRECISION (unprom->type)))
442 unprom->set_op (op, dt, caster);
443 min_precision = TYPE_PRECISION (op_type);
445 /* Stop if we've already seen a promotion and if this
446 conversion does more than change the sign. */
447 else if (TYPE_PRECISION (op_type)
448 != TYPE_PRECISION (unprom->type))
449 break;
451 /* The sequence now extends to OP. */
452 res = op;
455 /* See whether OP is defined by a cast. Record it as CASTER if
456 the cast is potentially vectorizable. */
457 if (!def_stmt)
458 break;
459 caster = def_stmt_info;
461 /* Ignore pattern statements, since we don't link uses for them. */
462 if (caster
463 && single_use_p
464 && !STMT_VINFO_RELATED_STMT (caster)
465 && !has_single_use (res))
466 *single_use_p = false;
468 gassign *assign = dyn_cast <gassign *> (def_stmt);
469 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
470 break;
472 /* Continue with the input to the cast. */
473 op = gimple_assign_rhs1 (def_stmt);
474 op_type = TREE_TYPE (op);
476 return res;
479 /* OP is an integer operand to an operation that returns TYPE, and we
480 want to treat the operation as a widening one. So far we can treat
481 it as widening from *COMMON_TYPE.
483 Return true if OP is suitable for such a widening operation,
484 either widening from *COMMON_TYPE or from some supertype of it.
485 Update *COMMON_TYPE to the supertype in the latter case.
487 SHIFT_P is true if OP is a shift amount. */
489 static bool
490 vect_joust_widened_integer (tree type, bool shift_p, tree op,
491 tree *common_type)
493 /* Calculate the minimum precision required by OP, without changing
494 the sign of either operand. */
495 unsigned int precision;
496 if (shift_p)
498 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
499 return false;
500 precision = TREE_INT_CST_LOW (op);
502 else
504 precision = wi::min_precision (wi::to_widest (op),
505 TYPE_SIGN (*common_type));
506 if (precision * 2 > TYPE_PRECISION (type))
507 return false;
510 /* If OP requires a wider type, switch to that type. The checks
511 above ensure that this is still narrower than the result. */
512 precision = vect_element_precision (precision);
513 if (TYPE_PRECISION (*common_type) < precision)
514 *common_type = build_nonstandard_integer_type
515 (precision, TYPE_UNSIGNED (*common_type));
516 return true;
519 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
520 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
522 static bool
523 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
525 if (types_compatible_p (*common_type, new_type))
526 return true;
528 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
529 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
530 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
531 return true;
533 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
534 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
535 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
537 *common_type = new_type;
538 return true;
541 /* We have mismatched signs, with the signed type being
542 no wider than the unsigned type. In this case we need
543 a wider signed type. */
544 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
545 TYPE_PRECISION (new_type));
546 precision *= 2;
548 if (precision * 2 > TYPE_PRECISION (type))
549 return false;
551 *common_type = build_nonstandard_integer_type (precision, false);
552 return true;
555 /* Check whether STMT_INFO can be viewed as a tree of integer operations
556 in which each node either performs CODE or WIDENED_CODE, and where
557 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
558 specifies the maximum number of leaf operands. SHIFT_P says whether
559 CODE and WIDENED_CODE are some sort of shift.
561 If STMT_INFO is such a tree, return the number of leaf operands
562 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
563 to a type that (a) is narrower than the result of STMT_INFO and
564 (b) can hold all leaf operand values.
566 If SUBTYPE then allow that the signs of the operands
567 may differ in signs but not in precision. SUBTYPE is updated to reflect
568 this.
570 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
571 exists. */
573 static unsigned int
574 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
575 code_helper widened_code, bool shift_p,
576 unsigned int max_nops,
577 vect_unpromoted_value *unprom, tree *common_type,
578 enum optab_subtype *subtype = NULL)
580 /* Check for an integer operation with the right code. */
581 gimple* stmt = stmt_info->stmt;
582 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
583 return 0;
585 code_helper rhs_code;
586 if (is_gimple_assign (stmt))
587 rhs_code = gimple_assign_rhs_code (stmt);
588 else if (is_gimple_call (stmt))
589 rhs_code = gimple_call_combined_fn (stmt);
590 else
591 return 0;
593 if (rhs_code != code
594 && rhs_code != widened_code)
595 return 0;
597 tree lhs = gimple_get_lhs (stmt);
598 tree type = TREE_TYPE (lhs);
599 if (!INTEGRAL_TYPE_P (type))
600 return 0;
602 /* Assume that both operands will be leaf operands. */
603 max_nops -= 2;
605 /* Check the operands. */
606 unsigned int next_op = 0;
607 for (unsigned int i = 0; i < 2; ++i)
609 vect_unpromoted_value *this_unprom = &unprom[next_op];
610 unsigned int nops = 1;
611 tree op = gimple_arg (stmt, i);
612 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
614 /* We already have a common type from earlier operands.
615 Update it to account for OP. */
616 this_unprom->set_op (op, vect_constant_def);
617 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
618 return 0;
620 else
622 /* Only allow shifts by constants. */
623 if (shift_p && i == 1)
624 return 0;
626 if (rhs_code != code)
628 /* If rhs_code is widened_code, don't look through further
629 possible promotions, there is a promotion already embedded
630 in the WIDEN_*_EXPR. */
631 if (TREE_CODE (op) != SSA_NAME
632 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
633 return 0;
635 stmt_vec_info def_stmt_info;
636 gimple *def_stmt;
637 vect_def_type dt;
638 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
639 &def_stmt))
640 return 0;
641 this_unprom->set_op (op, dt, NULL);
643 else if (!vect_look_through_possible_promotion (vinfo, op,
644 this_unprom))
645 return 0;
647 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
649 /* The operand isn't widened. If STMT_INFO has the code
650 for an unwidened operation, recursively check whether
651 this operand is a node of the tree. */
652 if (rhs_code != code
653 || max_nops == 0
654 || this_unprom->dt != vect_internal_def)
655 return 0;
657 /* Give back the leaf slot allocated above now that we're
658 not treating this as a leaf operand. */
659 max_nops += 1;
661 /* Recursively process the definition of the operand. */
662 stmt_vec_info def_stmt_info
663 = vect_get_internal_def (vinfo, this_unprom->op);
665 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
666 widened_code, shift_p, max_nops,
667 this_unprom, common_type,
668 subtype);
669 if (nops == 0)
670 return 0;
672 max_nops -= nops;
674 else
676 /* Make sure that the operand is narrower than the result. */
677 if (TYPE_PRECISION (this_unprom->type) * 2
678 > TYPE_PRECISION (type))
679 return 0;
681 /* Update COMMON_TYPE for the new operand. */
682 if (i == 0)
683 *common_type = this_unprom->type;
684 else if (!vect_joust_widened_type (type, this_unprom->type,
685 common_type))
687 if (subtype)
689 /* See if we can sign extend the smaller type. */
690 if (TYPE_PRECISION (this_unprom->type)
691 > TYPE_PRECISION (*common_type))
692 *common_type = this_unprom->type;
693 *subtype = optab_vector_mixed_sign;
695 else
696 return 0;
700 next_op += nops;
702 return next_op;
705 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
706 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
708 static tree
709 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
711 return make_temp_ssa_name (type, stmt, "patt");
714 /* STMT2_INFO describes a type conversion that could be split into STMT1
715 followed by a version of STMT2_INFO that takes NEW_RHS as its first
716 input. Try to do this using pattern statements, returning true on
717 success. */
719 static bool
720 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
721 gimple *stmt1, tree vectype)
723 if (is_pattern_stmt_p (stmt2_info))
725 /* STMT2_INFO is part of a pattern. Get the statement to which
726 the pattern is attached. */
727 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
728 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_NOTE, vect_location,
732 "Splitting pattern statement: %G", stmt2_info->stmt);
734 /* Since STMT2_INFO is a pattern statement, we can change it
735 in-situ without worrying about changing the code for the
736 containing block. */
737 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
742 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
743 stmt2_info->stmt);
746 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
747 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
748 /* STMT2_INFO is the actual pattern statement. Add STMT1
749 to the end of the definition sequence. */
750 gimple_seq_add_stmt_without_update (def_seq, stmt1);
751 else
753 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
754 before it. */
755 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
756 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
758 return true;
760 else
762 /* STMT2_INFO doesn't yet have a pattern. Try to create a
763 two-statement pattern now. */
764 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
765 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
766 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
767 if (!lhs_vectype)
768 return false;
770 if (dump_enabled_p ())
771 dump_printf_loc (MSG_NOTE, vect_location,
772 "Splitting statement: %G", stmt2_info->stmt);
774 /* Add STMT1 as a singleton pattern definition sequence. */
775 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
776 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
777 gimple_seq_add_stmt_without_update (def_seq, stmt1);
779 /* Build the second of the two pattern statements. */
780 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
781 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
782 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
784 if (dump_enabled_p ())
786 dump_printf_loc (MSG_NOTE, vect_location,
787 "into pattern statements: %G", stmt1);
788 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
789 (gimple *) new_stmt2);
792 return true;
796 /* Look for the following pattern
797 X = x[i]
798 Y = y[i]
799 DIFF = X - Y
800 DAD = ABS_EXPR<DIFF>
802 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
803 HALF_TYPE and UNPROM will be set should the statement be found to
804 be a widened operation.
805 DIFF_STMT will be set to the MINUS_EXPR
806 statement that precedes the ABS_STMT if it is a MINUS_EXPR..
808 static bool
809 vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
810 tree *half_type,
811 vect_unpromoted_value unprom[2],
812 gassign **diff_stmt)
814 if (!abs_stmt)
815 return false;
817 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
818 inside the loop (in case we are analyzing an outer-loop). */
819 enum tree_code code = gimple_assign_rhs_code (abs_stmt);
820 if (code != ABS_EXPR && code != ABSU_EXPR)
821 return false;
823 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
824 tree abs_type = TREE_TYPE (abs_oprnd);
825 if (!abs_oprnd)
826 return false;
827 if (!ANY_INTEGRAL_TYPE_P (abs_type)
828 || TYPE_OVERFLOW_WRAPS (abs_type)
829 || TYPE_UNSIGNED (abs_type))
830 return false;
832 /* Peel off conversions from the ABS input. This can involve sign
833 changes (e.g. from an unsigned subtraction to a signed ABS input)
834 or signed promotion, but it can't include unsigned promotion.
835 (Note that ABS of an unsigned promotion should have been folded
836 away before now anyway.) */
837 vect_unpromoted_value unprom_diff;
838 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
839 &unprom_diff);
840 if (!abs_oprnd)
841 return false;
842 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
843 && TYPE_UNSIGNED (unprom_diff.type))
844 return false;
846 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
847 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
848 if (!diff_stmt_vinfo)
849 return false;
851 gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
852 if (diff_stmt && diff
853 && gimple_assign_rhs_code (diff) == MINUS_EXPR
854 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
855 *diff_stmt = diff;
857 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
858 inside the loop (in case we are analyzing an outer-loop). */
859 if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
860 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
861 false, 2, unprom, half_type))
862 return true;
864 return false;
867 /* Convert UNPROM to TYPE and return the result, adding new statements
868 to STMT_INFO's pattern definition statements if no better way is
869 available. VECTYPE is the vector form of TYPE.
871 If SUBTYPE then convert the type based on the subtype. */
873 static tree
874 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
875 vect_unpromoted_value *unprom, tree vectype,
876 enum optab_subtype subtype = optab_default)
878 /* Update the type if the signs differ. */
879 if (subtype == optab_vector_mixed_sign)
881 gcc_assert (!TYPE_UNSIGNED (type));
882 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
884 type = unsigned_type_for (type);
885 vectype = unsigned_type_for (vectype);
889 /* Check for a no-op conversion. */
890 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
891 return unprom->op;
893 /* Allow the caller to create constant vect_unpromoted_values. */
894 if (TREE_CODE (unprom->op) == INTEGER_CST)
895 return wide_int_to_tree (type, wi::to_widest (unprom->op));
897 tree input = unprom->op;
898 if (unprom->caster)
900 tree lhs = gimple_get_lhs (unprom->caster->stmt);
901 tree lhs_type = TREE_TYPE (lhs);
903 /* If the result of the existing cast is the right width, use it
904 instead of the source of the cast. */
905 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
906 input = lhs;
907 /* If the precision we want is between the source and result
908 precisions of the existing cast, try splitting the cast into
909 two and tapping into a mid-way point. */
910 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
911 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
913 /* In order to preserve the semantics of the original cast,
914 give the mid-way point the same signedness as the input value.
916 It would be possible to use a signed type here instead if
917 TYPE is signed and UNPROM->TYPE is unsigned, but that would
918 make the sign of the midtype sensitive to the order in
919 which we process the statements, since the signedness of
920 TYPE is the signedness required by just one of possibly
921 many users. Also, unsigned promotions are usually as cheap
922 as or cheaper than signed ones, so it's better to keep an
923 unsigned promotion. */
924 tree midtype = build_nonstandard_integer_type
925 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
926 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
927 if (vec_midtype)
929 input = vect_recog_temp_ssa_var (midtype, NULL);
930 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
931 unprom->op);
932 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
933 vec_midtype))
934 append_pattern_def_seq (vinfo, stmt_info,
935 new_stmt, vec_midtype);
939 /* See if we can reuse an existing result. */
940 if (types_compatible_p (type, TREE_TYPE (input)))
941 return input;
944 /* We need a new conversion statement. */
945 tree new_op = vect_recog_temp_ssa_var (type, NULL);
946 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
948 /* If OP is an external value, see if we can insert the new statement
949 on an incoming edge. */
950 if (input == unprom->op && unprom->dt == vect_external_def)
951 if (edge e = vect_get_external_def_edge (vinfo, input))
953 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
954 gcc_assert (!new_bb);
955 return new_op;
958 /* As a (common) last resort, add the statement to the pattern itself. */
959 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
960 return new_op;
963 /* Invoke vect_convert_input for N elements of UNPROM and store the
964 result in the corresponding elements of RESULT.
966 If SUBTYPE then convert the type based on the subtype. */
968 static void
969 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
970 tree *result, tree type, vect_unpromoted_value *unprom,
971 tree vectype, enum optab_subtype subtype = optab_default)
973 for (unsigned int i = 0; i < n; ++i)
975 unsigned int j;
976 for (j = 0; j < i; ++j)
977 if (unprom[j].op == unprom[i].op)
978 break;
980 if (j < i)
981 result[i] = result[j];
982 else
983 result[i] = vect_convert_input (vinfo, stmt_info,
984 type, &unprom[i], vectype, subtype);
988 /* The caller has created a (possibly empty) sequence of pattern definition
989 statements followed by a single statement PATTERN_STMT. Cast the result
990 of this final statement to TYPE. If a new statement is needed, add
991 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
992 and return the new statement, otherwise return PATTERN_STMT as-is.
993 VECITYPE is the vector form of PATTERN_STMT's result type. */
995 static gimple *
996 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
997 gimple *pattern_stmt, tree vecitype)
999 tree lhs = gimple_get_lhs (pattern_stmt);
1000 if (!types_compatible_p (type, TREE_TYPE (lhs)))
1002 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
1003 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
1004 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
1006 return pattern_stmt;
1009 /* Return true if STMT_VINFO describes a reduction for which reassociation
1010 is allowed. If STMT_INFO is part of a group, assume that it's part of
1011 a reduction chain and optimistically assume that all statements
1012 except the last allow reassociation.
1013 Also require it to have code CODE and to be a reduction
1014 in the outermost loop. When returning true, store the operands in
1015 *OP0_OUT and *OP1_OUT. */
1017 static bool
1018 vect_reassociating_reduction_p (vec_info *vinfo,
1019 stmt_vec_info stmt_info, tree_code code,
1020 tree *op0_out, tree *op1_out)
1022 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
1023 if (!loop_info)
1024 return false;
1026 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
1027 if (!assign || gimple_assign_rhs_code (assign) != code)
1028 return false;
1030 /* We don't allow changing the order of the computation in the inner-loop
1031 when doing outer-loop vectorization. */
1032 class loop *loop = LOOP_VINFO_LOOP (loop_info);
1033 if (loop && nested_in_vect_loop_p (loop, stmt_info))
1034 return false;
1036 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1038 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1039 code))
1040 return false;
1042 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
1043 return false;
1045 *op0_out = gimple_assign_rhs1 (assign);
1046 *op1_out = gimple_assign_rhs2 (assign);
1047 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1048 std::swap (*op0_out, *op1_out);
1049 return true;
1052 /* match.pd function to match
1053 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1054 with conditions:
1055 1) @1, @2, c, d, a, b are all integral type.
1056 2) There's single_use for both @1 and @2.
1057 3) a, c have same precision.
1058 4) c and @1 have different precision.
1059 5) c, d are the same type or they can differ in sign when convert is
1060 truncation.
1062 record a and c and d and @3. */
1064 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1066 /* Function vect_recog_cond_expr_convert
1068 Try to find the following pattern:
1070 TYPE_AB A,B;
1071 TYPE_CD C,D;
1072 TYPE_E E;
1073 TYPE_E op_true = (TYPE_E) A;
1074 TYPE_E op_false = (TYPE_E) B;
1076 E = C cmp D ? op_true : op_false;
1078 where
1079 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1080 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1081 single_use of op_true and op_false.
1082 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1084 Input:
1086 * STMT_VINFO: The stmt from which the pattern search begins.
1087 here it starts with E = c cmp D ? op_true : op_false;
1089 Output:
1091 TYPE1 E' = C cmp D ? A : B;
1092 TYPE3 E = (TYPE3) E';
1094 There may extra nop_convert for A or B to handle different signness.
1096 * TYPE_OUT: The vector type of the output of this pattern.
1098 * Return value: A new stmt that will be used to replace the sequence of
1099 stmts that constitute the pattern. In this case it will be:
1100 E = (TYPE3)E';
1101 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1103 static gimple *
1104 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1105 stmt_vec_info stmt_vinfo, tree *type_out)
1107 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1108 tree lhs, match[4], temp, type, new_lhs, op2;
1109 gimple *cond_stmt;
1110 gimple *pattern_stmt;
1112 if (!last_stmt)
1113 return NULL;
1115 lhs = gimple_assign_lhs (last_stmt);
1117 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1118 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1119 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1120 return NULL;
1122 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1124 op2 = match[2];
1125 type = TREE_TYPE (match[1]);
1126 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1128 op2 = vect_recog_temp_ssa_var (type, NULL);
1129 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1130 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1131 get_vectype_for_scalar_type (vinfo, type));
1134 temp = vect_recog_temp_ssa_var (type, NULL);
1135 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1136 match[1], op2));
1137 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1138 get_vectype_for_scalar_type (vinfo, type));
1139 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1140 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1141 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "created pattern stmt: %G", pattern_stmt);
1146 return pattern_stmt;
1149 /* Function vect_recog_dot_prod_pattern
1151 Try to find the following pattern:
1153 type1a x_t
1154 type1b y_t;
1155 TYPE1 prod;
1156 TYPE2 sum = init;
1157 loop:
1158 sum_0 = phi <init, sum_1>
1159 S1 x_t = ...
1160 S2 y_t = ...
1161 S3 x_T = (TYPE1) x_t;
1162 S4 y_T = (TYPE1) y_t;
1163 S5 prod = x_T * y_T;
1164 [S6 prod = (TYPE2) prod; #optional]
1165 S7 sum_1 = prod + sum_0;
1167 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1168 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1169 'type1a' and 'type1b' can differ.
1171 Input:
1173 * STMT_VINFO: The stmt from which the pattern search begins. In the
1174 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1175 will be detected.
1177 Output:
1179 * TYPE_OUT: The type of the output of this pattern.
1181 * Return value: A new stmt that will be used to replace the sequence of
1182 stmts that constitute the pattern. In this case it will be:
1183 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1185 Note: The dot-prod idiom is a widening reduction pattern that is
1186 vectorized without preserving all the intermediate results. It
1187 produces only N/2 (widened) results (by summing up pairs of
1188 intermediate results) rather than all N results. Therefore, we
1189 cannot allow this pattern when we want to get all the results and in
1190 the correct order (as is the case when this computation is in an
1191 inner-loop nested in an outer-loop that us being vectorized). */
1193 static gimple *
1194 vect_recog_dot_prod_pattern (vec_info *vinfo,
1195 stmt_vec_info stmt_vinfo, tree *type_out)
1197 tree oprnd0, oprnd1;
1198 gimple *last_stmt = stmt_vinfo->stmt;
1199 tree type, half_type;
1200 gimple *pattern_stmt;
1201 tree var;
1203 /* Look for the following pattern
1204 DX = (TYPE1) X;
1205 DY = (TYPE1) Y;
1206 DPROD = DX * DY;
1207 DDPROD = (TYPE2) DPROD;
1208 sum_1 = DDPROD + sum_0;
1209 In which
1210 - DX is double the size of X
1211 - DY is double the size of Y
1212 - DX, DY, DPROD all have the same type but the sign
1213 between X, Y and DPROD can differ.
1214 - sum is the same size of DPROD or bigger
1215 - sum has been recognized as a reduction variable.
1217 This is equivalent to:
1218 DPROD = X w* Y; #widen mult
1219 sum_1 = DPROD w+ sum_0; #widen summation
1221 DPROD = X w* Y; #widen mult
1222 sum_1 = DPROD + sum_0; #summation
1225 /* Starting from LAST_STMT, follow the defs of its uses in search
1226 of the above pattern. */
1228 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1229 &oprnd0, &oprnd1))
1230 return NULL;
1232 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1234 vect_unpromoted_value unprom_mult;
1235 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1237 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1238 we know that oprnd1 is the reduction variable (defined by a loop-header
1239 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1240 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1241 if (!oprnd0)
1242 return NULL;
1244 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1245 if (!mult_vinfo)
1246 return NULL;
1248 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1249 inside the loop (in case we are analyzing an outer-loop). */
1250 vect_unpromoted_value unprom0[2];
1251 enum optab_subtype subtype = optab_vector;
1252 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1253 false, 2, unprom0, &half_type, &subtype))
1254 return NULL;
1256 /* If there are two widening operations, make sure they agree on the sign
1257 of the extension. The result of an optab_vector_mixed_sign operation
1258 is signed; otherwise, the result has the same sign as the operands. */
1259 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1260 && (subtype == optab_vector_mixed_sign
1261 ? TYPE_UNSIGNED (unprom_mult.type)
1262 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1263 return NULL;
1265 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1267 /* If the inputs have mixed signs, canonicalize on using the signed
1268 input type for analysis. This also helps when emulating mixed-sign
1269 operations using signed operations. */
1270 if (subtype == optab_vector_mixed_sign)
1271 half_type = signed_type_for (half_type);
1273 tree half_vectype;
1274 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1275 type_out, &half_vectype, subtype))
1277 /* We can emulate a mixed-sign dot-product using a sequence of
1278 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1279 if (subtype != optab_vector_mixed_sign
1280 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1281 DOT_PROD_EXPR, half_type,
1282 type_out, &half_vectype,
1283 optab_vector))
1284 return NULL;
1286 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1287 *type_out);
1290 /* Get the inputs in the appropriate types. */
1291 tree mult_oprnd[2];
1292 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1293 unprom0, half_vectype, subtype);
1295 var = vect_recog_temp_ssa_var (type, NULL);
1296 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1297 mult_oprnd[0], mult_oprnd[1], oprnd1);
1299 return pattern_stmt;
1303 /* Function vect_recog_sad_pattern
1305 Try to find the following Sum of Absolute Difference (SAD) pattern:
1307 type x_t, y_t;
1308 signed TYPE1 diff, abs_diff;
1309 TYPE2 sum = init;
1310 loop:
1311 sum_0 = phi <init, sum_1>
1312 S1 x_t = ...
1313 S2 y_t = ...
1314 S3 x_T = (TYPE1) x_t;
1315 S4 y_T = (TYPE1) y_t;
1316 S5 diff = x_T - y_T;
1317 S6 abs_diff = ABS_EXPR <diff>;
1318 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1319 S8 sum_1 = abs_diff + sum_0;
1321 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1322 same size of 'TYPE1' or bigger. This is a special case of a reduction
1323 computation.
1325 Input:
1327 * STMT_VINFO: The stmt from which the pattern search begins. In the
1328 example, when this function is called with S8, the pattern
1329 {S3,S4,S5,S6,S7,S8} will be detected.
1331 Output:
1333 * TYPE_OUT: The type of the output of this pattern.
1335 * Return value: A new stmt that will be used to replace the sequence of
1336 stmts that constitute the pattern. In this case it will be:
1337 SAD_EXPR <x_t, y_t, sum_0>
1340 static gimple *
1341 vect_recog_sad_pattern (vec_info *vinfo,
1342 stmt_vec_info stmt_vinfo, tree *type_out)
1344 gimple *last_stmt = stmt_vinfo->stmt;
1345 tree half_type;
1347 /* Look for the following pattern
1348 DX = (TYPE1) X;
1349 DY = (TYPE1) Y;
1350 DDIFF = DX - DY;
1351 DAD = ABS_EXPR <DDIFF>;
1352 DDPROD = (TYPE2) DPROD;
1353 sum_1 = DAD + sum_0;
1354 In which
1355 - DX is at least double the size of X
1356 - DY is at least double the size of Y
1357 - DX, DY, DDIFF, DAD all have the same type
1358 - sum is the same size of DAD or bigger
1359 - sum has been recognized as a reduction variable.
1361 This is equivalent to:
1362 DDIFF = X w- Y; #widen sub
1363 DAD = ABS_EXPR <DDIFF>;
1364 sum_1 = DAD w+ sum_0; #widen summation
1366 DDIFF = X w- Y; #widen sub
1367 DAD = ABS_EXPR <DDIFF>;
1368 sum_1 = DAD + sum_0; #summation
1371 /* Starting from LAST_STMT, follow the defs of its uses in search
1372 of the above pattern. */
1374 tree plus_oprnd0, plus_oprnd1;
1375 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1376 &plus_oprnd0, &plus_oprnd1))
1377 return NULL;
1379 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1381 /* Any non-truncating sequence of conversions is OK here, since
1382 with a successful match, the result of the ABS(U) is known to fit
1383 within the nonnegative range of the result type. (It cannot be the
1384 negative of the minimum signed value due to the range of the widening
1385 MINUS_EXPR.) */
1386 vect_unpromoted_value unprom_abs;
1387 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1388 &unprom_abs);
1390 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1391 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1392 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1393 Then check that plus_oprnd0 is defined by an abs_expr. */
1395 if (!plus_oprnd0)
1396 return NULL;
1398 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1399 if (!abs_stmt_vinfo)
1400 return NULL;
1402 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1403 inside the loop (in case we are analyzing an outer-loop). */
1404 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1405 vect_unpromoted_value unprom[2];
1407 if (!abs_stmt)
1409 gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
1410 if (!abd_stmt
1411 || !gimple_call_internal_p (abd_stmt)
1412 || gimple_call_num_args (abd_stmt) != 2)
1413 return NULL;
1415 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1416 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1418 if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
1420 if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
1421 &unprom[0])
1422 || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
1423 &unprom[1]))
1424 return NULL;
1426 else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
1428 unprom[0].op = abd_oprnd0;
1429 unprom[0].type = TREE_TYPE (abd_oprnd0);
1430 unprom[1].op = abd_oprnd1;
1431 unprom[1].type = TREE_TYPE (abd_oprnd1);
1433 else
1434 return NULL;
1436 half_type = unprom[0].type;
1438 else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
1439 unprom, NULL))
1440 return NULL;
1442 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1444 tree half_vectype;
1445 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1446 type_out, &half_vectype))
1447 return NULL;
1449 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1450 tree sad_oprnd[2];
1451 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1452 unprom, half_vectype);
1454 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1455 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1456 sad_oprnd[1], plus_oprnd1);
1458 return pattern_stmt;
1461 /* Function vect_recog_abd_pattern
1463 Try to find the following ABsolute Difference (ABD) or
1464 widening ABD (WIDEN_ABD) pattern:
1466 TYPE1 x;
1467 TYPE2 y;
1468 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1469 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1470 TYPE3 diff = x_cast - y_cast;
1471 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1472 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1474 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1475 twice as wide as TYPE3.
1477 Input:
1479 * STMT_VINFO: The stmt from which the pattern search begins
1481 Output:
1483 * TYPE_OUT: The type of the output of this pattern
1485 * Return value: A new stmt that will be used to replace the sequence of
1486 stmts that constitute the pattern, principally:
1487 out = IFN_ABD (x, y)
1488 out = IFN_WIDEN_ABD (x, y)
1491 static gimple *
1492 vect_recog_abd_pattern (vec_info *vinfo,
1493 stmt_vec_info stmt_vinfo, tree *type_out)
1495 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1496 if (!last_stmt)
1497 return NULL;
1499 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1501 vect_unpromoted_value unprom[2];
1502 gassign *diff_stmt = NULL;
1503 tree abd_in_type;
1504 if (!vect_recog_absolute_difference (vinfo, last_stmt, &abd_in_type,
1505 unprom, &diff_stmt))
1507 /* We cannot try further without having a non-widening MINUS. */
1508 if (!diff_stmt)
1509 return NULL;
1511 unprom[0].op = gimple_assign_rhs1 (diff_stmt);
1512 unprom[1].op = gimple_assign_rhs2 (diff_stmt);
1513 abd_in_type = signed_type_for (out_type);
1516 tree abd_out_type = abd_in_type;
1518 tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1519 if (!vectype_in)
1520 return NULL;
1522 internal_fn ifn = IFN_ABD;
1523 tree vectype_out = vectype_in;
1525 if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1526 && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1528 tree mid_type
1529 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1530 TYPE_UNSIGNED (abd_in_type));
1531 tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1533 code_helper dummy_code;
1534 int dummy_int;
1535 auto_vec<tree> dummy_vec;
1536 if (mid_vectype
1537 && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1538 stmt_vinfo, mid_vectype,
1539 vectype_in,
1540 &dummy_code, &dummy_code,
1541 &dummy_int, &dummy_vec))
1543 ifn = IFN_VEC_WIDEN_ABD;
1544 abd_out_type = mid_type;
1545 vectype_out = mid_vectype;
1549 if (ifn == IFN_ABD
1550 && !direct_internal_fn_supported_p (ifn, vectype_in,
1551 OPTIMIZE_FOR_SPEED))
1552 return NULL;
1554 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
1556 tree abd_oprnds[2];
1557 vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
1558 abd_in_type, unprom, vectype_in);
1560 *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1562 tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
1563 gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1564 abd_oprnds[0], abd_oprnds[1]);
1565 gimple_call_set_lhs (abd_stmt, abd_result);
1566 gimple_set_location (abd_stmt, gimple_location (last_stmt));
1568 gimple *stmt = abd_stmt;
1569 if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1570 && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1571 && !TYPE_UNSIGNED (abd_out_type))
1573 tree unsign = unsigned_type_for (abd_out_type);
1574 stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
1575 vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
1578 return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
1581 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1582 so that it can be treated as though it had the form:
1584 A_TYPE a;
1585 B_TYPE b;
1586 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1587 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1588 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1589 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1590 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1592 Try to replace the pattern with:
1594 A_TYPE a;
1595 B_TYPE b;
1596 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1597 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1598 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1599 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1601 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1603 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1604 name of the pattern being matched, for dump purposes. */
1606 static gimple *
1607 vect_recog_widen_op_pattern (vec_info *vinfo,
1608 stmt_vec_info last_stmt_info, tree *type_out,
1609 tree_code orig_code, code_helper wide_code,
1610 bool shift_p, const char *name)
1612 gimple *last_stmt = last_stmt_info->stmt;
1614 vect_unpromoted_value unprom[2];
1615 tree half_type;
1616 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1617 shift_p, 2, unprom, &half_type))
1619 return NULL;
1621 /* Pattern detected. */
1622 vect_pattern_detected (name, last_stmt);
1624 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1625 tree itype = type;
1626 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1627 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1628 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1629 TYPE_UNSIGNED (half_type));
1631 /* Check target support */
1632 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1633 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1634 tree ctype = itype;
1635 tree vecctype = vecitype;
1636 if (orig_code == MINUS_EXPR
1637 && TYPE_UNSIGNED (itype)
1638 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1640 /* Subtraction is special, even if half_type is unsigned and no matter
1641 whether type is signed or unsigned, if type is wider than itype,
1642 we need to sign-extend from the widening operation result to the
1643 result type.
1644 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1645 itype unsigned short and type either int or unsigned int.
1646 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1647 (unsigned short) 0xffff, but for type int we want the result -1
1648 and for type unsigned int 0xffffffff rather than 0xffff. */
1649 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1650 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1653 code_helper dummy_code;
1654 int dummy_int;
1655 auto_vec<tree> dummy_vec;
1656 if (!vectype
1657 || !vecitype
1658 || !vecctype
1659 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1660 vecitype, vectype,
1661 &dummy_code, &dummy_code,
1662 &dummy_int, &dummy_vec))
1663 return NULL;
1665 *type_out = get_vectype_for_scalar_type (vinfo, type);
1666 if (!*type_out)
1667 return NULL;
1669 tree oprnd[2];
1670 vect_convert_inputs (vinfo, last_stmt_info,
1671 2, oprnd, half_type, unprom, vectype);
1673 tree var = vect_recog_temp_ssa_var (itype, NULL);
1674 gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1676 if (vecctype != vecitype)
1677 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1678 pattern_stmt, vecitype);
1680 return vect_convert_output (vinfo, last_stmt_info,
1681 type, pattern_stmt, vecctype);
1684 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1685 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1687 static gimple *
1688 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1689 tree *type_out)
1691 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1692 MULT_EXPR, WIDEN_MULT_EXPR, false,
1693 "vect_recog_widen_mult_pattern");
1696 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1697 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1699 static gimple *
1700 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1701 tree *type_out)
1703 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1704 PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
1705 false, "vect_recog_widen_plus_pattern");
1708 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1709 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1710 static gimple *
1711 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1712 tree *type_out)
1714 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1715 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
1716 false, "vect_recog_widen_minus_pattern");
1719 /* Try to detect abd on widened inputs, converting IFN_ABD
1720 to IFN_VEC_WIDEN_ABD. */
1721 static gimple *
1722 vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1723 tree *type_out)
1725 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1726 if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1727 return NULL;
1729 tree last_rhs = gimple_assign_rhs1 (last_stmt);
1731 tree in_type = TREE_TYPE (last_rhs);
1732 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1733 if (!INTEGRAL_TYPE_P (in_type)
1734 || !INTEGRAL_TYPE_P (out_type)
1735 || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1736 || !TYPE_UNSIGNED (in_type))
1737 return NULL;
1739 vect_unpromoted_value unprom;
1740 tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
1741 if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1742 return NULL;
1744 stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1745 if (!abd_pattern_vinfo)
1746 return NULL;
1748 gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1749 if (!abd_stmt
1750 || !gimple_call_internal_p (abd_stmt)
1751 || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
1752 return NULL;
1754 tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1755 tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1757 code_helper dummy_code;
1758 int dummy_int;
1759 auto_vec<tree> dummy_vec;
1760 if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1761 vectype_out, vectype_in,
1762 &dummy_code, &dummy_code,
1763 &dummy_int, &dummy_vec))
1764 return NULL;
1766 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
1768 *type_out = vectype_out;
1770 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1771 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1772 tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
1773 gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1774 abd_oprnd0, abd_oprnd1);
1775 gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
1776 gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
1777 return widen_abd_stmt;
1780 /* Function vect_recog_ctz_ffs_pattern
1782 Try to find the following pattern:
1784 TYPE1 A;
1785 TYPE1 B;
1787 B = __builtin_ctz{,l,ll} (A);
1791 B = __builtin_ffs{,l,ll} (A);
1793 Input:
1795 * STMT_VINFO: The stmt from which the pattern search begins.
1796 here it starts with B = __builtin_* (A);
1798 Output:
1800 * TYPE_OUT: The vector type of the output of this pattern.
1802 * Return value: A new stmt that will be used to replace the sequence of
1803 stmts that constitute the pattern, using clz or popcount builtins. */
1805 static gimple *
1806 vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1807 tree *type_out)
1809 gimple *call_stmt = stmt_vinfo->stmt;
1810 gimple *pattern_stmt;
1811 tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1812 tree new_var;
1813 internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1814 bool defined_at_zero = true, defined_at_zero_new = false;
1815 int val = 0, val_new = 0, val_cmp = 0;
1816 int prec;
1817 int sub = 0, add = 0;
1818 location_t loc;
1820 if (!is_gimple_call (call_stmt))
1821 return NULL;
1823 if (gimple_call_num_args (call_stmt) != 1
1824 && gimple_call_num_args (call_stmt) != 2)
1825 return NULL;
1827 rhs_oprnd = gimple_call_arg (call_stmt, 0);
1828 rhs_type = TREE_TYPE (rhs_oprnd);
1829 lhs_oprnd = gimple_call_lhs (call_stmt);
1830 if (!lhs_oprnd)
1831 return NULL;
1832 lhs_type = TREE_TYPE (lhs_oprnd);
1833 if (!INTEGRAL_TYPE_P (lhs_type)
1834 || !INTEGRAL_TYPE_P (rhs_type)
1835 || !type_has_mode_precision_p (rhs_type)
1836 || TREE_CODE (rhs_oprnd) != SSA_NAME)
1837 return NULL;
1839 switch (gimple_call_combined_fn (call_stmt))
1841 CASE_CFN_CTZ:
1842 ifn = IFN_CTZ;
1843 if (!gimple_call_internal_p (call_stmt)
1844 || gimple_call_num_args (call_stmt) != 2)
1845 defined_at_zero = false;
1846 else
1847 val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
1848 break;
1849 CASE_CFN_FFS:
1850 ifn = IFN_FFS;
1851 break;
1852 default:
1853 return NULL;
1856 prec = TYPE_PRECISION (rhs_type);
1857 loc = gimple_location (call_stmt);
1859 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1860 if (!vec_type)
1861 return NULL;
1863 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1864 if (!vec_rhs_type)
1865 return NULL;
1867 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1868 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1869 popcount<vector_mode>2. */
1870 if (!vec_type
1871 || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1872 OPTIMIZE_FOR_SPEED))
1873 return NULL;
1875 if (ifn == IFN_FFS
1876 && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1877 OPTIMIZE_FOR_SPEED))
1879 ifnnew = IFN_CTZ;
1880 defined_at_zero_new
1881 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1882 val_new) == 2;
1884 else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1885 OPTIMIZE_FOR_SPEED))
1887 ifnnew = IFN_CLZ;
1888 defined_at_zero_new
1889 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1890 val_new) == 2;
1892 if ((ifnnew == IFN_LAST
1893 || (defined_at_zero && !defined_at_zero_new))
1894 && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1895 OPTIMIZE_FOR_SPEED))
1897 ifnnew = IFN_POPCOUNT;
1898 defined_at_zero_new = true;
1899 val_new = prec;
1901 if (ifnnew == IFN_LAST)
1902 return NULL;
1904 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1906 val_cmp = val_new;
1907 if ((ifnnew == IFN_CLZ
1908 && defined_at_zero
1909 && defined_at_zero_new
1910 && val == prec
1911 && val_new == prec)
1912 || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1914 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1915 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1916 if (ifnnew == IFN_CLZ)
1917 sub = prec;
1918 val_cmp = prec;
1920 if (!TYPE_UNSIGNED (rhs_type))
1922 rhs_type = unsigned_type_for (rhs_type);
1923 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1924 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1925 pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1926 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1927 vec_rhs_type);
1928 rhs_oprnd = new_var;
1931 tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1932 pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1933 build_int_cst (rhs_type, -1));
1934 gimple_set_location (pattern_stmt, loc);
1935 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1937 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1938 pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1939 gimple_set_location (pattern_stmt, loc);
1940 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1941 rhs_oprnd = new_var;
1943 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1944 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1945 m1, rhs_oprnd);
1946 gimple_set_location (pattern_stmt, loc);
1947 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1948 rhs_oprnd = new_var;
1950 else if (ifnnew == IFN_CLZ)
1952 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1953 .FFS (X) = PREC - .CLZ (X & -X). */
1954 sub = prec - (ifn == IFN_CTZ);
1955 val_cmp = sub - val_new;
1957 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1958 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1959 gimple_set_location (pattern_stmt, loc);
1960 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1962 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1963 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1964 rhs_oprnd, neg);
1965 gimple_set_location (pattern_stmt, loc);
1966 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1967 rhs_oprnd = new_var;
1969 else if (ifnnew == IFN_POPCOUNT)
1971 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1972 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1973 sub = prec + (ifn == IFN_FFS);
1974 val_cmp = sub;
1976 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1977 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1978 gimple_set_location (pattern_stmt, loc);
1979 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1981 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1982 pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1983 rhs_oprnd, neg);
1984 gimple_set_location (pattern_stmt, loc);
1985 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1986 rhs_oprnd = new_var;
1988 else if (ifnnew == IFN_CTZ)
1990 /* .FFS (X) = .CTZ (X) + 1. */
1991 add = 1;
1992 val_cmp++;
1995 /* Create B = .IFNNEW (A). */
1996 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1997 if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
1998 pattern_stmt
1999 = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
2000 build_int_cst (integer_type_node,
2001 val_new));
2002 else
2003 pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2004 gimple_call_set_lhs (pattern_stmt, new_var);
2005 gimple_set_location (pattern_stmt, loc);
2006 *type_out = vec_type;
2008 if (sub)
2010 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2011 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2012 pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2013 build_int_cst (lhs_type, sub),
2014 new_var);
2015 gimple_set_location (pattern_stmt, loc);
2016 new_var = ret_var;
2018 else if (add)
2020 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2021 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2022 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2023 build_int_cst (lhs_type, add));
2024 gimple_set_location (pattern_stmt, loc);
2025 new_var = ret_var;
2028 if (defined_at_zero
2029 && (!defined_at_zero_new || val != val_cmp))
2031 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2032 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2033 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2034 rhs_type = TREE_TYPE (rhs_oprnd);
2035 tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
2036 rhs_oprnd, build_zero_cst (rhs_type));
2037 pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2038 new_var,
2039 build_int_cst (lhs_type, val));
2042 if (dump_enabled_p ())
2043 dump_printf_loc (MSG_NOTE, vect_location,
2044 "created pattern stmt: %G", pattern_stmt);
2046 return pattern_stmt;
2049 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2051 Try to find the following pattern:
2053 UTYPE1 A;
2054 TYPE1 B;
2055 UTYPE2 temp_in;
2056 TYPE3 temp_out;
2057 temp_in = (UTYPE2)A;
2059 temp_out = __builtin_popcount{,l,ll} (temp_in);
2060 B = (TYPE1) temp_out;
2062 TYPE2 may or may not be equal to TYPE3.
2063 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2064 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2066 Input:
2068 * STMT_VINFO: The stmt from which the pattern search begins.
2069 here it starts with B = (TYPE1) temp_out;
2071 Output:
2073 * TYPE_OUT: The vector type of the output of this pattern.
2075 * Return value: A new stmt that will be used to replace the sequence of
2076 stmts that constitute the pattern. In this case it will be:
2077 B = .POPCOUNT (A);
2079 Similarly for clz, ctz and ffs.
2082 static gimple *
2083 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2084 stmt_vec_info stmt_vinfo,
2085 tree *type_out)
2087 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
2088 gimple *call_stmt, *pattern_stmt;
2089 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2090 internal_fn ifn = IFN_LAST;
2091 int addend = 0;
2093 /* Find B = (TYPE1) temp_out. */
2094 if (!last_stmt)
2095 return NULL;
2096 tree_code code = gimple_assign_rhs_code (last_stmt);
2097 if (!CONVERT_EXPR_CODE_P (code))
2098 return NULL;
2100 lhs_oprnd = gimple_assign_lhs (last_stmt);
2101 lhs_type = TREE_TYPE (lhs_oprnd);
2102 if (!INTEGRAL_TYPE_P (lhs_type))
2103 return NULL;
2105 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
2106 if (TREE_CODE (rhs_oprnd) != SSA_NAME
2107 || !has_single_use (rhs_oprnd))
2108 return NULL;
2109 call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2111 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2112 if (!is_gimple_call (call_stmt))
2113 return NULL;
2114 switch (gimple_call_combined_fn (call_stmt))
2116 int val;
2117 CASE_CFN_POPCOUNT:
2118 ifn = IFN_POPCOUNT;
2119 break;
2120 CASE_CFN_CLZ:
2121 ifn = IFN_CLZ;
2122 /* Punt if call result is unsigned and defined value at zero
2123 is negative, as the negative value doesn't extend correctly. */
2124 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2125 && gimple_call_internal_p (call_stmt)
2126 && CLZ_DEFINED_VALUE_AT_ZERO
2127 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2128 && val < 0)
2129 return NULL;
2130 break;
2131 CASE_CFN_CTZ:
2132 ifn = IFN_CTZ;
2133 /* Punt if call result is unsigned and defined value at zero
2134 is negative, as the negative value doesn't extend correctly. */
2135 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2136 && gimple_call_internal_p (call_stmt)
2137 && CTZ_DEFINED_VALUE_AT_ZERO
2138 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2139 && val < 0)
2140 return NULL;
2141 break;
2142 CASE_CFN_FFS:
2143 ifn = IFN_FFS;
2144 break;
2145 default:
2146 return NULL;
2149 if (gimple_call_num_args (call_stmt) != 1
2150 && gimple_call_num_args (call_stmt) != 2)
2151 return NULL;
2153 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2154 vect_unpromoted_value unprom_diff;
2155 rhs_origin
2156 = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
2158 if (!rhs_origin)
2159 return NULL;
2161 /* Input and output of .POPCOUNT should be same-precision integer. */
2162 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2163 return NULL;
2165 /* Also A should be unsigned or same precision as temp_in, otherwise
2166 different builtins/internal functions have different behaviors. */
2167 if (TYPE_PRECISION (unprom_diff.type)
2168 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2169 switch (ifn)
2171 case IFN_POPCOUNT:
2172 /* For popcount require zero extension, which doesn't add any
2173 further bits to the count. */
2174 if (!TYPE_UNSIGNED (unprom_diff.type))
2175 return NULL;
2176 break;
2177 case IFN_CLZ:
2178 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2179 if it is undefined at zero or if it matches also for the
2180 defined value there. */
2181 if (!TYPE_UNSIGNED (unprom_diff.type))
2182 return NULL;
2183 if (!type_has_mode_precision_p (lhs_type)
2184 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2185 return NULL;
2186 addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2187 - TYPE_PRECISION (lhs_type));
2188 if (gimple_call_internal_p (call_stmt)
2189 && gimple_call_num_args (call_stmt) == 2)
2191 int val1, val2;
2192 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2193 int d2
2194 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2195 val2);
2196 if (d2 != 2 || val1 != val2 + addend)
2197 return NULL;
2199 break;
2200 case IFN_CTZ:
2201 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2202 if it is undefined at zero or if it matches also for the
2203 defined value there. */
2204 if (gimple_call_internal_p (call_stmt)
2205 && gimple_call_num_args (call_stmt) == 2)
2207 int val1, val2;
2208 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2209 int d2
2210 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2211 val2);
2212 if (d2 != 2 || val1 != val2)
2213 return NULL;
2215 break;
2216 case IFN_FFS:
2217 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2218 break;
2219 default:
2220 gcc_unreachable ();
2223 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2224 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2225 if (!vec_type)
2226 return NULL;
2228 bool supported
2229 = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2230 if (!supported)
2231 switch (ifn)
2233 case IFN_POPCOUNT:
2234 case IFN_CLZ:
2235 return NULL;
2236 case IFN_FFS:
2237 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2238 if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2239 OPTIMIZE_FOR_SPEED))
2240 break;
2241 /* FALLTHRU */
2242 case IFN_CTZ:
2243 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2244 clz or popcount. */
2245 if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2246 OPTIMIZE_FOR_SPEED))
2247 break;
2248 if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2249 OPTIMIZE_FOR_SPEED))
2250 break;
2251 return NULL;
2252 default:
2253 gcc_unreachable ();
2256 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2257 call_stmt);
2259 /* Create B = .POPCOUNT (A). */
2260 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2261 tree arg2 = NULL_TREE;
2262 int val;
2263 if (ifn == IFN_CLZ
2264 && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2265 val) == 2)
2266 arg2 = build_int_cst (integer_type_node, val);
2267 else if (ifn == IFN_CTZ
2268 && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2269 val) == 2)
2270 arg2 = build_int_cst (integer_type_node, val);
2271 if (arg2)
2272 pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
2273 else
2274 pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2275 gimple_call_set_lhs (pattern_stmt, new_var);
2276 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2277 *type_out = vec_type;
2279 if (dump_enabled_p ())
2280 dump_printf_loc (MSG_NOTE, vect_location,
2281 "created pattern stmt: %G", pattern_stmt);
2283 if (addend)
2285 gcc_assert (supported);
2286 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2287 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2288 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2289 build_int_cst (lhs_type, addend));
2291 else if (!supported)
2293 stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2294 STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2295 pattern_stmt
2296 = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2297 if (pattern_stmt == NULL)
2298 return NULL;
2299 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2301 gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2302 gimple_seq_add_seq_without_update (pseq, seq);
2305 return pattern_stmt;
2308 /* Function vect_recog_pow_pattern
2310 Try to find the following pattern:
2312 x = POW (y, N);
2314 with POW being one of pow, powf, powi, powif and N being
2315 either 2 or 0.5.
2317 Input:
2319 * STMT_VINFO: The stmt from which the pattern search begins.
2321 Output:
2323 * TYPE_OUT: The type of the output of this pattern.
2325 * Return value: A new stmt that will be used to replace the sequence of
2326 stmts that constitute the pattern. In this case it will be:
2327 x = x * x
2329 x = sqrt (x)
2332 static gimple *
2333 vect_recog_pow_pattern (vec_info *vinfo,
2334 stmt_vec_info stmt_vinfo, tree *type_out)
2336 gimple *last_stmt = stmt_vinfo->stmt;
2337 tree base, exp;
2338 gimple *stmt;
2339 tree var;
2341 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2342 return NULL;
2344 switch (gimple_call_combined_fn (last_stmt))
2346 CASE_CFN_POW:
2347 CASE_CFN_POWI:
2348 break;
2350 default:
2351 return NULL;
2354 base = gimple_call_arg (last_stmt, 0);
2355 exp = gimple_call_arg (last_stmt, 1);
2356 if (TREE_CODE (exp) != REAL_CST
2357 && TREE_CODE (exp) != INTEGER_CST)
2359 if (flag_unsafe_math_optimizations
2360 && TREE_CODE (base) == REAL_CST
2361 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2363 combined_fn log_cfn;
2364 built_in_function exp_bfn;
2365 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2367 case BUILT_IN_POW:
2368 log_cfn = CFN_BUILT_IN_LOG;
2369 exp_bfn = BUILT_IN_EXP;
2370 break;
2371 case BUILT_IN_POWF:
2372 log_cfn = CFN_BUILT_IN_LOGF;
2373 exp_bfn = BUILT_IN_EXPF;
2374 break;
2375 case BUILT_IN_POWL:
2376 log_cfn = CFN_BUILT_IN_LOGL;
2377 exp_bfn = BUILT_IN_EXPL;
2378 break;
2379 default:
2380 return NULL;
2382 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2383 tree exp_decl = builtin_decl_implicit (exp_bfn);
2384 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2385 does that, but if C is a power of 2, we want to use
2386 exp2 (log2 (C) * x) in the non-vectorized version, but for
2387 vectorization we don't have vectorized exp2. */
2388 if (logc
2389 && TREE_CODE (logc) == REAL_CST
2390 && exp_decl
2391 && lookup_attribute ("omp declare simd",
2392 DECL_ATTRIBUTES (exp_decl)))
2394 cgraph_node *node = cgraph_node::get_create (exp_decl);
2395 if (node->simd_clones == NULL)
2397 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2398 || node->definition)
2399 return NULL;
2400 expand_simd_clones (node);
2401 if (node->simd_clones == NULL)
2402 return NULL;
2404 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2405 if (!*type_out)
2406 return NULL;
2407 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2408 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2409 append_pattern_def_seq (vinfo, stmt_vinfo, g);
2410 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2411 g = gimple_build_call (exp_decl, 1, def);
2412 gimple_call_set_lhs (g, res);
2413 return g;
2417 return NULL;
2420 /* We now have a pow or powi builtin function call with a constant
2421 exponent. */
2423 /* Catch squaring. */
2424 if ((tree_fits_shwi_p (exp)
2425 && tree_to_shwi (exp) == 2)
2426 || (TREE_CODE (exp) == REAL_CST
2427 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2429 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2430 TREE_TYPE (base), type_out))
2431 return NULL;
2433 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2434 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2435 return stmt;
2438 /* Catch square root. */
2439 if (TREE_CODE (exp) == REAL_CST
2440 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2442 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2443 if (*type_out
2444 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2445 OPTIMIZE_FOR_SPEED))
2447 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2448 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2449 gimple_call_set_lhs (stmt, var);
2450 gimple_call_set_nothrow (stmt, true);
2451 return stmt;
2455 return NULL;
2459 /* Function vect_recog_widen_sum_pattern
2461 Try to find the following pattern:
2463 type x_t;
2464 TYPE x_T, sum = init;
2465 loop:
2466 sum_0 = phi <init, sum_1>
2467 S1 x_t = *p;
2468 S2 x_T = (TYPE) x_t;
2469 S3 sum_1 = x_T + sum_0;
2471 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2472 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2473 a special case of a reduction computation.
2475 Input:
2477 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2478 when this function is called with S3, the pattern {S2,S3} will be detected.
2480 Output:
2482 * TYPE_OUT: The type of the output of this pattern.
2484 * Return value: A new stmt that will be used to replace the sequence of
2485 stmts that constitute the pattern. In this case it will be:
2486 WIDEN_SUM <x_t, sum_0>
2488 Note: The widening-sum idiom is a widening reduction pattern that is
2489 vectorized without preserving all the intermediate results. It
2490 produces only N/2 (widened) results (by summing up pairs of
2491 intermediate results) rather than all N results. Therefore, we
2492 cannot allow this pattern when we want to get all the results and in
2493 the correct order (as is the case when this computation is in an
2494 inner-loop nested in an outer-loop that us being vectorized). */
2496 static gimple *
2497 vect_recog_widen_sum_pattern (vec_info *vinfo,
2498 stmt_vec_info stmt_vinfo, tree *type_out)
2500 gimple *last_stmt = stmt_vinfo->stmt;
2501 tree oprnd0, oprnd1;
2502 tree type;
2503 gimple *pattern_stmt;
2504 tree var;
2506 /* Look for the following pattern
2507 DX = (TYPE) X;
2508 sum_1 = DX + sum_0;
2509 In which DX is at least double the size of X, and sum_1 has been
2510 recognized as a reduction variable.
2513 /* Starting from LAST_STMT, follow the defs of its uses in search
2514 of the above pattern. */
2516 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2517 &oprnd0, &oprnd1)
2518 || TREE_CODE (oprnd0) != SSA_NAME
2519 || !vinfo->lookup_def (oprnd0))
2520 return NULL;
2522 type = TREE_TYPE (gimple_get_lhs (last_stmt));
2524 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2525 we know that oprnd1 is the reduction variable (defined by a loop-header
2526 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2527 Left to check that oprnd0 is defined by a cast from type 'type' to type
2528 'TYPE'. */
2530 vect_unpromoted_value unprom0;
2531 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2532 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2533 return NULL;
2535 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2537 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2538 unprom0.type, type_out))
2539 return NULL;
2541 var = vect_recog_temp_ssa_var (type, NULL);
2542 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2544 return pattern_stmt;
2547 /* Function vect_recog_bitfield_ref_pattern
2549 Try to find the following pattern:
2551 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2552 result = (type_out) bf_value;
2556 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2558 where type_out is a non-bitfield type, that is to say, it's precision matches
2559 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2561 Input:
2563 * STMT_VINFO: The stmt from which the pattern search begins.
2564 here it starts with:
2565 result = (type_out) bf_value;
2569 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2571 Output:
2573 * TYPE_OUT: The vector type of the output of this pattern.
2575 * Return value: A new stmt that will be used to replace the sequence of
2576 stmts that constitute the pattern. If the precision of type_out is bigger
2577 than the precision type of _1 we perform the widening before the shifting,
2578 since the new precision will be large enough to shift the value and moving
2579 widening operations up the statement chain enables the generation of
2580 widening loads. If we are widening and the operation after the pattern is
2581 an addition then we mask first and shift later, to enable the generation of
2582 shifting adds. In the case of narrowing we will always mask first, shift
2583 last and then perform a narrowing operation. This will enable the
2584 generation of narrowing shifts.
2586 Widening with mask first, shift later:
2587 container = (type_out) container;
2588 masked = container & (((1 << bitsize) - 1) << bitpos);
2589 result = masked >> bitpos;
2591 Widening with shift first, mask last:
2592 container = (type_out) container;
2593 shifted = container >> bitpos;
2594 result = shifted & ((1 << bitsize) - 1);
2596 Narrowing:
2597 masked = container & (((1 << bitsize) - 1) << bitpos);
2598 result = masked >> bitpos;
2599 result = (type_out) result;
2601 If the bitfield is signed and it's wider than type_out, we need to
2602 keep the result sign-extended:
2603 container = (type) container;
2604 masked = container << (prec - bitsize - bitpos);
2605 result = (type_out) (masked >> (prec - bitsize));
2607 Here type is the signed variant of the wider of type_out and the type
2608 of container.
2610 The shifting is always optional depending on whether bitpos != 0.
2612 When the original bitfield was inside a gcond then an new gcond is also
2613 generated with the newly `result` as the operand to the comparison.
2617 static gimple *
2618 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2619 tree *type_out)
2621 gimple *bf_stmt = NULL;
2622 tree lhs = NULL_TREE;
2623 tree ret_type = NULL_TREE;
2624 gimple *stmt = STMT_VINFO_STMT (stmt_info);
2625 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
2627 tree op = gimple_cond_lhs (cond_stmt);
2628 if (TREE_CODE (op) != SSA_NAME)
2629 return NULL;
2630 bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
2631 if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
2632 return NULL;
2634 else if (is_gimple_assign (stmt)
2635 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
2636 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2638 gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2639 bf_stmt = dyn_cast <gassign *> (second_stmt);
2640 lhs = gimple_assign_lhs (stmt);
2641 ret_type = TREE_TYPE (lhs);
2644 if (!bf_stmt
2645 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2646 return NULL;
2648 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2649 tree container = TREE_OPERAND (bf_ref, 0);
2650 ret_type = ret_type ? ret_type : TREE_TYPE (container);
2652 if (!bit_field_offset (bf_ref).is_constant ()
2653 || !bit_field_size (bf_ref).is_constant ()
2654 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2655 return NULL;
2657 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2658 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2659 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2660 return NULL;
2662 gimple *use_stmt, *pattern_stmt;
2663 use_operand_p use_p;
2664 bool shift_first = true;
2665 tree container_type = TREE_TYPE (container);
2666 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2668 /* Calculate shift_n before the adjustments for widening loads, otherwise
2669 the container may change and we have to consider offset change for
2670 widening loads on big endianness. The shift_n calculated here can be
2671 independent of widening. */
2672 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2673 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2674 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2675 if (BYTES_BIG_ENDIAN)
2676 shift_n = prec - shift_n - mask_width;
2678 bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2679 TYPE_PRECISION (ret_type) > mask_width);
2680 bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2681 TYPE_PRECISION (ret_type));
2683 /* We move the conversion earlier if the loaded type is smaller than the
2684 return type to enable the use of widening loads. And if we need a
2685 sign extension, we need to convert the loaded value early to a signed
2686 type as well. */
2687 if (ref_sext || load_widen)
2689 tree type = load_widen ? ret_type : container_type;
2690 if (ref_sext)
2691 type = gimple_signed_type (type);
2692 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2693 NOP_EXPR, container);
2694 container = gimple_get_lhs (pattern_stmt);
2695 container_type = TREE_TYPE (container);
2696 prec = tree_to_uhwi (TYPE_SIZE (container_type));
2697 vectype = get_vectype_for_scalar_type (vinfo, container_type);
2698 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2700 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2701 /* If we are doing the conversion last then also delay the shift as we may
2702 be able to combine the shift and conversion in certain cases. */
2703 shift_first = false;
2705 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2706 PLUS_EXPR then do the shift last as some targets can combine the shift and
2707 add into a single instruction. */
2708 if (lhs && !is_pattern_stmt_p (stmt_info)
2709 && single_imm_use (lhs, &use_p, &use_stmt))
2711 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2712 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2713 shift_first = false;
2716 /* If we don't have to shift we only generate the mask, so just fix the
2717 code-path to shift_first. */
2718 if (shift_n == 0)
2719 shift_first = true;
2721 tree result;
2722 if (shift_first && !ref_sext)
2724 tree shifted = container;
2725 if (shift_n)
2727 pattern_stmt
2728 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2729 RSHIFT_EXPR, container,
2730 build_int_cst (sizetype, shift_n));
2731 shifted = gimple_assign_lhs (pattern_stmt);
2732 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2735 tree mask = wide_int_to_tree (container_type,
2736 wi::mask (mask_width, false, prec));
2738 pattern_stmt
2739 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2740 BIT_AND_EXPR, shifted, mask);
2741 result = gimple_assign_lhs (pattern_stmt);
2743 else
2745 tree temp = vect_recog_temp_ssa_var (container_type);
2746 if (!ref_sext)
2748 tree mask = wide_int_to_tree (container_type,
2749 wi::shifted_mask (shift_n,
2750 mask_width,
2751 false, prec));
2752 pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2753 container, mask);
2755 else
2757 HOST_WIDE_INT shl = prec - shift_n - mask_width;
2758 shift_n += shl;
2759 pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2760 container,
2761 build_int_cst (sizetype,
2762 shl));
2765 tree masked = gimple_assign_lhs (pattern_stmt);
2766 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2767 pattern_stmt
2768 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2769 RSHIFT_EXPR, masked,
2770 build_int_cst (sizetype, shift_n));
2771 result = gimple_assign_lhs (pattern_stmt);
2774 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2776 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2777 pattern_stmt
2778 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2779 NOP_EXPR, result);
2782 if (!lhs)
2784 if (!vectype)
2785 return NULL;
2787 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2788 vectype = truth_type_for (vectype);
2790 /* FIXME: This part extracts the boolean value out of the bitfield in the
2791 same way as vect_recog_gcond_pattern does. However because
2792 patterns cannot match the same root twice, when we handle and
2793 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2794 apply anymore. We should really fix it so that we don't need to
2795 duplicate transformations like these. */
2796 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
2797 gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
2798 tree cond_cst = gimple_cond_rhs (cond_stmt);
2799 gimple *new_stmt
2800 = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
2801 gimple_get_lhs (pattern_stmt),
2802 fold_convert (container_type, cond_cst));
2803 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
2804 pattern_stmt
2805 = gimple_build_cond (NE_EXPR, new_lhs,
2806 build_zero_cst (TREE_TYPE (new_lhs)),
2807 NULL_TREE, NULL_TREE);
2810 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2811 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2813 return pattern_stmt;
2816 /* Function vect_recog_bit_insert_pattern
2818 Try to find the following pattern:
2820 written = BIT_INSERT_EXPR (container, value, bitpos);
2822 Input:
2824 * STMT_VINFO: The stmt we want to replace.
2826 Output:
2828 * TYPE_OUT: The vector type of the output of this pattern.
2830 * Return value: A new stmt that will be used to replace the sequence of
2831 stmts that constitute the pattern. In this case it will be:
2832 value = (container_type) value; // Make sure
2833 shifted = value << bitpos; // Shift value into place
2834 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2835 // the 'to-write value'.
2836 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2837 // write to from the value we want
2838 // to write to.
2839 written = cleared | masked; // Write bits.
2842 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2843 bits corresponding to the real size of the bitfield value we are writing to.
2844 The shifting is always optional depending on whether bitpos != 0.
2848 static gimple *
2849 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2850 tree *type_out)
2852 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2853 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2854 return NULL;
2856 tree container = gimple_assign_rhs1 (bf_stmt);
2857 tree value = gimple_assign_rhs2 (bf_stmt);
2858 tree shift = gimple_assign_rhs3 (bf_stmt);
2860 tree bf_type = TREE_TYPE (value);
2861 tree container_type = TREE_TYPE (container);
2863 if (!INTEGRAL_TYPE_P (container_type)
2864 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2865 return NULL;
2867 gimple *pattern_stmt;
2869 vect_unpromoted_value unprom;
2870 unprom.set_op (value, vect_internal_def);
2871 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2872 get_vectype_for_scalar_type (vinfo,
2873 container_type));
2875 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2876 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2877 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2878 if (BYTES_BIG_ENDIAN)
2880 shift_n = prec - shift_n - mask_width;
2881 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2884 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2886 pattern_stmt =
2887 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2888 NOP_EXPR, value);
2889 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2890 value = gimple_get_lhs (pattern_stmt);
2893 /* Shift VALUE into place. */
2894 tree shifted = value;
2895 if (shift_n)
2897 gimple_seq stmts = NULL;
2898 shifted
2899 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2900 if (!gimple_seq_empty_p (stmts))
2901 append_pattern_def_seq (vinfo, stmt_info,
2902 gimple_seq_first_stmt (stmts));
2905 tree mask_t
2906 = wide_int_to_tree (container_type,
2907 wi::shifted_mask (shift_n, mask_width, false, prec));
2909 /* Clear bits we don't want to write back from SHIFTED. */
2910 gimple_seq stmts = NULL;
2911 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2912 mask_t);
2913 if (!gimple_seq_empty_p (stmts))
2915 pattern_stmt = gimple_seq_first_stmt (stmts);
2916 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2919 /* Mask off the bits in the container that we are to write to. */
2920 mask_t = wide_int_to_tree (container_type,
2921 wi::shifted_mask (shift_n, mask_width, true, prec));
2922 tree cleared = vect_recog_temp_ssa_var (container_type);
2923 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2924 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2926 /* Write MASKED into CLEARED. */
2927 pattern_stmt
2928 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2929 BIT_IOR_EXPR, cleared, masked);
2931 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2932 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2934 return pattern_stmt;
2938 /* Recognize cases in which an operation is performed in one type WTYPE
2939 but could be done more efficiently in a narrower type NTYPE. For example,
2940 if we have:
2942 ATYPE a; // narrower than NTYPE
2943 BTYPE b; // narrower than NTYPE
2944 WTYPE aw = (WTYPE) a;
2945 WTYPE bw = (WTYPE) b;
2946 WTYPE res = aw + bw; // only uses of aw and bw
2948 then it would be more efficient to do:
2950 NTYPE an = (NTYPE) a;
2951 NTYPE bn = (NTYPE) b;
2952 NTYPE resn = an + bn;
2953 WTYPE res = (WTYPE) resn;
2955 Other situations include things like:
2957 ATYPE a; // NTYPE or narrower
2958 WTYPE aw = (WTYPE) a;
2959 WTYPE res = aw + b;
2961 when only "(NTYPE) res" is significant. In that case it's more efficient
2962 to truncate "b" and do the operation on NTYPE instead:
2964 NTYPE an = (NTYPE) a;
2965 NTYPE bn = (NTYPE) b; // truncation
2966 NTYPE resn = an + bn;
2967 WTYPE res = (WTYPE) resn;
2969 All users of "res" should then use "resn" instead, making the final
2970 statement dead (not marked as relevant). The final statement is still
2971 needed to maintain the type correctness of the IR.
2973 vect_determine_precisions has already determined the minimum
2974 precison of the operation and the minimum precision required
2975 by users of the result. */
2977 static gimple *
2978 vect_recog_over_widening_pattern (vec_info *vinfo,
2979 stmt_vec_info last_stmt_info, tree *type_out)
2981 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2982 if (!last_stmt)
2983 return NULL;
2985 /* See whether we have found that this operation can be done on a
2986 narrower type without changing its semantics. */
2987 unsigned int new_precision = last_stmt_info->operation_precision;
2988 if (!new_precision)
2989 return NULL;
2991 tree lhs = gimple_assign_lhs (last_stmt);
2992 tree type = TREE_TYPE (lhs);
2993 tree_code code = gimple_assign_rhs_code (last_stmt);
2995 /* Punt for reductions where we don't handle the type conversions. */
2996 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
2997 return NULL;
2999 /* Keep the first operand of a COND_EXPR as-is: only the other two
3000 operands are interesting. */
3001 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
3003 /* Check the operands. */
3004 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
3005 auto_vec <vect_unpromoted_value, 3> unprom (nops);
3006 unprom.quick_grow_cleared (nops);
3007 unsigned int min_precision = 0;
3008 bool single_use_p = false;
3009 for (unsigned int i = 0; i < nops; ++i)
3011 tree op = gimple_op (last_stmt, first_op + i);
3012 if (TREE_CODE (op) == INTEGER_CST)
3013 unprom[i].set_op (op, vect_constant_def);
3014 else if (TREE_CODE (op) == SSA_NAME)
3016 bool op_single_use_p = true;
3017 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
3018 &op_single_use_p))
3019 return NULL;
3020 /* If:
3022 (1) N bits of the result are needed;
3023 (2) all inputs are widened from M<N bits; and
3024 (3) one operand OP is a single-use SSA name
3026 we can shift the M->N widening from OP to the output
3027 without changing the number or type of extensions involved.
3028 This then reduces the number of copies of STMT_INFO.
3030 If instead of (3) more than one operand is a single-use SSA name,
3031 shifting the extension to the output is even more of a win.
3033 If instead:
3035 (1) N bits of the result are needed;
3036 (2) one operand OP2 is widened from M2<N bits;
3037 (3) another operand OP1 is widened from M1<M2 bits; and
3038 (4) both OP1 and OP2 are single-use
3040 the choice is between:
3042 (a) truncating OP2 to M1, doing the operation on M1,
3043 and then widening the result to N
3045 (b) widening OP1 to M2, doing the operation on M2, and then
3046 widening the result to N
3048 Both shift the M2->N widening of the inputs to the output.
3049 (a) additionally shifts the M1->M2 widening to the output;
3050 it requires fewer copies of STMT_INFO but requires an extra
3051 M2->M1 truncation.
3053 Which is better will depend on the complexity and cost of
3054 STMT_INFO, which is hard to predict at this stage. However,
3055 a clear tie-breaker in favor of (b) is the fact that the
3056 truncation in (a) increases the length of the operation chain.
3058 If instead of (4) only one of OP1 or OP2 is single-use,
3059 (b) is still a win over doing the operation in N bits:
3060 it still shifts the M2->N widening on the single-use operand
3061 to the output and reduces the number of STMT_INFO copies.
3063 If neither operand is single-use then operating on fewer than
3064 N bits might lead to more extensions overall. Whether it does
3065 or not depends on global information about the vectorization
3066 region, and whether that's a good trade-off would again
3067 depend on the complexity and cost of the statements involved,
3068 as well as things like register pressure that are not normally
3069 modelled at this stage. We therefore ignore these cases
3070 and just optimize the clear single-use wins above.
3072 Thus we take the maximum precision of the unpromoted operands
3073 and record whether any operand is single-use. */
3074 if (unprom[i].dt == vect_internal_def)
3076 min_precision = MAX (min_precision,
3077 TYPE_PRECISION (unprom[i].type));
3078 single_use_p |= op_single_use_p;
3081 else
3082 return NULL;
3085 /* Although the operation could be done in operation_precision, we have
3086 to balance that against introducing extra truncations or extensions.
3087 Calculate the minimum precision that can be handled efficiently.
3089 The loop above determined that the operation could be handled
3090 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3091 extension from the inputs to the output without introducing more
3092 instructions, and would reduce the number of instructions required
3093 for STMT_INFO itself.
3095 vect_determine_precisions has also determined that the result only
3096 needs min_output_precision bits. Truncating by a factor of N times
3097 requires a tree of N - 1 instructions, so if TYPE is N times wider
3098 than min_output_precision, doing the operation in TYPE and truncating
3099 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3100 In contrast:
3102 - truncating the input to a unary operation and doing the operation
3103 in the new type requires at most N - 1 + 1 = N instructions per
3104 output vector
3106 - doing the same for a binary operation requires at most
3107 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3109 Both unary and binary operations require fewer instructions than
3110 this if the operands were extended from a suitable truncated form.
3111 Thus there is usually nothing to lose by doing operations in
3112 min_output_precision bits, but there can be something to gain. */
3113 if (!single_use_p)
3114 min_precision = last_stmt_info->min_output_precision;
3115 else
3116 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3118 /* Apply the minimum efficient precision we just calculated. */
3119 if (new_precision < min_precision)
3120 new_precision = min_precision;
3121 new_precision = vect_element_precision (new_precision);
3122 if (new_precision >= TYPE_PRECISION (type))
3123 return NULL;
3125 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
3127 *type_out = get_vectype_for_scalar_type (vinfo, type);
3128 if (!*type_out)
3129 return NULL;
3131 /* We've found a viable pattern. Get the new type of the operation. */
3132 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3133 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3135 /* If we're truncating an operation, we need to make sure that we
3136 don't introduce new undefined overflow. The codes tested here are
3137 a subset of those accepted by vect_truncatable_operation_p. */
3138 tree op_type = new_type;
3139 if (TYPE_OVERFLOW_UNDEFINED (new_type)
3140 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3141 op_type = build_nonstandard_integer_type (new_precision, true);
3143 /* We specifically don't check here whether the target supports the
3144 new operation, since it might be something that a later pattern
3145 wants to rewrite anyway. If targets have a minimum element size
3146 for some optabs, we should pattern-match smaller ops to larger ops
3147 where beneficial. */
3148 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3149 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3150 if (!new_vectype || !op_vectype)
3151 return NULL;
3153 if (dump_enabled_p ())
3154 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3155 type, new_type);
3157 /* Calculate the rhs operands for an operation on OP_TYPE. */
3158 tree ops[3] = {};
3159 for (unsigned int i = 1; i < first_op; ++i)
3160 ops[i - 1] = gimple_op (last_stmt, i);
3161 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
3162 op_type, &unprom[0], op_vectype);
3164 /* Use the operation to produce a result of type OP_TYPE. */
3165 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3166 gimple *pattern_stmt = gimple_build_assign (new_var, code,
3167 ops[0], ops[1], ops[2]);
3168 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3170 if (dump_enabled_p ())
3171 dump_printf_loc (MSG_NOTE, vect_location,
3172 "created pattern stmt: %G", pattern_stmt);
3174 /* Convert back to the original signedness, if OP_TYPE is different
3175 from NEW_TYPE. */
3176 if (op_type != new_type)
3177 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
3178 pattern_stmt, op_vectype);
3180 /* Promote the result to the original type. */
3181 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
3182 pattern_stmt, new_vectype);
3184 return pattern_stmt;
3187 /* Recognize the following patterns:
3189 ATYPE a; // narrower than TYPE
3190 BTYPE b; // narrower than TYPE
3192 1) Multiply high with scaling
3193 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3194 Here, c is bitsize (TYPE) / 2 - 1.
3196 2) ... or also with rounding
3197 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3198 Here, d is bitsize (TYPE) / 2 - 2.
3200 3) Normal multiply high
3201 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3202 Here, e is bitsize (TYPE) / 2.
3204 where only the bottom half of res is used. */
3206 static gimple *
3207 vect_recog_mulhs_pattern (vec_info *vinfo,
3208 stmt_vec_info last_stmt_info, tree *type_out)
3210 /* Check for a right shift. */
3211 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3212 if (!last_stmt
3213 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
3214 return NULL;
3216 /* Check that the shift result is wider than the users of the
3217 result need (i.e. that narrowing would be a natural choice). */
3218 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3219 unsigned int target_precision
3220 = vect_element_precision (last_stmt_info->min_output_precision);
3221 if (!INTEGRAL_TYPE_P (lhs_type)
3222 || target_precision >= TYPE_PRECISION (lhs_type))
3223 return NULL;
3225 /* Look through any change in sign on the outer shift input. */
3226 vect_unpromoted_value unprom_rshift_input;
3227 tree rshift_input = vect_look_through_possible_promotion
3228 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
3229 if (!rshift_input
3230 || TYPE_PRECISION (TREE_TYPE (rshift_input))
3231 != TYPE_PRECISION (lhs_type))
3232 return NULL;
3234 /* Get the definition of the shift input. */
3235 stmt_vec_info rshift_input_stmt_info
3236 = vect_get_internal_def (vinfo, rshift_input);
3237 if (!rshift_input_stmt_info)
3238 return NULL;
3239 gassign *rshift_input_stmt
3240 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
3241 if (!rshift_input_stmt)
3242 return NULL;
3244 stmt_vec_info mulh_stmt_info;
3245 tree scale_term;
3246 bool rounding_p = false;
3248 /* Check for the presence of the rounding term. */
3249 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
3251 /* Check that the outer shift was by 1. */
3252 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
3253 return NULL;
3255 /* Check that the second operand of the PLUS_EXPR is 1. */
3256 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
3257 return NULL;
3259 /* Look through any change in sign on the addition input. */
3260 vect_unpromoted_value unprom_plus_input;
3261 tree plus_input = vect_look_through_possible_promotion
3262 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
3263 if (!plus_input
3264 || TYPE_PRECISION (TREE_TYPE (plus_input))
3265 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3266 return NULL;
3268 /* Get the definition of the multiply-high-scale part. */
3269 stmt_vec_info plus_input_stmt_info
3270 = vect_get_internal_def (vinfo, plus_input);
3271 if (!plus_input_stmt_info)
3272 return NULL;
3273 gassign *plus_input_stmt
3274 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
3275 if (!plus_input_stmt
3276 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
3277 return NULL;
3279 /* Look through any change in sign on the scaling input. */
3280 vect_unpromoted_value unprom_scale_input;
3281 tree scale_input = vect_look_through_possible_promotion
3282 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
3283 if (!scale_input
3284 || TYPE_PRECISION (TREE_TYPE (scale_input))
3285 != TYPE_PRECISION (TREE_TYPE (plus_input)))
3286 return NULL;
3288 /* Get the definition of the multiply-high part. */
3289 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
3290 if (!mulh_stmt_info)
3291 return NULL;
3293 /* Get the scaling term. */
3294 scale_term = gimple_assign_rhs2 (plus_input_stmt);
3295 rounding_p = true;
3297 else
3299 mulh_stmt_info = rshift_input_stmt_info;
3300 scale_term = gimple_assign_rhs2 (last_stmt);
3303 /* Check that the scaling factor is constant. */
3304 if (TREE_CODE (scale_term) != INTEGER_CST)
3305 return NULL;
3307 /* Check whether the scaling input term can be seen as two widened
3308 inputs multiplied together. */
3309 vect_unpromoted_value unprom_mult[2];
3310 tree new_type;
3311 unsigned int nops
3312 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
3313 false, 2, unprom_mult, &new_type);
3314 if (nops != 2)
3315 return NULL;
3317 /* Adjust output precision. */
3318 if (TYPE_PRECISION (new_type) < target_precision)
3319 new_type = build_nonstandard_integer_type
3320 (target_precision, TYPE_UNSIGNED (new_type));
3322 unsigned mult_precision = TYPE_PRECISION (new_type);
3323 internal_fn ifn;
3324 /* Check that the scaling factor is expected. Instead of
3325 target_precision, we should use the one that we actually
3326 use for internal function. */
3327 if (rounding_p)
3329 /* Check pattern 2). */
3330 if (wi::to_widest (scale_term) + mult_precision + 2
3331 != TYPE_PRECISION (lhs_type))
3332 return NULL;
3334 ifn = IFN_MULHRS;
3336 else
3338 /* Check for pattern 1). */
3339 if (wi::to_widest (scale_term) + mult_precision + 1
3340 == TYPE_PRECISION (lhs_type))
3341 ifn = IFN_MULHS;
3342 /* Check for pattern 3). */
3343 else if (wi::to_widest (scale_term) + mult_precision
3344 == TYPE_PRECISION (lhs_type))
3345 ifn = IFN_MULH;
3346 else
3347 return NULL;
3350 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
3352 /* Check for target support. */
3353 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3354 if (!new_vectype
3355 || !direct_internal_fn_supported_p
3356 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3357 return NULL;
3359 /* The IR requires a valid vector type for the cast result, even though
3360 it's likely to be discarded. */
3361 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3362 if (!*type_out)
3363 return NULL;
3365 /* Generate the IFN_MULHRS call. */
3366 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3367 tree new_ops[2];
3368 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3369 unprom_mult, new_vectype);
3370 gcall *mulhrs_stmt
3371 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3372 gimple_call_set_lhs (mulhrs_stmt, new_var);
3373 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3375 if (dump_enabled_p ())
3376 dump_printf_loc (MSG_NOTE, vect_location,
3377 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3379 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3380 mulhrs_stmt, new_vectype);
3383 /* Recognize the patterns:
3385 ATYPE a; // narrower than TYPE
3386 BTYPE b; // narrower than TYPE
3387 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3388 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3390 where only the bottom half of avg is used. Try to transform them into:
3392 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3393 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3395 followed by:
3397 TYPE avg = (TYPE) avg';
3399 where NTYPE is no wider than half of TYPE. Since only the bottom half
3400 of avg is used, all or part of the cast of avg' should become redundant.
3402 If there is no target support available, generate code to distribute rshift
3403 over plus and add a carry. */
3405 static gimple *
3406 vect_recog_average_pattern (vec_info *vinfo,
3407 stmt_vec_info last_stmt_info, tree *type_out)
3409 /* Check for a shift right by one bit. */
3410 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3411 if (!last_stmt
3412 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3413 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3414 return NULL;
3416 /* Check that the shift result is wider than the users of the
3417 result need (i.e. that narrowing would be a natural choice). */
3418 tree lhs = gimple_assign_lhs (last_stmt);
3419 tree type = TREE_TYPE (lhs);
3420 unsigned int target_precision
3421 = vect_element_precision (last_stmt_info->min_output_precision);
3422 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3423 return NULL;
3425 /* Look through any change in sign on the shift input. */
3426 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3427 vect_unpromoted_value unprom_plus;
3428 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3429 &unprom_plus);
3430 if (!rshift_rhs
3431 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3432 return NULL;
3434 /* Get the definition of the shift input. */
3435 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3436 if (!plus_stmt_info)
3437 return NULL;
3439 /* Check whether the shift input can be seen as a tree of additions on
3440 2 or 3 widened inputs.
3442 Note that the pattern should be a win even if the result of one or
3443 more additions is reused elsewhere: if the pattern matches, we'd be
3444 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3445 internal_fn ifn = IFN_AVG_FLOOR;
3446 vect_unpromoted_value unprom[3];
3447 tree new_type;
3448 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3449 IFN_VEC_WIDEN_PLUS, false, 3,
3450 unprom, &new_type);
3451 if (nops == 0)
3452 return NULL;
3453 if (nops == 3)
3455 /* Check that one operand is 1. */
3456 unsigned int i;
3457 for (i = 0; i < 3; ++i)
3458 if (integer_onep (unprom[i].op))
3459 break;
3460 if (i == 3)
3461 return NULL;
3462 /* Throw away the 1 operand and keep the other two. */
3463 if (i < 2)
3464 unprom[i] = unprom[2];
3465 ifn = IFN_AVG_CEIL;
3468 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3470 /* We know that:
3472 (a) the operation can be viewed as:
3474 TYPE widened0 = (TYPE) UNPROM[0];
3475 TYPE widened1 = (TYPE) UNPROM[1];
3476 TYPE tmp1 = widened0 + widened1 {+ 1};
3477 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3479 (b) the first two statements are equivalent to:
3481 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3482 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3484 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3485 where sensible;
3487 (d) all the operations can be performed correctly at twice the width of
3488 NEW_TYPE, due to the nature of the average operation; and
3490 (e) users of the result of the right shift need only TARGET_PRECISION
3491 bits, where TARGET_PRECISION is no more than half of TYPE's
3492 precision.
3494 Under these circumstances, the only situation in which NEW_TYPE
3495 could be narrower than TARGET_PRECISION is if widened0, widened1
3496 and an addition result are all used more than once. Thus we can
3497 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3498 as "free", whereas widening the result of the average instruction
3499 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3500 therefore better not to go narrower than TARGET_PRECISION. */
3501 if (TYPE_PRECISION (new_type) < target_precision)
3502 new_type = build_nonstandard_integer_type (target_precision,
3503 TYPE_UNSIGNED (new_type));
3505 /* Check for target support. */
3506 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3507 if (!new_vectype)
3508 return NULL;
3510 bool fallback_p = false;
3512 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3514 else if (TYPE_UNSIGNED (new_type)
3515 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3516 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3517 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3518 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3519 fallback_p = true;
3520 else
3521 return NULL;
3523 /* The IR requires a valid vector type for the cast result, even though
3524 it's likely to be discarded. */
3525 *type_out = get_vectype_for_scalar_type (vinfo, type);
3526 if (!*type_out)
3527 return NULL;
3529 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3530 tree new_ops[2];
3531 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3532 unprom, new_vectype);
3534 if (fallback_p)
3536 /* As a fallback, generate code for following sequence:
3538 shifted_op0 = new_ops[0] >> 1;
3539 shifted_op1 = new_ops[1] >> 1;
3540 sum_of_shifted = shifted_op0 + shifted_op1;
3541 unmasked_carry = new_ops[0] and/or new_ops[1];
3542 carry = unmasked_carry & 1;
3543 new_var = sum_of_shifted + carry;
3546 tree one_cst = build_one_cst (new_type);
3547 gassign *g;
3549 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3550 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3551 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3553 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3554 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3555 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3557 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3558 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3559 shifted_op0, shifted_op1);
3560 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3562 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3563 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3564 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3565 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3567 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3568 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3569 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3571 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3572 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3575 /* Generate the IFN_AVG* call. */
3576 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3577 new_ops[1]);
3578 gimple_call_set_lhs (average_stmt, new_var);
3579 gimple_set_location (average_stmt, gimple_location (last_stmt));
3581 if (dump_enabled_p ())
3582 dump_printf_loc (MSG_NOTE, vect_location,
3583 "created pattern stmt: %G", (gimple *) average_stmt);
3585 return vect_convert_output (vinfo, last_stmt_info,
3586 type, average_stmt, new_vectype);
3589 /* Recognize cases in which the input to a cast is wider than its
3590 output, and the input is fed by a widening operation. Fold this
3591 by removing the unnecessary intermediate widening. E.g.:
3593 unsigned char a;
3594 unsigned int b = (unsigned int) a;
3595 unsigned short c = (unsigned short) b;
3599 unsigned short c = (unsigned short) a;
3601 Although this is rare in input IR, it is an expected side-effect
3602 of the over-widening pattern above.
3604 This is beneficial also for integer-to-float conversions, if the
3605 widened integer has more bits than the float, and if the unwidened
3606 input doesn't. */
3608 static gimple *
3609 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3610 stmt_vec_info last_stmt_info, tree *type_out)
3612 /* Check for a cast, including an integer-to-float conversion. */
3613 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3614 if (!last_stmt)
3615 return NULL;
3616 tree_code code = gimple_assign_rhs_code (last_stmt);
3617 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3618 return NULL;
3620 /* Make sure that the rhs is a scalar with a natural bitsize. */
3621 tree lhs = gimple_assign_lhs (last_stmt);
3622 if (!lhs)
3623 return NULL;
3624 tree lhs_type = TREE_TYPE (lhs);
3625 scalar_mode lhs_mode;
3626 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3627 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3628 return NULL;
3630 /* Check for a narrowing operation (from a vector point of view). */
3631 tree rhs = gimple_assign_rhs1 (last_stmt);
3632 tree rhs_type = TREE_TYPE (rhs);
3633 if (!INTEGRAL_TYPE_P (rhs_type)
3634 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3635 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3636 return NULL;
3638 /* Try to find an unpromoted input. */
3639 vect_unpromoted_value unprom;
3640 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3641 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3642 return NULL;
3644 /* If the bits above RHS_TYPE matter, make sure that they're the
3645 same when extending from UNPROM as they are when extending from RHS. */
3646 if (!INTEGRAL_TYPE_P (lhs_type)
3647 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3648 return NULL;
3650 /* We can get the same result by casting UNPROM directly, to avoid
3651 the unnecessary widening and narrowing. */
3652 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3654 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3655 if (!*type_out)
3656 return NULL;
3658 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3659 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3660 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3662 return pattern_stmt;
3665 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3666 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3668 static gimple *
3669 vect_recog_widen_shift_pattern (vec_info *vinfo,
3670 stmt_vec_info last_stmt_info, tree *type_out)
3672 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3673 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3674 "vect_recog_widen_shift_pattern");
3677 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3679 type a_t, b_t, c_t;
3681 S0 a_t = b_t r<< c_t;
3683 Input/Output:
3685 * STMT_VINFO: The stmt from which the pattern search begins,
3686 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3687 with a sequence:
3689 S1 d_t = -c_t;
3690 S2 e_t = d_t & (B - 1);
3691 S3 f_t = b_t << c_t;
3692 S4 g_t = b_t >> e_t;
3693 S0 a_t = f_t | g_t;
3695 where B is element bitsize of type.
3697 Output:
3699 * TYPE_OUT: The type of the output of this pattern.
3701 * Return value: A new stmt that will be used to replace the rotate
3702 S0 stmt. */
3704 static gimple *
3705 vect_recog_rotate_pattern (vec_info *vinfo,
3706 stmt_vec_info stmt_vinfo, tree *type_out)
3708 gimple *last_stmt = stmt_vinfo->stmt;
3709 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3710 gimple *pattern_stmt, *def_stmt;
3711 enum tree_code rhs_code;
3712 enum vect_def_type dt;
3713 optab optab1, optab2;
3714 edge ext_def = NULL;
3715 bool bswap16_p = false;
3717 if (is_gimple_assign (last_stmt))
3719 rhs_code = gimple_assign_rhs_code (last_stmt);
3720 switch (rhs_code)
3722 case LROTATE_EXPR:
3723 case RROTATE_EXPR:
3724 break;
3725 default:
3726 return NULL;
3729 lhs = gimple_assign_lhs (last_stmt);
3730 oprnd0 = gimple_assign_rhs1 (last_stmt);
3731 type = TREE_TYPE (oprnd0);
3732 oprnd1 = gimple_assign_rhs2 (last_stmt);
3734 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3736 /* __builtin_bswap16 (x) is another form of x r>> 8.
3737 The vectorizer has bswap support, but only if the argument isn't
3738 promoted. */
3739 lhs = gimple_call_lhs (last_stmt);
3740 oprnd0 = gimple_call_arg (last_stmt, 0);
3741 type = TREE_TYPE (oprnd0);
3742 if (!lhs
3743 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3744 || TYPE_PRECISION (type) <= 16
3745 || TREE_CODE (oprnd0) != SSA_NAME
3746 || BITS_PER_UNIT != 8)
3747 return NULL;
3749 stmt_vec_info def_stmt_info;
3750 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3751 return NULL;
3753 if (dt != vect_internal_def)
3754 return NULL;
3756 if (gimple_assign_cast_p (def_stmt))
3758 def = gimple_assign_rhs1 (def_stmt);
3759 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3760 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3761 oprnd0 = def;
3764 type = TREE_TYPE (lhs);
3765 vectype = get_vectype_for_scalar_type (vinfo, type);
3766 if (vectype == NULL_TREE)
3767 return NULL;
3769 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3771 /* The encoding uses one stepped pattern for each byte in the
3772 16-bit word. */
3773 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3774 for (unsigned i = 0; i < 3; ++i)
3775 for (unsigned j = 0; j < 2; ++j)
3776 elts.quick_push ((i + 1) * 2 - j - 1);
3778 vec_perm_indices indices (elts, 1,
3779 TYPE_VECTOR_SUBPARTS (char_vectype));
3780 machine_mode vmode = TYPE_MODE (char_vectype);
3781 if (can_vec_perm_const_p (vmode, vmode, indices))
3783 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3784 undo the argument promotion. */
3785 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3787 def = vect_recog_temp_ssa_var (type, NULL);
3788 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3789 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3790 oprnd0 = def;
3793 /* Pattern detected. */
3794 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3796 *type_out = vectype;
3798 /* Pattern supported. Create a stmt to be used to replace the
3799 pattern, with the unpromoted argument. */
3800 var = vect_recog_temp_ssa_var (type, NULL);
3801 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3802 1, oprnd0);
3803 gimple_call_set_lhs (pattern_stmt, var);
3804 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3805 gimple_call_fntype (last_stmt));
3806 return pattern_stmt;
3810 oprnd1 = build_int_cst (integer_type_node, 8);
3811 rhs_code = LROTATE_EXPR;
3812 bswap16_p = true;
3814 else
3815 return NULL;
3817 if (TREE_CODE (oprnd0) != SSA_NAME
3818 || !INTEGRAL_TYPE_P (type)
3819 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3820 return NULL;
3822 stmt_vec_info def_stmt_info;
3823 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3824 return NULL;
3826 if (dt != vect_internal_def
3827 && dt != vect_constant_def
3828 && dt != vect_external_def)
3829 return NULL;
3831 vectype = get_vectype_for_scalar_type (vinfo, type);
3832 if (vectype == NULL_TREE)
3833 return NULL;
3835 /* If vector/vector or vector/scalar rotate is supported by the target,
3836 don't do anything here. */
3837 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3838 if (optab1
3839 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3841 use_rotate:
3842 if (bswap16_p)
3844 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3846 def = vect_recog_temp_ssa_var (type, NULL);
3847 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3848 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3849 oprnd0 = def;
3852 /* Pattern detected. */
3853 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3855 *type_out = vectype;
3857 /* Pattern supported. Create a stmt to be used to replace the
3858 pattern. */
3859 var = vect_recog_temp_ssa_var (type, NULL);
3860 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3861 oprnd1);
3862 return pattern_stmt;
3864 return NULL;
3867 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3869 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3870 if (optab2
3871 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3872 goto use_rotate;
3875 tree utype = unsigned_type_for (type);
3876 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3877 if (!uvectype)
3878 return NULL;
3880 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3881 don't do anything here either. */
3882 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3883 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3884 if (!optab1
3885 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3886 || !optab2
3887 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3889 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3890 return NULL;
3891 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3892 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3893 if (!optab1
3894 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3895 || !optab2
3896 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3897 return NULL;
3900 *type_out = vectype;
3902 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3904 def = vect_recog_temp_ssa_var (utype, NULL);
3905 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3906 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3907 oprnd0 = def;
3910 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3911 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3913 def = NULL_TREE;
3914 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3915 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3916 def = oprnd1;
3917 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3919 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3920 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3921 && TYPE_PRECISION (TREE_TYPE (rhs1))
3922 == TYPE_PRECISION (type))
3923 def = rhs1;
3926 if (def == NULL_TREE)
3928 def = vect_recog_temp_ssa_var (utype, NULL);
3929 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3930 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3932 stype = TREE_TYPE (def);
3934 if (TREE_CODE (def) == INTEGER_CST)
3936 if (!tree_fits_uhwi_p (def)
3937 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3938 || integer_zerop (def))
3939 return NULL;
3940 def2 = build_int_cst (stype,
3941 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3943 else
3945 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3947 if (vecstype == NULL_TREE)
3948 return NULL;
3949 def2 = vect_recog_temp_ssa_var (stype, NULL);
3950 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3951 if (ext_def)
3953 basic_block new_bb
3954 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3955 gcc_assert (!new_bb);
3957 else
3958 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3960 def2 = vect_recog_temp_ssa_var (stype, NULL);
3961 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3962 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3963 gimple_assign_lhs (def_stmt), mask);
3964 if (ext_def)
3966 basic_block new_bb
3967 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3968 gcc_assert (!new_bb);
3970 else
3971 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3974 var1 = vect_recog_temp_ssa_var (utype, NULL);
3975 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3976 ? LSHIFT_EXPR : RSHIFT_EXPR,
3977 oprnd0, def);
3978 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3980 var2 = vect_recog_temp_ssa_var (utype, NULL);
3981 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3982 ? RSHIFT_EXPR : LSHIFT_EXPR,
3983 oprnd0, def2);
3984 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3986 /* Pattern detected. */
3987 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3989 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3990 var = vect_recog_temp_ssa_var (utype, NULL);
3991 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
3993 if (!useless_type_conversion_p (type, utype))
3995 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
3996 tree result = vect_recog_temp_ssa_var (type, NULL);
3997 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
3999 return pattern_stmt;
4002 /* Detect a vector by vector shift pattern that wouldn't be otherwise
4003 vectorized:
4005 type a_t;
4006 TYPE b_T, res_T;
4008 S1 a_t = ;
4009 S2 b_T = ;
4010 S3 res_T = b_T op a_t;
4012 where type 'TYPE' is a type with different size than 'type',
4013 and op is <<, >> or rotate.
4015 Also detect cases:
4017 type a_t;
4018 TYPE b_T, c_T, res_T;
4020 S0 c_T = ;
4021 S1 a_t = (type) c_T;
4022 S2 b_T = ;
4023 S3 res_T = b_T op a_t;
4025 Input/Output:
4027 * STMT_VINFO: The stmt from which the pattern search begins,
4028 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4029 with a shift/rotate which has same type on both operands, in the
4030 second case just b_T op c_T, in the first case with added cast
4031 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4033 Output:
4035 * TYPE_OUT: The type of the output of this pattern.
4037 * Return value: A new stmt that will be used to replace the shift/rotate
4038 S3 stmt. */
4040 static gimple *
4041 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4042 stmt_vec_info stmt_vinfo,
4043 tree *type_out)
4045 gimple *last_stmt = stmt_vinfo->stmt;
4046 tree oprnd0, oprnd1, lhs, var;
4047 gimple *pattern_stmt;
4048 enum tree_code rhs_code;
4050 if (!is_gimple_assign (last_stmt))
4051 return NULL;
4053 rhs_code = gimple_assign_rhs_code (last_stmt);
4054 switch (rhs_code)
4056 case LSHIFT_EXPR:
4057 case RSHIFT_EXPR:
4058 case LROTATE_EXPR:
4059 case RROTATE_EXPR:
4060 break;
4061 default:
4062 return NULL;
4065 lhs = gimple_assign_lhs (last_stmt);
4066 oprnd0 = gimple_assign_rhs1 (last_stmt);
4067 oprnd1 = gimple_assign_rhs2 (last_stmt);
4068 if (TREE_CODE (oprnd0) != SSA_NAME
4069 || TREE_CODE (oprnd1) != SSA_NAME
4070 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4071 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4072 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4073 || TYPE_PRECISION (TREE_TYPE (lhs))
4074 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4075 return NULL;
4077 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
4078 if (!def_vinfo)
4079 return NULL;
4081 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4082 if (*type_out == NULL_TREE)
4083 return NULL;
4085 tree def = NULL_TREE;
4086 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
4087 if (def_stmt && gimple_assign_cast_p (def_stmt))
4089 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4090 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4091 && TYPE_PRECISION (TREE_TYPE (rhs1))
4092 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4094 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4095 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4096 def = rhs1;
4097 else
4099 tree mask
4100 = build_low_bits_mask (TREE_TYPE (rhs1),
4101 TYPE_PRECISION (TREE_TYPE (oprnd1)));
4102 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4103 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4104 tree vecstype = get_vectype_for_scalar_type (vinfo,
4105 TREE_TYPE (rhs1));
4106 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4111 if (def == NULL_TREE)
4113 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4114 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4115 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4118 /* Pattern detected. */
4119 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
4121 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4122 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4123 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4125 return pattern_stmt;
4128 /* Return true iff the target has a vector optab implementing the operation
4129 CODE on type VECTYPE. */
4131 static bool
4132 target_has_vecop_for_code (tree_code code, tree vectype)
4134 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4135 return voptab
4136 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
4139 /* Verify that the target has optabs of VECTYPE to perform all the steps
4140 needed by the multiplication-by-immediate synthesis algorithm described by
4141 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4142 present. Return true iff the target supports all the steps. */
4144 static bool
4145 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4146 tree vectype, bool synth_shift_p)
4148 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4149 return false;
4151 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
4152 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
4154 if (var == negate_variant
4155 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
4156 return false;
4158 /* If we must synthesize shifts with additions make sure that vector
4159 addition is available. */
4160 if ((var == add_variant || synth_shift_p) && !supports_vplus)
4161 return false;
4163 for (int i = 1; i < alg->ops; i++)
4165 switch (alg->op[i])
4167 case alg_shift:
4168 break;
4169 case alg_add_t_m2:
4170 case alg_add_t2_m:
4171 case alg_add_factor:
4172 if (!supports_vplus)
4173 return false;
4174 break;
4175 case alg_sub_t_m2:
4176 case alg_sub_t2_m:
4177 case alg_sub_factor:
4178 if (!supports_vminus)
4179 return false;
4180 break;
4181 case alg_unknown:
4182 case alg_m:
4183 case alg_zero:
4184 case alg_impossible:
4185 return false;
4186 default:
4187 gcc_unreachable ();
4191 return true;
4194 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4195 putting the final result in DEST. Append all statements but the last into
4196 VINFO. Return the last statement. */
4198 static gimple *
4199 synth_lshift_by_additions (vec_info *vinfo,
4200 tree dest, tree op, HOST_WIDE_INT amnt,
4201 stmt_vec_info stmt_info)
4203 HOST_WIDE_INT i;
4204 tree itype = TREE_TYPE (op);
4205 tree prev_res = op;
4206 gcc_assert (amnt >= 0);
4207 for (i = 0; i < amnt; i++)
4209 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
4210 : dest;
4211 gimple *stmt
4212 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4213 prev_res = tmp_var;
4214 if (i < amnt - 1)
4215 append_pattern_def_seq (vinfo, stmt_info, stmt);
4216 else
4217 return stmt;
4219 gcc_unreachable ();
4220 return NULL;
4223 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4224 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4225 the process if necessary. Append the resulting assignment statements
4226 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4227 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4228 left shifts using additions. */
4230 static tree
4231 apply_binop_and_append_stmt (vec_info *vinfo,
4232 tree_code code, tree op1, tree op2,
4233 stmt_vec_info stmt_vinfo, bool synth_shift_p)
4235 if (integer_zerop (op2)
4236 && (code == LSHIFT_EXPR
4237 || code == PLUS_EXPR))
4239 gcc_assert (TREE_CODE (op1) == SSA_NAME);
4240 return op1;
4243 gimple *stmt;
4244 tree itype = TREE_TYPE (op1);
4245 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
4247 if (code == LSHIFT_EXPR
4248 && synth_shift_p)
4250 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
4251 TREE_INT_CST_LOW (op2), stmt_vinfo);
4252 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4253 return tmp_var;
4256 stmt = gimple_build_assign (tmp_var, code, op1, op2);
4257 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4258 return tmp_var;
4261 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4262 and simple arithmetic operations to be vectorized. Record the statements
4263 produced in STMT_VINFO and return the last statement in the sequence or
4264 NULL if it's not possible to synthesize such a multiplication.
4265 This function mirrors the behavior of expand_mult_const in expmed.cc but
4266 works on tree-ssa form. */
4268 static gimple *
4269 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4270 stmt_vec_info stmt_vinfo)
4272 tree itype = TREE_TYPE (op);
4273 machine_mode mode = TYPE_MODE (itype);
4274 struct algorithm alg;
4275 mult_variant variant;
4276 if (!tree_fits_shwi_p (val))
4277 return NULL;
4279 /* Multiplication synthesis by shifts, adds and subs can introduce
4280 signed overflow where the original operation didn't. Perform the
4281 operations on an unsigned type and cast back to avoid this.
4282 In the future we may want to relax this for synthesis algorithms
4283 that we can prove do not cause unexpected overflow. */
4284 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4286 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4287 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4288 if (!vectype)
4289 return NULL;
4291 /* Targets that don't support vector shifts but support vector additions
4292 can synthesize shifts that way. */
4293 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4295 HOST_WIDE_INT hwval = tree_to_shwi (val);
4296 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4297 The vectorizer's benefit analysis will decide whether it's beneficial
4298 to do this. */
4299 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4300 ? TYPE_MODE (vectype) : mode,
4301 hwval, &alg, &variant, MAX_COST);
4302 if (!possible)
4303 return NULL;
4305 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
4306 return NULL;
4308 tree accumulator;
4310 /* Clear out the sequence of statements so we can populate it below. */
4311 gimple *stmt = NULL;
4313 if (cast_to_unsigned_p)
4315 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
4316 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4317 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4318 op = tmp_op;
4321 if (alg.op[0] == alg_zero)
4322 accumulator = build_int_cst (multtype, 0);
4323 else
4324 accumulator = op;
4326 bool needs_fixup = (variant == negate_variant)
4327 || (variant == add_variant);
4329 for (int i = 1; i < alg.ops; i++)
4331 tree shft_log = build_int_cst (multtype, alg.log[i]);
4332 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4333 tree tmp_var = NULL_TREE;
4335 switch (alg.op[i])
4337 case alg_shift:
4338 if (synth_shift_p)
4339 stmt
4340 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
4341 alg.log[i], stmt_vinfo);
4342 else
4343 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4344 shft_log);
4345 break;
4346 case alg_add_t_m2:
4347 tmp_var
4348 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
4349 stmt_vinfo, synth_shift_p);
4350 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4351 tmp_var);
4352 break;
4353 case alg_sub_t_m2:
4354 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
4355 shft_log, stmt_vinfo,
4356 synth_shift_p);
4357 /* In some algorithms the first step involves zeroing the
4358 accumulator. If subtracting from such an accumulator
4359 just emit the negation directly. */
4360 if (integer_zerop (accumulator))
4361 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4362 else
4363 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4364 tmp_var);
4365 break;
4366 case alg_add_t2_m:
4367 tmp_var
4368 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4369 shft_log, stmt_vinfo, synth_shift_p);
4370 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4371 break;
4372 case alg_sub_t2_m:
4373 tmp_var
4374 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4375 shft_log, stmt_vinfo, synth_shift_p);
4376 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4377 break;
4378 case alg_add_factor:
4379 tmp_var
4380 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4381 shft_log, stmt_vinfo, synth_shift_p);
4382 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4383 tmp_var);
4384 break;
4385 case alg_sub_factor:
4386 tmp_var
4387 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4388 shft_log, stmt_vinfo, synth_shift_p);
4389 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4390 accumulator);
4391 break;
4392 default:
4393 gcc_unreachable ();
4395 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4396 but rather return it directly. */
4398 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4399 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4400 accumulator = accum_tmp;
4402 if (variant == negate_variant)
4404 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4405 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4406 accumulator = accum_tmp;
4407 if (cast_to_unsigned_p)
4408 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4410 else if (variant == add_variant)
4412 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4413 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4414 accumulator = accum_tmp;
4415 if (cast_to_unsigned_p)
4416 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4418 /* Move back to a signed if needed. */
4419 if (cast_to_unsigned_p)
4421 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4422 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4425 return stmt;
4428 /* Detect multiplication by constant and convert it into a sequence of
4429 shifts and additions, subtractions, negations. We reuse the
4430 choose_mult_variant algorithms from expmed.cc
4432 Input/Output:
4434 STMT_VINFO: The stmt from which the pattern search begins,
4435 i.e. the mult stmt.
4437 Output:
4439 * TYPE_OUT: The type of the output of this pattern.
4441 * Return value: A new stmt that will be used to replace
4442 the multiplication. */
4444 static gimple *
4445 vect_recog_mult_pattern (vec_info *vinfo,
4446 stmt_vec_info stmt_vinfo, tree *type_out)
4448 gimple *last_stmt = stmt_vinfo->stmt;
4449 tree oprnd0, oprnd1, vectype, itype;
4450 gimple *pattern_stmt;
4452 if (!is_gimple_assign (last_stmt))
4453 return NULL;
4455 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4456 return NULL;
4458 oprnd0 = gimple_assign_rhs1 (last_stmt);
4459 oprnd1 = gimple_assign_rhs2 (last_stmt);
4460 itype = TREE_TYPE (oprnd0);
4462 if (TREE_CODE (oprnd0) != SSA_NAME
4463 || TREE_CODE (oprnd1) != INTEGER_CST
4464 || !INTEGRAL_TYPE_P (itype)
4465 || !type_has_mode_precision_p (itype))
4466 return NULL;
4468 vectype = get_vectype_for_scalar_type (vinfo, itype);
4469 if (vectype == NULL_TREE)
4470 return NULL;
4472 /* If the target can handle vectorized multiplication natively,
4473 don't attempt to optimize this. */
4474 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4475 if (mul_optab != unknown_optab)
4477 machine_mode vec_mode = TYPE_MODE (vectype);
4478 int icode = (int) optab_handler (mul_optab, vec_mode);
4479 if (icode != CODE_FOR_nothing)
4480 return NULL;
4483 pattern_stmt = vect_synth_mult_by_constant (vinfo,
4484 oprnd0, oprnd1, stmt_vinfo);
4485 if (!pattern_stmt)
4486 return NULL;
4488 /* Pattern detected. */
4489 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4491 *type_out = vectype;
4493 return pattern_stmt;
4496 extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
4497 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
4498 extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
4500 extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
4502 static gimple *
4503 vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
4504 internal_fn fn, tree *type_out,
4505 tree lhs, tree op_0, tree op_1)
4507 tree itype = TREE_TYPE (op_0);
4508 tree otype = TREE_TYPE (lhs);
4509 tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4510 tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4512 if (v_itype != NULL_TREE && v_otype != NULL_TREE
4513 && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
4515 gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4516 tree in_ssa = vect_recog_temp_ssa_var (itype, NULL);
4518 gimple_call_set_lhs (call, in_ssa);
4519 gimple_call_set_nothrow (call, /* nothrow_p */ false);
4520 gimple_set_location (call, gimple_location (STMT_VINFO_STMT (stmt_info)));
4522 *type_out = v_otype;
4524 if (types_compatible_p (itype, otype))
4525 return call;
4526 else
4528 append_pattern_def_seq (vinfo, stmt_info, call, v_itype);
4529 tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
4531 return gimple_build_assign (out_ssa, NOP_EXPR, in_ssa);
4535 return NULL;
4539 * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
4540 * _7 = _4 + _6;
4541 * _8 = _4 > _7;
4542 * _9 = (long unsigned int) _8;
4543 * _10 = -_9;
4544 * _12 = _7 | _10;
4546 * And then simplied to
4547 * _12 = .SAT_ADD (_4, _6);
4550 static gimple *
4551 vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4552 tree *type_out)
4554 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4556 if (!is_gimple_assign (last_stmt))
4557 return NULL;
4559 tree ops[2];
4560 tree lhs = gimple_assign_lhs (last_stmt);
4562 if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
4563 || gimple_signed_integer_sat_add (lhs, ops, NULL))
4565 if (TREE_CODE (ops[1]) == INTEGER_CST)
4566 ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4568 gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
4569 IFN_SAT_ADD, type_out,
4570 lhs, ops[0], ops[1]);
4571 if (stmt)
4573 vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
4574 return stmt;
4578 return NULL;
4582 * Try to transform the truncation for .SAT_SUB pattern, mostly occurs in
4583 * the benchmark zip. Aka:
4585 * unsigned int _1;
4586 * unsigned int _2;
4587 * unsigned short int _4;
4588 * _9 = (unsigned short int).SAT_SUB (_1, _2);
4590 * if _1 is known to be in the range of unsigned short int. For example
4591 * there is a def _1 = (unsigned short int)_4. Then we can transform the
4592 * truncation to:
4594 * _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
4595 * _9 = .SAT_SUB (_4, _3);
4597 * Then, we can better vectorized code and avoid the unnecessary narrowing
4598 * stmt during vectorization with below stmt(s).
4600 * _3 = .SAT_TRUNC(_2); // SI => HI
4601 * _9 = .SAT_SUB (_4, _3);
4603 static void
4604 vect_recog_sat_sub_pattern_transform (vec_info *vinfo,
4605 stmt_vec_info stmt_vinfo,
4606 tree lhs, tree *ops)
4608 tree otype = TREE_TYPE (lhs);
4609 tree itype = TREE_TYPE (ops[0]);
4610 unsigned itype_prec = TYPE_PRECISION (itype);
4611 unsigned otype_prec = TYPE_PRECISION (otype);
4613 if (types_compatible_p (otype, itype) || otype_prec >= itype_prec)
4614 return;
4616 tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4617 tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4618 tree_pair v_pair = tree_pair (v_otype, v_itype);
4620 if (v_otype == NULL_TREE || v_itype == NULL_TREE
4621 || !direct_internal_fn_supported_p (IFN_SAT_TRUNC, v_pair,
4622 OPTIMIZE_FOR_BOTH))
4623 return;
4625 /* 1. Find the _4 and update ops[0] as above example. */
4626 vect_unpromoted_value unprom;
4627 tree tmp = vect_look_through_possible_promotion (vinfo, ops[0], &unprom);
4629 if (tmp == NULL_TREE || TYPE_PRECISION (unprom.type) != otype_prec)
4630 return;
4632 ops[0] = tmp;
4634 /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example. */
4635 tree trunc_lhs_ssa = vect_recog_temp_ssa_var (otype, NULL);
4636 gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[1]);
4638 gimple_call_set_lhs (call, trunc_lhs_ssa);
4639 gimple_call_set_nothrow (call, /* nothrow_p */ false);
4640 append_pattern_def_seq (vinfo, stmt_vinfo, call, v_otype);
4642 ops[1] = trunc_lhs_ssa;
4646 * Try to detect saturation sub pattern (SAT_ADD), aka below gimple:
4647 * _7 = _1 >= _2;
4648 * _8 = _1 - _2;
4649 * _10 = (long unsigned int) _7;
4650 * _9 = _8 * _10;
4652 * And then simplied to
4653 * _9 = .SAT_SUB (_1, _2);
4656 static gimple *
4657 vect_recog_sat_sub_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4658 tree *type_out)
4660 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4662 if (!is_gimple_assign (last_stmt))
4663 return NULL;
4665 tree ops[2];
4666 tree lhs = gimple_assign_lhs (last_stmt);
4668 if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
4670 vect_recog_sat_sub_pattern_transform (vinfo, stmt_vinfo, lhs, ops);
4671 gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
4672 IFN_SAT_SUB, type_out,
4673 lhs, ops[0], ops[1]);
4674 if (stmt)
4676 vect_pattern_detected ("vect_recog_sat_sub_pattern", last_stmt);
4677 return stmt;
4681 return NULL;
4685 * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
4686 * overflow_5 = x_4(D) > 4294967295;
4687 * _1 = (unsigned int) x_4(D);
4688 * _2 = (unsigned int) overflow_5;
4689 * _3 = -_2;
4690 * _6 = _1 | _3;
4692 * And then simplied to
4693 * _6 = .SAT_TRUNC (x_4(D));
4696 static gimple *
4697 vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4698 tree *type_out)
4700 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4702 if (!is_gimple_assign (last_stmt))
4703 return NULL;
4705 tree ops[1];
4706 tree lhs = gimple_assign_lhs (last_stmt);
4707 tree otype = TREE_TYPE (lhs);
4709 if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
4710 && type_has_mode_precision_p (otype))
4712 tree itype = TREE_TYPE (ops[0]);
4713 tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4714 tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4715 internal_fn fn = IFN_SAT_TRUNC;
4717 if (v_itype != NULL_TREE && v_otype != NULL_TREE
4718 && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
4719 OPTIMIZE_FOR_BOTH))
4721 gcall *call = gimple_build_call_internal (fn, 1, ops[0]);
4722 tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
4724 gimple_call_set_lhs (call, out_ssa);
4725 gimple_call_set_nothrow (call, /* nothrow_p */ false);
4726 gimple_set_location (call, gimple_location (last_stmt));
4728 *type_out = v_otype;
4730 return call;
4734 return NULL;
4737 /* Detect a signed division by a constant that wouldn't be
4738 otherwise vectorized:
4740 type a_t, b_t;
4742 S1 a_t = b_t / N;
4744 where type 'type' is an integral type and N is a constant.
4746 Similarly handle modulo by a constant:
4748 S4 a_t = b_t % N;
4750 Input/Output:
4752 * STMT_VINFO: The stmt from which the pattern search begins,
4753 i.e. the division stmt. S1 is replaced by if N is a power
4754 of two constant and type is signed:
4755 S3 y_t = b_t < 0 ? N - 1 : 0;
4756 S2 x_t = b_t + y_t;
4757 S1' a_t = x_t >> log2 (N);
4759 S4 is replaced if N is a power of two constant and
4760 type is signed by (where *_T temporaries have unsigned type):
4761 S9 y_T = b_t < 0 ? -1U : 0U;
4762 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4763 S7 z_t = (type) z_T;
4764 S6 w_t = b_t + z_t;
4765 S5 x_t = w_t & (N - 1);
4766 S4' a_t = x_t - z_t;
4768 Output:
4770 * TYPE_OUT: The type of the output of this pattern.
4772 * Return value: A new stmt that will be used to replace the division
4773 S1 or modulo S4 stmt. */
4775 static gimple *
4776 vect_recog_divmod_pattern (vec_info *vinfo,
4777 stmt_vec_info stmt_vinfo, tree *type_out)
4779 gimple *last_stmt = stmt_vinfo->stmt;
4780 tree oprnd0, oprnd1, vectype, itype, cond;
4781 gimple *pattern_stmt, *def_stmt;
4782 enum tree_code rhs_code;
4783 optab optab;
4784 tree q, cst;
4785 int prec;
4787 if (!is_gimple_assign (last_stmt))
4788 return NULL;
4790 rhs_code = gimple_assign_rhs_code (last_stmt);
4791 switch (rhs_code)
4793 case TRUNC_DIV_EXPR:
4794 case EXACT_DIV_EXPR:
4795 case TRUNC_MOD_EXPR:
4796 break;
4797 default:
4798 return NULL;
4801 oprnd0 = gimple_assign_rhs1 (last_stmt);
4802 oprnd1 = gimple_assign_rhs2 (last_stmt);
4803 itype = TREE_TYPE (oprnd0);
4804 if (TREE_CODE (oprnd0) != SSA_NAME
4805 || TREE_CODE (oprnd1) != INTEGER_CST
4806 || TREE_CODE (itype) != INTEGER_TYPE
4807 || !type_has_mode_precision_p (itype))
4808 return NULL;
4810 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4811 vectype = get_vectype_for_scalar_type (vinfo, itype);
4812 if (vectype == NULL_TREE)
4813 return NULL;
4815 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
4817 /* If the target can handle vectorized division or modulo natively,
4818 don't attempt to optimize this, since native division is likely
4819 to give smaller code. */
4820 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4821 if (optab != unknown_optab)
4823 machine_mode vec_mode = TYPE_MODE (vectype);
4824 int icode = (int) optab_handler (optab, vec_mode);
4825 if (icode != CODE_FOR_nothing)
4826 return NULL;
4830 prec = TYPE_PRECISION (itype);
4831 if (integer_pow2p (oprnd1))
4833 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4834 return NULL;
4836 /* Pattern detected. */
4837 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4839 *type_out = vectype;
4841 /* Check if the target supports this internal function. */
4842 internal_fn ifn = IFN_DIV_POW2;
4843 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4845 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4847 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
4848 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4849 gimple_call_set_lhs (div_stmt, var_div);
4851 if (rhs_code == TRUNC_MOD_EXPR)
4853 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
4854 def_stmt
4855 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4856 LSHIFT_EXPR, var_div, shift);
4857 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4858 pattern_stmt
4859 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4860 MINUS_EXPR, oprnd0,
4861 gimple_assign_lhs (def_stmt));
4863 else
4864 pattern_stmt = div_stmt;
4865 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
4867 return pattern_stmt;
4870 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4871 build_int_cst (itype, 0));
4872 if (rhs_code == TRUNC_DIV_EXPR
4873 || rhs_code == EXACT_DIV_EXPR)
4875 tree var = vect_recog_temp_ssa_var (itype, NULL);
4876 tree shift;
4877 def_stmt
4878 = gimple_build_assign (var, COND_EXPR, cond,
4879 fold_build2 (MINUS_EXPR, itype, oprnd1,
4880 build_int_cst (itype, 1)),
4881 build_int_cst (itype, 0));
4882 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4883 var = vect_recog_temp_ssa_var (itype, NULL);
4884 def_stmt
4885 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4886 gimple_assign_lhs (def_stmt));
4887 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4889 shift = build_int_cst (itype, tree_log2 (oprnd1));
4890 pattern_stmt
4891 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4892 RSHIFT_EXPR, var, shift);
4894 else
4896 tree signmask;
4897 if (compare_tree_int (oprnd1, 2) == 0)
4899 signmask = vect_recog_temp_ssa_var (itype, NULL);
4900 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4901 build_int_cst (itype, 1),
4902 build_int_cst (itype, 0));
4903 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4905 else
4907 tree utype
4908 = build_nonstandard_integer_type (prec, 1);
4909 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4910 tree shift
4911 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
4912 - tree_log2 (oprnd1));
4913 tree var = vect_recog_temp_ssa_var (utype, NULL);
4915 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4916 build_int_cst (utype, -1),
4917 build_int_cst (utype, 0));
4918 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4919 var = vect_recog_temp_ssa_var (utype, NULL);
4920 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4921 gimple_assign_lhs (def_stmt),
4922 shift);
4923 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4924 signmask = vect_recog_temp_ssa_var (itype, NULL);
4925 def_stmt
4926 = gimple_build_assign (signmask, NOP_EXPR, var);
4927 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4929 def_stmt
4930 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4931 PLUS_EXPR, oprnd0, signmask);
4932 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4933 def_stmt
4934 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4935 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
4936 fold_build2 (MINUS_EXPR, itype, oprnd1,
4937 build_int_cst (itype, 1)));
4938 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4940 pattern_stmt
4941 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4942 MINUS_EXPR, gimple_assign_lhs (def_stmt),
4943 signmask);
4946 return pattern_stmt;
4949 if ((cst = uniform_integer_cst_p (oprnd1))
4950 && TYPE_UNSIGNED (itype)
4951 && rhs_code == TRUNC_DIV_EXPR
4952 && vectype
4953 && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
4955 /* We can use the relationship:
4957 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4959 to optimize cases where N+1 is a power of 2, and where // (N+1)
4960 is therefore a shift right. When operating in modes that are
4961 multiples of a byte in size, there are two cases:
4963 (1) N(N+3) is not representable, in which case the question
4964 becomes whether the replacement expression overflows.
4965 It is enough to test that x+N+2 does not overflow,
4966 i.e. that x < MAX-(N+1).
4968 (2) N(N+3) is representable, in which case it is the (only)
4969 bound that we need to check.
4971 ??? For now we just handle the case where // (N+1) is a shift
4972 right by half the precision, since some architectures can
4973 optimize the associated addition and shift combinations
4974 into single instructions. */
4976 auto wcst = wi::to_wide (cst);
4977 int pow = wi::exact_log2 (wcst + 1);
4978 if (pow == prec / 2)
4980 gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
4982 gimple_ranger ranger;
4983 int_range_max r;
4985 /* Check that no overflow will occur. If we don't have range
4986 information we can't perform the optimization. */
4988 if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
4990 wide_int max = r.upper_bound ();
4991 wide_int one = wi::shwi (1, prec);
4992 wide_int adder = wi::add (one, wi::lshift (one, pow));
4993 wi::overflow_type ovf;
4994 wi::add (max, adder, UNSIGNED, &ovf);
4995 if (ovf == wi::OVF_NONE)
4997 *type_out = vectype;
4998 tree tadder = wide_int_to_tree (itype, adder);
4999 tree rshift = wide_int_to_tree (itype, pow);
5001 tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
5002 gassign *patt1
5003 = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
5004 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5006 tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
5007 patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
5008 rshift);
5009 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5011 tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
5012 patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
5013 oprnd0);
5014 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5016 tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
5017 pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
5018 new_lhs3, rshift);
5020 return pattern_stmt;
5026 if (prec > HOST_BITS_PER_WIDE_INT
5027 || integer_zerop (oprnd1))
5028 return NULL;
5030 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
5031 return NULL;
5033 if (TYPE_UNSIGNED (itype))
5035 unsigned HOST_WIDE_INT mh, ml;
5036 int pre_shift, post_shift;
5037 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
5038 & GET_MODE_MASK (itype_mode));
5039 tree t1, t2, t3, t4;
5041 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
5042 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
5043 return NULL;
5045 /* Find a suitable multiplier and right shift count instead of
5046 directly dividing by D. */
5047 mh = choose_multiplier (d, prec, prec, &ml, &post_shift);
5049 /* If the suggested multiplier is more than PREC bits, we can do better
5050 for even divisors, using an initial right shift. */
5051 if (mh != 0 && (d & 1) == 0)
5053 pre_shift = ctz_or_zero (d);
5054 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
5055 &ml, &post_shift);
5056 gcc_assert (!mh);
5058 else
5059 pre_shift = 0;
5061 if (mh != 0)
5063 if (post_shift - 1 >= prec)
5064 return NULL;
5066 /* t1 = oprnd0 h* ml;
5067 t2 = oprnd0 - t1;
5068 t3 = t2 >> 1;
5069 t4 = t1 + t3;
5070 q = t4 >> (post_shift - 1); */
5071 t1 = vect_recog_temp_ssa_var (itype, NULL);
5072 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
5073 build_int_cst (itype, ml));
5074 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5076 t2 = vect_recog_temp_ssa_var (itype, NULL);
5077 def_stmt
5078 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
5079 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5081 t3 = vect_recog_temp_ssa_var (itype, NULL);
5082 def_stmt
5083 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
5084 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5086 t4 = vect_recog_temp_ssa_var (itype, NULL);
5087 def_stmt
5088 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
5090 if (post_shift != 1)
5092 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5094 q = vect_recog_temp_ssa_var (itype, NULL);
5095 pattern_stmt
5096 = gimple_build_assign (q, RSHIFT_EXPR, t4,
5097 build_int_cst (itype, post_shift - 1));
5099 else
5101 q = t4;
5102 pattern_stmt = def_stmt;
5105 else
5107 if (pre_shift >= prec || post_shift >= prec)
5108 return NULL;
5110 /* t1 = oprnd0 >> pre_shift;
5111 t2 = t1 h* ml;
5112 q = t2 >> post_shift; */
5113 if (pre_shift)
5115 t1 = vect_recog_temp_ssa_var (itype, NULL);
5116 def_stmt
5117 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
5118 build_int_cst (NULL, pre_shift));
5119 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5121 else
5122 t1 = oprnd0;
5124 t2 = vect_recog_temp_ssa_var (itype, NULL);
5125 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
5126 build_int_cst (itype, ml));
5128 if (post_shift)
5130 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5132 q = vect_recog_temp_ssa_var (itype, NULL);
5133 def_stmt
5134 = gimple_build_assign (q, RSHIFT_EXPR, t2,
5135 build_int_cst (itype, post_shift));
5137 else
5138 q = t2;
5140 pattern_stmt = def_stmt;
5143 else
5145 unsigned HOST_WIDE_INT ml;
5146 int post_shift;
5147 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
5148 unsigned HOST_WIDE_INT abs_d;
5149 bool add = false;
5150 tree t1, t2, t3, t4;
5152 /* Give up for -1. */
5153 if (d == -1)
5154 return NULL;
5156 /* Since d might be INT_MIN, we have to cast to
5157 unsigned HOST_WIDE_INT before negating to avoid
5158 undefined signed overflow. */
5159 abs_d = (d >= 0
5160 ? (unsigned HOST_WIDE_INT) d
5161 : - (unsigned HOST_WIDE_INT) d);
5163 /* n rem d = n rem -d */
5164 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
5166 d = abs_d;
5167 oprnd1 = build_int_cst (itype, abs_d);
5169 if (HOST_BITS_PER_WIDE_INT >= prec
5170 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
5171 /* This case is not handled correctly below. */
5172 return NULL;
5174 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift);
5175 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
5177 add = true;
5178 ml |= HOST_WIDE_INT_M1U << (prec - 1);
5180 if (post_shift >= prec)
5181 return NULL;
5183 /* t1 = oprnd0 h* ml; */
5184 t1 = vect_recog_temp_ssa_var (itype, NULL);
5185 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
5186 build_int_cst (itype, ml));
5188 if (add)
5190 /* t2 = t1 + oprnd0; */
5191 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5192 t2 = vect_recog_temp_ssa_var (itype, NULL);
5193 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
5195 else
5196 t2 = t1;
5198 if (post_shift)
5200 /* t3 = t2 >> post_shift; */
5201 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5202 t3 = vect_recog_temp_ssa_var (itype, NULL);
5203 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
5204 build_int_cst (itype, post_shift));
5206 else
5207 t3 = t2;
5209 int msb = 1;
5210 int_range_max r;
5211 get_range_query (cfun)->range_of_expr (r, oprnd0);
5212 if (!r.varying_p () && !r.undefined_p ())
5214 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
5215 msb = 0;
5216 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
5217 msb = -1;
5220 if (msb == 0 && d >= 0)
5222 /* q = t3; */
5223 q = t3;
5224 pattern_stmt = def_stmt;
5226 else
5228 /* t4 = oprnd0 >> (prec - 1);
5229 or if we know from VRP that oprnd0 >= 0
5230 t4 = 0;
5231 or if we know from VRP that oprnd0 < 0
5232 t4 = -1; */
5233 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5234 t4 = vect_recog_temp_ssa_var (itype, NULL);
5235 if (msb != 1)
5236 def_stmt = gimple_build_assign (t4, INTEGER_CST,
5237 build_int_cst (itype, msb));
5238 else
5239 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
5240 build_int_cst (itype, prec - 1));
5241 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5243 /* q = t3 - t4; or q = t4 - t3; */
5244 q = vect_recog_temp_ssa_var (itype, NULL);
5245 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
5246 d < 0 ? t3 : t4);
5250 if (rhs_code == TRUNC_MOD_EXPR)
5252 tree r, t1;
5254 /* We divided. Now finish by:
5255 t1 = q * oprnd1;
5256 r = oprnd0 - t1; */
5257 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5259 t1 = vect_recog_temp_ssa_var (itype, NULL);
5260 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
5261 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5263 r = vect_recog_temp_ssa_var (itype, NULL);
5264 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5267 /* Pattern detected. */
5268 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5270 *type_out = vectype;
5271 return pattern_stmt;
5274 /* Detects pattern with a modulo operation (S1) where both arguments
5275 are variables of integral type.
5276 The statement is replaced by division, multiplication, and subtraction.
5277 The last statement (S4) is returned.
5279 Example:
5280 S1 c_t = a_t % b_t;
5282 is replaced by
5283 S2 x_t = a_t / b_t;
5284 S3 y_t = x_t * b_t;
5285 S4 z_t = a_t - y_t; */
5287 static gimple *
5288 vect_recog_mod_var_pattern (vec_info *vinfo,
5289 stmt_vec_info stmt_vinfo, tree *type_out)
5291 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5292 tree oprnd0, oprnd1, vectype, itype;
5293 gimple *pattern_stmt, *def_stmt;
5294 enum tree_code rhs_code;
5296 if (!is_gimple_assign (last_stmt))
5297 return NULL;
5299 rhs_code = gimple_assign_rhs_code (last_stmt);
5300 if (rhs_code != TRUNC_MOD_EXPR)
5301 return NULL;
5303 oprnd0 = gimple_assign_rhs1 (last_stmt);
5304 oprnd1 = gimple_assign_rhs2 (last_stmt);
5305 itype = TREE_TYPE (oprnd0);
5306 if (TREE_CODE (oprnd0) != SSA_NAME
5307 || TREE_CODE (oprnd1) != SSA_NAME
5308 || TREE_CODE (itype) != INTEGER_TYPE)
5309 return NULL;
5311 vectype = get_vectype_for_scalar_type (vinfo, itype);
5313 if (!vectype
5314 || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
5315 || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
5316 || !target_has_vecop_for_code (MULT_EXPR, vectype)
5317 || !target_has_vecop_for_code (MINUS_EXPR, vectype))
5318 return NULL;
5320 tree q, tmp, r;
5321 q = vect_recog_temp_ssa_var (itype, NULL);
5322 def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
5323 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
5325 tmp = vect_recog_temp_ssa_var (itype, NULL);
5326 def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
5327 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
5329 r = vect_recog_temp_ssa_var (itype, NULL);
5330 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
5332 /* Pattern detected. */
5333 *type_out = vectype;
5334 vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
5336 return pattern_stmt;
5339 /* Function vect_recog_mixed_size_cond_pattern
5341 Try to find the following pattern:
5343 type x_t, y_t;
5344 TYPE a_T, b_T, c_T;
5345 loop:
5346 S1 a_T = x_t CMP y_t ? b_T : c_T;
5348 where type 'TYPE' is an integral type which has different size
5349 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5350 than 'type', the constants need to fit into an integer type
5351 with the same width as 'type') or results of conversion from 'type'.
5353 Input:
5355 * STMT_VINFO: The stmt from which the pattern search begins.
5357 Output:
5359 * TYPE_OUT: The type of the output of this pattern.
5361 * Return value: A new stmt that will be used to replace the pattern.
5362 Additionally a def_stmt is added.
5364 a_it = x_t CMP y_t ? b_it : c_it;
5365 a_T = (TYPE) a_it; */
5367 static gimple *
5368 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
5369 stmt_vec_info stmt_vinfo, tree *type_out)
5371 gimple *last_stmt = stmt_vinfo->stmt;
5372 tree cond_expr, then_clause, else_clause;
5373 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
5374 gimple *pattern_stmt, *def_stmt;
5375 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
5376 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
5377 bool promotion;
5378 tree comp_scalar_type;
5380 if (!is_gimple_assign (last_stmt)
5381 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
5382 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
5383 return NULL;
5385 cond_expr = gimple_assign_rhs1 (last_stmt);
5386 then_clause = gimple_assign_rhs2 (last_stmt);
5387 else_clause = gimple_assign_rhs3 (last_stmt);
5389 if (!COMPARISON_CLASS_P (cond_expr))
5390 return NULL;
5392 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
5393 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
5394 if (comp_vectype == NULL_TREE)
5395 return NULL;
5397 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
5398 if (types_compatible_p (type, comp_scalar_type)
5399 || ((TREE_CODE (then_clause) != INTEGER_CST
5400 || TREE_CODE (else_clause) != INTEGER_CST)
5401 && !INTEGRAL_TYPE_P (comp_scalar_type))
5402 || !INTEGRAL_TYPE_P (type))
5403 return NULL;
5405 if ((TREE_CODE (then_clause) != INTEGER_CST
5406 && !type_conversion_p (vinfo, then_clause, false,
5407 &orig_type0, &def_stmt0, &promotion))
5408 || (TREE_CODE (else_clause) != INTEGER_CST
5409 && !type_conversion_p (vinfo, else_clause, false,
5410 &orig_type1, &def_stmt1, &promotion)))
5411 return NULL;
5413 if (orig_type0 && orig_type1
5414 && !types_compatible_p (orig_type0, orig_type1))
5415 return NULL;
5417 if (orig_type0)
5419 if (!types_compatible_p (orig_type0, comp_scalar_type))
5420 return NULL;
5421 then_clause = gimple_assign_rhs1 (def_stmt0);
5422 itype = orig_type0;
5425 if (orig_type1)
5427 if (!types_compatible_p (orig_type1, comp_scalar_type))
5428 return NULL;
5429 else_clause = gimple_assign_rhs1 (def_stmt1);
5430 itype = orig_type1;
5434 HOST_WIDE_INT cmp_mode_size
5435 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
5437 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
5438 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
5439 return NULL;
5441 vectype = get_vectype_for_scalar_type (vinfo, type);
5442 if (vectype == NULL_TREE)
5443 return NULL;
5445 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
5446 return NULL;
5448 if (itype == NULL_TREE)
5449 itype = build_nonstandard_integer_type (cmp_mode_size,
5450 TYPE_UNSIGNED (type));
5452 if (itype == NULL_TREE
5453 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
5454 return NULL;
5456 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5457 if (vecitype == NULL_TREE)
5458 return NULL;
5460 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
5461 return NULL;
5463 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
5465 if ((TREE_CODE (then_clause) == INTEGER_CST
5466 && !int_fits_type_p (then_clause, itype))
5467 || (TREE_CODE (else_clause) == INTEGER_CST
5468 && !int_fits_type_p (else_clause, itype)))
5469 return NULL;
5472 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5473 COND_EXPR, unshare_expr (cond_expr),
5474 fold_convert (itype, then_clause),
5475 fold_convert (itype, else_clause));
5476 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5477 NOP_EXPR, gimple_assign_lhs (def_stmt));
5479 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
5480 *type_out = vectype;
5482 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
5484 return pattern_stmt;
5488 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5489 true if bool VAR can and should be optimized that way. Assume it shouldn't
5490 in case it's a result of a comparison which can be directly vectorized into
5491 a vector comparison. Fills in STMTS with all stmts visited during the
5492 walk. */
5494 static bool
5495 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
5497 tree rhs1;
5498 enum tree_code rhs_code;
5500 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5501 if (!def_stmt_info)
5502 return false;
5504 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
5505 if (!def_stmt)
5506 return false;
5508 if (stmts.contains (def_stmt))
5509 return true;
5511 rhs1 = gimple_assign_rhs1 (def_stmt);
5512 rhs_code = gimple_assign_rhs_code (def_stmt);
5513 switch (rhs_code)
5515 case SSA_NAME:
5516 if (! check_bool_pattern (rhs1, vinfo, stmts))
5517 return false;
5518 break;
5520 CASE_CONVERT:
5521 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
5522 return false;
5523 if (! check_bool_pattern (rhs1, vinfo, stmts))
5524 return false;
5525 break;
5527 case BIT_NOT_EXPR:
5528 if (! check_bool_pattern (rhs1, vinfo, stmts))
5529 return false;
5530 break;
5532 case BIT_AND_EXPR:
5533 case BIT_IOR_EXPR:
5534 case BIT_XOR_EXPR:
5535 if (! check_bool_pattern (rhs1, vinfo, stmts)
5536 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
5537 return false;
5538 break;
5540 default:
5541 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5543 tree vecitype, comp_vectype;
5545 /* If the comparison can throw, then is_gimple_condexpr will be
5546 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5547 if (stmt_could_throw_p (cfun, def_stmt))
5548 return false;
5550 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
5551 if (comp_vectype == NULL_TREE)
5552 return false;
5554 tree mask_type = get_mask_type_for_scalar_type (vinfo,
5555 TREE_TYPE (rhs1));
5556 if (mask_type
5557 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5558 return false;
5560 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
5562 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5563 tree itype
5564 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5565 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5566 if (vecitype == NULL_TREE)
5567 return false;
5569 else
5570 vecitype = comp_vectype;
5571 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
5572 return false;
5574 else
5575 return false;
5576 break;
5579 bool res = stmts.add (def_stmt);
5580 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5581 gcc_assert (!res);
5583 return true;
5587 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5588 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5589 pattern sequence. */
5591 static tree
5592 adjust_bool_pattern_cast (vec_info *vinfo,
5593 tree type, tree var, stmt_vec_info stmt_info)
5595 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5596 NOP_EXPR, var);
5597 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
5598 get_vectype_for_scalar_type (vinfo, type));
5599 return gimple_assign_lhs (cast_stmt);
5602 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5603 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5604 type, OUT_TYPE is the desired final integer type of the whole pattern.
5605 STMT_INFO is the info of the pattern root and is where pattern stmts should
5606 be associated with. DEFS is a map of pattern defs. */
5608 static void
5609 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
5610 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
5612 gimple *stmt = SSA_NAME_DEF_STMT (var);
5613 enum tree_code rhs_code, def_rhs_code;
5614 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
5615 location_t loc;
5616 gimple *pattern_stmt, *def_stmt;
5617 tree trueval = NULL_TREE;
5619 rhs1 = gimple_assign_rhs1 (stmt);
5620 rhs2 = gimple_assign_rhs2 (stmt);
5621 rhs_code = gimple_assign_rhs_code (stmt);
5622 loc = gimple_location (stmt);
5623 switch (rhs_code)
5625 case SSA_NAME:
5626 CASE_CONVERT:
5627 irhs1 = *defs.get (rhs1);
5628 itype = TREE_TYPE (irhs1);
5629 pattern_stmt
5630 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5631 SSA_NAME, irhs1);
5632 break;
5634 case BIT_NOT_EXPR:
5635 irhs1 = *defs.get (rhs1);
5636 itype = TREE_TYPE (irhs1);
5637 pattern_stmt
5638 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5639 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
5640 break;
5642 case BIT_AND_EXPR:
5643 /* Try to optimize x = y & (a < b ? 1 : 0); into
5644 x = (a < b ? y : 0);
5646 E.g. for:
5647 bool a_b, b_b, c_b;
5648 TYPE d_T;
5650 S1 a_b = x1 CMP1 y1;
5651 S2 b_b = x2 CMP2 y2;
5652 S3 c_b = a_b & b_b;
5653 S4 d_T = (TYPE) c_b;
5655 we would normally emit:
5657 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5658 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5659 S3' c_T = a_T & b_T;
5660 S4' d_T = c_T;
5662 but we can save one stmt by using the
5663 result of one of the COND_EXPRs in the other COND_EXPR and leave
5664 BIT_AND_EXPR stmt out:
5666 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5667 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5668 S4' f_T = c_T;
5670 At least when VEC_COND_EXPR is implemented using masks
5671 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5672 computes the comparison masks and ands it, in one case with
5673 all ones vector, in the other case with a vector register.
5674 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5675 often more expensive. */
5676 def_stmt = SSA_NAME_DEF_STMT (rhs2);
5677 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5678 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5680 irhs1 = *defs.get (rhs1);
5681 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5682 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5683 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5685 rhs_code = def_rhs_code;
5686 rhs1 = def_rhs1;
5687 rhs2 = gimple_assign_rhs2 (def_stmt);
5688 trueval = irhs1;
5689 goto do_compare;
5691 else
5692 irhs2 = *defs.get (rhs2);
5693 goto and_ior_xor;
5695 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5696 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5697 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5699 irhs2 = *defs.get (rhs2);
5700 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5701 if (TYPE_PRECISION (TREE_TYPE (irhs2))
5702 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5704 rhs_code = def_rhs_code;
5705 rhs1 = def_rhs1;
5706 rhs2 = gimple_assign_rhs2 (def_stmt);
5707 trueval = irhs2;
5708 goto do_compare;
5710 else
5711 irhs1 = *defs.get (rhs1);
5712 goto and_ior_xor;
5714 /* FALLTHRU */
5715 case BIT_IOR_EXPR:
5716 case BIT_XOR_EXPR:
5717 irhs1 = *defs.get (rhs1);
5718 irhs2 = *defs.get (rhs2);
5719 and_ior_xor:
5720 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5721 != TYPE_PRECISION (TREE_TYPE (irhs2)))
5723 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5724 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5725 int out_prec = TYPE_PRECISION (out_type);
5726 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
5727 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
5728 stmt_info);
5729 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
5730 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
5731 stmt_info);
5732 else
5734 irhs1 = adjust_bool_pattern_cast (vinfo,
5735 out_type, irhs1, stmt_info);
5736 irhs2 = adjust_bool_pattern_cast (vinfo,
5737 out_type, irhs2, stmt_info);
5740 itype = TREE_TYPE (irhs1);
5741 pattern_stmt
5742 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5743 rhs_code, irhs1, irhs2);
5744 break;
5746 default:
5747 do_compare:
5748 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5749 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5750 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5751 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5752 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5754 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5755 itype
5756 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5758 else
5759 itype = TREE_TYPE (rhs1);
5760 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
5761 if (trueval == NULL_TREE)
5762 trueval = build_int_cst (itype, 1);
5763 else
5764 gcc_checking_assert (useless_type_conversion_p (itype,
5765 TREE_TYPE (trueval)));
5766 pattern_stmt
5767 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5768 COND_EXPR, cond_expr, trueval,
5769 build_int_cst (itype, 0));
5770 break;
5773 gimple_set_location (pattern_stmt, loc);
5774 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
5775 get_vectype_for_scalar_type (vinfo, itype));
5776 defs.put (var, gimple_assign_lhs (pattern_stmt));
5779 /* Comparison function to qsort a vector of gimple stmts after UID. */
5781 static int
5782 sort_after_uid (const void *p1, const void *p2)
5784 const gimple *stmt1 = *(const gimple * const *)p1;
5785 const gimple *stmt2 = *(const gimple * const *)p2;
5786 return gimple_uid (stmt1) - gimple_uid (stmt2);
5789 /* Create pattern stmts for all stmts participating in the bool pattern
5790 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5791 OUT_TYPE. Return the def of the pattern root. */
5793 static tree
5794 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5795 tree out_type, stmt_vec_info stmt_info)
5797 /* Gather original stmts in the bool pattern in their order of appearance
5798 in the IL. */
5799 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5800 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5801 i != bool_stmt_set.end (); ++i)
5802 bool_stmts.quick_push (*i);
5803 bool_stmts.qsort (sort_after_uid);
5805 /* Now process them in that order, producing pattern stmts. */
5806 hash_map <tree, tree> defs;
5807 for (unsigned i = 0; i < bool_stmts.length (); ++i)
5808 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
5809 out_type, stmt_info, defs);
5811 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5812 gimple *pattern_stmt
5813 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5814 return gimple_assign_lhs (pattern_stmt);
5817 /* Return the proper type for converting bool VAR into
5818 an integer value or NULL_TREE if no such type exists.
5819 The type is chosen so that the converted value has the
5820 same number of elements as VAR's vector type. */
5822 static tree
5823 integer_type_for_mask (tree var, vec_info *vinfo)
5825 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5826 return NULL_TREE;
5828 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5829 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
5830 return NULL_TREE;
5832 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5835 /* Function vect_recog_gcond_pattern
5837 Try to find pattern like following:
5839 if (a op b)
5841 where operator 'op' is not != and convert it to an adjusted boolean pattern
5843 mask = a op b
5844 if (mask != 0)
5846 and set the mask type on MASK.
5848 Input:
5850 * STMT_VINFO: The stmt at the end from which the pattern
5851 search begins, i.e. cast of a bool to
5852 an integer type.
5854 Output:
5856 * TYPE_OUT: The type of the output of this pattern.
5858 * Return value: A new stmt that will be used to replace the pattern. */
5860 static gimple *
5861 vect_recog_gcond_pattern (vec_info *vinfo,
5862 stmt_vec_info stmt_vinfo, tree *type_out)
5864 /* Currently we only support this for loop vectorization and when multiple
5865 exits. */
5866 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5867 if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
5868 return NULL;
5870 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5871 gcond* cond = NULL;
5872 if (!(cond = dyn_cast <gcond *> (last_stmt)))
5873 return NULL;
5875 auto lhs = gimple_cond_lhs (cond);
5876 auto rhs = gimple_cond_rhs (cond);
5877 auto code = gimple_cond_code (cond);
5879 tree scalar_type = TREE_TYPE (lhs);
5880 if (VECTOR_TYPE_P (scalar_type))
5881 return NULL;
5883 if (code == NE_EXPR
5884 && zerop (rhs)
5885 && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
5886 return NULL;
5888 tree vecitype = get_vectype_for_scalar_type (vinfo, scalar_type);
5889 if (vecitype == NULL_TREE)
5890 return NULL;
5892 tree vectype = truth_type_for (vecitype);
5894 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5895 gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
5896 append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
5898 gimple *pattern_stmt
5899 = gimple_build_cond (NE_EXPR, new_lhs,
5900 build_int_cst (TREE_TYPE (new_lhs), 0),
5901 NULL_TREE, NULL_TREE);
5902 *type_out = vectype;
5903 vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
5904 return pattern_stmt;
5907 /* Function vect_recog_bool_pattern
5909 Try to find pattern like following:
5911 bool a_b, b_b, c_b, d_b, e_b;
5912 TYPE f_T;
5913 loop:
5914 S1 a_b = x1 CMP1 y1;
5915 S2 b_b = x2 CMP2 y2;
5916 S3 c_b = a_b & b_b;
5917 S4 d_b = x3 CMP3 y3;
5918 S5 e_b = c_b | d_b;
5919 S6 f_T = (TYPE) e_b;
5921 where type 'TYPE' is an integral type. Or a similar pattern
5922 ending in
5924 S6 f_Y = e_b ? r_Y : s_Y;
5926 as results from if-conversion of a complex condition.
5928 Input:
5930 * STMT_VINFO: The stmt at the end from which the pattern
5931 search begins, i.e. cast of a bool to
5932 an integer type.
5934 Output:
5936 * TYPE_OUT: The type of the output of this pattern.
5938 * Return value: A new stmt that will be used to replace the pattern.
5940 Assuming size of TYPE is the same as size of all comparisons
5941 (otherwise some casts would be added where needed), the above
5942 sequence we create related pattern stmts:
5943 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5944 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5945 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5946 S5' e_T = c_T | d_T;
5947 S6' f_T = e_T;
5949 Instead of the above S3' we could emit:
5950 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5951 S3' c_T = a_T | b_T;
5952 but the above is more efficient. */
5954 static gimple *
5955 vect_recog_bool_pattern (vec_info *vinfo,
5956 stmt_vec_info stmt_vinfo, tree *type_out)
5958 gimple *last_stmt = stmt_vinfo->stmt;
5959 enum tree_code rhs_code;
5960 tree var, lhs, rhs, vectype;
5961 gimple *pattern_stmt;
5963 if (!is_gimple_assign (last_stmt))
5964 return NULL;
5966 var = gimple_assign_rhs1 (last_stmt);
5967 lhs = gimple_assign_lhs (last_stmt);
5968 rhs_code = gimple_assign_rhs_code (last_stmt);
5970 if (rhs_code == VIEW_CONVERT_EXPR)
5971 var = TREE_OPERAND (var, 0);
5973 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5974 return NULL;
5976 hash_set<gimple *> bool_stmts;
5978 if (CONVERT_EXPR_CODE_P (rhs_code)
5979 || rhs_code == VIEW_CONVERT_EXPR)
5981 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
5982 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5983 return NULL;
5984 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5986 if (check_bool_pattern (var, vinfo, bool_stmts))
5988 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5989 TREE_TYPE (lhs), stmt_vinfo);
5990 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5991 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5992 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5993 else
5994 pattern_stmt
5995 = gimple_build_assign (lhs, NOP_EXPR, rhs);
5997 else
5999 tree type = integer_type_for_mask (var, vinfo);
6000 tree cst0, cst1, tmp;
6002 if (!type)
6003 return NULL;
6005 /* We may directly use cond with narrowed type to avoid
6006 multiple cond exprs with following result packing and
6007 perform single cond with packed mask instead. In case
6008 of widening we better make cond first and then extract
6009 results. */
6010 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
6011 type = TREE_TYPE (lhs);
6013 cst0 = build_int_cst (type, 0);
6014 cst1 = build_int_cst (type, 1);
6015 tmp = vect_recog_temp_ssa_var (type, NULL);
6016 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
6018 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
6020 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
6021 append_pattern_def_seq (vinfo, stmt_vinfo,
6022 pattern_stmt, new_vectype);
6024 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6025 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
6029 *type_out = vectype;
6030 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6032 return pattern_stmt;
6034 else if (rhs_code == COND_EXPR
6035 && TREE_CODE (var) == SSA_NAME)
6037 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6038 if (vectype == NULL_TREE)
6039 return NULL;
6041 /* Build a scalar type for the boolean result that when
6042 vectorized matches the vector type of the result in
6043 size and number of elements. */
6044 unsigned prec
6045 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
6046 TYPE_VECTOR_SUBPARTS (vectype));
6048 tree type
6049 = build_nonstandard_integer_type (prec,
6050 TYPE_UNSIGNED (TREE_TYPE (var)));
6051 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
6052 return NULL;
6054 if (check_bool_pattern (var, vinfo, bool_stmts))
6055 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
6056 else if (integer_type_for_mask (var, vinfo))
6057 return NULL;
6059 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6060 pattern_stmt
6061 = gimple_build_assign (lhs, COND_EXPR,
6062 build2 (NE_EXPR, boolean_type_node,
6063 var, build_int_cst (TREE_TYPE (var), 0)),
6064 gimple_assign_rhs2 (last_stmt),
6065 gimple_assign_rhs3 (last_stmt));
6066 *type_out = vectype;
6067 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6069 return pattern_stmt;
6071 else if (rhs_code == SSA_NAME
6072 && STMT_VINFO_DATA_REF (stmt_vinfo))
6074 stmt_vec_info pattern_stmt_info;
6075 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6076 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
6077 return NULL;
6079 if (check_bool_pattern (var, vinfo, bool_stmts))
6080 rhs = adjust_bool_stmts (vinfo, bool_stmts,
6081 TREE_TYPE (vectype), stmt_vinfo);
6082 else
6084 tree type = integer_type_for_mask (var, vinfo);
6085 tree cst0, cst1, new_vectype;
6087 if (!type)
6088 return NULL;
6090 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
6091 type = TREE_TYPE (vectype);
6093 cst0 = build_int_cst (type, 0);
6094 cst1 = build_int_cst (type, 1);
6095 new_vectype = get_vectype_for_scalar_type (vinfo, type);
6097 rhs = vect_recog_temp_ssa_var (type, NULL);
6098 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
6099 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
6102 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
6103 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
6105 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6106 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
6107 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
6108 rhs = rhs2;
6110 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
6111 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
6112 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6113 *type_out = vectype;
6114 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6116 return pattern_stmt;
6118 else
6119 return NULL;
6123 /* A helper for vect_recog_mask_conversion_pattern. Build
6124 conversion of MASK to a type suitable for masking VECTYPE.
6125 Built statement gets required vectype and is appended to
6126 a pattern sequence of STMT_VINFO.
6128 Return converted mask. */
6130 static tree
6131 build_mask_conversion (vec_info *vinfo,
6132 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
6134 gimple *stmt;
6135 tree masktype, tmp;
6137 masktype = truth_type_for (vectype);
6138 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
6139 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
6140 append_pattern_def_seq (vinfo, stmt_vinfo,
6141 stmt, masktype, TREE_TYPE (vectype));
6143 return tmp;
6147 /* Function vect_recog_mask_conversion_pattern
6149 Try to find statements which require boolean type
6150 converison. Additional conversion statements are
6151 added to handle such cases. For example:
6153 bool m_1, m_2, m_3;
6154 int i_4, i_5;
6155 double d_6, d_7;
6156 char c_1, c_2, c_3;
6158 S1 m_1 = i_4 > i_5;
6159 S2 m_2 = d_6 < d_7;
6160 S3 m_3 = m_1 & m_2;
6161 S4 c_1 = m_3 ? c_2 : c_3;
6163 Will be transformed into:
6165 S1 m_1 = i_4 > i_5;
6166 S2 m_2 = d_6 < d_7;
6167 S3'' m_2' = (_Bool[bitsize=32])m_2
6168 S3' m_3' = m_1 & m_2';
6169 S4'' m_3'' = (_Bool[bitsize=8])m_3'
6170 S4' c_1' = m_3'' ? c_2 : c_3; */
6172 static gimple *
6173 vect_recog_mask_conversion_pattern (vec_info *vinfo,
6174 stmt_vec_info stmt_vinfo, tree *type_out)
6176 gimple *last_stmt = stmt_vinfo->stmt;
6177 enum tree_code rhs_code;
6178 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
6179 tree vectype1, vectype2;
6180 stmt_vec_info pattern_stmt_info;
6181 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
6182 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
6184 /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
6185 conversion. */
6186 if (is_gimple_call (last_stmt)
6187 && gimple_call_internal_p (last_stmt))
6189 gcall *pattern_stmt;
6191 internal_fn ifn = gimple_call_internal_fn (last_stmt);
6192 int mask_argno = internal_fn_mask_index (ifn);
6193 if (mask_argno < 0)
6194 return NULL;
6196 bool store_p = internal_store_fn_p (ifn);
6197 bool load_p = internal_store_fn_p (ifn);
6198 if (store_p)
6200 int rhs_index = internal_fn_stored_value_index (ifn);
6201 tree rhs = gimple_call_arg (last_stmt, rhs_index);
6202 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
6204 else
6206 lhs = gimple_call_lhs (last_stmt);
6207 if (!lhs)
6208 return NULL;
6209 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6212 if (!vectype1)
6213 return NULL;
6215 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
6216 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
6217 if (mask_arg_type)
6219 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
6221 if (!vectype2
6222 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6223 TYPE_VECTOR_SUBPARTS (vectype2)))
6224 return NULL;
6226 else if (store_p || load_p)
6227 return NULL;
6229 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
6231 auto_vec<tree, 8> args;
6232 unsigned int nargs = gimple_call_num_args (last_stmt);
6233 args.safe_grow (nargs, true);
6234 for (unsigned int i = 0; i < nargs; ++i)
6235 args[i] = ((int) i == mask_argno
6236 ? tmp
6237 : gimple_call_arg (last_stmt, i));
6238 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
6240 if (!store_p)
6242 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6243 gimple_call_set_lhs (pattern_stmt, lhs);
6246 if (load_p || store_p)
6247 gimple_call_set_nothrow (pattern_stmt, true);
6249 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
6250 if (STMT_VINFO_DATA_REF (stmt_vinfo))
6251 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6253 *type_out = vectype1;
6254 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6256 return pattern_stmt;
6259 if (!is_gimple_assign (last_stmt))
6260 return NULL;
6262 gimple *pattern_stmt;
6263 lhs = gimple_assign_lhs (last_stmt);
6264 rhs1 = gimple_assign_rhs1 (last_stmt);
6265 rhs_code = gimple_assign_rhs_code (last_stmt);
6267 /* Check for cond expression requiring mask conversion. */
6268 if (rhs_code == COND_EXPR)
6270 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6272 if (TREE_CODE (rhs1) == SSA_NAME)
6274 rhs1_type = integer_type_for_mask (rhs1, vinfo);
6275 if (!rhs1_type)
6276 return NULL;
6278 else if (COMPARISON_CLASS_P (rhs1))
6280 /* Check whether we're comparing scalar booleans and (if so)
6281 whether a better mask type exists than the mask associated
6282 with boolean-sized elements. This avoids unnecessary packs
6283 and unpacks if the booleans are set from comparisons of
6284 wider types. E.g. in:
6286 int x1, x2, x3, x4, y1, y1;
6288 bool b1 = (x1 == x2);
6289 bool b2 = (x3 == x4);
6290 ... = b1 == b2 ? y1 : y2;
6292 it is better for b1 and b2 to use the mask type associated
6293 with int elements rather bool (byte) elements. */
6294 rhs1_op0 = TREE_OPERAND (rhs1, 0);
6295 rhs1_op1 = TREE_OPERAND (rhs1, 1);
6296 if (!rhs1_op0 || !rhs1_op1)
6297 return NULL;
6298 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
6299 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
6301 if (!rhs1_op0_type)
6302 rhs1_type = TREE_TYPE (rhs1_op0);
6303 else if (!rhs1_op1_type)
6304 rhs1_type = TREE_TYPE (rhs1_op1);
6305 else if (TYPE_PRECISION (rhs1_op0_type)
6306 != TYPE_PRECISION (rhs1_op1_type))
6308 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
6309 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6310 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
6311 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6312 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
6314 if (abs (tmp0) > abs (tmp1))
6315 rhs1_type = rhs1_op1_type;
6316 else
6317 rhs1_type = rhs1_op0_type;
6319 else
6320 rhs1_type = build_nonstandard_integer_type
6321 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
6323 else
6324 rhs1_type = rhs1_op0_type;
6326 else
6327 return NULL;
6329 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6331 if (!vectype1 || !vectype2)
6332 return NULL;
6334 /* Continue if a conversion is needed. Also continue if we have
6335 a comparison whose vector type would normally be different from
6336 VECTYPE2 when considered in isolation. In that case we'll
6337 replace the comparison with an SSA name (so that we can record
6338 its vector type) and behave as though the comparison was an SSA
6339 name from the outset. */
6340 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6341 TYPE_VECTOR_SUBPARTS (vectype2))
6342 && !rhs1_op0_type
6343 && !rhs1_op1_type)
6344 return NULL;
6346 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
6347 in place, we can handle it in vectorizable_condition. This avoids
6348 unnecessary promotion stmts and increased vectorization factor. */
6349 if (COMPARISON_CLASS_P (rhs1)
6350 && INTEGRAL_TYPE_P (rhs1_type)
6351 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
6352 TYPE_VECTOR_SUBPARTS (vectype2)))
6354 enum vect_def_type dt;
6355 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
6356 && dt == vect_external_def
6357 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
6358 && (dt == vect_external_def
6359 || dt == vect_constant_def))
6361 tree wide_scalar_type = build_nonstandard_integer_type
6362 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
6363 tree vectype3 = get_vectype_for_scalar_type (vinfo,
6364 wide_scalar_type);
6365 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
6366 return NULL;
6370 /* If rhs1 is a comparison we need to move it into a
6371 separate statement. */
6372 if (TREE_CODE (rhs1) != SSA_NAME)
6374 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
6375 if (rhs1_op0_type
6376 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
6377 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
6378 vectype2, stmt_vinfo);
6379 if (rhs1_op1_type
6380 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
6381 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
6382 vectype2, stmt_vinfo);
6383 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
6384 rhs1_op0, rhs1_op1);
6385 rhs1 = tmp;
6386 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
6387 rhs1_type);
6390 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
6391 TYPE_VECTOR_SUBPARTS (vectype2)))
6392 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6393 else
6394 tmp = rhs1;
6396 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6397 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
6398 gimple_assign_rhs2 (last_stmt),
6399 gimple_assign_rhs3 (last_stmt));
6401 *type_out = vectype1;
6402 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6404 return pattern_stmt;
6407 /* Now check for binary boolean operations requiring conversion for
6408 one of operands. */
6409 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6410 return NULL;
6412 if (rhs_code != BIT_IOR_EXPR
6413 && rhs_code != BIT_XOR_EXPR
6414 && rhs_code != BIT_AND_EXPR
6415 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6416 return NULL;
6418 rhs2 = gimple_assign_rhs2 (last_stmt);
6420 rhs1_type = integer_type_for_mask (rhs1, vinfo);
6421 rhs2_type = integer_type_for_mask (rhs2, vinfo);
6423 if (!rhs1_type || !rhs2_type
6424 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6425 return NULL;
6427 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6429 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6430 if (!vectype1)
6431 return NULL;
6432 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
6434 else
6436 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6437 if (!vectype1)
6438 return NULL;
6439 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6442 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6443 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6445 *type_out = vectype1;
6446 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6448 return pattern_stmt;
6451 /* STMT_INFO is a load or store. If the load or store is conditional, return
6452 the boolean condition under which it occurs, otherwise return null. */
6454 static tree
6455 vect_get_load_store_mask (stmt_vec_info stmt_info)
6457 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
6459 gcc_assert (gimple_assign_single_p (def_assign));
6460 return NULL_TREE;
6463 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
6465 internal_fn ifn = gimple_call_internal_fn (def_call);
6466 int mask_index = internal_fn_mask_index (ifn);
6467 return gimple_call_arg (def_call, mask_index);
6470 gcc_unreachable ();
6473 /* Return MASK if MASK is suitable for masking an operation on vectors
6474 of type VECTYPE, otherwise convert it into such a form and return
6475 the result. Associate any conversion statements with STMT_INFO's
6476 pattern. */
6478 static tree
6479 vect_convert_mask_for_vectype (tree mask, tree vectype,
6480 stmt_vec_info stmt_info, vec_info *vinfo)
6482 tree mask_type = integer_type_for_mask (mask, vinfo);
6483 if (mask_type)
6485 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
6486 if (mask_vectype
6487 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
6488 TYPE_VECTOR_SUBPARTS (mask_vectype)))
6489 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
6491 return mask;
6494 /* Return the equivalent of:
6496 fold_convert (TYPE, VALUE)
6498 with the expectation that the operation will be vectorized.
6499 If new statements are needed, add them as pattern statements
6500 to STMT_INFO. */
6502 static tree
6503 vect_add_conversion_to_pattern (vec_info *vinfo,
6504 tree type, tree value, stmt_vec_info stmt_info)
6506 if (useless_type_conversion_p (type, TREE_TYPE (value)))
6507 return value;
6509 tree new_value = vect_recog_temp_ssa_var (type, NULL);
6510 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6511 append_pattern_def_seq (vinfo, stmt_info, conversion,
6512 get_vectype_for_scalar_type (vinfo, type));
6513 return new_value;
6516 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6517 internal function. Return the final statement on success and set
6518 *TYPE_OUT to the vector type being loaded or stored.
6520 This function only handles gathers and scatters that were recognized
6521 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6523 static gimple *
6524 vect_recog_gather_scatter_pattern (vec_info *vinfo,
6525 stmt_vec_info stmt_info, tree *type_out)
6527 /* Currently we only support this for loop vectorization. */
6528 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6529 if (!loop_vinfo)
6530 return NULL;
6532 /* Make sure that we're looking at a gather load or scatter store. */
6533 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6534 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6535 return NULL;
6537 /* Get the boolean that controls whether the load or store happens.
6538 This is null if the operation is unconditional. */
6539 tree mask = vect_get_load_store_mask (stmt_info);
6541 /* Make sure that the target supports an appropriate internal
6542 function for the gather/scatter operation. */
6543 gather_scatter_info gs_info;
6544 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
6545 || gs_info.ifn == IFN_LAST)
6546 return NULL;
6548 /* Convert the mask to the right form. */
6549 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
6550 gs_info.element_type);
6551 if (mask)
6552 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
6553 loop_vinfo);
6554 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6555 || gs_info.ifn == IFN_MASK_GATHER_LOAD
6556 || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6557 || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6558 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6560 /* Get the invariant base and non-invariant offset, converting the
6561 latter to the same width as the vector elements. */
6562 tree base = gs_info.base;
6563 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6564 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
6565 gs_info.offset, stmt_info);
6567 /* Build the new pattern statement. */
6568 tree scale = size_int (gs_info.scale);
6569 gcall *pattern_stmt;
6570 if (DR_IS_READ (dr))
6572 tree zero = build_zero_cst (gs_info.element_type);
6573 if (mask != NULL)
6574 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6575 offset, scale, zero, mask);
6576 else
6577 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
6578 offset, scale, zero);
6579 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
6580 gimple_call_set_lhs (pattern_stmt, load_lhs);
6582 else
6584 tree rhs = vect_get_store_rhs (stmt_info);
6585 if (mask != NULL)
6586 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6587 base, offset, scale, rhs,
6588 mask);
6589 else
6590 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
6591 base, offset, scale, rhs);
6593 gimple_call_set_nothrow (pattern_stmt, true);
6595 /* Copy across relevant vectorization info and associate DR with the
6596 new pattern statement instead of the original statement. */
6597 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6598 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6600 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6601 *type_out = vectype;
6602 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
6604 return pattern_stmt;
6607 /* Helper method of vect_recog_cond_store_pattern, checks to see if COND_ARG
6608 is points to a load statement that reads the same data as that of
6609 STORE_VINFO. */
6611 static bool
6612 vect_cond_store_pattern_same_ref (vec_info *vinfo,
6613 stmt_vec_info store_vinfo, tree cond_arg)
6615 stmt_vec_info load_stmt_vinfo = vinfo->lookup_def (cond_arg);
6616 if (!load_stmt_vinfo
6617 || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
6618 || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
6619 || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
6620 STMT_VINFO_DATA_REF (load_stmt_vinfo)))
6621 return false;
6623 return true;
6626 /* Function vect_recog_cond_store_pattern
6628 Try to find the following pattern:
6630 x = *_3;
6631 c = a CMP b;
6632 y = c ? t_20 : x;
6633 *_3 = y;
6635 where the store of _3 happens on a conditional select on a value loaded
6636 from the same location. In such case we can elide the initial load if
6637 MASK_STORE is supported and instead only conditionally write out the result.
6639 The pattern produces for the above:
6641 c = a CMP b;
6642 .MASK_STORE (_3, c, t_20)
6644 Input:
6646 * STMT_VINFO: The stmt from which the pattern search begins. In the
6647 example, when this function is called with _3 then the search begins.
6649 Output:
6651 * TYPE_OUT: The type of the output of this pattern.
6653 * Return value: A new stmt that will be used to replace the sequence. */
6655 static gimple *
6656 vect_recog_cond_store_pattern (vec_info *vinfo,
6657 stmt_vec_info stmt_vinfo, tree *type_out)
6659 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6660 if (!loop_vinfo)
6661 return NULL;
6663 gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
6665 /* Needs to be a gimple store where we have DR info for. */
6666 if (!STMT_VINFO_DATA_REF (stmt_vinfo)
6667 || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo))
6668 || !gimple_store_p (store_stmt))
6669 return NULL;
6671 tree st_rhs = gimple_assign_rhs1 (store_stmt);
6673 if (TREE_CODE (st_rhs) != SSA_NAME)
6674 return NULL;
6676 auto cond_vinfo = vinfo->lookup_def (st_rhs);
6678 /* If the condition isn't part of the loop then bool recog wouldn't have seen
6679 it and so this transformation may not be valid. */
6680 if (!cond_vinfo)
6681 return NULL;
6683 cond_vinfo = vect_stmt_to_vectorize (cond_vinfo);
6684 gassign *cond_stmt = dyn_cast<gassign *> (STMT_VINFO_STMT (cond_vinfo));
6685 if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
6686 return NULL;
6688 /* Check if the else value matches the original loaded one. */
6689 bool invert = false;
6690 tree cmp_ls = gimple_arg (cond_stmt, 0);
6691 if (TREE_CODE (cmp_ls) != SSA_NAME)
6692 return NULL;
6694 tree cond_arg1 = gimple_arg (cond_stmt, 1);
6695 tree cond_arg2 = gimple_arg (cond_stmt, 2);
6697 if (!vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo, cond_arg2)
6698 && !(invert = vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo,
6699 cond_arg1)))
6700 return NULL;
6702 vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
6704 tree scalar_type = TREE_TYPE (st_rhs);
6705 if (VECTOR_TYPE_P (scalar_type))
6706 return NULL;
6708 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6709 if (vectype == NULL_TREE)
6710 return NULL;
6712 machine_mode mask_mode;
6713 machine_mode vecmode = TYPE_MODE (vectype);
6714 if (!VECTOR_MODE_P (vecmode)
6715 || targetm.vectorize.conditional_operation_is_expensive (IFN_MASK_STORE)
6716 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
6717 || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
6718 return NULL;
6720 tree base = DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo));
6721 if (may_be_nonaddressable_p (base))
6722 return NULL;
6724 /* We need to use the false parameter of the conditional select. */
6725 tree cond_store_arg = invert ? cond_arg2 : cond_arg1;
6726 tree cond_load_arg = invert ? cond_arg1 : cond_arg2;
6727 gimple *load_stmt = SSA_NAME_DEF_STMT (cond_load_arg);
6729 /* This is a rough estimation to check that there aren't any aliasing stores
6730 in between the load and store. It's a bit strict, but for now it's good
6731 enough. */
6732 if (gimple_vuse (load_stmt) != gimple_vuse (store_stmt))
6733 return NULL;
6735 /* If we have to invert the condition, i.e. use the true argument rather than
6736 the false argument, we have to negate the mask. */
6737 if (invert)
6739 tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
6741 /* Invert the mask using ^ 1. */
6742 tree itype = TREE_TYPE (cmp_ls);
6743 gassign *conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
6744 build_int_cst (itype, 1));
6746 tree mask_vec_type = get_mask_type_for_scalar_type (vinfo, itype);
6747 append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
6748 cmp_ls= var;
6751 if (TREE_CODE (base) != MEM_REF)
6752 base = build_fold_addr_expr (base);
6754 tree ptr = build_int_cst (reference_alias_ptr_type (base),
6755 get_object_alignment (base));
6757 /* Convert the mask to the right form. */
6758 tree mask = vect_convert_mask_for_vectype (cmp_ls, vectype, stmt_vinfo,
6759 vinfo);
6761 gcall *call
6762 = gimple_build_call_internal (IFN_MASK_STORE, 4, base, ptr, mask,
6763 cond_store_arg);
6764 gimple_set_location (call, gimple_location (store_stmt));
6766 /* Copy across relevant vectorization info and associate DR with the
6767 new pattern statement instead of the original statement. */
6768 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
6769 loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6771 *type_out = vectype;
6772 return call;
6775 /* Return true if TYPE is a non-boolean integer type. These are the types
6776 that we want to consider for narrowing. */
6778 static bool
6779 vect_narrowable_type_p (tree type)
6781 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6784 /* Return true if the operation given by CODE can be truncated to N bits
6785 when only N bits of the output are needed. This is only true if bit N+1
6786 of the inputs has no effect on the low N bits of the result. */
6788 static bool
6789 vect_truncatable_operation_p (tree_code code)
6791 switch (code)
6793 case NEGATE_EXPR:
6794 case PLUS_EXPR:
6795 case MINUS_EXPR:
6796 case MULT_EXPR:
6797 case BIT_NOT_EXPR:
6798 case BIT_AND_EXPR:
6799 case BIT_IOR_EXPR:
6800 case BIT_XOR_EXPR:
6801 case COND_EXPR:
6802 return true;
6804 default:
6805 return false;
6809 /* Record that STMT_INFO could be changed from operating on TYPE to
6810 operating on a type with the precision and sign given by PRECISION
6811 and SIGN respectively. PRECISION is an arbitrary bit precision;
6812 it might not be a whole number of bytes. */
6814 static void
6815 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6816 unsigned int precision, signop sign)
6818 /* Round the precision up to a whole number of bytes. */
6819 precision = vect_element_precision (precision);
6820 if (precision < TYPE_PRECISION (type)
6821 && (!stmt_info->operation_precision
6822 || stmt_info->operation_precision > precision))
6824 stmt_info->operation_precision = precision;
6825 stmt_info->operation_sign = sign;
6829 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6830 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6831 is an arbitrary bit precision; it might not be a whole number of bytes. */
6833 static void
6834 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6835 unsigned int min_input_precision)
6837 /* This operation in isolation only requires the inputs to have
6838 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6839 that MIN_INPUT_PRECISION is a natural precision for the chain
6840 as a whole. E.g. consider something like:
6842 unsigned short *x, *y;
6843 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6845 The right shift can be done on unsigned chars, and only requires the
6846 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6847 approach would mean turning a natural chain of single-vector unsigned
6848 short operations into one that truncates "*x" and then extends
6849 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6850 operation and one vector for each unsigned char operation.
6851 This would be a significant pessimization.
6853 Instead only propagate the maximum of this precision and the precision
6854 required by the users of the result. This means that we don't pessimize
6855 the case above but continue to optimize things like:
6857 unsigned char *y;
6858 unsigned short *x;
6859 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6861 Here we would truncate two vectors of *x to a single vector of
6862 unsigned chars and use single-vector unsigned char operations for
6863 everything else, rather than doing two unsigned short copies of
6864 "(*x & 0xf0) >> 4" and then truncating the result. */
6865 min_input_precision = MAX (min_input_precision,
6866 stmt_info->min_output_precision);
6868 if (min_input_precision < TYPE_PRECISION (type)
6869 && (!stmt_info->min_input_precision
6870 || stmt_info->min_input_precision > min_input_precision))
6871 stmt_info->min_input_precision = min_input_precision;
6874 /* Subroutine of vect_determine_min_output_precision. Return true if
6875 we can calculate a reduced number of output bits for STMT_INFO,
6876 whose result is LHS. */
6878 static bool
6879 vect_determine_min_output_precision_1 (vec_info *vinfo,
6880 stmt_vec_info stmt_info, tree lhs)
6882 /* Take the maximum precision required by users of the result. */
6883 unsigned int precision = 0;
6884 imm_use_iterator iter;
6885 use_operand_p use;
6886 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6888 gimple *use_stmt = USE_STMT (use);
6889 if (is_gimple_debug (use_stmt))
6890 continue;
6891 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6892 if (!use_stmt_info || !use_stmt_info->min_input_precision)
6893 return false;
6894 /* The input precision recorded for COND_EXPRs applies only to the
6895 "then" and "else" values. */
6896 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
6897 if (assign
6898 && gimple_assign_rhs_code (assign) == COND_EXPR
6899 && use->use != gimple_assign_rhs2_ptr (assign)
6900 && use->use != gimple_assign_rhs3_ptr (assign))
6901 return false;
6902 precision = MAX (precision, use_stmt_info->min_input_precision);
6905 if (dump_enabled_p ())
6906 dump_printf_loc (MSG_NOTE, vect_location,
6907 "only the low %d bits of %T are significant\n",
6908 precision, lhs);
6909 stmt_info->min_output_precision = precision;
6910 return true;
6913 /* Calculate min_output_precision for STMT_INFO. */
6915 static void
6916 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6918 /* We're only interested in statements with a narrowable result. */
6919 tree lhs = gimple_get_lhs (stmt_info->stmt);
6920 if (!lhs
6921 || TREE_CODE (lhs) != SSA_NAME
6922 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6923 return;
6925 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6926 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6929 /* Use range information to decide whether STMT (described by STMT_INFO)
6930 could be done in a narrower type. This is effectively a forward
6931 propagation, since it uses context-independent information that applies
6932 to all users of an SSA name. */
6934 static void
6935 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6937 tree lhs = gimple_assign_lhs (stmt);
6938 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6939 return;
6941 tree type = TREE_TYPE (lhs);
6942 if (!vect_narrowable_type_p (type))
6943 return;
6945 /* First see whether we have any useful range information for the result. */
6946 unsigned int precision = TYPE_PRECISION (type);
6947 signop sign = TYPE_SIGN (type);
6948 wide_int min_value, max_value;
6949 if (!vect_get_range_info (lhs, &min_value, &max_value))
6950 return;
6952 tree_code code = gimple_assign_rhs_code (stmt);
6953 unsigned int nops = gimple_num_ops (stmt);
6955 if (!vect_truncatable_operation_p (code))
6957 /* Handle operations that can be computed in type T if all inputs
6958 and outputs can be represented in type T. Also handle left and
6959 right shifts, where (in addition) the maximum shift amount must
6960 be less than the number of bits in T. */
6961 bool is_shift;
6962 switch (code)
6964 case LSHIFT_EXPR:
6965 case RSHIFT_EXPR:
6966 is_shift = true;
6967 break;
6969 case ABS_EXPR:
6970 case MIN_EXPR:
6971 case MAX_EXPR:
6972 case TRUNC_DIV_EXPR:
6973 case CEIL_DIV_EXPR:
6974 case FLOOR_DIV_EXPR:
6975 case ROUND_DIV_EXPR:
6976 case EXACT_DIV_EXPR:
6977 /* Modulus is excluded because it is typically calculated by doing
6978 a division, for which minimum signed / -1 isn't representable in
6979 the original signed type. We could take the division range into
6980 account instead, if handling modulus ever becomes important. */
6981 is_shift = false;
6982 break;
6984 default:
6985 return;
6987 for (unsigned int i = 1; i < nops; ++i)
6989 tree op = gimple_op (stmt, i);
6990 wide_int op_min_value, op_max_value;
6991 if (TREE_CODE (op) == INTEGER_CST)
6993 unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
6994 op_min_value = op_max_value = wi::to_wide (op, op_precision);
6996 else if (TREE_CODE (op) == SSA_NAME)
6998 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
6999 return;
7001 else
7002 return;
7004 if (is_shift && i == 2)
7006 /* There needs to be one more bit than the maximum shift amount.
7008 If the maximum shift amount is already 1 less than PRECISION
7009 then we can't narrow the shift further. Dealing with that
7010 case first ensures that we can safely use an unsigned range
7011 below.
7013 op_min_value isn't relevant, since shifts by negative amounts
7014 are UB. */
7015 if (wi::geu_p (op_max_value, precision - 1))
7016 return;
7017 unsigned int min_bits = op_max_value.to_uhwi () + 1;
7019 /* As explained below, we can convert a signed shift into an
7020 unsigned shift if the sign bit is always clear. At this
7021 point we've already processed the ranges of the output and
7022 the first input. */
7023 auto op_sign = sign;
7024 if (sign == SIGNED && !wi::neg_p (min_value))
7025 op_sign = UNSIGNED;
7026 op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
7027 precision, op_sign);
7028 op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
7029 precision, op_sign);
7031 min_value = wi::min (min_value, op_min_value, sign);
7032 max_value = wi::max (max_value, op_max_value, sign);
7036 /* Try to switch signed types for unsigned types if we can.
7037 This is better for two reasons. First, unsigned ops tend
7038 to be cheaper than signed ops. Second, it means that we can
7039 handle things like:
7041 signed char c;
7042 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
7046 signed char c;
7047 unsigned short res_1 = (unsigned short) c & 0xff00;
7048 int res = (int) res_1;
7050 where the intermediate result res_1 has unsigned rather than
7051 signed type. */
7052 if (sign == SIGNED && !wi::neg_p (min_value))
7053 sign = UNSIGNED;
7055 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
7056 unsigned int precision1 = wi::min_precision (min_value, sign);
7057 unsigned int precision2 = wi::min_precision (max_value, sign);
7058 unsigned int value_precision = MAX (precision1, precision2);
7059 if (value_precision >= precision)
7060 return;
7062 if (dump_enabled_p ())
7063 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
7064 " without loss of precision: %G",
7065 sign == SIGNED ? "signed" : "unsigned",
7066 value_precision, (gimple *) stmt);
7068 vect_set_operation_type (stmt_info, type, value_precision, sign);
7069 vect_set_min_input_precision (stmt_info, type, value_precision);
7072 /* Use information about the users of STMT's result to decide whether
7073 STMT (described by STMT_INFO) could be done in a narrower type.
7074 This is effectively a backward propagation. */
7076 static void
7077 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
7079 tree_code code = gimple_assign_rhs_code (stmt);
7080 unsigned int opno = (code == COND_EXPR ? 2 : 1);
7081 tree type = TREE_TYPE (gimple_op (stmt, opno));
7082 if (!vect_narrowable_type_p (type))
7083 return;
7085 unsigned int precision = TYPE_PRECISION (type);
7086 unsigned int operation_precision, min_input_precision;
7087 switch (code)
7089 CASE_CONVERT:
7090 /* Only the bits that contribute to the output matter. Don't change
7091 the precision of the operation itself. */
7092 operation_precision = precision;
7093 min_input_precision = stmt_info->min_output_precision;
7094 break;
7096 case LSHIFT_EXPR:
7097 case RSHIFT_EXPR:
7099 tree shift = gimple_assign_rhs2 (stmt);
7100 if (TREE_CODE (shift) != INTEGER_CST
7101 || !wi::ltu_p (wi::to_widest (shift), precision))
7102 return;
7103 unsigned int const_shift = TREE_INT_CST_LOW (shift);
7104 if (code == LSHIFT_EXPR)
7106 /* Avoid creating an undefined shift.
7108 ??? We could instead use min_output_precision as-is and
7109 optimize out-of-range shifts to zero. However, only
7110 degenerate testcases shift away all their useful input data,
7111 and it isn't natural to drop input operations in the middle
7112 of vectorization. This sort of thing should really be
7113 handled before vectorization. */
7114 operation_precision = MAX (stmt_info->min_output_precision,
7115 const_shift + 1);
7116 /* We need CONST_SHIFT fewer bits of the input. */
7117 min_input_precision = (MAX (operation_precision, const_shift)
7118 - const_shift);
7120 else
7122 /* We need CONST_SHIFT extra bits to do the operation. */
7123 operation_precision = (stmt_info->min_output_precision
7124 + const_shift);
7125 min_input_precision = operation_precision;
7127 break;
7130 default:
7131 if (vect_truncatable_operation_p (code))
7133 /* Input bit N has no effect on output bits N-1 and lower. */
7134 operation_precision = stmt_info->min_output_precision;
7135 min_input_precision = operation_precision;
7136 break;
7138 return;
7141 if (operation_precision < precision)
7143 if (dump_enabled_p ())
7144 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
7145 " without affecting users: %G",
7146 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
7147 operation_precision, (gimple *) stmt);
7148 vect_set_operation_type (stmt_info, type, operation_precision,
7149 TYPE_SIGN (type));
7151 vect_set_min_input_precision (stmt_info, type, min_input_precision);
7154 /* Return true if the statement described by STMT_INFO sets a boolean
7155 SSA_NAME and if we know how to vectorize this kind of statement using
7156 vector mask types. */
7158 static bool
7159 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
7161 tree lhs = gimple_get_lhs (stmt_info->stmt);
7162 tree_code code = ERROR_MARK;
7163 gassign *assign = NULL;
7164 gcond *cond = NULL;
7166 if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
7167 code = gimple_assign_rhs_code (assign);
7168 else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
7170 lhs = gimple_cond_lhs (cond);
7171 code = gimple_cond_code (cond);
7174 if (!lhs
7175 || TREE_CODE (lhs) != SSA_NAME
7176 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
7177 return false;
7179 if (code != ERROR_MARK)
7181 switch (code)
7183 CASE_CONVERT:
7184 case SSA_NAME:
7185 case BIT_NOT_EXPR:
7186 case BIT_IOR_EXPR:
7187 case BIT_XOR_EXPR:
7188 case BIT_AND_EXPR:
7189 return true;
7191 default:
7192 return TREE_CODE_CLASS (code) == tcc_comparison;
7195 else if (is_a <gphi *> (stmt_info->stmt))
7196 return true;
7197 return false;
7200 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
7201 a vector mask type instead of a normal vector type. Record the
7202 result in STMT_INFO->mask_precision. */
7204 static void
7205 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
7207 if (!possible_vector_mask_operation_p (stmt_info))
7208 return;
7210 /* If at least one boolean input uses a vector mask type,
7211 pick the mask type with the narrowest elements.
7213 ??? This is the traditional behavior. It should always produce
7214 the smallest number of operations, but isn't necessarily the
7215 optimal choice. For example, if we have:
7217 a = b & c
7219 where:
7221 - the user of a wants it to have a mask type for 16-bit elements (M16)
7222 - b also uses M16
7223 - c uses a mask type for 8-bit elements (M8)
7225 then picking M8 gives:
7227 - 1 M16->M8 pack for b
7228 - 1 M8 AND for a
7229 - 2 M8->M16 unpacks for the user of a
7231 whereas picking M16 would have given:
7233 - 2 M8->M16 unpacks for c
7234 - 2 M16 ANDs for a
7236 The number of operations are equal, but M16 would have given
7237 a shorter dependency chain and allowed more ILP. */
7238 unsigned int precision = ~0U;
7239 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7241 /* If the statement compares two values that shouldn't use vector masks,
7242 try comparing the values as normal scalars instead. */
7243 tree_code code = ERROR_MARK;
7244 tree op0_type;
7245 unsigned int nops = -1;
7246 unsigned int ops_start = 0;
7248 if (gassign *assign = dyn_cast <gassign *> (stmt))
7250 code = gimple_assign_rhs_code (assign);
7251 op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
7252 nops = gimple_num_ops (assign);
7253 ops_start = 1;
7255 else if (gcond *cond = dyn_cast <gcond *> (stmt))
7257 code = gimple_cond_code (cond);
7258 op0_type = TREE_TYPE (gimple_cond_lhs (cond));
7259 nops = 2;
7260 ops_start = 0;
7263 if (code != ERROR_MARK)
7265 for (unsigned int i = ops_start; i < nops; ++i)
7267 tree rhs = gimple_op (stmt, i);
7268 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
7269 continue;
7271 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
7272 if (!def_stmt_info)
7273 /* Don't let external or constant operands influence the choice.
7274 We can convert them to whichever vector type we pick. */
7275 continue;
7277 if (def_stmt_info->mask_precision)
7279 if (precision > def_stmt_info->mask_precision)
7280 precision = def_stmt_info->mask_precision;
7284 if (precision == ~0U
7285 && TREE_CODE_CLASS (code) == tcc_comparison)
7287 scalar_mode mode;
7288 tree vectype, mask_type;
7289 if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
7290 && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
7291 && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
7292 && expand_vec_cmp_expr_p (vectype, mask_type, code))
7293 precision = GET_MODE_BITSIZE (mode);
7296 else
7298 gphi *phi = as_a <gphi *> (stmt_info->stmt);
7299 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
7301 tree rhs = gimple_phi_arg_def (phi, i);
7303 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
7304 if (!def_stmt_info)
7305 /* Don't let external or constant operands influence the choice.
7306 We can convert them to whichever vector type we pick. */
7307 continue;
7309 if (def_stmt_info->mask_precision)
7311 if (precision > def_stmt_info->mask_precision)
7312 precision = def_stmt_info->mask_precision;
7317 if (dump_enabled_p ())
7319 if (precision == ~0U)
7320 dump_printf_loc (MSG_NOTE, vect_location,
7321 "using normal nonmask vectors for %G",
7322 stmt_info->stmt);
7323 else
7324 dump_printf_loc (MSG_NOTE, vect_location,
7325 "using boolean precision %d for %G",
7326 precision, stmt_info->stmt);
7329 stmt_info->mask_precision = precision;
7332 /* Handle vect_determine_precisions for STMT_INFO, given that we
7333 have already done so for the users of its result. */
7335 void
7336 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
7338 vect_determine_min_output_precision (vinfo, stmt_info);
7339 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
7341 vect_determine_precisions_from_range (stmt_info, stmt);
7342 vect_determine_precisions_from_users (stmt_info, stmt);
7346 /* Walk backwards through the vectorizable region to determine the
7347 values of these fields:
7349 - min_output_precision
7350 - min_input_precision
7351 - operation_precision
7352 - operation_sign. */
7354 void
7355 vect_determine_precisions (vec_info *vinfo)
7357 basic_block *bbs = vinfo->bbs;
7358 unsigned int nbbs = vinfo->nbbs;
7360 DUMP_VECT_SCOPE ("vect_determine_precisions");
7362 for (unsigned int i = 0; i < nbbs; i++)
7364 basic_block bb = bbs[i];
7365 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7367 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
7368 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7369 vect_determine_mask_precision (vinfo, stmt_info);
7371 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7373 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
7374 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7375 vect_determine_mask_precision (vinfo, stmt_info);
7378 for (unsigned int i = 0; i < nbbs; i++)
7380 basic_block bb = bbs[nbbs - i - 1];
7381 for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
7383 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
7384 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7385 vect_determine_stmt_precisions (vinfo, stmt_info);
7387 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7389 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
7390 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7391 vect_determine_stmt_precisions (vinfo, stmt_info);
7396 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
7398 struct vect_recog_func
7400 vect_recog_func_ptr fn;
7401 const char *name;
7404 /* Note that ordering matters - the first pattern matching on a stmt is
7405 taken which means usually the more complex one needs to preceed the
7406 less comples onex (widen_sum only after dot_prod or sad for example). */
7407 static vect_recog_func vect_vect_recog_func_ptrs[] = {
7408 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
7409 { vect_recog_bit_insert_pattern, "bit_insert" },
7410 { vect_recog_abd_pattern, "abd" },
7411 { vect_recog_over_widening_pattern, "over_widening" },
7412 /* Must come after over_widening, which narrows the shift as much as
7413 possible beforehand. */
7414 { vect_recog_average_pattern, "average" },
7415 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
7416 { vect_recog_mulhs_pattern, "mult_high" },
7417 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
7418 { vect_recog_widen_mult_pattern, "widen_mult" },
7419 { vect_recog_dot_prod_pattern, "dot_prod" },
7420 { vect_recog_sad_pattern, "sad" },
7421 { vect_recog_widen_sum_pattern, "widen_sum" },
7422 { vect_recog_pow_pattern, "pow" },
7423 { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
7424 { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
7425 { vect_recog_widen_shift_pattern, "widen_shift" },
7426 { vect_recog_rotate_pattern, "rotate" },
7427 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
7428 { vect_recog_divmod_pattern, "divmod" },
7429 { vect_recog_mod_var_pattern, "modvar" },
7430 { vect_recog_mult_pattern, "mult" },
7431 { vect_recog_sat_add_pattern, "sat_add" },
7432 { vect_recog_sat_sub_pattern, "sat_sub" },
7433 { vect_recog_sat_trunc_pattern, "sat_trunc" },
7434 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
7435 { vect_recog_gcond_pattern, "gcond" },
7436 { vect_recog_bool_pattern, "bool" },
7437 /* This must come before mask conversion, and includes the parts
7438 of mask conversion that are needed for gather and scatter
7439 internal functions. */
7440 { vect_recog_gather_scatter_pattern, "gather_scatter" },
7441 { vect_recog_cond_store_pattern, "cond_store" },
7442 { vect_recog_mask_conversion_pattern, "mask_conversion" },
7443 { vect_recog_widen_plus_pattern, "widen_plus" },
7444 { vect_recog_widen_minus_pattern, "widen_minus" },
7445 { vect_recog_widen_abd_pattern, "widen_abd" },
7446 /* These must come after the double widening ones. */
7449 /* Mark statements that are involved in a pattern. */
7451 void
7452 vect_mark_pattern_stmts (vec_info *vinfo,
7453 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
7454 tree pattern_vectype)
7456 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
7457 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7459 gimple *orig_pattern_stmt = NULL;
7460 if (is_pattern_stmt_p (orig_stmt_info))
7462 /* We're replacing a statement in an existing pattern definition
7463 sequence. */
7464 orig_pattern_stmt = orig_stmt_info->stmt;
7465 if (dump_enabled_p ())
7466 dump_printf_loc (MSG_NOTE, vect_location,
7467 "replacing earlier pattern %G", orig_pattern_stmt);
7469 /* To keep the book-keeping simple, just swap the lhs of the
7470 old and new statements, so that the old one has a valid but
7471 unused lhs. */
7472 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
7473 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
7474 gimple_set_lhs (pattern_stmt, old_lhs);
7476 if (dump_enabled_p ())
7477 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
7479 /* Switch to the statement that ORIG replaces. */
7480 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
7482 /* We shouldn't be replacing the main pattern statement. */
7483 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
7484 != orig_pattern_stmt);
7487 if (def_seq)
7488 for (gimple_stmt_iterator si = gsi_start (def_seq);
7489 !gsi_end_p (si); gsi_next (&si))
7491 if (dump_enabled_p ())
7492 dump_printf_loc (MSG_NOTE, vect_location,
7493 "extra pattern stmt: %G", gsi_stmt (si));
7494 stmt_vec_info pattern_stmt_info
7495 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
7496 orig_stmt_info, pattern_vectype);
7497 /* Stmts in the def sequence are not vectorizable cycle or
7498 induction defs, instead they should all be vect_internal_def
7499 feeding the main pattern stmt which retains this def type. */
7500 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
7503 if (orig_pattern_stmt)
7505 vect_init_pattern_stmt (vinfo, pattern_stmt,
7506 orig_stmt_info, pattern_vectype);
7508 /* Insert all the new pattern statements before the original one. */
7509 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7510 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
7511 orig_def_seq);
7512 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
7513 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
7515 /* Remove the pattern statement that this new pattern replaces. */
7516 gsi_remove (&gsi, false);
7518 else
7519 vect_set_pattern_stmt (vinfo,
7520 pattern_stmt, orig_stmt_info, pattern_vectype);
7522 /* For any conditionals mark them as vect_condition_def. */
7523 if (is_a <gcond *> (pattern_stmt))
7524 STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
7526 /* Transfer reduction path info to the pattern. */
7527 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
7529 gimple_match_op op;
7530 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
7531 gcc_unreachable ();
7532 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
7533 /* Search the pattern def sequence and the main pattern stmt. Note
7534 we may have inserted all into a containing pattern def sequence
7535 so the following is a bit awkward. */
7536 gimple_stmt_iterator si;
7537 gimple *s;
7538 if (def_seq)
7540 si = gsi_start (def_seq);
7541 s = gsi_stmt (si);
7542 gsi_next (&si);
7544 else
7546 si = gsi_none ();
7547 s = pattern_stmt;
7551 bool found = false;
7552 if (gimple_extract_op (s, &op))
7553 for (unsigned i = 0; i < op.num_ops; ++i)
7554 if (op.ops[i] == lookfor)
7556 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
7557 lookfor = gimple_get_lhs (s);
7558 found = true;
7559 break;
7561 if (s == pattern_stmt)
7563 if (!found && dump_enabled_p ())
7564 dump_printf_loc (MSG_NOTE, vect_location,
7565 "failed to update reduction index.\n");
7566 break;
7568 if (gsi_end_p (si))
7569 s = pattern_stmt;
7570 else
7572 s = gsi_stmt (si);
7573 if (s == pattern_stmt)
7574 /* Found the end inside a bigger pattern def seq. */
7575 si = gsi_none ();
7576 else
7577 gsi_next (&si);
7579 } while (1);
7583 /* Function vect_pattern_recog_1
7585 Input:
7586 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7587 computation pattern.
7588 STMT_INFO: A stmt from which the pattern search should start.
7590 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7591 a sequence of statements that has the same functionality and can be
7592 used to replace STMT_INFO. It returns the last statement in the sequence
7593 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7594 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7595 statement, having first checked that the target supports the new operation
7596 in that type.
7598 This function also does some bookkeeping, as explained in the documentation
7599 for vect_recog_pattern. */
7601 static void
7602 vect_pattern_recog_1 (vec_info *vinfo,
7603 const vect_recog_func &recog_func, stmt_vec_info stmt_info)
7605 gimple *pattern_stmt;
7606 tree pattern_vectype;
7608 /* If this statement has already been replaced with pattern statements,
7609 leave the original statement alone, since the first match wins.
7610 Instead try to match against the definition statements that feed
7611 the main pattern statement. */
7612 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7614 gimple_stmt_iterator gsi;
7615 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7616 !gsi_end_p (gsi); gsi_next (&gsi))
7617 vect_pattern_recog_1 (vinfo, recog_func,
7618 vinfo->lookup_stmt (gsi_stmt (gsi)));
7619 return;
7622 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7623 pattern_stmt = recog_func.fn (vinfo, stmt_info, &pattern_vectype);
7624 if (!pattern_stmt)
7626 /* Clear any half-formed pattern definition sequence. */
7627 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7628 return;
7631 /* Found a vectorizable pattern. */
7632 if (dump_enabled_p ())
7633 dump_printf_loc (MSG_NOTE, vect_location,
7634 "%s pattern recognized: %G",
7635 recog_func.name, pattern_stmt);
7637 /* Mark the stmts that are involved in the pattern. */
7638 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
7642 /* Function vect_pattern_recog
7644 Input:
7645 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7646 computation idioms.
7648 Output - for each computation idiom that is detected we create a new stmt
7649 that provides the same functionality and that can be vectorized. We
7650 also record some information in the struct_stmt_info of the relevant
7651 stmts, as explained below:
7653 At the entry to this function we have the following stmts, with the
7654 following initial value in the STMT_VINFO fields:
7656 stmt in_pattern_p related_stmt vec_stmt
7657 S1: a_i = .... - - -
7658 S2: a_2 = ..use(a_i).. - - -
7659 S3: a_1 = ..use(a_2).. - - -
7660 S4: a_0 = ..use(a_1).. - - -
7661 S5: ... = ..use(a_0).. - - -
7663 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7664 represented by a single stmt. We then:
7665 - create a new stmt S6 equivalent to the pattern (the stmt is not
7666 inserted into the code)
7667 - fill in the STMT_VINFO fields as follows:
7669 in_pattern_p related_stmt vec_stmt
7670 S1: a_i = .... - - -
7671 S2: a_2 = ..use(a_i).. - - -
7672 S3: a_1 = ..use(a_2).. - - -
7673 S4: a_0 = ..use(a_1).. true S6 -
7674 '---> S6: a_new = .... - S4 -
7675 S5: ... = ..use(a_0).. - - -
7677 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7678 to each other through the RELATED_STMT field).
7680 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7681 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7682 remain irrelevant unless used by stmts other than S4.
7684 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7685 (because they are marked as irrelevant). It will vectorize S6, and record
7686 a pointer to the new vector stmt VS6 from S6 (as usual).
7687 S4 will be skipped, and S5 will be vectorized as usual:
7689 in_pattern_p related_stmt vec_stmt
7690 S1: a_i = .... - - -
7691 S2: a_2 = ..use(a_i).. - - -
7692 S3: a_1 = ..use(a_2).. - - -
7693 > VS6: va_new = .... - - -
7694 S4: a_0 = ..use(a_1).. true S6 VS6
7695 '---> S6: a_new = .... - S4 VS6
7696 > VS5: ... = ..vuse(va_new).. - - -
7697 S5: ... = ..use(a_0).. - - -
7699 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7700 elsewhere), and we'll end up with:
7702 VS6: va_new = ....
7703 VS5: ... = ..vuse(va_new)..
7705 In case of more than one pattern statements, e.g., widen-mult with
7706 intermediate type:
7708 S1 a_t = ;
7709 S2 a_T = (TYPE) a_t;
7710 '--> S3: a_it = (interm_type) a_t;
7711 S4 prod_T = a_T * CONST;
7712 '--> S5: prod_T' = a_it w* CONST;
7714 there may be other users of a_T outside the pattern. In that case S2 will
7715 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7716 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7717 be recorded in S3. */
7719 void
7720 vect_pattern_recog (vec_info *vinfo)
7722 basic_block *bbs = vinfo->bbs;
7723 unsigned int nbbs = vinfo->nbbs;
7725 vect_determine_precisions (vinfo);
7727 DUMP_VECT_SCOPE ("vect_pattern_recog");
7729 /* Scan through the stmts in the region, applying the pattern recognition
7730 functions starting at each stmt visited. */
7731 for (unsigned i = 0; i < nbbs; i++)
7733 basic_block bb = bbs[i];
7735 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7737 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
7739 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7740 continue;
7742 /* Scan over all generic vect_recog_xxx_pattern functions. */
7743 for (const auto &func_ptr : vect_vect_recog_func_ptrs)
7744 vect_pattern_recog_1 (vinfo, func_ptr,
7745 stmt_info);
7749 /* After this no more add_stmt calls are allowed. */
7750 vinfo->stmt_vec_info_ro = true;
7753 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7754 or internal_fn contained in ch, respectively. */
7755 gimple *
7756 vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7758 gcc_assert (op0 != NULL_TREE);
7759 if (ch.is_tree_code ())
7760 return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7762 gcc_assert (ch.is_internal_fn ());
7763 gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
7764 op1 == NULL_TREE ? 1 : 2,
7765 op0, op1);
7766 gimple_call_set_lhs (stmt, lhs);
7767 return stmt;