OpenMP: Update documentation of metadirective implementation status.
[gcc.git] / gcc / tree-vect-generic.cc
blobc2f7a29d539bdf67eac6e12757e55b7bcf4c35ae
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "gimple-fold.h"
44 #include "gimple-match.h"
45 #include "recog.h" /* FIXME: for insn_data */
46 #include "optabs-libfuncs.h"
47 #include "cfgloop.h"
48 #include "tree-vectorizer.h"
51 /* Build a ternary operation and gimplify it. Emit code before GSI.
52 Return the gimple_val holding the result. */
54 static tree
55 gimplify_build3 (gimple_stmt_iterator *gsi, enum tree_code code,
56 tree type, tree a, tree b, tree c)
58 location_t loc = gimple_location (gsi_stmt (*gsi));
59 return gimple_build (gsi, true, GSI_SAME_STMT, loc, code, type, a, b, c);
62 /* Build a binary operation and gimplify it. Emit code before GSI.
63 Return the gimple_val holding the result. */
65 static tree
66 gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code,
67 tree type, tree a, tree b)
69 location_t loc = gimple_location (gsi_stmt (*gsi));
70 return gimple_build (gsi, true, GSI_SAME_STMT, loc, code, type, a, b);
73 /* Build a unary operation and gimplify it. Emit code before GSI.
74 Return the gimple_val holding the result. */
76 static tree
77 gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
78 tree a)
80 location_t loc = gimple_location (gsi_stmt (*gsi));
81 return gimple_build (gsi, true, GSI_SAME_STMT, loc, code, type, a);
85 /* Return the number of elements in a vector type TYPE that we have
86 already decided needs to be expanded piecewise. We don't support
87 this kind of expansion for variable-length vectors, since we should
88 always check for target support before introducing uses of those. */
89 static unsigned int
90 nunits_for_known_piecewise_op (const_tree type)
92 return TYPE_VECTOR_SUBPARTS (type).to_constant ();
95 /* Return true if TYPE1 has more elements than TYPE2, where either
96 type may be a vector or a scalar. */
98 static inline bool
99 subparts_gt (tree type1, tree type2)
101 poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
102 poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
103 return known_gt (n1, n2);
106 static GTY(()) tree vector_inner_type;
107 static GTY(()) tree vector_last_type;
108 static GTY(()) int vector_last_nunits;
110 /* Return a suitable vector types made of SUBPARTS units each of mode
111 "word_mode" (the global variable). */
112 static tree
113 build_word_mode_vector_type (int nunits)
115 if (!vector_inner_type)
116 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
117 else if (vector_last_nunits == nunits)
119 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
120 return vector_last_type;
123 vector_last_nunits = nunits;
124 vector_last_type = build_vector_type (vector_inner_type, nunits);
125 return vector_last_type;
128 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
129 tree, tree, tree, tree, tree, enum tree_code,
130 tree);
132 /* Extract the vector element of type TYPE at BITPOS with BITSIZE from T
133 and return it. */
135 tree
136 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
137 tree t, tree bitsize, tree bitpos)
139 /* We're using the resimplify API and maybe_push_res_to_seq to
140 simplify the BIT_FIELD_REF but restrict the simplification to
141 a single stmt while at the same time following SSA edges for
142 simplification with already emitted CTORs. */
143 gimple_match_op opr;
144 opr.set_op (BIT_FIELD_REF, type, t, bitsize, bitpos);
145 opr.resimplify (NULL, follow_all_ssa_edges);
146 gimple_seq stmts = NULL;
147 tree res = maybe_push_res_to_seq (&opr, &stmts);
148 if (!res)
150 /* This can happen if SSA_NAME_OCCURS_IN_ABNORMAL_PHI are
151 used. Build BIT_FIELD_REF manually otherwise. */
152 t = build3 (BIT_FIELD_REF, type, t, bitsize, bitpos);
153 res = make_ssa_name (type);
154 gimple *g = gimple_build_assign (res, t);
155 gsi_insert_before (gsi, g, GSI_SAME_STMT);
156 return res;
158 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
159 return res;
162 static tree
163 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
164 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
165 enum tree_code code, tree type ATTRIBUTE_UNUSED)
167 tree rhs_type = inner_type;
169 /* For ABSU_EXPR, use the signed type for the rhs if the rhs was signed. */
170 if (code == ABSU_EXPR
171 && ANY_INTEGRAL_TYPE_P (TREE_TYPE (a))
172 && !TYPE_UNSIGNED (TREE_TYPE (a)))
173 rhs_type = signed_type_for (rhs_type);
175 a = tree_vec_extract (gsi, rhs_type, a, bitsize, bitpos);
176 return gimplify_build1 (gsi, code, inner_type, a);
179 static tree
180 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
181 tree bitpos, tree bitsize, enum tree_code code,
182 tree type ATTRIBUTE_UNUSED)
184 if (VECTOR_TYPE_P (TREE_TYPE (a)))
185 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
186 if (VECTOR_TYPE_P (TREE_TYPE (b)))
187 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
188 return gimplify_build2 (gsi, code, inner_type, a, b);
191 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
193 INNER_TYPE is the type of A and B elements
195 returned expression is of signed integer type with the
196 size equal to the size of INNER_TYPE. */
197 static tree
198 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
199 tree bitpos, tree bitsize, enum tree_code code, tree type)
201 tree stype = TREE_TYPE (type);
202 tree cst_false = build_zero_cst (stype);
203 tree cst_true = build_all_ones_cst (stype);
204 tree cmp;
206 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
207 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
209 cmp = build2 (code, boolean_type_node, a, b);
210 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
213 /* Expand vector addition to scalars. This does bit twiddling
214 in order to increase parallelism:
216 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
217 (a ^ b) & 0x80808080
219 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
220 (a ^ ~b) & 0x80808080
222 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
224 This optimization should be done only if 4 vector items or more
225 fit into a word. */
226 static tree
227 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
228 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
229 enum tree_code code, tree type ATTRIBUTE_UNUSED)
231 unsigned int width = vector_element_bits (TREE_TYPE (a));
232 tree inner_type = TREE_TYPE (TREE_TYPE (a));
233 unsigned HOST_WIDE_INT max;
234 tree low_bits, high_bits, a_low, b_low, result_low, signs;
236 max = GET_MODE_MASK (TYPE_MODE (inner_type));
237 low_bits = build_replicated_int_cst (word_type, width, max >> 1);
238 high_bits = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
240 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
241 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
243 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
244 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
245 if (code == PLUS_EXPR)
246 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
247 else
249 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
250 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
253 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
254 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
255 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
258 static tree
259 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
260 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
261 tree bitsize ATTRIBUTE_UNUSED,
262 enum tree_code code ATTRIBUTE_UNUSED,
263 tree type ATTRIBUTE_UNUSED)
265 unsigned int width = vector_element_bits (TREE_TYPE (b));
266 tree inner_type = TREE_TYPE (TREE_TYPE (b));
267 HOST_WIDE_INT max;
268 tree low_bits, high_bits, b_low, result_low, signs;
270 max = GET_MODE_MASK (TYPE_MODE (inner_type));
271 low_bits = build_replicated_int_cst (word_type, width, max >> 1);
272 high_bits = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
274 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
276 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
277 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
278 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
279 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
280 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
283 /* Expand a vector operation to scalars, by using many operations
284 whose type is the vector type's inner type. */
285 static tree
286 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
287 tree type, tree inner_type,
288 tree a, tree b, enum tree_code code,
289 bool parallel_p, tree ret_type = NULL_TREE)
291 vec<constructor_elt, va_gc> *v;
292 tree part_width = TYPE_SIZE (inner_type);
293 tree index = bitsize_int (0);
294 int nunits = nunits_for_known_piecewise_op (type);
295 int delta = (VECTOR_TYPE_P (inner_type)
296 ? nunits_for_known_piecewise_op (inner_type) : 1);
297 int i;
298 location_t loc = gimple_location (gsi_stmt (*gsi));
300 if (nunits == 1
301 || warning_suppressed_p (gsi_stmt (*gsi),
302 OPT_Wvector_operation_performance))
303 /* Do not diagnose decomposing single element vectors or when
304 decomposing vectorizer produced operations. */
306 else if (ret_type || !parallel_p)
307 warning_at (loc, OPT_Wvector_operation_performance,
308 "vector operation will be expanded piecewise");
309 else
310 warning_at (loc, OPT_Wvector_operation_performance,
311 "vector operation will be expanded in parallel");
313 if (!ret_type)
314 ret_type = type;
315 vec_alloc (v, (nunits + delta - 1) / delta);
316 bool constant_p = true;
317 for (i = 0; i < nunits;
318 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
320 tree result = f (gsi, inner_type, a, b, index, part_width, code,
321 ret_type);
322 if (!CONSTANT_CLASS_P (result))
323 constant_p = false;
324 constructor_elt ce = {NULL_TREE, result};
325 v->quick_push (ce);
328 if (constant_p)
329 return build_vector_from_ctor (ret_type, v);
330 else
331 return build_constructor (ret_type, v);
334 /* Expand a vector operation to scalars with the freedom to use
335 a scalar integer type, or to use a different size for the items
336 in the vector type. */
337 static tree
338 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
339 tree a, tree b, enum tree_code code)
341 tree result, compute_type;
342 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
343 location_t loc = gimple_location (gsi_stmt (*gsi));
345 /* We have three strategies. If the type is already correct, just do
346 the operation an element at a time. Else, if the vector is wider than
347 one word, do it a word at a time; finally, if the vector is smaller
348 than one word, do it as a scalar. */
349 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
350 return expand_vector_piecewise (gsi, f,
351 type, TREE_TYPE (type),
352 a, b, code, true);
353 else if (n_words > 1)
355 tree word_type = build_word_mode_vector_type (n_words);
356 result = expand_vector_piecewise (gsi, f,
357 word_type, TREE_TYPE (word_type),
358 a, b, code, true);
359 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
360 GSI_SAME_STMT);
362 else
364 /* Use a single scalar operation with a mode no wider than word_mode. */
365 if (!warning_suppressed_p (gsi_stmt (*gsi),
366 OPT_Wvector_operation_performance))
367 warning_at (loc, OPT_Wvector_operation_performance,
368 "vector operation will be expanded with a "
369 "single scalar operation");
370 scalar_int_mode mode
371 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
372 compute_type = lang_hooks.types.type_for_mode (mode, 1);
373 result = f (gsi, compute_type, a, b, bitsize_zero_node,
374 TYPE_SIZE (compute_type), code, type);
377 return result;
380 /* Expand a vector operation to scalars; for integer types we can use
381 special bit twiddling tricks to do the sums a word at a time, using
382 function F_PARALLEL instead of F. These tricks are done only if
383 they can process at least four items, that is, only if the vector
384 holds at least four items and if a word can hold four items. */
385 static tree
386 expand_vector_addition (gimple_stmt_iterator *gsi,
387 elem_op_func f, elem_op_func f_parallel,
388 tree type, tree a, tree b, enum tree_code code)
390 int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
392 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
393 && parts_per_word >= 4
394 && nunits_for_known_piecewise_op (type) >= 4)
395 return expand_vector_parallel (gsi, f_parallel,
396 type, a, b, code);
397 else
398 return expand_vector_piecewise (gsi, f,
399 type, TREE_TYPE (type),
400 a, b, code, false);
403 /* Expand vector comparison expression OP0 CODE OP1 if the compare optab
404 is not implemented. */
406 static tree
407 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
408 tree op1, enum tree_code code)
410 if (expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code))
411 return NULL_TREE;
413 tree t;
414 if (VECTOR_BOOLEAN_TYPE_P (type)
415 && SCALAR_INT_MODE_P (TYPE_MODE (type))
416 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
417 TYPE_VECTOR_SUBPARTS (type)
418 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
419 (TREE_TYPE (type)))))
421 tree inner_type = TREE_TYPE (TREE_TYPE (op0));
422 tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
423 tree index = bitsize_int (0);
424 int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
425 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
426 tree ret_type = build_nonstandard_integer_type (prec, 1);
427 tree ret_inner_type = boolean_type_node;
428 int i;
429 location_t loc = gimple_location (gsi_stmt (*gsi));
430 t = build_zero_cst (ret_type);
432 if (TYPE_PRECISION (ret_inner_type) != 1)
433 ret_inner_type = build_nonstandard_integer_type (1, 1);
434 if (!warning_suppressed_p (gsi_stmt (*gsi),
435 OPT_Wvector_operation_performance))
436 warning_at (loc, OPT_Wvector_operation_performance,
437 "vector operation will be expanded piecewise");
438 for (i = 0; i < nunits;
439 i++, index = int_const_binop (PLUS_EXPR, index, part_width))
441 tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
442 index);
443 tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
444 index);
445 tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
446 t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
447 bitsize_int (i));
449 t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
451 else
452 t = expand_vector_piecewise (gsi, do_compare, type,
453 TREE_TYPE (TREE_TYPE (op0)), op0, op1,
454 code, false);
456 return t;
459 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
460 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
461 the result if successful, otherwise return NULL_TREE. */
462 static tree
463 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
465 optab op;
466 unsigned int i, nunits = nunits_for_known_piecewise_op (type);
467 bool scalar_shift = true;
469 for (i = 1; i < nunits; i++)
471 if (shiftcnts[i] != shiftcnts[0])
472 scalar_shift = false;
475 if (scalar_shift && shiftcnts[0] == 0)
476 return op0;
478 if (scalar_shift)
480 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
481 if (op != unknown_optab
482 && can_implement_p (op, TYPE_MODE (type)))
483 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
484 build_int_cst (NULL_TREE, shiftcnts[0]));
487 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
488 if (op != unknown_optab
489 && can_implement_p (op, TYPE_MODE (type)))
491 tree_vector_builder vec (type, nunits, 1);
492 for (i = 0; i < nunits; i++)
493 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
494 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
497 return NULL_TREE;
500 /* Try to expand integer vector division by constant using
501 widening multiply, shifts and additions. */
502 static tree
503 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
504 tree op1, enum tree_code code)
506 bool use_pow2 = true;
507 bool has_vector_shift = true;
508 bool use_abs_op1 = false;
509 int mode = -1, this_mode;
510 int pre_shift = -1, post_shift;
511 unsigned int nunits = nunits_for_known_piecewise_op (type);
512 int *shifts = XALLOCAVEC (int, nunits * 4);
513 int *pre_shifts = shifts + nunits;
514 int *post_shifts = pre_shifts + nunits;
515 int *shift_temps = post_shifts + nunits;
516 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
517 int prec = TYPE_PRECISION (TREE_TYPE (type));
518 unsigned int i;
519 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
520 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
521 tree cur_op, mulcst, tem;
522 optab op;
524 if (prec > HOST_BITS_PER_WIDE_INT)
525 return NULL_TREE;
527 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
528 if (op == unknown_optab
529 || !can_implement_p (op, TYPE_MODE (type)))
530 has_vector_shift = false;
532 /* Analysis phase. Determine if all op1 elements are either power
533 of two and it is possible to expand it using shifts (or for remainder
534 using masking). Additionally compute the multiplicative constants
535 and pre and post shifts if the division is to be expanded using
536 widening or high part multiplication plus shifts. */
537 for (i = 0; i < nunits; i++)
539 tree cst = VECTOR_CST_ELT (op1, i);
540 unsigned HOST_WIDE_INT ml;
542 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
543 return NULL_TREE;
544 pre_shifts[i] = 0;
545 post_shifts[i] = 0;
546 mulc[i] = 0;
547 if (use_pow2
548 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
549 use_pow2 = false;
550 if (use_pow2)
552 shifts[i] = tree_log2 (cst);
553 if (shifts[i] != shifts[0]
554 && code == TRUNC_DIV_EXPR
555 && !has_vector_shift)
556 use_pow2 = false;
558 if (mode == -2)
559 continue;
560 if (sign_p == UNSIGNED)
562 unsigned HOST_WIDE_INT mh;
563 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
565 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
566 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
567 return NULL_TREE;
569 if (d <= 1)
571 mode = -2;
572 continue;
575 /* Find a suitable multiplier and right shift count instead of
576 directly dividing by D. */
577 mh = choose_multiplier (d, prec, prec, &ml, &post_shift);
579 /* If the suggested multiplier is more than PREC bits, we can
580 do better for even divisors, using an initial right shift. */
581 if ((mh != 0 && (d & 1) == 0)
582 || (!has_vector_shift && pre_shift != -1))
584 if (has_vector_shift)
585 pre_shift = ctz_or_zero (d);
586 else if (pre_shift == -1)
588 unsigned int j;
589 for (j = 0; j < nunits; j++)
591 tree cst2 = VECTOR_CST_ELT (op1, j);
592 unsigned HOST_WIDE_INT d2;
593 int this_pre_shift;
595 if (!tree_fits_uhwi_p (cst2))
596 return NULL_TREE;
597 d2 = tree_to_uhwi (cst2) & mask;
598 if (d2 == 0)
599 return NULL_TREE;
600 this_pre_shift = floor_log2 (d2 & -d2);
601 if (pre_shift == -1 || this_pre_shift < pre_shift)
602 pre_shift = this_pre_shift;
604 if (i != 0 && pre_shift != 0)
606 /* Restart. */
607 i = -1U;
608 mode = -1;
609 continue;
612 if (pre_shift != 0)
614 if ((d >> pre_shift) <= 1)
616 mode = -2;
617 continue;
619 mh = choose_multiplier (d >> pre_shift, prec,
620 prec - pre_shift,
621 &ml, &post_shift);
622 gcc_assert (!mh);
623 pre_shifts[i] = pre_shift;
626 if (!mh)
627 this_mode = 0;
628 else
629 this_mode = 1;
631 else
633 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
634 unsigned HOST_WIDE_INT abs_d;
636 if (d == -1)
637 return NULL_TREE;
639 /* Since d might be INT_MIN, we have to cast to
640 unsigned HOST_WIDE_INT before negating to avoid
641 undefined signed overflow. */
642 abs_d = (d >= 0
643 ? (unsigned HOST_WIDE_INT) d
644 : - (unsigned HOST_WIDE_INT) d);
646 /* n rem d = n rem -d */
647 if (code == TRUNC_MOD_EXPR && d < 0)
649 d = abs_d;
650 use_abs_op1 = true;
652 if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
654 /* This case is not handled correctly below. */
655 mode = -2;
656 continue;
658 if (abs_d <= 1)
660 mode = -2;
661 continue;
664 choose_multiplier (abs_d, prec, prec - 1, &ml,
665 &post_shift);
666 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
668 this_mode = 4 + (d < 0);
669 ml |= HOST_WIDE_INT_M1U << (prec - 1);
671 else
672 this_mode = 2 + (d < 0);
674 mulc[i] = ml;
675 post_shifts[i] = post_shift;
676 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
677 || post_shift >= prec
678 || pre_shifts[i] >= prec)
679 this_mode = -2;
681 if (i == 0)
682 mode = this_mode;
683 else if (mode != this_mode)
684 mode = -2;
687 if (use_pow2)
689 tree addend = NULL_TREE;
690 if (sign_p == SIGNED)
692 tree uns_type;
694 /* Both division and remainder sequences need
695 op0 < 0 ? mask : 0 computed. It can be either computed as
696 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
697 if none of the shifts is 0, or as the conditional. */
698 for (i = 0; i < nunits; i++)
699 if (shifts[i] == 0)
700 break;
701 uns_type
702 = build_vector_type (build_nonstandard_integer_type (prec, 1),
703 nunits);
704 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
706 for (i = 0; i < nunits; i++)
707 shift_temps[i] = prec - 1;
708 cur_op = add_rshift (gsi, type, op0, shift_temps);
709 if (cur_op != NULL_TREE)
711 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
712 uns_type, cur_op);
713 for (i = 0; i < nunits; i++)
714 shift_temps[i] = prec - shifts[i];
715 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
716 if (cur_op != NULL_TREE)
717 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
718 type, cur_op);
721 tree mask_type = truth_type_for (type);
722 if (addend == NULL_TREE
723 && expand_vec_cmp_expr_p (type, mask_type, LT_EXPR)
724 && expand_vec_cond_expr_p (type, mask_type))
726 tree zero, cst, mask_type, mask;
727 gimple *stmt, *cond;
729 mask_type = truth_type_for (type);
730 zero = build_zero_cst (type);
731 mask = make_ssa_name (mask_type);
732 cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
733 gsi_insert_before (gsi, cond, GSI_SAME_STMT);
734 tree_vector_builder vec (type, nunits, 1);
735 for (i = 0; i < nunits; i++)
736 vec.quick_push (build_int_cst (TREE_TYPE (type),
737 (HOST_WIDE_INT_1U
738 << shifts[i]) - 1));
739 cst = vec.build ();
740 addend = make_ssa_name (type);
741 stmt
742 = gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
743 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
746 if (code == TRUNC_DIV_EXPR)
748 if (sign_p == UNSIGNED)
750 /* q = op0 >> shift; */
751 cur_op = add_rshift (gsi, type, op0, shifts);
752 if (cur_op != NULL_TREE)
753 return cur_op;
755 else if (addend != NULL_TREE)
757 /* t1 = op0 + addend;
758 q = t1 >> shift; */
759 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
760 if (op != unknown_optab
761 && can_implement_p (op, TYPE_MODE (type)))
763 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
764 cur_op = add_rshift (gsi, type, cur_op, shifts);
765 if (cur_op != NULL_TREE)
766 return cur_op;
770 else
772 tree mask;
773 tree_vector_builder vec (type, nunits, 1);
774 for (i = 0; i < nunits; i++)
775 vec.quick_push (build_int_cst (TREE_TYPE (type),
776 (HOST_WIDE_INT_1U
777 << shifts[i]) - 1));
778 mask = vec.build ();
779 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
780 if (op != unknown_optab
781 && can_implement_p (op, TYPE_MODE (type)))
783 if (sign_p == UNSIGNED)
784 /* r = op0 & mask; */
785 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
786 else if (addend != NULL_TREE)
788 /* t1 = op0 + addend;
789 t2 = t1 & mask;
790 r = t2 - addend; */
791 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
792 if (op != unknown_optab
793 && can_implement_p (op, TYPE_MODE (type)))
795 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
796 addend);
797 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
798 cur_op, mask);
799 op = optab_for_tree_code (MINUS_EXPR, type,
800 optab_default);
801 if (op != unknown_optab
802 && can_implement_p (op, TYPE_MODE (type)))
803 return gimplify_build2 (gsi, MINUS_EXPR, type,
804 cur_op, addend);
811 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
812 return NULL_TREE;
814 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
815 return NULL_TREE;
817 cur_op = op0;
819 switch (mode)
821 case 0:
822 gcc_assert (sign_p == UNSIGNED);
823 /* t1 = oprnd0 >> pre_shift;
824 t2 = t1 h* ml;
825 q = t2 >> post_shift; */
826 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
827 if (cur_op == NULL_TREE)
828 return NULL_TREE;
829 break;
830 case 1:
831 gcc_assert (sign_p == UNSIGNED);
832 for (i = 0; i < nunits; i++)
834 shift_temps[i] = 1;
835 post_shifts[i]--;
837 break;
838 case 2:
839 case 3:
840 case 4:
841 case 5:
842 gcc_assert (sign_p == SIGNED);
843 for (i = 0; i < nunits; i++)
844 shift_temps[i] = prec - 1;
845 break;
846 default:
847 return NULL_TREE;
850 tree_vector_builder vec (type, nunits, 1);
851 for (i = 0; i < nunits; i++)
852 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
853 mulcst = vec.build ();
855 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
857 switch (mode)
859 case 0:
860 /* t1 = oprnd0 >> pre_shift;
861 t2 = t1 h* ml;
862 q = t2 >> post_shift; */
863 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
864 break;
865 case 1:
866 /* t1 = oprnd0 h* ml;
867 t2 = oprnd0 - t1;
868 t3 = t2 >> 1;
869 t4 = t1 + t3;
870 q = t4 >> (post_shift - 1); */
871 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
872 if (op == unknown_optab
873 || !can_implement_p (op, TYPE_MODE (type)))
874 return NULL_TREE;
875 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
876 tem = add_rshift (gsi, type, tem, shift_temps);
877 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
878 if (op == unknown_optab
879 || !can_implement_p (op, TYPE_MODE (type)))
880 return NULL_TREE;
881 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
882 cur_op = add_rshift (gsi, type, tem, post_shifts);
883 if (cur_op == NULL_TREE)
884 return NULL_TREE;
885 break;
886 case 2:
887 case 3:
888 case 4:
889 case 5:
890 /* t1 = oprnd0 h* ml;
891 t2 = t1; [ iff (mode & 2) != 0 ]
892 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
893 t3 = t2 >> post_shift;
894 t4 = oprnd0 >> (prec - 1);
895 q = t3 - t4; [ iff (mode & 1) == 0 ]
896 q = t4 - t3; [ iff (mode & 1) != 0 ] */
897 if ((mode & 2) == 0)
899 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
900 if (op == unknown_optab
901 || !can_implement_p (op, TYPE_MODE (type)))
902 return NULL_TREE;
903 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
905 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
906 if (cur_op == NULL_TREE)
907 return NULL_TREE;
908 tem = add_rshift (gsi, type, op0, shift_temps);
909 if (tem == NULL_TREE)
910 return NULL_TREE;
911 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
912 if (op == unknown_optab
913 || !can_implement_p (op, TYPE_MODE (type)))
914 return NULL_TREE;
915 if ((mode & 1) == 0)
916 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
917 else
918 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
919 break;
920 default:
921 gcc_unreachable ();
924 if (code == TRUNC_DIV_EXPR)
925 return cur_op;
927 /* We divided. Now finish by:
928 t1 = q * oprnd1;
929 r = oprnd0 - t1; */
930 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
931 if (op == unknown_optab
932 || !can_implement_p (op, TYPE_MODE (type)))
933 return NULL_TREE;
934 if (use_abs_op1)
936 tree_vector_builder elts;
937 if (!elts.new_unary_operation (type, op1, false))
938 return NULL_TREE;
939 unsigned int count = elts.encoded_nelts ();
940 for (unsigned int i = 0; i < count; ++i)
942 tree elem1 = VECTOR_CST_ELT (op1, i);
944 tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
945 if (elt == NULL_TREE)
946 return NULL_TREE;
947 elts.quick_push (elt);
949 op1 = elts.build ();
951 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
952 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
953 if (op == unknown_optab
954 || !can_implement_p (op, TYPE_MODE (type)))
955 return NULL_TREE;
956 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
959 /* Expand a vector condition to scalars, by using many conditions
960 on the vector's elements. */
962 static bool
963 expand_vector_condition (gimple_stmt_iterator *gsi)
965 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
966 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
967 tree a = gimple_assign_rhs1 (stmt);
968 tree a1 = a;
969 tree a2 = NULL_TREE;
970 bool a_is_scalar_bitmask = false;
971 tree b = gimple_assign_rhs2 (stmt);
972 tree c = gimple_assign_rhs3 (stmt);
973 vec<constructor_elt, va_gc> *v;
974 tree constr;
975 tree inner_type = TREE_TYPE (type);
976 tree width = vector_element_bits_tree (type);
977 tree cond_type = TREE_TYPE (TREE_TYPE (a));
978 tree index = bitsize_int (0);
979 tree comp_width = width;
980 tree comp_index = index;
981 location_t loc = gimple_location (gsi_stmt (*gsi));
983 gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a)));
985 if (expand_vec_cond_expr_p (type, TREE_TYPE (a)))
986 return true;
988 /* Handle vector boolean types with bitmasks. We can transform
989 vbfld_1 = tmp_6 ? vbfld_4 : vbfld_5;
990 into
991 tmp_7 = tmp_6 & vbfld_4;
992 tmp_8 = ~tmp_6;
993 tmp_9 = tmp_8 & vbfld_5;
994 vbfld_1 = tmp_7 | tmp_9; */
995 if (VECTOR_BOOLEAN_TYPE_P (type)
996 && SCALAR_INT_MODE_P (TYPE_MODE (type))
997 && useless_type_conversion_p (type, TREE_TYPE (a)))
999 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
1000 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
1001 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
1002 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
1003 gimple_assign_set_rhs_from_tree (gsi, a);
1004 update_stmt (gsi_stmt (*gsi));
1005 return true;
1008 /* TODO: try and find a smaller vector type. */
1010 if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1011 warning_at (loc, OPT_Wvector_operation_performance,
1012 "vector condition will be expanded piecewise");
1014 if (SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1015 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1016 TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1017 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1018 (TREE_TYPE (TREE_TYPE (a))))))
1020 a_is_scalar_bitmask = true;
1021 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1022 tree atype = build_nonstandard_integer_type (prec, 1);
1023 a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
1025 else
1026 comp_width = vector_element_bits_tree (TREE_TYPE (a));
1028 int nunits = nunits_for_known_piecewise_op (type);
1029 vec_alloc (v, nunits);
1030 bool constant_p = true;
1031 for (int i = 0; i < nunits; i++)
1033 tree aa, result;
1034 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1035 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1036 if (a_is_scalar_bitmask)
1038 wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1039 result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1040 a, wide_int_to_tree (TREE_TYPE (a), w));
1041 aa = gimplify_build2 (gsi, NE_EXPR, boolean_type_node, result,
1042 build_zero_cst (TREE_TYPE (a)));
1044 else
1046 result = tree_vec_extract (gsi, cond_type, a, comp_width, comp_index);
1047 aa = gimplify_build2 (gsi, NE_EXPR, boolean_type_node, result,
1048 build_zero_cst (cond_type));
1050 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1051 if (!CONSTANT_CLASS_P (result))
1052 constant_p = false;
1053 constructor_elt ce = {NULL_TREE, result};
1054 v->quick_push (ce);
1055 index = int_const_binop (PLUS_EXPR, index, width);
1056 if (width == comp_width)
1057 comp_index = index;
1058 else
1059 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1062 if (constant_p)
1063 constr = build_vector_from_ctor (type, v);
1064 else
1065 constr = build_constructor (type, v);
1066 gimple_assign_set_rhs_from_tree (gsi, constr);
1067 update_stmt (gsi_stmt (*gsi));
1069 return false;
1072 static tree
1073 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1074 gassign *assign, enum tree_code code)
1076 machine_mode compute_mode = TYPE_MODE (compute_type);
1078 /* If the compute mode is not a vector mode (hence we are not decomposing
1079 a BLKmode vector to smaller, hardware-supported vectors), we may want
1080 to expand the operations in parallel. */
1081 if (!VECTOR_MODE_P (compute_mode))
1082 switch (code)
1084 case PLUS_EXPR:
1085 case MINUS_EXPR:
1086 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1087 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1088 gimple_assign_rhs1 (assign),
1089 gimple_assign_rhs2 (assign), code);
1090 break;
1092 case NEGATE_EXPR:
1093 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1094 return expand_vector_addition (gsi, do_unop, do_negate, type,
1095 gimple_assign_rhs1 (assign),
1096 NULL_TREE, code);
1097 break;
1099 case BIT_AND_EXPR:
1100 case BIT_IOR_EXPR:
1101 case BIT_XOR_EXPR:
1102 return expand_vector_parallel (gsi, do_binop, type,
1103 gimple_assign_rhs1 (assign),
1104 gimple_assign_rhs2 (assign), code);
1106 case BIT_NOT_EXPR:
1107 return expand_vector_parallel (gsi, do_unop, type,
1108 gimple_assign_rhs1 (assign),
1109 NULL_TREE, code);
1110 case EQ_EXPR:
1111 case NE_EXPR:
1112 case GT_EXPR:
1113 case LT_EXPR:
1114 case GE_EXPR:
1115 case LE_EXPR:
1116 case UNEQ_EXPR:
1117 case UNGT_EXPR:
1118 case UNLT_EXPR:
1119 case UNGE_EXPR:
1120 case UNLE_EXPR:
1121 case LTGT_EXPR:
1122 case ORDERED_EXPR:
1123 case UNORDERED_EXPR:
1125 tree rhs1 = gimple_assign_rhs1 (assign);
1126 tree rhs2 = gimple_assign_rhs2 (assign);
1128 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1131 case TRUNC_DIV_EXPR:
1132 case TRUNC_MOD_EXPR:
1134 tree rhs1 = gimple_assign_rhs1 (assign);
1135 tree rhs2 = gimple_assign_rhs2 (assign);
1136 tree ret;
1138 if (!optimize
1139 || !VECTOR_INTEGER_TYPE_P (type)
1140 || TREE_CODE (rhs2) != VECTOR_CST
1141 || !VECTOR_MODE_P (TYPE_MODE (type)))
1142 break;
1144 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1145 if (ret != NULL_TREE)
1146 return ret;
1147 break;
1150 default:
1151 break;
1154 if (TREE_CODE_CLASS (code) == tcc_unary)
1155 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1156 gimple_assign_rhs1 (assign),
1157 NULL_TREE, code, false);
1158 else
1159 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1160 gimple_assign_rhs1 (assign),
1161 gimple_assign_rhs2 (assign), code, false);
1164 /* Try to optimize
1165 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1166 style stmts into:
1167 _9 = { b_7, b_7, b_7, b_7 };
1168 a_5 = _9 + { 0, 3, 6, 9 };
1169 because vector splat operation is usually more efficient
1170 than piecewise initialization of the vector. */
1172 static void
1173 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1175 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1176 tree lhs = gimple_assign_lhs (stmt);
1177 tree rhs = gimple_assign_rhs1 (stmt);
1178 tree type = TREE_TYPE (rhs);
1179 unsigned int i, j;
1180 unsigned HOST_WIDE_INT nelts;
1181 bool all_same = true;
1182 constructor_elt *elt;
1183 gimple *g;
1184 tree base = NULL_TREE;
1185 optab op;
1187 if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1188 || nelts <= 2
1189 || CONSTRUCTOR_NELTS (rhs) != nelts)
1190 return;
1191 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1192 if (op == unknown_optab
1193 || !can_implement_p (op, TYPE_MODE (type)))
1194 return;
1195 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1196 if (TREE_CODE (elt->value) != SSA_NAME
1197 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1198 return;
1199 else
1201 tree this_base = elt->value;
1202 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1203 all_same = false;
1204 for (j = 0; j < nelts + 1; j++)
1206 g = SSA_NAME_DEF_STMT (this_base);
1207 if (is_gimple_assign (g)
1208 && gimple_assign_rhs_code (g) == PLUS_EXPR
1209 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1210 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1211 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1212 this_base = gimple_assign_rhs1 (g);
1213 else
1214 break;
1216 if (i == 0)
1217 base = this_base;
1218 else if (this_base != base)
1219 return;
1221 if (all_same)
1222 return;
1223 tree_vector_builder cst (type, nelts, 1);
1224 for (i = 0; i < nelts; i++)
1226 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1227 tree elt = build_zero_cst (TREE_TYPE (base));
1228 while (this_base != base)
1230 g = SSA_NAME_DEF_STMT (this_base);
1231 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1232 elt, gimple_assign_rhs2 (g));
1233 if (elt == NULL_TREE
1234 || TREE_CODE (elt) != INTEGER_CST
1235 || TREE_OVERFLOW (elt))
1236 return;
1237 this_base = gimple_assign_rhs1 (g);
1239 cst.quick_push (elt);
1241 for (i = 0; i < nelts; i++)
1242 CONSTRUCTOR_ELT (rhs, i)->value = base;
1243 g = gimple_build_assign (make_ssa_name (type), rhs);
1244 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1245 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1246 cst.build ());
1247 gsi_replace (gsi, g, false);
1250 /* Return a type for the widest vector mode with the same element type as
1251 type ORIGINAL_VECTOR_TYPE, with at most the same number of elements as type
1252 ORIGINAL_VECTOR_TYPE and that is supported by the target for an operation
1253 with optab OP, or return NULL_TREE if none is found. */
1255 static tree
1256 type_for_widest_vector_mode (tree original_vector_type, optab op)
1258 gcc_assert (VECTOR_TYPE_P (original_vector_type));
1259 tree type = TREE_TYPE (original_vector_type);
1260 machine_mode inner_mode = TYPE_MODE (type);
1261 machine_mode best_mode = VOIDmode, mode;
1262 poly_int64 best_nunits = 0;
1264 if (SCALAR_FLOAT_MODE_P (inner_mode))
1265 mode = MIN_MODE_VECTOR_FLOAT;
1266 else if (SCALAR_FRACT_MODE_P (inner_mode))
1267 mode = MIN_MODE_VECTOR_FRACT;
1268 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1269 mode = MIN_MODE_VECTOR_UFRACT;
1270 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1271 mode = MIN_MODE_VECTOR_ACCUM;
1272 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1273 mode = MIN_MODE_VECTOR_UACCUM;
1274 else if (inner_mode == BImode)
1275 mode = MIN_MODE_VECTOR_BOOL;
1276 else
1277 mode = MIN_MODE_VECTOR_INT;
1279 FOR_EACH_MODE_FROM (mode, mode)
1280 if (GET_MODE_INNER (mode) == inner_mode
1281 && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1282 && can_implement_p (op, mode)
1283 && known_le (GET_MODE_NUNITS (mode),
1284 TYPE_VECTOR_SUBPARTS (original_vector_type)))
1285 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1287 if (best_mode == VOIDmode)
1288 return NULL_TREE;
1289 else
1290 return build_vector_type_for_mode (type, best_mode);
1294 /* Build a reference to the element of the vector VECT. Function
1295 returns either the element itself, either BIT_FIELD_REF, or an
1296 ARRAY_REF expression.
1298 GSI is required to insert temporary variables while building a
1299 refernece to the element of the vector VECT.
1301 PTMPVEC is a pointer to the temporary variable for caching
1302 purposes. In case when PTMPVEC is NULL new temporary variable
1303 will be created. */
1304 static tree
1305 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1307 tree vect_type, vect_elt_type;
1308 gimple *asgn;
1309 tree tmpvec;
1310 tree arraytype;
1311 bool need_asgn = true;
1312 unsigned int elements;
1314 vect_type = TREE_TYPE (vect);
1315 vect_elt_type = TREE_TYPE (vect_type);
1316 elements = nunits_for_known_piecewise_op (vect_type);
1318 if (TREE_CODE (idx) == INTEGER_CST)
1320 unsigned HOST_WIDE_INT index;
1322 /* Given that we're about to compute a binary modulus,
1323 we don't care about the high bits of the value. */
1324 index = TREE_INT_CST_LOW (idx);
1325 if (!tree_fits_uhwi_p (idx) || index >= elements)
1327 index &= elements - 1;
1328 idx = build_int_cst (TREE_TYPE (idx), index);
1331 /* When lowering a vector statement sequence do some easy
1332 simplification by looking through intermediate vector results. */
1333 if (TREE_CODE (vect) == SSA_NAME)
1335 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1336 if (is_gimple_assign (def_stmt)
1337 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1338 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1339 vect = gimple_assign_rhs1 (def_stmt);
1342 if (TREE_CODE (vect) == VECTOR_CST)
1343 return VECTOR_CST_ELT (vect, index);
1344 else if (TREE_CODE (vect) == CONSTRUCTOR
1345 && (CONSTRUCTOR_NELTS (vect) == 0
1346 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1347 != VECTOR_TYPE))
1349 if (index < CONSTRUCTOR_NELTS (vect))
1350 return CONSTRUCTOR_ELT (vect, index)->value;
1351 return build_zero_cst (vect_elt_type);
1353 else
1355 tree size = vector_element_bits_tree (vect_type);
1356 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1357 size);
1358 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1362 if (!ptmpvec)
1363 tmpvec = create_tmp_var (vect_type, "vectmp");
1364 else if (!*ptmpvec)
1365 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1366 else
1368 tmpvec = *ptmpvec;
1369 need_asgn = false;
1372 if (need_asgn)
1374 TREE_ADDRESSABLE (tmpvec) = 1;
1375 asgn = gimple_build_assign (tmpvec, vect);
1376 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1379 arraytype = build_array_type_nelts (vect_elt_type, elements);
1380 return build4 (ARRAY_REF, vect_elt_type,
1381 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1382 idx, NULL_TREE, NULL_TREE);
1385 /* Check if VEC_PERM_EXPR within the given setting is supported
1386 by hardware, or lower it piecewise.
1388 When VEC_PERM_EXPR has the same first and second operands:
1389 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1390 {v0[mask[0]], v0[mask[1]], ...}
1391 MASK and V0 must have the same number of elements.
1393 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1394 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1395 V0 and V1 must have the same type. MASK, V0, V1 must have the
1396 same number of arguments. */
1398 static void
1399 lower_vec_perm (gimple_stmt_iterator *gsi)
1401 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1402 tree mask = gimple_assign_rhs3 (stmt);
1403 tree vec0 = gimple_assign_rhs1 (stmt);
1404 tree vec1 = gimple_assign_rhs2 (stmt);
1405 tree res_vect_type = TREE_TYPE (gimple_assign_lhs (stmt));
1406 tree vect_type = TREE_TYPE (vec0);
1407 tree mask_type = TREE_TYPE (mask);
1408 tree vect_elt_type = TREE_TYPE (vect_type);
1409 tree mask_elt_type = TREE_TYPE (mask_type);
1410 unsigned HOST_WIDE_INT elements;
1411 vec<constructor_elt, va_gc> *v;
1412 tree constr, t, si, i_val;
1413 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1414 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1415 location_t loc = gimple_location (gsi_stmt (*gsi));
1416 unsigned i;
1418 if (!TYPE_VECTOR_SUBPARTS (res_vect_type).is_constant (&elements))
1419 return;
1421 if (TREE_CODE (mask) == SSA_NAME)
1423 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1424 if (is_gimple_assign (def_stmt)
1425 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1426 mask = gimple_assign_rhs1 (def_stmt);
1429 vec_perm_builder sel_int;
1431 if (TREE_CODE (mask) == VECTOR_CST
1432 && tree_to_vec_perm_builder (&sel_int, mask))
1434 vec_perm_indices indices (sel_int, 2, elements);
1435 machine_mode vmode = TYPE_MODE (vect_type);
1436 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
1437 machine_mode lhs_mode = TYPE_MODE (lhs_type);
1438 if (can_vec_perm_const_p (lhs_mode, vmode, indices))
1440 gimple_assign_set_rhs3 (stmt, mask);
1441 update_stmt (stmt);
1442 return;
1444 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1445 vector as VEC1 and a right element shift MASK. */
1446 if (can_implement_p (vec_shr_optab, TYPE_MODE (vect_type))
1447 && TREE_CODE (vec1) == VECTOR_CST
1448 && initializer_zerop (vec1)
1449 && maybe_ne (indices[0], 0)
1450 && known_lt (poly_uint64 (indices[0]), elements))
1452 bool ok_p = indices.series_p (0, 1, indices[0], 1);
1453 if (!ok_p)
1455 for (i = 1; i < elements; ++i)
1457 poly_uint64 actual = indices[i];
1458 poly_uint64 expected = i + indices[0];
1459 /* Indices into the second vector are all equivalent. */
1460 if (maybe_lt (actual, elements)
1461 ? maybe_ne (actual, expected)
1462 : maybe_lt (expected, elements))
1463 break;
1465 ok_p = i == elements;
1467 if (ok_p)
1469 gimple_assign_set_rhs3 (stmt, mask);
1470 update_stmt (stmt);
1471 return;
1474 /* And similarly vec_shl pattern. */
1475 if (can_implement_p (vec_shl_optab, TYPE_MODE (vect_type))
1476 && TREE_CODE (vec0) == VECTOR_CST
1477 && initializer_zerop (vec0))
1479 unsigned int first = 0;
1480 for (i = 0; i < elements; ++i)
1481 if (known_eq (poly_uint64 (indices[i]), elements))
1483 if (i == 0 || first)
1484 break;
1485 first = i;
1487 else if (first
1488 ? maybe_ne (poly_uint64 (indices[i]),
1489 elements + i - first)
1490 : maybe_ge (poly_uint64 (indices[i]), elements))
1491 break;
1492 if (first && i == elements)
1494 gimple_assign_set_rhs3 (stmt, mask);
1495 update_stmt (stmt);
1496 return;
1500 else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1501 return;
1503 if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1504 warning_at (loc, OPT_Wvector_operation_performance,
1505 "vector shuffling operation will be expanded piecewise");
1507 vec_alloc (v, elements);
1508 bool constant_p = true;
1509 for (i = 0; i < elements; i++)
1511 si = size_int (i);
1512 i_val = vector_element (gsi, mask, si, &masktmp);
1514 if (TREE_CODE (i_val) == INTEGER_CST)
1516 unsigned HOST_WIDE_INT index;
1518 index = TREE_INT_CST_LOW (i_val);
1519 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1520 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1522 if (two_operand_p && (index & elements) != 0)
1523 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1524 else
1525 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1527 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1528 true, GSI_SAME_STMT);
1530 else
1532 tree cond = NULL_TREE, v0_val;
1534 if (two_operand_p)
1536 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1537 build_int_cst (mask_elt_type, elements));
1538 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1539 true, GSI_SAME_STMT);
1542 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1543 build_int_cst (mask_elt_type, elements - 1));
1544 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1545 true, GSI_SAME_STMT);
1547 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1548 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1549 true, GSI_SAME_STMT);
1551 if (two_operand_p)
1553 tree v1_val;
1555 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1556 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1557 true, GSI_SAME_STMT);
1559 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1560 cond, build_zero_cst (mask_elt_type));
1561 cond = fold_build3 (COND_EXPR, vect_elt_type,
1562 cond, v0_val, v1_val);
1563 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1564 true, GSI_SAME_STMT);
1566 else
1567 t = v0_val;
1570 if (!CONSTANT_CLASS_P (t))
1571 constant_p = false;
1572 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1575 if (constant_p)
1576 constr = build_vector_from_ctor (res_vect_type, v);
1577 else
1578 constr = build_constructor (res_vect_type, v);
1579 gimple_assign_set_rhs_from_tree (gsi, constr);
1580 update_stmt (gsi_stmt (*gsi));
1583 /* If OP is a uniform vector return the element it is a splat from. */
1585 static tree
1586 ssa_uniform_vector_p (tree op)
1588 if (TREE_CODE (op) == VECTOR_CST
1589 || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1590 || TREE_CODE (op) == CONSTRUCTOR)
1591 return uniform_vector_p (op);
1592 if (TREE_CODE (op) == SSA_NAME)
1594 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1595 if (gimple_assign_single_p (def_stmt))
1596 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1598 return NULL_TREE;
1601 /* Return the type that should be used to implement OP on type TYPE.
1602 This is TYPE itself if the target can do the operation directly,
1603 otherwise it is a scalar type or a smaller vector type. */
1605 static tree
1606 get_compute_type (optab op, tree type)
1608 if (op)
1610 if (VECTOR_MODE_P (TYPE_MODE (type))
1611 && can_implement_p (op, TYPE_MODE (type)))
1612 return type;
1614 /* For very wide vectors, try using a smaller vector mode. */
1615 tree vector_compute_type = type_for_widest_vector_mode (type, op);
1616 if (vector_compute_type != NULL_TREE
1617 && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1618 && can_implement_p (op, TYPE_MODE (vector_compute_type)))
1619 return vector_compute_type;
1622 /* There is no operation in hardware, so fall back to scalars. */
1623 return TREE_TYPE (type);
1626 static tree
1627 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1628 tree bitpos, tree bitsize, enum tree_code code,
1629 tree type ATTRIBUTE_UNUSED)
1631 if (VECTOR_TYPE_P (TREE_TYPE (a)))
1632 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1633 if (VECTOR_TYPE_P (TREE_TYPE (b)))
1634 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1635 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1636 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1639 /* Expand a vector COND_EXPR to scalars, piecewise. */
1640 static void
1641 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1643 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1644 tree lhs = gimple_assign_lhs (stmt);
1645 tree type = TREE_TYPE (lhs);
1646 tree compute_type = get_compute_type (mov_optab, type);
1647 machine_mode compute_mode = TYPE_MODE (compute_type);
1648 gcc_assert (compute_mode != BLKmode);
1649 tree rhs2 = gimple_assign_rhs2 (stmt);
1650 tree rhs3 = gimple_assign_rhs3 (stmt);
1651 tree new_rhs;
1653 /* If the compute mode is not a vector mode (hence we are not decomposing
1654 a BLKmode vector to smaller, hardware-supported vectors), we may want
1655 to expand the operations in parallel. */
1656 if (!VECTOR_MODE_P (compute_mode))
1657 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1658 COND_EXPR);
1659 else
1660 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1661 rhs2, rhs3, COND_EXPR, false);
1662 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1663 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1664 new_rhs);
1666 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1667 way to do it is change expand_vector_operation and its callees to
1668 return a tree_code, RHS1 and RHS2 instead of a tree. */
1669 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1670 update_stmt (gsi_stmt (*gsi));
1673 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1674 lowering. If INNER_TYPE is not a vector type, this is a scalar
1675 fallback. */
1677 static tree
1678 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1679 tree decl, tree bitpos, tree bitsize,
1680 enum tree_code code, tree type)
1682 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1683 if (!VECTOR_TYPE_P (inner_type))
1684 return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1685 if (code == CALL_EXPR)
1687 gimple *g = gimple_build_call (decl, 1, a);
1688 tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1689 gimple_call_set_lhs (g, lhs);
1690 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1691 return lhs;
1693 else
1695 tree outer_type = build_vector_type (TREE_TYPE (type),
1696 TYPE_VECTOR_SUBPARTS (inner_type));
1697 return gimplify_build1 (gsi, code, outer_type, a);
1701 /* Similarly, but for narrowing conversion. */
1703 static tree
1704 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1705 tree, tree bitpos, tree, enum tree_code code,
1706 tree type)
1708 tree itype = build_vector_type (TREE_TYPE (inner_type),
1709 exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1710 2));
1711 tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1712 tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1713 int_const_binop (PLUS_EXPR, bitpos,
1714 TYPE_SIZE (itype)));
1715 tree outer_type = build_vector_type (TREE_TYPE (type),
1716 TYPE_VECTOR_SUBPARTS (inner_type));
1717 return gimplify_build2 (gsi, code, outer_type, b, c);
1720 /* Expand VEC_CONVERT ifn call. */
1722 static void
1723 expand_vector_conversion (gimple_stmt_iterator *gsi)
1725 gimple *stmt = gsi_stmt (*gsi);
1726 gimple *g;
1727 tree lhs = gimple_call_lhs (stmt);
1728 if (lhs == NULL_TREE)
1730 g = gimple_build_nop ();
1731 gsi_replace (gsi, g, false);
1732 return;
1734 tree arg = gimple_call_arg (stmt, 0);
1735 tree ret_type = TREE_TYPE (lhs);
1736 tree arg_type = TREE_TYPE (arg);
1737 tree new_rhs, new_lhs, compute_type = TREE_TYPE (arg_type);
1738 enum tree_code code = NOP_EXPR;
1739 enum tree_code code1 = ERROR_MARK;
1740 enum { NARROW, NONE, WIDEN } modifier = NONE;
1741 optab optab1 = unknown_optab;
1743 gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1744 if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1745 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1746 code = FIX_TRUNC_EXPR;
1747 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1748 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1749 code = FLOAT_EXPR;
1750 unsigned int ret_elt_bits = vector_element_bits (ret_type);
1751 unsigned int arg_elt_bits = vector_element_bits (arg_type);
1752 if (ret_elt_bits < arg_elt_bits)
1753 modifier = NARROW;
1754 else if (ret_elt_bits > arg_elt_bits)
1755 modifier = WIDEN;
1757 auto_vec<std::pair<tree, tree_code> > converts;
1758 if (supportable_indirect_convert_operation (code,
1759 ret_type, arg_type,
1760 converts,
1761 arg))
1763 new_rhs = arg;
1764 for (unsigned int i = 0; i < converts.length () - 1; i++)
1766 new_lhs = make_ssa_name (converts[i].first);
1767 g = gimple_build_assign (new_lhs, converts[i].second, new_rhs);
1768 new_rhs = new_lhs;
1769 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1771 g = gimple_build_assign (lhs,
1772 converts[converts.length() - 1].second,
1773 new_rhs);
1774 gsi_replace (gsi, g, false);
1775 return;
1778 if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1780 /* Can't use get_compute_type here, as supportable_convert_operation
1781 doesn't necessarily use an optab and needs two arguments. */
1782 tree vec_compute_type
1783 = type_for_widest_vector_mode (arg_type, mov_optab);
1784 if (vec_compute_type
1785 && VECTOR_MODE_P (TYPE_MODE (vec_compute_type)))
1787 unsigned HOST_WIDE_INT nelts
1788 = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1789 while (nelts > 1)
1791 tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1792 tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1793 if (supportable_convert_operation (code, ret1_type, arg1_type,
1794 &code1))
1796 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1797 ret_type, arg1_type, arg,
1798 NULL_TREE, code1, false);
1799 g = gimple_build_assign (lhs, new_rhs);
1800 gsi_replace (gsi, g, false);
1801 return;
1803 nelts = nelts / 2;
1807 else if (modifier == NARROW)
1809 switch (code)
1811 CASE_CONVERT:
1812 code1 = VEC_PACK_TRUNC_EXPR;
1813 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1814 break;
1815 case FIX_TRUNC_EXPR:
1816 code1 = VEC_PACK_FIX_TRUNC_EXPR;
1817 /* The signedness is determined from output operand. */
1818 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1819 break;
1820 case FLOAT_EXPR:
1821 code1 = VEC_PACK_FLOAT_EXPR;
1822 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1823 break;
1824 default:
1825 gcc_unreachable ();
1828 if (optab1)
1829 compute_type = get_compute_type (optab1, arg_type);
1830 enum insn_code icode1;
1831 if (VECTOR_TYPE_P (compute_type)
1832 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1833 != CODE_FOR_nothing)
1834 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1836 tree cretd_type
1837 = build_vector_type (TREE_TYPE (ret_type),
1838 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1839 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1841 if (compute_type == arg_type)
1843 new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1844 arg, build_zero_cst (arg_type));
1845 new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1846 TYPE_SIZE (ret_type),
1847 bitsize_int (0));
1848 g = gimple_build_assign (lhs, new_rhs);
1849 gsi_replace (gsi, g, false);
1850 return;
1852 tree dcompute_type
1853 = build_vector_type (TREE_TYPE (compute_type),
1854 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1855 if (TYPE_MAIN_VARIANT (dcompute_type)
1856 == TYPE_MAIN_VARIANT (arg_type))
1857 new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1858 NULL_TREE, bitsize_int (0),
1859 NULL_TREE, code1,
1860 ret_type);
1861 else
1862 new_rhs = expand_vector_piecewise (gsi,
1863 do_vec_narrow_conversion,
1864 arg_type, dcompute_type,
1865 arg, NULL_TREE, code1,
1866 false, ret_type);
1867 g = gimple_build_assign (lhs, new_rhs);
1868 gsi_replace (gsi, g, false);
1869 return;
1873 else if (modifier == WIDEN)
1875 enum tree_code code2 = ERROR_MARK;
1876 optab optab2 = unknown_optab;
1877 switch (code)
1879 CASE_CONVERT:
1880 code1 = VEC_UNPACK_LO_EXPR;
1881 code2 = VEC_UNPACK_HI_EXPR;
1882 break;
1883 case FIX_TRUNC_EXPR:
1884 code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
1885 code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
1886 break;
1887 case FLOAT_EXPR:
1888 code1 = VEC_UNPACK_FLOAT_LO_EXPR;
1889 code2 = VEC_UNPACK_FLOAT_HI_EXPR;
1890 break;
1891 default:
1892 gcc_unreachable ();
1894 if (BYTES_BIG_ENDIAN)
1895 std::swap (code1, code2);
1897 if (code == FIX_TRUNC_EXPR)
1899 /* The signedness is determined from output operand. */
1900 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1901 optab2 = optab_for_tree_code (code2, ret_type, optab_default);
1903 else
1905 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1906 optab2 = optab_for_tree_code (code2, arg_type, optab_default);
1909 if (optab1 && optab2)
1910 compute_type = get_compute_type (optab1, arg_type);
1912 enum insn_code icode1, icode2;
1913 if (VECTOR_TYPE_P (compute_type)
1914 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1915 != CODE_FOR_nothing)
1916 && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
1917 != CODE_FOR_nothing)
1918 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
1919 && (insn_data[icode1].operand[0].mode
1920 == insn_data[icode2].operand[0].mode))
1922 poly_uint64 nunits
1923 = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
1924 tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
1925 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1927 vec<constructor_elt, va_gc> *v;
1928 tree part_width = TYPE_SIZE (compute_type);
1929 tree index = bitsize_int (0);
1930 int nunits = nunits_for_known_piecewise_op (arg_type);
1931 int delta = tree_to_uhwi (part_width) / arg_elt_bits;
1932 int i;
1933 location_t loc = gimple_location (gsi_stmt (*gsi));
1935 if (compute_type != arg_type)
1937 if (!warning_suppressed_p (gsi_stmt (*gsi),
1938 OPT_Wvector_operation_performance))
1939 warning_at (loc, OPT_Wvector_operation_performance,
1940 "vector operation will be expanded piecewise");
1942 else
1944 nunits = 1;
1945 delta = 1;
1948 vec_alloc (v, (nunits + delta - 1) / delta * 2);
1949 bool constant_p = true;
1950 for (i = 0; i < nunits;
1951 i += delta, index = int_const_binop (PLUS_EXPR, index,
1952 part_width))
1954 tree a = arg;
1955 if (compute_type != arg_type)
1956 a = tree_vec_extract (gsi, compute_type, a, part_width,
1957 index);
1958 tree result = gimplify_build1 (gsi, code1, cretd_type, a);
1959 constructor_elt ce = { NULL_TREE, result };
1960 if (!CONSTANT_CLASS_P (ce.value))
1961 constant_p = false;
1962 v->quick_push (ce);
1963 ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
1964 if (!CONSTANT_CLASS_P (ce.value))
1965 constant_p = false;
1966 v->quick_push (ce);
1969 if (constant_p)
1970 new_rhs = build_vector_from_ctor (ret_type, v);
1971 else
1972 new_rhs = build_constructor (ret_type, v);
1973 g = gimple_build_assign (lhs, new_rhs);
1974 gsi_replace (gsi, g, false);
1975 return;
1980 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
1981 TREE_TYPE (arg_type), arg,
1982 NULL_TREE, code, false, ret_type);
1983 g = gimple_build_assign (lhs, new_rhs);
1984 gsi_replace (gsi, g, false);
1987 /* Process one statement. If we identify a vector operation, expand it. */
1989 static void
1990 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1992 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1993 enum tree_code code;
1994 optab op = unknown_optab;
1995 enum gimple_rhs_class rhs_class;
1996 tree new_rhs;
1998 /* Only consider code == GIMPLE_ASSIGN. */
1999 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
2000 if (!stmt)
2002 if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
2003 expand_vector_conversion (gsi);
2004 return;
2007 code = gimple_assign_rhs_code (stmt);
2008 rhs_class = get_gimple_rhs_class (code);
2009 lhs = gimple_assign_lhs (stmt);
2011 if (code == VEC_PERM_EXPR)
2013 lower_vec_perm (gsi);
2014 return;
2017 if (code == VEC_COND_EXPR)
2019 expand_vector_condition (gsi);
2020 return;
2023 if (code == COND_EXPR
2024 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
2025 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
2027 expand_vector_scalar_condition (gsi);
2028 return;
2031 if (code == CONSTRUCTOR
2032 && TREE_CODE (lhs) == SSA_NAME
2033 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
2034 && !gimple_clobber_p (stmt)
2035 && optimize)
2037 optimize_vector_constructor (gsi);
2038 return;
2041 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
2042 return;
2044 rhs1 = gimple_assign_rhs1 (stmt);
2045 if (rhs_class == GIMPLE_BINARY_RHS)
2046 rhs2 = gimple_assign_rhs2 (stmt);
2048 type = TREE_TYPE (lhs);
2049 if (!VECTOR_TYPE_P (type)
2050 || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2051 return;
2053 /* A scalar operation pretending to be a vector one. */
2054 if (VECTOR_BOOLEAN_TYPE_P (type)
2055 && !VECTOR_MODE_P (TYPE_MODE (type))
2056 && TYPE_MODE (type) != BLKmode
2057 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2058 || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2059 && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2060 && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2061 return;
2063 /* If the vector operation is operating on all same vector elements
2064 implement it with a scalar operation and a splat if the target
2065 supports the scalar operation. */
2066 tree srhs1, srhs2 = NULL_TREE;
2067 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2068 && (rhs2 == NULL_TREE
2069 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2070 && (srhs2 = rhs2))
2071 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2072 /* As we query direct optabs restrict to non-convert operations. */
2073 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2075 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2076 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2077 && can_implement_p (op, TYPE_MODE (TREE_TYPE (type))))
2079 tree stype = TREE_TYPE (TREE_TYPE (lhs));
2080 tree slhs = (rhs2 != NULL_TREE)
2081 ? gimplify_build2 (gsi, code, stype, srhs1, srhs2)
2082 : gimplify_build1 (gsi, code, stype, srhs1);
2083 gimple_assign_set_rhs_from_tree (gsi,
2084 build_vector_from_val (type, slhs));
2085 update_stmt (stmt);
2086 return;
2090 /* Plain moves do not need lowering. */
2091 if (code == SSA_NAME
2092 || code == VIEW_CONVERT_EXPR
2093 || code == PAREN_EXPR)
2094 return;
2096 if (CONVERT_EXPR_CODE_P (code)
2097 || code == FLOAT_EXPR
2098 || code == FIX_TRUNC_EXPR)
2099 return;
2101 /* The signedness is determined from input argument. */
2102 if (code == VEC_UNPACK_FLOAT_HI_EXPR
2103 || code == VEC_UNPACK_FLOAT_LO_EXPR
2104 || code == VEC_PACK_FLOAT_EXPR)
2106 /* We do not know how to scalarize those. */
2107 return;
2110 /* For widening/narrowing vector operations, the relevant type is of the
2111 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
2112 calculated in the same way above. */
2113 if (code == WIDEN_SUM_EXPR
2114 || code == VEC_WIDEN_MULT_HI_EXPR
2115 || code == VEC_WIDEN_MULT_LO_EXPR
2116 || code == VEC_WIDEN_MULT_EVEN_EXPR
2117 || code == VEC_WIDEN_MULT_ODD_EXPR
2118 || code == VEC_UNPACK_HI_EXPR
2119 || code == VEC_UNPACK_LO_EXPR
2120 || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2121 || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2122 || code == VEC_PACK_TRUNC_EXPR
2123 || code == VEC_PACK_SAT_EXPR
2124 || code == VEC_PACK_FIX_TRUNC_EXPR
2125 || code == VEC_WIDEN_LSHIFT_HI_EXPR
2126 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2128 /* We do not know how to scalarize those. */
2129 return;
2132 /* Choose between vector shift/rotate by vector and vector shift/rotate by
2133 scalar */
2134 if (code == LSHIFT_EXPR
2135 || code == RSHIFT_EXPR
2136 || code == LROTATE_EXPR
2137 || code == RROTATE_EXPR)
2139 optab opv;
2141 /* Check whether we have vector <op> {x,x,x,x} where x
2142 could be a scalar variable or a constant. Transform
2143 vector <op> {x,x,x,x} ==> vector <op> scalar. */
2144 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2146 tree first;
2148 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2150 gimple_assign_set_rhs2 (stmt, first);
2151 update_stmt (stmt);
2152 rhs2 = first;
2156 opv = optab_for_tree_code (code, type, optab_vector);
2157 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2158 op = opv;
2159 else
2161 op = optab_for_tree_code (code, type, optab_scalar);
2163 compute_type = get_compute_type (op, type);
2164 if (compute_type == type)
2165 return;
2166 /* The rtl expander will expand vector/scalar as vector/vector
2167 if necessary. Pick one with wider vector type. */
2168 tree compute_vtype = get_compute_type (opv, type);
2169 if (subparts_gt (compute_vtype, compute_type))
2171 compute_type = compute_vtype;
2172 op = opv;
2176 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2178 if (compute_type == NULL_TREE)
2179 compute_type = get_compute_type (op, type);
2180 if (compute_type == type)
2181 return;
2182 /* Before splitting vector rotates into scalar rotates,
2183 see if we can't use vector shifts and BIT_IOR_EXPR
2184 instead. For vector by vector rotates we'd also
2185 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2186 for now, fold doesn't seem to create such rotates anyway. */
2187 if (compute_type == TREE_TYPE (type)
2188 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2190 optab oplv = vashl_optab, opl = ashl_optab;
2191 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2192 tree compute_lvtype = get_compute_type (oplv, type);
2193 tree compute_rvtype = get_compute_type (oprv, type);
2194 tree compute_otype = get_compute_type (opo, type);
2195 tree compute_ltype = get_compute_type (opl, type);
2196 tree compute_rtype = get_compute_type (opr, type);
2197 /* The rtl expander will expand vector/scalar as vector/vector
2198 if necessary. Pick one with wider vector type. */
2199 if (subparts_gt (compute_lvtype, compute_ltype))
2201 compute_ltype = compute_lvtype;
2202 opl = oplv;
2204 if (subparts_gt (compute_rvtype, compute_rtype))
2206 compute_rtype = compute_rvtype;
2207 opr = oprv;
2209 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2210 BIT_IOR_EXPR. */
2211 compute_type = compute_ltype;
2212 if (subparts_gt (compute_type, compute_rtype))
2213 compute_type = compute_rtype;
2214 if (subparts_gt (compute_type, compute_otype))
2215 compute_type = compute_otype;
2216 /* Verify all 3 operations can be performed in that type. */
2217 if (compute_type != TREE_TYPE (type))
2219 if (!can_implement_p (opl, TYPE_MODE (compute_type))
2220 || !can_implement_p (opr, TYPE_MODE (compute_type))
2221 || !can_implement_p (opo, TYPE_MODE (compute_type)))
2222 compute_type = TREE_TYPE (type);
2227 else
2228 op = optab_for_tree_code (code, type, optab_default);
2230 /* Optabs will try converting a negation into a subtraction, so
2231 look for it as well. TODO: negation of floating-point vectors
2232 might be turned into an exclusive OR toggling the sign bit. */
2233 if (op == unknown_optab
2234 && code == NEGATE_EXPR
2235 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2236 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2238 if (compute_type == NULL_TREE)
2239 compute_type = get_compute_type (op, type);
2240 if (compute_type == type)
2241 return;
2243 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
2245 /* Leave expression untouched for later expansion. */
2246 if (new_rhs == NULL_TREE)
2247 return;
2249 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2250 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2251 new_rhs);
2253 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
2254 way to do it is change expand_vector_operation and its callees to
2255 return a tree_code, RHS1 and RHS2 instead of a tree. */
2256 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2257 update_stmt (gsi_stmt (*gsi));
2260 /* Use this to lower vector operations introduced by the vectorizer,
2261 if it may need the bit-twiddling tricks implemented in this file. */
2263 static unsigned int
2264 expand_vector_operations (void)
2266 gimple_stmt_iterator gsi;
2267 basic_block bb;
2268 bool cfg_changed = false;
2270 FOR_EACH_BB_FN (bb, cfun)
2272 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2274 expand_vector_operations_1 (&gsi);
2275 /* ??? If we do not cleanup EH then we will ICE in
2276 verification. But in reality we have created wrong-code
2277 as we did not properly transition EH info and edges to
2278 the piecewise computations. */
2279 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2280 && gimple_purge_dead_eh_edges (bb))
2281 cfg_changed = true;
2282 /* If a .LOOP_DIST_ALIAS call prevailed loops got elided
2283 before vectorization got a chance to get at them. Simply
2284 fold as if loop distribution wasn't performed. */
2285 if (gimple_call_internal_p (gsi_stmt (gsi), IFN_LOOP_DIST_ALIAS))
2287 fold_loop_internal_call (gsi_stmt (gsi), boolean_false_node);
2288 cfg_changed = true;
2293 return cfg_changed ? TODO_cleanup_cfg : 0;
2296 namespace {
2298 const pass_data pass_data_lower_vector =
2300 GIMPLE_PASS, /* type */
2301 "veclower", /* name */
2302 OPTGROUP_VEC, /* optinfo_flags */
2303 TV_NONE, /* tv_id */
2304 PROP_cfg, /* properties_required */
2305 PROP_gimple_lvec, /* properties_provided */
2306 0, /* properties_destroyed */
2307 0, /* todo_flags_start */
2308 TODO_update_ssa, /* todo_flags_finish */
2311 class pass_lower_vector : public gimple_opt_pass
2313 public:
2314 pass_lower_vector (gcc::context *ctxt)
2315 : gimple_opt_pass (pass_data_lower_vector, ctxt)
2318 /* opt_pass methods: */
2319 bool gate (function *fun) final override
2321 return !(fun->curr_properties & PROP_gimple_lvec);
2324 unsigned int execute (function *) final override
2326 return expand_vector_operations ();
2329 }; // class pass_lower_vector
2331 } // anon namespace
2333 gimple_opt_pass *
2334 make_pass_lower_vector (gcc::context *ctxt)
2336 return new pass_lower_vector (ctxt);
2339 namespace {
2341 const pass_data pass_data_lower_vector_ssa =
2343 GIMPLE_PASS, /* type */
2344 "veclower2", /* name */
2345 OPTGROUP_VEC, /* optinfo_flags */
2346 TV_NONE, /* tv_id */
2347 PROP_cfg, /* properties_required */
2348 PROP_gimple_lvec, /* properties_provided */
2349 0, /* properties_destroyed */
2350 0, /* todo_flags_start */
2351 ( TODO_update_ssa
2352 | TODO_cleanup_cfg ), /* todo_flags_finish */
2355 class pass_lower_vector_ssa : public gimple_opt_pass
2357 public:
2358 pass_lower_vector_ssa (gcc::context *ctxt)
2359 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2362 /* opt_pass methods: */
2363 opt_pass * clone () final override
2365 return new pass_lower_vector_ssa (m_ctxt);
2367 unsigned int execute (function *) final override
2369 return expand_vector_operations ();
2372 }; // class pass_lower_vector_ssa
2374 } // anon namespace
2376 gimple_opt_pass *
2377 make_pass_lower_vector_ssa (gcc::context *ctxt)
2379 return new pass_lower_vector_ssa (ctxt);
2382 #include "gt-tree-vect-generic.h"