OpenMP: Update documentation of metadirective implementation status.
[gcc.git] / gcc / internal-fn.cc
blob21eac80819a541581a50f2e2e9f558efd9fbe2e9
1 /* Internal functions.
2 Copyright (C) 2011-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #define INCLUDE_MEMORY
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "predict.h"
30 #include "stringpool.h"
31 #include "tree-vrp.h"
32 #include "tree-ssanames.h"
33 #include "expmed.h"
34 #include "memmodel.h"
35 #include "optabs.h"
36 #include "emit-rtl.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "internal-fn.h"
40 #include "stor-layout.h"
41 #include "dojump.h"
42 #include "expr.h"
43 #include "stringpool.h"
44 #include "attribs.h"
45 #include "asan.h"
46 #include "ubsan.h"
47 #include "recog.h"
48 #include "builtins.h"
49 #include "optabs-tree.h"
50 #include "gimple-ssa.h"
51 #include "tree-phinodes.h"
52 #include "ssa-iterators.h"
53 #include "explow.h"
54 #include "rtl-iter.h"
55 #include "gimple-range.h"
56 #include "fold-const-call.h"
57 #include "tree-ssa-live.h"
58 #include "tree-outof-ssa.h"
59 #include "gcc-urlifier.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* The names of each internal function, indexed by function number. */
65 const char *const internal_fn_name_array[] = {
66 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) #CODE,
67 #include "internal-fn.def"
68 "<invalid-fn>"
71 /* The ECF_* flags of each internal function, indexed by function number. */
72 const int internal_fn_flags_array[] = {
73 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) FLAGS,
74 #include "internal-fn.def"
78 /* Return the internal function called NAME, or IFN_LAST if there's
79 no such function. */
81 internal_fn
82 lookup_internal_fn (const char *name)
84 typedef hash_map<nofree_string_hash, internal_fn> name_to_fn_map_type;
85 static name_to_fn_map_type *name_to_fn_map;
87 if (!name_to_fn_map)
89 name_to_fn_map = new name_to_fn_map_type (IFN_LAST);
90 for (unsigned int i = 0; i < IFN_LAST; ++i)
91 name_to_fn_map->put (internal_fn_name (internal_fn (i)),
92 internal_fn (i));
94 internal_fn *entry = name_to_fn_map->get (name);
95 return entry ? *entry : IFN_LAST;
98 /* Geven an internal_fn IFN that is a widening function, return its
99 corresponding LO and HI internal_fns. */
101 extern void
102 lookup_hilo_internal_fn (internal_fn ifn, internal_fn *lo, internal_fn *hi)
104 gcc_assert (widening_fn_p (ifn));
106 switch (ifn)
108 default:
109 gcc_unreachable ();
110 #define DEF_INTERNAL_FN(NAME, FLAGS, TYPE)
111 #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, F, S, SO, UO, T) \
112 case IFN_##NAME: \
113 *lo = internal_fn (IFN_##NAME##_LO); \
114 *hi = internal_fn (IFN_##NAME##_HI); \
115 break;
116 #include "internal-fn.def"
120 /* Given an internal_fn IFN that is a widening function, return its
121 corresponding _EVEN and _ODD internal_fns in *EVEN and *ODD. */
123 extern void
124 lookup_evenodd_internal_fn (internal_fn ifn, internal_fn *even,
125 internal_fn *odd)
127 gcc_assert (widening_fn_p (ifn));
129 switch (ifn)
131 default:
132 gcc_unreachable ();
133 #define DEF_INTERNAL_FN(NAME, FLAGS, TYPE)
134 #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, F, S, SO, UO, T) \
135 case IFN_##NAME: \
136 *even = internal_fn (IFN_##NAME##_EVEN); \
137 *odd = internal_fn (IFN_##NAME##_ODD); \
138 break;
139 #include "internal-fn.def"
144 /* Fnspec of each internal function, indexed by function number. */
145 const_tree internal_fn_fnspec_array[IFN_LAST + 1];
147 void
148 init_internal_fns ()
150 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
151 if (FNSPEC) internal_fn_fnspec_array[IFN_##CODE] = \
152 build_string ((int) sizeof (FNSPEC) - 1, FNSPEC ? FNSPEC : "");
153 #include "internal-fn.def"
154 internal_fn_fnspec_array[IFN_LAST] = 0;
157 /* Create static initializers for the information returned by
158 direct_internal_fn. */
159 #define not_direct { -2, -2, false }
160 #define mask_load_direct { -1, 2, false }
161 #define load_lanes_direct { -1, -1, false }
162 #define mask_load_lanes_direct { -1, -1, false }
163 #define gather_load_direct { 3, 1, false }
164 #define strided_load_direct { -1, -1, false }
165 #define len_load_direct { -1, -1, false }
166 #define mask_len_load_direct { -1, 4, false }
167 #define mask_store_direct { 3, 2, false }
168 #define store_lanes_direct { 0, 0, false }
169 #define mask_store_lanes_direct { 0, 0, false }
170 #define vec_cond_mask_direct { 1, 0, false }
171 #define vec_cond_mask_len_direct { 1, 1, false }
172 #define vec_cond_direct { 2, 0, false }
173 #define scatter_store_direct { 3, 1, false }
174 #define strided_store_direct { 1, 1, false }
175 #define len_store_direct { 3, 3, false }
176 #define mask_len_store_direct { 4, 5, false }
177 #define vec_set_direct { 3, 3, false }
178 #define vec_extract_direct { 0, -1, false }
179 #define unary_direct { 0, 0, true }
180 #define unary_convert_direct { -1, 0, true }
181 #define binary_direct { 0, 0, true }
182 #define ternary_direct { 0, 0, true }
183 #define cond_unary_direct { 1, 1, true }
184 #define cond_binary_direct { 1, 1, true }
185 #define cond_ternary_direct { 1, 1, true }
186 #define cond_len_unary_direct { 1, 1, true }
187 #define cond_len_binary_direct { 1, 1, true }
188 #define cond_len_ternary_direct { 1, 1, true }
189 #define while_direct { 0, 2, false }
190 #define fold_extract_direct { 2, 2, false }
191 #define fold_len_extract_direct { 2, 2, false }
192 #define fold_left_direct { 1, 1, false }
193 #define mask_fold_left_direct { 1, 1, false }
194 #define mask_len_fold_left_direct { 1, 1, false }
195 #define check_ptrs_direct { 0, 0, false }
196 #define crc_direct { 1, -1, true }
198 const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
199 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
200 #define DEF_INTERNAL_OPTAB_FN(CODE, FLAGS, OPTAB, TYPE) TYPE##_direct,
201 #define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
202 UNSIGNED_OPTAB, TYPE) TYPE##_direct,
203 #include "internal-fn.def"
204 not_direct
207 /* Like create_output_operand, but for callers that will use
208 assign_call_lhs afterwards. */
210 static void
211 create_call_lhs_operand (expand_operand *op, rtx lhs_rtx, machine_mode mode)
213 /* Do not assign directly to a promoted subreg, since there is no
214 guarantee that the instruction will leave the upper bits of the
215 register in the state required by SUBREG_PROMOTED_SIGN. */
216 rtx dest = lhs_rtx;
217 if (dest && GET_CODE (dest) == SUBREG && SUBREG_PROMOTED_VAR_P (dest))
218 dest = NULL_RTX;
219 create_output_operand (op, dest, mode);
222 /* Move the result of an expanded instruction into the lhs of a gimple call.
223 LHS is the lhs of the call, LHS_RTX is its expanded form, and OP is the
224 result of the expanded instruction. OP should have been set up by
225 create_call_lhs_operand. */
227 static void
228 assign_call_lhs (tree lhs, rtx lhs_rtx, expand_operand *op)
230 if (rtx_equal_p (lhs_rtx, op->value))
231 return;
233 /* If the return value has an integral type, convert the instruction
234 result to that type. This is useful for things that return an
235 int regardless of the size of the input. If the instruction result
236 is smaller than required, assume that it is signed.
238 If the return value has a nonintegral type, its mode must match
239 the instruction result. */
240 if (GET_CODE (lhs_rtx) == SUBREG && SUBREG_PROMOTED_VAR_P (lhs_rtx))
242 /* If this is a scalar in a register that is stored in a wider
243 mode than the declared mode, compute the result into its
244 declared mode and then convert to the wider mode. */
245 gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs)));
246 rtx tmp = convert_to_mode (GET_MODE (lhs_rtx), op->value, 0);
247 convert_move (SUBREG_REG (lhs_rtx), tmp,
248 SUBREG_PROMOTED_SIGN (lhs_rtx));
250 else if (GET_MODE (lhs_rtx) == GET_MODE (op->value))
251 emit_move_insn (lhs_rtx, op->value);
252 else
254 gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs)));
255 convert_move (lhs_rtx, op->value, 0);
259 /* Expand STMT using instruction ICODE. The instruction has NOUTPUTS
260 output operands and NINPUTS input operands, where NOUTPUTS is either
261 0 or 1. The output operand (if any) comes first, followed by the
262 NINPUTS input operands. */
264 static void
265 expand_fn_using_insn (gcall *stmt, insn_code icode, unsigned int noutputs,
266 unsigned int ninputs)
268 gcc_assert (icode != CODE_FOR_nothing);
270 expand_operand *ops = XALLOCAVEC (expand_operand, noutputs + ninputs);
271 unsigned int opno = 0;
272 rtx lhs_rtx = NULL_RTX;
273 tree lhs = gimple_call_lhs (stmt);
275 if (noutputs)
277 gcc_assert (noutputs == 1);
278 if (lhs)
279 lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
280 create_call_lhs_operand (&ops[opno], lhs_rtx,
281 insn_data[icode].operand[opno].mode);
282 opno += 1;
284 else
285 gcc_assert (!lhs);
287 for (unsigned int i = 0; i < ninputs; ++i)
289 tree rhs = gimple_call_arg (stmt, i);
290 tree rhs_type = TREE_TYPE (rhs);
291 rtx rhs_rtx = expand_normal (rhs);
292 if (INTEGRAL_TYPE_P (rhs_type))
293 create_convert_operand_from (&ops[opno], rhs_rtx,
294 TYPE_MODE (rhs_type),
295 TYPE_UNSIGNED (rhs_type));
296 else if (TREE_CODE (rhs) == SSA_NAME
297 && SSA_NAME_IS_DEFAULT_DEF (rhs)
298 && VAR_P (SSA_NAME_VAR (rhs)))
299 create_undefined_input_operand (&ops[opno], TYPE_MODE (rhs_type));
300 else if (VECTOR_BOOLEAN_TYPE_P (rhs_type)
301 && SCALAR_INT_MODE_P (TYPE_MODE (rhs_type))
302 && maybe_ne (GET_MODE_PRECISION (TYPE_MODE (rhs_type)),
303 TYPE_VECTOR_SUBPARTS (rhs_type).to_constant ()))
305 /* Ensure that the vector bitmasks do not have excess bits. */
306 int nunits = TYPE_VECTOR_SUBPARTS (rhs_type).to_constant ();
307 rtx tmp = expand_binop (TYPE_MODE (rhs_type), and_optab, rhs_rtx,
308 GEN_INT ((HOST_WIDE_INT_1U << nunits) - 1),
309 NULL_RTX, true, OPTAB_WIDEN);
310 create_input_operand (&ops[opno], tmp, TYPE_MODE (rhs_type));
312 else
313 create_input_operand (&ops[opno], rhs_rtx, TYPE_MODE (rhs_type));
314 opno += 1;
317 gcc_assert (opno == noutputs + ninputs);
318 expand_insn (icode, opno, ops);
319 if (lhs_rtx)
320 assign_call_lhs (lhs, lhs_rtx, &ops[0]);
323 /* ARRAY_TYPE is an array of vector modes. Return the associated insn
324 for load-lanes-style optab OPTAB, or CODE_FOR_nothing if none. */
326 static enum insn_code
327 get_multi_vector_move (tree array_type, convert_optab optab)
329 machine_mode imode;
330 machine_mode vmode;
332 gcc_assert (TREE_CODE (array_type) == ARRAY_TYPE);
333 imode = TYPE_MODE (array_type);
334 vmode = TYPE_MODE (TREE_TYPE (array_type));
336 return convert_optab_handler (optab, imode, vmode);
339 /* Add mask, else, and len arguments according to the STMT. */
341 static unsigned int
342 add_mask_else_and_len_args (expand_operand *ops, unsigned int opno, gcall *stmt)
344 internal_fn ifn = gimple_call_internal_fn (stmt);
345 int len_index = internal_fn_len_index (ifn);
346 /* BIAS is always consecutive next of LEN. */
347 int bias_index = len_index + 1;
348 int mask_index = internal_fn_mask_index (ifn);
350 /* The order of arguments is always {mask, else, len, bias}. */
351 if (mask_index >= 0)
353 tree mask = gimple_call_arg (stmt, mask_index);
354 rtx mask_rtx = expand_normal (mask);
356 tree mask_type = TREE_TYPE (mask);
357 if (VECTOR_BOOLEAN_TYPE_P (mask_type)
358 && SCALAR_INT_MODE_P (TYPE_MODE (mask_type))
359 && maybe_ne (GET_MODE_PRECISION (TYPE_MODE (mask_type)),
360 TYPE_VECTOR_SUBPARTS (mask_type).to_constant ()))
362 /* Ensure that the vector bitmasks do not have excess bits. */
363 int nunits = TYPE_VECTOR_SUBPARTS (mask_type).to_constant ();
364 mask_rtx = expand_binop (TYPE_MODE (mask_type), and_optab, mask_rtx,
365 GEN_INT ((HOST_WIDE_INT_1U << nunits) - 1),
366 NULL_RTX, true, OPTAB_WIDEN);
369 create_input_operand (&ops[opno++], mask_rtx,
370 TYPE_MODE (TREE_TYPE (mask)));
373 int els_index = internal_fn_else_index (ifn);
374 if (els_index >= 0)
376 tree els = gimple_call_arg (stmt, els_index);
377 tree els_type = TREE_TYPE (els);
378 if (TREE_CODE (els) == SSA_NAME
379 && SSA_NAME_IS_DEFAULT_DEF (els)
380 && VAR_P (SSA_NAME_VAR (els)))
381 create_undefined_input_operand (&ops[opno++], TYPE_MODE (els_type));
382 else
384 rtx els_rtx = expand_normal (els);
385 create_input_operand (&ops[opno++], els_rtx, TYPE_MODE (els_type));
388 if (len_index >= 0)
390 tree len = gimple_call_arg (stmt, len_index);
391 rtx len_rtx = expand_normal (len);
392 create_convert_operand_from (&ops[opno++], len_rtx,
393 TYPE_MODE (TREE_TYPE (len)),
394 TYPE_UNSIGNED (TREE_TYPE (len)));
395 tree biast = gimple_call_arg (stmt, bias_index);
396 rtx bias = expand_normal (biast);
397 create_input_operand (&ops[opno++], bias, QImode);
399 return opno;
402 /* Expand LOAD_LANES call STMT using optab OPTAB. */
404 static void
405 expand_load_lanes_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
407 class expand_operand ops[2];
408 tree type, lhs, rhs;
409 rtx target, mem;
411 lhs = gimple_call_lhs (stmt);
412 rhs = gimple_call_arg (stmt, 0);
413 type = TREE_TYPE (lhs);
415 target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
416 mem = expand_normal (rhs);
418 gcc_assert (MEM_P (mem));
419 PUT_MODE (mem, TYPE_MODE (type));
421 create_call_lhs_operand (&ops[0], target, TYPE_MODE (type));
422 create_fixed_operand (&ops[1], mem);
423 expand_insn (get_multi_vector_move (type, optab), 2, ops);
424 assign_call_lhs (lhs, target, &ops[0]);
427 /* Expand STORE_LANES call STMT using optab OPTAB. */
429 static void
430 expand_store_lanes_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
432 class expand_operand ops[2];
433 tree type, lhs, rhs;
434 rtx target, reg;
436 lhs = gimple_call_lhs (stmt);
437 rhs = gimple_call_arg (stmt, 0);
438 type = TREE_TYPE (rhs);
440 target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
441 reg = expand_normal (rhs);
443 gcc_assert (MEM_P (target));
444 PUT_MODE (target, TYPE_MODE (type));
446 create_fixed_operand (&ops[0], target);
447 create_input_operand (&ops[1], reg, TYPE_MODE (type));
448 expand_insn (get_multi_vector_move (type, optab), 2, ops);
451 static void
452 expand_ANNOTATE (internal_fn, gcall *)
454 gcc_unreachable ();
457 /* This should get expanded in omp_device_lower pass. */
459 static void
460 expand_GOMP_USE_SIMT (internal_fn, gcall *)
462 gcc_unreachable ();
465 /* This should get expanded in omp_device_lower pass. */
467 static void
468 expand_GOMP_SIMT_ENTER (internal_fn, gcall *)
470 gcc_unreachable ();
473 /* Allocate per-lane storage and begin non-uniform execution region. */
475 static void
476 expand_GOMP_SIMT_ENTER_ALLOC (internal_fn, gcall *stmt)
478 rtx target;
479 tree lhs = gimple_call_lhs (stmt);
480 if (lhs)
481 target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
482 else
483 target = gen_reg_rtx (Pmode);
484 rtx size = expand_normal (gimple_call_arg (stmt, 0));
485 rtx align = expand_normal (gimple_call_arg (stmt, 1));
486 class expand_operand ops[3];
487 create_call_lhs_operand (&ops[0], target, Pmode);
488 create_input_operand (&ops[1], size, Pmode);
489 create_input_operand (&ops[2], align, Pmode);
490 gcc_assert (targetm.have_omp_simt_enter ());
491 expand_insn (targetm.code_for_omp_simt_enter, 3, ops);
492 assign_call_lhs (lhs, target, &ops[0]);
495 /* Deallocate per-lane storage and leave non-uniform execution region. */
497 static void
498 expand_GOMP_SIMT_EXIT (internal_fn, gcall *stmt)
500 gcc_checking_assert (!gimple_call_lhs (stmt));
501 rtx arg = expand_normal (gimple_call_arg (stmt, 0));
502 class expand_operand ops[1];
503 create_input_operand (&ops[0], arg, Pmode);
504 gcc_assert (targetm.have_omp_simt_exit ());
505 expand_insn (targetm.code_for_omp_simt_exit, 1, ops);
508 /* Lane index on SIMT targets: thread index in the warp on NVPTX. On targets
509 without SIMT execution this should be expanded in omp_device_lower pass. */
511 static void
512 expand_GOMP_SIMT_LANE (internal_fn, gcall *stmt)
514 tree lhs = gimple_call_lhs (stmt);
515 if (!lhs)
516 return;
518 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
519 gcc_assert (targetm.have_omp_simt_lane ());
520 emit_insn (targetm.gen_omp_simt_lane (target));
523 /* This should get expanded in omp_device_lower pass. */
525 static void
526 expand_GOMP_SIMT_VF (internal_fn, gcall *)
528 gcc_unreachable ();
531 /* This should get expanded in omp_device_lower pass. */
533 static void
534 expand_GOMP_MAX_VF (internal_fn, gcall *)
536 gcc_unreachable ();
539 /* This should get expanded in omp_device_lower pass. */
541 static void
542 expand_GOMP_TARGET_REV (internal_fn, gcall *)
544 gcc_unreachable ();
547 /* Lane index of the first SIMT lane that supplies a non-zero argument.
548 This is a SIMT counterpart to GOMP_SIMD_LAST_LANE, used to represent the
549 lane that executed the last iteration for handling OpenMP lastprivate. */
551 static void
552 expand_GOMP_SIMT_LAST_LANE (internal_fn, gcall *stmt)
554 tree lhs = gimple_call_lhs (stmt);
555 if (!lhs)
556 return;
558 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
559 rtx cond = expand_normal (gimple_call_arg (stmt, 0));
560 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
561 class expand_operand ops[2];
562 create_call_lhs_operand (&ops[0], target, mode);
563 create_input_operand (&ops[1], cond, mode);
564 gcc_assert (targetm.have_omp_simt_last_lane ());
565 expand_insn (targetm.code_for_omp_simt_last_lane, 2, ops);
566 assign_call_lhs (lhs, target, &ops[0]);
569 /* Non-transparent predicate used in SIMT lowering of OpenMP "ordered". */
571 static void
572 expand_GOMP_SIMT_ORDERED_PRED (internal_fn, gcall *stmt)
574 tree lhs = gimple_call_lhs (stmt);
575 if (!lhs)
576 return;
578 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
579 rtx ctr = expand_normal (gimple_call_arg (stmt, 0));
580 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
581 class expand_operand ops[2];
582 create_call_lhs_operand (&ops[0], target, mode);
583 create_input_operand (&ops[1], ctr, mode);
584 gcc_assert (targetm.have_omp_simt_ordered ());
585 expand_insn (targetm.code_for_omp_simt_ordered, 2, ops);
586 assign_call_lhs (lhs, target, &ops[0]);
589 /* "Or" boolean reduction across SIMT lanes: return non-zero in all lanes if
590 any lane supplies a non-zero argument. */
592 static void
593 expand_GOMP_SIMT_VOTE_ANY (internal_fn, gcall *stmt)
595 tree lhs = gimple_call_lhs (stmt);
596 if (!lhs)
597 return;
599 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
600 rtx cond = expand_normal (gimple_call_arg (stmt, 0));
601 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
602 class expand_operand ops[2];
603 create_call_lhs_operand (&ops[0], target, mode);
604 create_input_operand (&ops[1], cond, mode);
605 gcc_assert (targetm.have_omp_simt_vote_any ());
606 expand_insn (targetm.code_for_omp_simt_vote_any, 2, ops);
607 assign_call_lhs (lhs, target, &ops[0]);
610 /* Exchange between SIMT lanes with a "butterfly" pattern: source lane index
611 is destination lane index XOR given offset. */
613 static void
614 expand_GOMP_SIMT_XCHG_BFLY (internal_fn, gcall *stmt)
616 tree lhs = gimple_call_lhs (stmt);
617 if (!lhs)
618 return;
620 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
621 rtx src = expand_normal (gimple_call_arg (stmt, 0));
622 rtx idx = expand_normal (gimple_call_arg (stmt, 1));
623 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
624 class expand_operand ops[3];
625 create_call_lhs_operand (&ops[0], target, mode);
626 create_input_operand (&ops[1], src, mode);
627 create_input_operand (&ops[2], idx, SImode);
628 gcc_assert (targetm.have_omp_simt_xchg_bfly ());
629 expand_insn (targetm.code_for_omp_simt_xchg_bfly, 3, ops);
630 assign_call_lhs (lhs, target, &ops[0]);
633 /* Exchange between SIMT lanes according to given source lane index. */
635 static void
636 expand_GOMP_SIMT_XCHG_IDX (internal_fn, gcall *stmt)
638 tree lhs = gimple_call_lhs (stmt);
639 if (!lhs)
640 return;
642 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
643 rtx src = expand_normal (gimple_call_arg (stmt, 0));
644 rtx idx = expand_normal (gimple_call_arg (stmt, 1));
645 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
646 class expand_operand ops[3];
647 create_call_lhs_operand (&ops[0], target, mode);
648 create_input_operand (&ops[1], src, mode);
649 create_input_operand (&ops[2], idx, SImode);
650 gcc_assert (targetm.have_omp_simt_xchg_idx ());
651 expand_insn (targetm.code_for_omp_simt_xchg_idx, 3, ops);
652 assign_call_lhs (lhs, target, &ops[0]);
655 /* This should get expanded in adjust_simduid_builtins. */
657 static void
658 expand_GOMP_SIMD_LANE (internal_fn, gcall *)
660 gcc_unreachable ();
663 /* This should get expanded in adjust_simduid_builtins. */
665 static void
666 expand_GOMP_SIMD_VF (internal_fn, gcall *)
668 gcc_unreachable ();
671 /* This should get expanded in adjust_simduid_builtins. */
673 static void
674 expand_GOMP_SIMD_LAST_LANE (internal_fn, gcall *)
676 gcc_unreachable ();
679 /* This should get expanded in adjust_simduid_builtins. */
681 static void
682 expand_GOMP_SIMD_ORDERED_START (internal_fn, gcall *)
684 gcc_unreachable ();
687 /* This should get expanded in adjust_simduid_builtins. */
689 static void
690 expand_GOMP_SIMD_ORDERED_END (internal_fn, gcall *)
692 gcc_unreachable ();
695 /* This should get expanded in gimplify_omp_dispatch. */
697 static void
698 expand_GOMP_DISPATCH (internal_fn, gcall *)
700 gcc_unreachable ();
703 /* This should get expanded in the sanopt pass. */
705 static void
706 expand_UBSAN_NULL (internal_fn, gcall *)
708 gcc_unreachable ();
711 /* This should get expanded in the sanopt pass. */
713 static void
714 expand_UBSAN_BOUNDS (internal_fn, gcall *)
716 gcc_unreachable ();
719 /* This should get expanded in the sanopt pass. */
721 static void
722 expand_UBSAN_VPTR (internal_fn, gcall *)
724 gcc_unreachable ();
727 /* This should get expanded in the sanopt pass. */
729 static void
730 expand_UBSAN_PTR (internal_fn, gcall *)
732 gcc_unreachable ();
735 /* This should get expanded in the sanopt pass. */
737 static void
738 expand_UBSAN_OBJECT_SIZE (internal_fn, gcall *)
740 gcc_unreachable ();
743 /* This should get expanded in the sanopt pass. */
745 static void
746 expand_HWASAN_CHECK (internal_fn, gcall *)
748 gcc_unreachable ();
751 /* For hwasan stack tagging:
752 Clear tags on the dynamically allocated space.
753 For use after an object dynamically allocated on the stack goes out of
754 scope. */
755 static void
756 expand_HWASAN_ALLOCA_UNPOISON (internal_fn, gcall *gc)
758 gcc_assert (Pmode == ptr_mode);
759 tree restored_position = gimple_call_arg (gc, 0);
760 rtx restored_rtx = expand_expr (restored_position, NULL_RTX, VOIDmode,
761 EXPAND_NORMAL);
762 rtx func = init_one_libfunc ("__hwasan_tag_memory");
763 rtx off = expand_simple_binop (Pmode, MINUS, restored_rtx,
764 stack_pointer_rtx, NULL_RTX, 0,
765 OPTAB_WIDEN);
766 emit_library_call_value (func, NULL_RTX, LCT_NORMAL, VOIDmode,
767 virtual_stack_dynamic_rtx, Pmode,
768 HWASAN_STACK_BACKGROUND, QImode,
769 off, Pmode);
772 /* For hwasan stack tagging:
773 Return a tag to be used for a dynamic allocation. */
774 static void
775 expand_HWASAN_CHOOSE_TAG (internal_fn, gcall *gc)
777 tree tag = gimple_call_lhs (gc);
778 rtx target = expand_expr (tag, NULL_RTX, VOIDmode, EXPAND_NORMAL);
779 machine_mode mode = GET_MODE (target);
780 gcc_assert (mode == QImode);
782 rtx base_tag = targetm.memtag.extract_tag (hwasan_frame_base (), NULL_RTX);
783 gcc_assert (base_tag);
784 rtx tag_offset = gen_int_mode (hwasan_current_frame_tag (), QImode);
785 rtx chosen_tag = expand_simple_binop (QImode, PLUS, base_tag, tag_offset,
786 target, /* unsignedp = */1,
787 OPTAB_WIDEN);
788 chosen_tag = hwasan_truncate_to_tag_size (chosen_tag, target);
790 /* Really need to put the tag into the `target` RTX. */
791 if (chosen_tag != target)
793 rtx temp = chosen_tag;
794 gcc_assert (GET_MODE (chosen_tag) == mode);
795 emit_move_insn (target, temp);
798 hwasan_increment_frame_tag ();
801 /* For hwasan stack tagging:
802 Tag a region of space in the shadow stack according to the base pointer of
803 an object on the stack. N.b. the length provided in the internal call is
804 required to be aligned to HWASAN_TAG_GRANULE_SIZE. */
805 static void
806 expand_HWASAN_MARK (internal_fn, gcall *gc)
808 gcc_assert (ptr_mode == Pmode);
809 HOST_WIDE_INT flag = tree_to_shwi (gimple_call_arg (gc, 0));
810 bool is_poison = ((asan_mark_flags)flag) == ASAN_MARK_POISON;
812 tree base = gimple_call_arg (gc, 1);
813 gcc_checking_assert (TREE_CODE (base) == ADDR_EXPR);
814 rtx base_rtx = expand_normal (base);
816 rtx tag = is_poison ? HWASAN_STACK_BACKGROUND
817 : targetm.memtag.extract_tag (base_rtx, NULL_RTX);
818 rtx address = targetm.memtag.untagged_pointer (base_rtx, NULL_RTX);
820 tree len = gimple_call_arg (gc, 2);
821 rtx r_len = expand_normal (len);
823 rtx func = init_one_libfunc ("__hwasan_tag_memory");
824 emit_library_call (func, LCT_NORMAL, VOIDmode, address, Pmode,
825 tag, QImode, r_len, Pmode);
828 /* For hwasan stack tagging:
829 Store a tag into a pointer. */
830 static void
831 expand_HWASAN_SET_TAG (internal_fn, gcall *gc)
833 gcc_assert (ptr_mode == Pmode);
834 tree g_target = gimple_call_lhs (gc);
835 tree g_ptr = gimple_call_arg (gc, 0);
836 tree g_tag = gimple_call_arg (gc, 1);
838 rtx ptr = expand_normal (g_ptr);
839 rtx tag = expand_expr (g_tag, NULL_RTX, QImode, EXPAND_NORMAL);
840 rtx target = expand_normal (g_target);
842 rtx untagged = targetm.memtag.untagged_pointer (ptr, target);
843 rtx tagged_value = targetm.memtag.set_tag (untagged, tag, target);
844 if (tagged_value != target)
845 emit_move_insn (target, tagged_value);
848 /* This should get expanded in the sanopt pass. */
850 static void
851 expand_ASAN_CHECK (internal_fn, gcall *)
853 gcc_unreachable ();
856 /* This should get expanded in the sanopt pass. */
858 static void
859 expand_ASAN_MARK (internal_fn, gcall *)
861 gcc_unreachable ();
864 /* This should get expanded in the sanopt pass. */
866 static void
867 expand_ASAN_POISON (internal_fn, gcall *)
869 gcc_unreachable ();
872 /* This should get expanded in the sanopt pass. */
874 static void
875 expand_ASAN_POISON_USE (internal_fn, gcall *)
877 gcc_unreachable ();
880 /* This should get expanded in the tsan pass. */
882 static void
883 expand_TSAN_FUNC_EXIT (internal_fn, gcall *)
885 gcc_unreachable ();
888 /* This should get expanded in the lower pass. */
890 static void
891 expand_FALLTHROUGH (internal_fn, gcall *call)
893 auto_urlify_attributes sentinel;
894 error_at (gimple_location (call),
895 "invalid use of attribute %<fallthrough%>");
898 /* Return minimum precision needed to represent all values
899 of ARG in SIGNed integral type. */
901 static int
902 get_min_precision (tree arg, signop sign)
904 int prec = TYPE_PRECISION (TREE_TYPE (arg));
905 int cnt = 0;
906 signop orig_sign = sign;
907 if (TREE_CODE (arg) == INTEGER_CST)
909 int p;
910 if (TYPE_SIGN (TREE_TYPE (arg)) != sign)
912 widest_int w = wi::to_widest (arg);
913 w = wi::ext (w, prec, sign);
914 p = wi::min_precision (w, sign);
916 else
917 p = wi::min_precision (wi::to_wide (arg), sign);
918 return MIN (p, prec);
920 while (CONVERT_EXPR_P (arg)
921 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0)))
922 && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 0))) <= prec)
924 arg = TREE_OPERAND (arg, 0);
925 if (TYPE_PRECISION (TREE_TYPE (arg)) < prec)
927 if (TYPE_UNSIGNED (TREE_TYPE (arg)))
928 sign = UNSIGNED;
929 else if (sign == UNSIGNED && get_range_pos_neg (arg) != 1)
930 return prec + (orig_sign != sign);
931 prec = TYPE_PRECISION (TREE_TYPE (arg));
933 if (++cnt > 30)
934 return prec + (orig_sign != sign);
936 if (CONVERT_EXPR_P (arg)
937 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0)))
938 && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 0))) > prec)
940 /* We have e.g. (unsigned short) y_2 where int y_2 = (int) x_1(D);
941 If y_2's min precision is smaller than prec, return that. */
942 int oprec = get_min_precision (TREE_OPERAND (arg, 0), sign);
943 if (oprec < prec)
944 return oprec + (orig_sign != sign);
946 if (TREE_CODE (arg) != SSA_NAME)
947 return prec + (orig_sign != sign);
948 int_range_max r;
949 while (!get_global_range_query ()->range_of_expr (r, arg)
950 || r.varying_p ()
951 || r.undefined_p ())
953 gimple *g = SSA_NAME_DEF_STMT (arg);
954 if (is_gimple_assign (g)
955 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (g)))
957 tree t = gimple_assign_rhs1 (g);
958 if (INTEGRAL_TYPE_P (TREE_TYPE (t))
959 && TYPE_PRECISION (TREE_TYPE (t)) <= prec)
961 arg = t;
962 if (TYPE_PRECISION (TREE_TYPE (arg)) < prec)
964 if (TYPE_UNSIGNED (TREE_TYPE (arg)))
965 sign = UNSIGNED;
966 else if (sign == UNSIGNED && get_range_pos_neg (arg) != 1)
967 return prec + (orig_sign != sign);
968 prec = TYPE_PRECISION (TREE_TYPE (arg));
970 if (++cnt > 30)
971 return prec + (orig_sign != sign);
972 continue;
975 return prec + (orig_sign != sign);
977 if (sign == TYPE_SIGN (TREE_TYPE (arg)))
979 int p1 = wi::min_precision (r.lower_bound (), sign);
980 int p2 = wi::min_precision (r.upper_bound (), sign);
981 p1 = MAX (p1, p2);
982 prec = MIN (prec, p1);
984 else if (sign == UNSIGNED && !wi::neg_p (r.lower_bound (), SIGNED))
986 int p = wi::min_precision (r.upper_bound (), UNSIGNED);
987 prec = MIN (prec, p);
989 return prec + (orig_sign != sign);
992 /* Helper for expand_*_overflow. Set the __imag__ part to true
993 (1 except for signed:1 type, in which case store -1). */
995 static void
996 expand_arith_set_overflow (tree lhs, rtx target)
998 if (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (lhs))) == 1
999 && !TYPE_UNSIGNED (TREE_TYPE (TREE_TYPE (lhs))))
1000 write_complex_part (target, constm1_rtx, true, false);
1001 else
1002 write_complex_part (target, const1_rtx, true, false);
1005 /* Helper for expand_*_overflow. Store RES into the __real__ part
1006 of TARGET. If RES has larger MODE than __real__ part of TARGET,
1007 set the __imag__ part to 1 if RES doesn't fit into it. Similarly
1008 if LHS has smaller precision than its mode. */
1010 static void
1011 expand_arith_overflow_result_store (tree lhs, rtx target,
1012 scalar_int_mode mode, rtx res)
1014 scalar_int_mode tgtmode
1015 = as_a <scalar_int_mode> (GET_MODE_INNER (GET_MODE (target)));
1016 rtx lres = res;
1017 if (tgtmode != mode)
1019 rtx_code_label *done_label = gen_label_rtx ();
1020 int uns = TYPE_UNSIGNED (TREE_TYPE (TREE_TYPE (lhs)));
1021 lres = convert_modes (tgtmode, mode, res, uns);
1022 gcc_assert (GET_MODE_PRECISION (tgtmode) < GET_MODE_PRECISION (mode));
1023 do_compare_rtx_and_jump (res, convert_modes (mode, tgtmode, lres, uns),
1024 EQ, true, mode, NULL_RTX, NULL, done_label,
1025 profile_probability::very_likely ());
1026 expand_arith_set_overflow (lhs, target);
1027 emit_label (done_label);
1029 int prec = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (lhs)));
1030 int tgtprec = GET_MODE_PRECISION (tgtmode);
1031 if (prec < tgtprec)
1033 rtx_code_label *done_label = gen_label_rtx ();
1034 int uns = TYPE_UNSIGNED (TREE_TYPE (TREE_TYPE (lhs)));
1035 res = lres;
1036 if (uns)
1038 rtx mask
1039 = immed_wide_int_const (wi::shifted_mask (0, prec, false, tgtprec),
1040 tgtmode);
1041 lres = expand_simple_binop (tgtmode, AND, res, mask, NULL_RTX,
1042 true, OPTAB_LIB_WIDEN);
1044 else
1046 lres = expand_shift (LSHIFT_EXPR, tgtmode, res, tgtprec - prec,
1047 NULL_RTX, 1);
1048 lres = expand_shift (RSHIFT_EXPR, tgtmode, lres, tgtprec - prec,
1049 NULL_RTX, 0);
1051 do_compare_rtx_and_jump (res, lres,
1052 EQ, true, tgtmode, NULL_RTX, NULL, done_label,
1053 profile_probability::very_likely ());
1054 expand_arith_set_overflow (lhs, target);
1055 emit_label (done_label);
1057 write_complex_part (target, lres, false, false);
1060 /* Helper for expand_*_overflow. Store RES into TARGET. */
1062 static void
1063 expand_ubsan_result_store (tree lhs, rtx target, scalar_int_mode mode,
1064 rtx res, rtx_code_label *do_error)
1066 if (TREE_CODE (TREE_TYPE (lhs)) == BITINT_TYPE
1067 && TYPE_PRECISION (TREE_TYPE (lhs)) < GET_MODE_PRECISION (mode))
1069 int uns = TYPE_UNSIGNED (TREE_TYPE (lhs));
1070 int prec = TYPE_PRECISION (TREE_TYPE (lhs));
1071 int tgtprec = GET_MODE_PRECISION (mode);
1072 rtx resc = gen_reg_rtx (mode), lres;
1073 emit_move_insn (resc, res);
1074 if (uns)
1076 rtx mask
1077 = immed_wide_int_const (wi::shifted_mask (0, prec, false, tgtprec),
1078 mode);
1079 lres = expand_simple_binop (mode, AND, res, mask, NULL_RTX,
1080 true, OPTAB_LIB_WIDEN);
1082 else
1084 lres = expand_shift (LSHIFT_EXPR, mode, res, tgtprec - prec,
1085 NULL_RTX, 1);
1086 lres = expand_shift (RSHIFT_EXPR, mode, lres, tgtprec - prec,
1087 NULL_RTX, 0);
1089 if (lres != res)
1090 emit_move_insn (res, lres);
1091 do_compare_rtx_and_jump (res, resc,
1092 NE, true, mode, NULL_RTX, NULL, do_error,
1093 profile_probability::very_unlikely ());
1095 if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
1096 /* If this is a scalar in a register that is stored in a wider mode
1097 than the declared mode, compute the result into its declared mode
1098 and then convert to the wider mode. Our value is the computed
1099 expression. */
1100 convert_move (SUBREG_REG (target), res, SUBREG_PROMOTED_SIGN (target));
1101 else
1102 emit_move_insn (target, res);
1105 /* Add sub/add overflow checking to the statement STMT.
1106 CODE says whether the operation is +, or -. */
1108 void
1109 expand_addsub_overflow (location_t loc, tree_code code, tree lhs,
1110 tree arg0, tree arg1, bool unsr_p, bool uns0_p,
1111 bool uns1_p, bool is_ubsan, tree *datap)
1113 rtx res, target = NULL_RTX;
1114 tree fn;
1115 rtx_code_label *done_label = gen_label_rtx ();
1116 rtx_code_label *do_error = gen_label_rtx ();
1117 do_pending_stack_adjust ();
1118 rtx op0 = expand_normal (arg0);
1119 rtx op1 = expand_normal (arg1);
1120 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg0));
1121 int prec = GET_MODE_PRECISION (mode);
1122 rtx sgn = immed_wide_int_const (wi::min_value (prec, SIGNED), mode);
1123 bool do_xor = false;
1125 if (is_ubsan)
1126 gcc_assert (!unsr_p && !uns0_p && !uns1_p);
1128 if (lhs)
1130 target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
1131 if (!is_ubsan)
1132 write_complex_part (target, const0_rtx, true, false);
1135 /* We assume both operands and result have the same precision
1136 here (GET_MODE_BITSIZE (mode)), S stands for signed type
1137 with that precision, U for unsigned type with that precision,
1138 sgn for unsigned most significant bit in that precision.
1139 s1 is signed first operand, u1 is unsigned first operand,
1140 s2 is signed second operand, u2 is unsigned second operand,
1141 sr is signed result, ur is unsigned result and the following
1142 rules say how to compute result (which is always result of
1143 the operands as if both were unsigned, cast to the right
1144 signedness) and how to compute whether operation overflowed.
1146 s1 + s2 -> sr
1147 res = (S) ((U) s1 + (U) s2)
1148 ovf = s2 < 0 ? res > s1 : res < s1 (or jump on overflow)
1149 s1 - s2 -> sr
1150 res = (S) ((U) s1 - (U) s2)
1151 ovf = s2 < 0 ? res < s1 : res > s2 (or jump on overflow)
1152 u1 + u2 -> ur
1153 res = u1 + u2
1154 ovf = res < u1 (or jump on carry, but RTL opts will handle it)
1155 u1 - u2 -> ur
1156 res = u1 - u2
1157 ovf = res > u1 (or jump on carry, but RTL opts will handle it)
1158 s1 + u2 -> sr
1159 res = (S) ((U) s1 + u2)
1160 ovf = ((U) res ^ sgn) < u2
1161 s1 + u2 -> ur
1162 t1 = (S) (u2 ^ sgn)
1163 t2 = s1 + t1
1164 res = (U) t2 ^ sgn
1165 ovf = t1 < 0 ? t2 > s1 : t2 < s1 (or jump on overflow)
1166 s1 - u2 -> sr
1167 res = (S) ((U) s1 - u2)
1168 ovf = u2 > ((U) s1 ^ sgn)
1169 s1 - u2 -> ur
1170 res = (U) s1 - u2
1171 ovf = s1 < 0 || u2 > (U) s1
1172 u1 - s2 -> sr
1173 res = u1 - (U) s2
1174 ovf = u1 >= ((U) s2 ^ sgn)
1175 u1 - s2 -> ur
1176 t1 = u1 ^ sgn
1177 t2 = t1 - (U) s2
1178 res = t2 ^ sgn
1179 ovf = s2 < 0 ? (S) t2 < (S) t1 : (S) t2 > (S) t1 (or jump on overflow)
1180 s1 + s2 -> ur
1181 res = (U) s1 + (U) s2
1182 ovf = s2 < 0 ? (s1 | (S) res) < 0) : (s1 & (S) res) < 0)
1183 u1 + u2 -> sr
1184 res = (S) (u1 + u2)
1185 ovf = (U) res < u2 || res < 0
1186 u1 - u2 -> sr
1187 res = (S) (u1 - u2)
1188 ovf = u1 >= u2 ? res < 0 : res >= 0
1189 s1 - s2 -> ur
1190 res = (U) s1 - (U) s2
1191 ovf = s2 >= 0 ? ((s1 | (S) res) < 0) : ((s1 & (S) res) < 0) */
1193 if (code == PLUS_EXPR && uns0_p && !uns1_p)
1195 /* PLUS_EXPR is commutative, if operand signedness differs,
1196 canonicalize to the first operand being signed and second
1197 unsigned to simplify following code. */
1198 std::swap (op0, op1);
1199 std::swap (arg0, arg1);
1200 uns0_p = false;
1201 uns1_p = true;
1204 /* u1 +- u2 -> ur */
1205 if (uns0_p && uns1_p && unsr_p)
1207 insn_code icode = optab_handler (code == PLUS_EXPR ? uaddv4_optab
1208 : usubv4_optab, mode);
1209 if (icode != CODE_FOR_nothing)
1211 class expand_operand ops[4];
1212 rtx_insn *last = get_last_insn ();
1214 res = gen_reg_rtx (mode);
1215 create_output_operand (&ops[0], res, mode);
1216 create_input_operand (&ops[1], op0, mode);
1217 create_input_operand (&ops[2], op1, mode);
1218 create_fixed_operand (&ops[3], do_error);
1219 if (maybe_expand_insn (icode, 4, ops))
1221 last = get_last_insn ();
1222 if (profile_status_for_fn (cfun) != PROFILE_ABSENT
1223 && JUMP_P (last)
1224 && any_condjump_p (last)
1225 && !find_reg_note (last, REG_BR_PROB, 0))
1226 add_reg_br_prob_note (last,
1227 profile_probability::very_unlikely ());
1228 emit_jump (done_label);
1229 goto do_error_label;
1232 delete_insns_since (last);
1235 /* Compute the operation. On RTL level, the addition is always
1236 unsigned. */
1237 res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
1238 op0, op1, NULL_RTX, false, OPTAB_LIB_WIDEN);
1239 rtx tem = op0;
1240 /* For PLUS_EXPR, the operation is commutative, so we can pick
1241 operand to compare against. For prec <= BITS_PER_WORD, I think
1242 preferring REG operand is better over CONST_INT, because
1243 the CONST_INT might enlarge the instruction or CSE would need
1244 to figure out we'd already loaded it into a register before.
1245 For prec > BITS_PER_WORD, I think CONST_INT might be more beneficial,
1246 as then the multi-word comparison can be perhaps simplified. */
1247 if (code == PLUS_EXPR
1248 && (prec <= BITS_PER_WORD
1249 ? (CONST_SCALAR_INT_P (op0) && REG_P (op1))
1250 : CONST_SCALAR_INT_P (op1)))
1251 tem = op1;
1252 do_compare_rtx_and_jump (res, tem, code == PLUS_EXPR ? GEU : LEU,
1253 true, mode, NULL_RTX, NULL, done_label,
1254 profile_probability::very_likely ());
1255 goto do_error_label;
1258 /* s1 +- u2 -> sr */
1259 if (!uns0_p && uns1_p && !unsr_p)
1261 /* Compute the operation. On RTL level, the addition is always
1262 unsigned. */
1263 res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
1264 op0, op1, NULL_RTX, false, OPTAB_LIB_WIDEN);
1265 rtx tem = expand_binop (mode, add_optab,
1266 code == PLUS_EXPR ? res : op0, sgn,
1267 NULL_RTX, false, OPTAB_LIB_WIDEN);
1268 do_compare_rtx_and_jump (tem, op1, GEU, true, mode, NULL_RTX, NULL,
1269 done_label, profile_probability::very_likely ());
1270 goto do_error_label;
1273 /* s1 + u2 -> ur */
1274 if (code == PLUS_EXPR && !uns0_p && uns1_p && unsr_p)
1276 op1 = expand_binop (mode, add_optab, op1, sgn, NULL_RTX, false,
1277 OPTAB_LIB_WIDEN);
1278 /* As we've changed op1, we have to avoid using the value range
1279 for the original argument. */
1280 arg1 = error_mark_node;
1281 do_xor = true;
1282 goto do_signed;
1285 /* u1 - s2 -> ur */
1286 if (code == MINUS_EXPR && uns0_p && !uns1_p && unsr_p)
1288 op0 = expand_binop (mode, add_optab, op0, sgn, NULL_RTX, false,
1289 OPTAB_LIB_WIDEN);
1290 /* As we've changed op0, we have to avoid using the value range
1291 for the original argument. */
1292 arg0 = error_mark_node;
1293 do_xor = true;
1294 goto do_signed;
1297 /* s1 - u2 -> ur */
1298 if (code == MINUS_EXPR && !uns0_p && uns1_p && unsr_p)
1300 /* Compute the operation. On RTL level, the addition is always
1301 unsigned. */
1302 res = expand_binop (mode, sub_optab, op0, op1, NULL_RTX, false,
1303 OPTAB_LIB_WIDEN);
1304 int pos_neg = get_range_pos_neg (arg0);
1305 if (pos_neg == 2)
1306 /* If ARG0 is known to be always negative, this is always overflow. */
1307 emit_jump (do_error);
1308 else if (pos_neg == 3)
1309 /* If ARG0 is not known to be always positive, check at runtime. */
1310 do_compare_rtx_and_jump (op0, const0_rtx, LT, false, mode, NULL_RTX,
1311 NULL, do_error, profile_probability::very_unlikely ());
1312 do_compare_rtx_and_jump (op1, op0, LEU, true, mode, NULL_RTX, NULL,
1313 done_label, profile_probability::very_likely ());
1314 goto do_error_label;
1317 /* u1 - s2 -> sr */
1318 if (code == MINUS_EXPR && uns0_p && !uns1_p && !unsr_p)
1320 /* Compute the operation. On RTL level, the addition is always
1321 unsigned. */
1322 res = expand_binop (mode, sub_optab, op0, op1, NULL_RTX, false,
1323 OPTAB_LIB_WIDEN);
1324 rtx tem = expand_binop (mode, add_optab, op1, sgn, NULL_RTX, false,
1325 OPTAB_LIB_WIDEN);
1326 do_compare_rtx_and_jump (op0, tem, LTU, true, mode, NULL_RTX, NULL,
1327 done_label, profile_probability::very_likely ());
1328 goto do_error_label;
1331 /* u1 + u2 -> sr */
1332 if (code == PLUS_EXPR && uns0_p && uns1_p && !unsr_p)
1334 /* Compute the operation. On RTL level, the addition is always
1335 unsigned. */
1336 res = expand_binop (mode, add_optab, op0, op1, NULL_RTX, false,
1337 OPTAB_LIB_WIDEN);
1338 do_compare_rtx_and_jump (res, const0_rtx, LT, false, mode, NULL_RTX,
1339 NULL, do_error, profile_probability::very_unlikely ());
1340 rtx tem = op1;
1341 /* The operation is commutative, so we can pick operand to compare
1342 against. For prec <= BITS_PER_WORD, I think preferring REG operand
1343 is better over CONST_INT, because the CONST_INT might enlarge the
1344 instruction or CSE would need to figure out we'd already loaded it
1345 into a register before. For prec > BITS_PER_WORD, I think CONST_INT
1346 might be more beneficial, as then the multi-word comparison can be
1347 perhaps simplified. */
1348 if (prec <= BITS_PER_WORD
1349 ? (CONST_SCALAR_INT_P (op1) && REG_P (op0))
1350 : CONST_SCALAR_INT_P (op0))
1351 tem = op0;
1352 do_compare_rtx_and_jump (res, tem, GEU, true, mode, NULL_RTX, NULL,
1353 done_label, profile_probability::very_likely ());
1354 goto do_error_label;
1357 /* s1 +- s2 -> ur */
1358 if (!uns0_p && !uns1_p && unsr_p)
1360 /* Compute the operation. On RTL level, the addition is always
1361 unsigned. */
1362 res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
1363 op0, op1, NULL_RTX, false, OPTAB_LIB_WIDEN);
1364 int pos_neg = get_range_pos_neg (arg1);
1365 if (code == PLUS_EXPR)
1367 int pos_neg0 = get_range_pos_neg (arg0);
1368 if (pos_neg0 != 3 && pos_neg == 3)
1370 std::swap (op0, op1);
1371 pos_neg = pos_neg0;
1374 rtx tem;
1375 if (pos_neg != 3)
1377 tem = expand_binop (mode, ((pos_neg == 1) ^ (code == MINUS_EXPR))
1378 ? and_optab : ior_optab,
1379 op0, res, NULL_RTX, false, OPTAB_LIB_WIDEN);
1380 do_compare_rtx_and_jump (tem, const0_rtx, GE, false, mode, NULL,
1381 NULL, done_label, profile_probability::very_likely ());
1383 else
1385 rtx_code_label *do_ior_label = gen_label_rtx ();
1386 do_compare_rtx_and_jump (op1, const0_rtx,
1387 code == MINUS_EXPR ? GE : LT, false, mode,
1388 NULL_RTX, NULL, do_ior_label,
1389 profile_probability::even ());
1390 tem = expand_binop (mode, and_optab, op0, res, NULL_RTX, false,
1391 OPTAB_LIB_WIDEN);
1392 do_compare_rtx_and_jump (tem, const0_rtx, GE, false, mode, NULL_RTX,
1393 NULL, done_label, profile_probability::very_likely ());
1394 emit_jump (do_error);
1395 emit_label (do_ior_label);
1396 tem = expand_binop (mode, ior_optab, op0, res, NULL_RTX, false,
1397 OPTAB_LIB_WIDEN);
1398 do_compare_rtx_and_jump (tem, const0_rtx, GE, false, mode, NULL_RTX,
1399 NULL, done_label, profile_probability::very_likely ());
1401 goto do_error_label;
1404 /* u1 - u2 -> sr */
1405 if (code == MINUS_EXPR && uns0_p && uns1_p && !unsr_p)
1407 /* Compute the operation. On RTL level, the addition is always
1408 unsigned. */
1409 res = expand_binop (mode, sub_optab, op0, op1, NULL_RTX, false,
1410 OPTAB_LIB_WIDEN);
1411 rtx_code_label *op0_geu_op1 = gen_label_rtx ();
1412 do_compare_rtx_and_jump (op0, op1, GEU, true, mode, NULL_RTX, NULL,
1413 op0_geu_op1, profile_probability::even ());
1414 do_compare_rtx_and_jump (res, const0_rtx, LT, false, mode, NULL_RTX,
1415 NULL, done_label, profile_probability::very_likely ());
1416 emit_jump (do_error);
1417 emit_label (op0_geu_op1);
1418 do_compare_rtx_and_jump (res, const0_rtx, GE, false, mode, NULL_RTX,
1419 NULL, done_label, profile_probability::very_likely ());
1420 goto do_error_label;
1423 gcc_assert (!uns0_p && !uns1_p && !unsr_p);
1425 /* s1 +- s2 -> sr */
1426 do_signed:
1428 insn_code icode = optab_handler (code == PLUS_EXPR ? addv4_optab
1429 : subv4_optab, mode);
1430 if (icode != CODE_FOR_nothing)
1432 class expand_operand ops[4];
1433 rtx_insn *last = get_last_insn ();
1435 res = gen_reg_rtx (mode);
1436 create_output_operand (&ops[0], res, mode);
1437 create_input_operand (&ops[1], op0, mode);
1438 create_input_operand (&ops[2], op1, mode);
1439 create_fixed_operand (&ops[3], do_error);
1440 if (maybe_expand_insn (icode, 4, ops))
1442 last = get_last_insn ();
1443 if (profile_status_for_fn (cfun) != PROFILE_ABSENT
1444 && JUMP_P (last)
1445 && any_condjump_p (last)
1446 && !find_reg_note (last, REG_BR_PROB, 0))
1447 add_reg_br_prob_note (last,
1448 profile_probability::very_unlikely ());
1449 emit_jump (done_label);
1450 goto do_error_label;
1453 delete_insns_since (last);
1456 /* Compute the operation. On RTL level, the addition is always
1457 unsigned. */
1458 res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
1459 op0, op1, NULL_RTX, false, OPTAB_LIB_WIDEN);
1461 /* If we can prove that one of the arguments (for MINUS_EXPR only
1462 the second operand, as subtraction is not commutative) is always
1463 non-negative or always negative, we can do just one comparison
1464 and conditional jump. */
1465 int pos_neg = get_range_pos_neg (arg1);
1466 if (code == PLUS_EXPR)
1468 int pos_neg0 = get_range_pos_neg (arg0);
1469 if (pos_neg0 != 3 && pos_neg == 3)
1471 std::swap (op0, op1);
1472 pos_neg = pos_neg0;
1476 /* Addition overflows if and only if the two operands have the same sign,
1477 and the result has the opposite sign. Subtraction overflows if and
1478 only if the two operands have opposite sign, and the subtrahend has
1479 the same sign as the result. Here 0 is counted as positive. */
1480 if (pos_neg == 3)
1482 /* Compute op0 ^ op1 (operands have opposite sign). */
1483 rtx op_xor = expand_binop (mode, xor_optab, op0, op1, NULL_RTX, false,
1484 OPTAB_LIB_WIDEN);
1486 /* Compute res ^ op1 (result and 2nd operand have opposite sign). */
1487 rtx res_xor = expand_binop (mode, xor_optab, res, op1, NULL_RTX, false,
1488 OPTAB_LIB_WIDEN);
1490 rtx tem;
1491 if (code == PLUS_EXPR)
1493 /* Compute (res ^ op1) & ~(op0 ^ op1). */
1494 tem = expand_unop (mode, one_cmpl_optab, op_xor, NULL_RTX, false);
1495 tem = expand_binop (mode, and_optab, res_xor, tem, NULL_RTX, false,
1496 OPTAB_LIB_WIDEN);
1498 else
1500 /* Compute (op0 ^ op1) & ~(res ^ op1). */
1501 tem = expand_unop (mode, one_cmpl_optab, res_xor, NULL_RTX, false);
1502 tem = expand_binop (mode, and_optab, op_xor, tem, NULL_RTX, false,
1503 OPTAB_LIB_WIDEN);
1506 /* No overflow if the result has bit sign cleared. */
1507 do_compare_rtx_and_jump (tem, const0_rtx, GE, false, mode, NULL_RTX,
1508 NULL, done_label, profile_probability::very_likely ());
1511 /* Compare the result of the operation with the first operand.
1512 No overflow for addition if second operand is positive and result
1513 is larger or second operand is negative and result is smaller.
1514 Likewise for subtraction with sign of second operand flipped. */
1515 else
1516 do_compare_rtx_and_jump (res, op0,
1517 (pos_neg == 1) ^ (code == MINUS_EXPR) ? GE : LE,
1518 false, mode, NULL_RTX, NULL, done_label,
1519 profile_probability::very_likely ());
1522 do_error_label:
1523 emit_label (do_error);
1524 if (is_ubsan)
1526 /* Expand the ubsan builtin call. */
1527 push_temp_slots ();
1528 fn = ubsan_build_overflow_builtin (code, loc, TREE_TYPE (arg0),
1529 arg0, arg1, datap);
1530 expand_normal (fn);
1531 pop_temp_slots ();
1532 do_pending_stack_adjust ();
1534 else if (lhs)
1535 expand_arith_set_overflow (lhs, target);
1537 /* We're done. */
1538 emit_label (done_label);
1540 if (lhs)
1542 if (is_ubsan)
1543 expand_ubsan_result_store (lhs, target, mode, res, do_error);
1544 else
1546 if (do_xor)
1547 res = expand_binop (mode, add_optab, res, sgn, NULL_RTX, false,
1548 OPTAB_LIB_WIDEN);
1550 expand_arith_overflow_result_store (lhs, target, mode, res);
1555 /* Add negate overflow checking to the statement STMT. */
1557 static void
1558 expand_neg_overflow (location_t loc, tree lhs, tree arg1, bool is_ubsan,
1559 tree *datap)
1561 rtx res, op1;
1562 tree fn;
1563 rtx_code_label *done_label, *do_error;
1564 rtx target = NULL_RTX;
1566 done_label = gen_label_rtx ();
1567 do_error = gen_label_rtx ();
1569 do_pending_stack_adjust ();
1570 op1 = expand_normal (arg1);
1572 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg1));
1573 if (lhs)
1575 target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
1576 if (!is_ubsan)
1577 write_complex_part (target, const0_rtx, true, false);
1580 enum insn_code icode = optab_handler (negv3_optab, mode);
1581 if (icode != CODE_FOR_nothing)
1583 class expand_operand ops[3];
1584 rtx_insn *last = get_last_insn ();
1586 res = gen_reg_rtx (mode);
1587 create_output_operand (&ops[0], res, mode);
1588 create_input_operand (&ops[1], op1, mode);
1589 create_fixed_operand (&ops[2], do_error);
1590 if (maybe_expand_insn (icode, 3, ops))
1592 last = get_last_insn ();
1593 if (profile_status_for_fn (cfun) != PROFILE_ABSENT
1594 && JUMP_P (last)
1595 && any_condjump_p (last)
1596 && !find_reg_note (last, REG_BR_PROB, 0))
1597 add_reg_br_prob_note (last,
1598 profile_probability::very_unlikely ());
1599 emit_jump (done_label);
1601 else
1603 delete_insns_since (last);
1604 icode = CODE_FOR_nothing;
1608 if (icode == CODE_FOR_nothing)
1610 /* Compute the operation. On RTL level, the addition is always
1611 unsigned. */
1612 res = expand_unop (mode, neg_optab, op1, NULL_RTX, false);
1614 /* Compare the operand with the most negative value. */
1615 rtx minv = expand_normal (TYPE_MIN_VALUE (TREE_TYPE (arg1)));
1616 do_compare_rtx_and_jump (op1, minv, NE, true, mode, NULL_RTX, NULL,
1617 done_label, profile_probability::very_likely ());
1620 emit_label (do_error);
1621 if (is_ubsan)
1623 /* Expand the ubsan builtin call. */
1624 push_temp_slots ();
1625 fn = ubsan_build_overflow_builtin (NEGATE_EXPR, loc, TREE_TYPE (arg1),
1626 arg1, NULL_TREE, datap);
1627 expand_normal (fn);
1628 pop_temp_slots ();
1629 do_pending_stack_adjust ();
1631 else if (lhs)
1632 expand_arith_set_overflow (lhs, target);
1634 /* We're done. */
1635 emit_label (done_label);
1637 if (lhs)
1639 if (is_ubsan)
1640 expand_ubsan_result_store (lhs, target, mode, res, do_error);
1641 else
1642 expand_arith_overflow_result_store (lhs, target, mode, res);
1646 /* Return true if UNS WIDEN_MULT_EXPR with result mode WMODE and operand
1647 mode MODE can be expanded without using a libcall. */
1649 static bool
1650 can_widen_mult_without_libcall (scalar_int_mode wmode, scalar_int_mode mode,
1651 rtx op0, rtx op1, bool uns)
1653 if (find_widening_optab_handler (umul_widen_optab, wmode, mode)
1654 != CODE_FOR_nothing)
1655 return true;
1657 if (find_widening_optab_handler (smul_widen_optab, wmode, mode)
1658 != CODE_FOR_nothing)
1659 return true;
1661 rtx_insn *last = get_last_insn ();
1662 if (CONSTANT_P (op0))
1663 op0 = convert_modes (wmode, mode, op0, uns);
1664 else
1665 op0 = gen_raw_REG (wmode, LAST_VIRTUAL_REGISTER + 1);
1666 if (CONSTANT_P (op1))
1667 op1 = convert_modes (wmode, mode, op1, uns);
1668 else
1669 op1 = gen_raw_REG (wmode, LAST_VIRTUAL_REGISTER + 2);
1670 rtx ret = expand_mult (wmode, op0, op1, NULL_RTX, uns, true);
1671 delete_insns_since (last);
1672 return ret != NULL_RTX;
1675 /* Add mul overflow checking to the statement STMT. */
1677 static void
1678 expand_mul_overflow (location_t loc, tree lhs, tree arg0, tree arg1,
1679 bool unsr_p, bool uns0_p, bool uns1_p, bool is_ubsan,
1680 tree *datap)
1682 rtx res, op0, op1;
1683 tree fn, type;
1684 rtx_code_label *done_label, *do_error;
1685 rtx target = NULL_RTX;
1686 signop sign;
1687 enum insn_code icode;
1688 int save_flag_trapv = flag_trapv;
1690 /* We don't want any __mulv?i3 etc. calls from the expansion of
1691 these internal functions, so disable -ftrapv temporarily. */
1692 flag_trapv = 0;
1693 done_label = gen_label_rtx ();
1694 do_error = gen_label_rtx ();
1696 do_pending_stack_adjust ();
1697 op0 = expand_normal (arg0);
1698 op1 = expand_normal (arg1);
1700 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg0));
1701 bool uns = unsr_p;
1702 if (lhs)
1704 target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
1705 if (!is_ubsan)
1706 write_complex_part (target, const0_rtx, true, false);
1709 if (is_ubsan)
1710 gcc_assert (!unsr_p && !uns0_p && !uns1_p);
1712 /* We assume both operands and result have the same precision
1713 here (GET_MODE_BITSIZE (mode)), S stands for signed type
1714 with that precision, U for unsigned type with that precision,
1715 sgn for unsigned most significant bit in that precision.
1716 s1 is signed first operand, u1 is unsigned first operand,
1717 s2 is signed second operand, u2 is unsigned second operand,
1718 sr is signed result, ur is unsigned result and the following
1719 rules say how to compute result (which is always result of
1720 the operands as if both were unsigned, cast to the right
1721 signedness) and how to compute whether operation overflowed.
1722 main_ovf (false) stands for jump on signed multiplication
1723 overflow or the main algorithm with uns == false.
1724 main_ovf (true) stands for jump on unsigned multiplication
1725 overflow or the main algorithm with uns == true.
1727 s1 * s2 -> sr
1728 res = (S) ((U) s1 * (U) s2)
1729 ovf = main_ovf (false)
1730 u1 * u2 -> ur
1731 res = u1 * u2
1732 ovf = main_ovf (true)
1733 s1 * u2 -> ur
1734 res = (U) s1 * u2
1735 ovf = (s1 < 0 && u2) || main_ovf (true)
1736 u1 * u2 -> sr
1737 res = (S) (u1 * u2)
1738 ovf = res < 0 || main_ovf (true)
1739 s1 * u2 -> sr
1740 res = (S) ((U) s1 * u2)
1741 ovf = (S) u2 >= 0 ? main_ovf (false)
1742 : (s1 != 0 && (s1 != -1 || u2 != (U) res))
1743 s1 * s2 -> ur
1744 t1 = (s1 & s2) < 0 ? (-(U) s1) : ((U) s1)
1745 t2 = (s1 & s2) < 0 ? (-(U) s2) : ((U) s2)
1746 res = t1 * t2
1747 ovf = (s1 ^ s2) < 0 ? (s1 && s2) : main_ovf (true) */
1749 if (uns0_p && !uns1_p)
1751 /* Multiplication is commutative, if operand signedness differs,
1752 canonicalize to the first operand being signed and second
1753 unsigned to simplify following code. */
1754 std::swap (op0, op1);
1755 std::swap (arg0, arg1);
1756 uns0_p = false;
1757 uns1_p = true;
1760 int pos_neg0 = get_range_pos_neg (arg0);
1761 int pos_neg1 = get_range_pos_neg (arg1);
1762 /* Unsigned types with smaller than mode precision, even if they have most
1763 significant bit set, are still zero-extended. */
1764 if (uns0_p && TYPE_PRECISION (TREE_TYPE (arg0)) < GET_MODE_PRECISION (mode))
1765 pos_neg0 = 1;
1766 if (uns1_p && TYPE_PRECISION (TREE_TYPE (arg1)) < GET_MODE_PRECISION (mode))
1767 pos_neg1 = 1;
1769 /* s1 * u2 -> ur */
1770 if (!uns0_p && uns1_p && unsr_p)
1772 switch (pos_neg0)
1774 case 1:
1775 /* If s1 is non-negative, just perform normal u1 * u2 -> ur. */
1776 goto do_main;
1777 case 2:
1778 /* If s1 is negative, avoid the main code, just multiply and
1779 signal overflow if op1 is not 0. */
1780 struct separate_ops ops;
1781 ops.code = MULT_EXPR;
1782 ops.type = TREE_TYPE (arg1);
1783 ops.op0 = make_tree (ops.type, op0);
1784 ops.op1 = make_tree (ops.type, op1);
1785 ops.op2 = NULL_TREE;
1786 ops.location = loc;
1787 res = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
1788 do_compare_rtx_and_jump (op1, const0_rtx, EQ, true, mode, NULL_RTX,
1789 NULL, done_label, profile_probability::very_likely ());
1790 goto do_error_label;
1791 case 3:
1792 if (get_min_precision (arg1, UNSIGNED)
1793 + get_min_precision (arg0, SIGNED) <= GET_MODE_PRECISION (mode))
1795 /* If the first operand is sign extended from narrower type, the
1796 second operand is zero extended from narrower type and
1797 the sum of the two precisions is smaller or equal to the
1798 result precision: if the first argument is at runtime
1799 non-negative, maximum result will be 0x7e81 or 0x7f..fe80..01
1800 and there will be no overflow, if the first argument is
1801 negative and the second argument zero, the result will be
1802 0 and there will be no overflow, if the first argument is
1803 negative and the second argument positive, the result when
1804 treated as signed will be negative (minimum -0x7f80 or
1805 -0x7f..f80..0) there will be always overflow. So, do
1806 res = (U) (s1 * u2)
1807 ovf = (S) res < 0 */
1808 struct separate_ops ops;
1809 ops.code = MULT_EXPR;
1810 ops.type
1811 = build_nonstandard_integer_type (GET_MODE_PRECISION (mode),
1813 ops.op0 = make_tree (ops.type, op0);
1814 ops.op1 = make_tree (ops.type, op1);
1815 ops.op2 = NULL_TREE;
1816 ops.location = loc;
1817 res = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
1818 do_compare_rtx_and_jump (res, const0_rtx, GE, false,
1819 mode, NULL_RTX, NULL, done_label,
1820 profile_probability::very_likely ());
1821 goto do_error_label;
1823 rtx_code_label *do_main_label;
1824 do_main_label = gen_label_rtx ();
1825 do_compare_rtx_and_jump (op0, const0_rtx, GE, false, mode, NULL_RTX,
1826 NULL, do_main_label, profile_probability::very_likely ());
1827 do_compare_rtx_and_jump (op1, const0_rtx, EQ, true, mode, NULL_RTX,
1828 NULL, do_main_label, profile_probability::very_likely ());
1829 expand_arith_set_overflow (lhs, target);
1830 emit_label (do_main_label);
1831 goto do_main;
1832 default:
1833 gcc_unreachable ();
1837 /* u1 * u2 -> sr */
1838 if (uns0_p && uns1_p && !unsr_p)
1840 if ((pos_neg0 | pos_neg1) == 1)
1842 /* If both arguments are zero extended from narrower types,
1843 the MSB will be clear on both and so we can pretend it is
1844 a normal s1 * s2 -> sr multiplication. */
1845 uns0_p = false;
1846 uns1_p = false;
1848 else
1849 uns = true;
1850 /* Rest of handling of this case after res is computed. */
1851 goto do_main;
1854 /* s1 * u2 -> sr */
1855 if (!uns0_p && uns1_p && !unsr_p)
1857 switch (pos_neg1)
1859 case 1:
1860 goto do_main;
1861 case 2:
1862 /* If (S) u2 is negative (i.e. u2 is larger than maximum of S,
1863 avoid the main code, just multiply and signal overflow
1864 unless 0 * u2 or -1 * ((U) Smin). */
1865 struct separate_ops ops;
1866 ops.code = MULT_EXPR;
1867 ops.type = TREE_TYPE (arg1);
1868 ops.op0 = make_tree (ops.type, op0);
1869 ops.op1 = make_tree (ops.type, op1);
1870 ops.op2 = NULL_TREE;
1871 ops.location = loc;
1872 res = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
1873 do_compare_rtx_and_jump (op0, const0_rtx, EQ, true, mode, NULL_RTX,
1874 NULL, done_label, profile_probability::very_likely ());
1875 do_compare_rtx_and_jump (op0, constm1_rtx, NE, true, mode, NULL_RTX,
1876 NULL, do_error, profile_probability::very_unlikely ());
1877 int prec;
1878 prec = GET_MODE_PRECISION (mode);
1879 rtx sgn;
1880 sgn = immed_wide_int_const (wi::min_value (prec, SIGNED), mode);
1881 do_compare_rtx_and_jump (op1, sgn, EQ, true, mode, NULL_RTX,
1882 NULL, done_label, profile_probability::very_likely ());
1883 goto do_error_label;
1884 case 3:
1885 /* Rest of handling of this case after res is computed. */
1886 goto do_main;
1887 default:
1888 gcc_unreachable ();
1892 /* s1 * s2 -> ur */
1893 if (!uns0_p && !uns1_p && unsr_p)
1895 rtx tem;
1896 switch (pos_neg0 | pos_neg1)
1898 case 1: /* Both operands known to be non-negative. */
1899 goto do_main;
1900 case 2: /* Both operands known to be negative. */
1901 op0 = expand_unop (mode, neg_optab, op0, NULL_RTX, false);
1902 op1 = expand_unop (mode, neg_optab, op1, NULL_RTX, false);
1903 /* Avoid looking at arg0/arg1 ranges, as we've changed
1904 the arguments. */
1905 arg0 = error_mark_node;
1906 arg1 = error_mark_node;
1907 goto do_main;
1908 case 3:
1909 if ((pos_neg0 ^ pos_neg1) == 3)
1911 /* If one operand is known to be negative and the other
1912 non-negative, this overflows always, unless the non-negative
1913 one is 0. Just do normal multiply and set overflow
1914 unless one of the operands is 0. */
1915 struct separate_ops ops;
1916 ops.code = MULT_EXPR;
1917 ops.type
1918 = build_nonstandard_integer_type (GET_MODE_PRECISION (mode),
1920 ops.op0 = make_tree (ops.type, op0);
1921 ops.op1 = make_tree (ops.type, op1);
1922 ops.op2 = NULL_TREE;
1923 ops.location = loc;
1924 res = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
1925 do_compare_rtx_and_jump (pos_neg0 == 1 ? op0 : op1, const0_rtx, EQ,
1926 true, mode, NULL_RTX, NULL, done_label,
1927 profile_probability::very_likely ());
1928 goto do_error_label;
1930 if (get_min_precision (arg0, SIGNED)
1931 + get_min_precision (arg1, SIGNED) <= GET_MODE_PRECISION (mode))
1933 /* If both operands are sign extended from narrower types and
1934 the sum of the two precisions is smaller or equal to the
1935 result precision: if both arguments are at runtime
1936 non-negative, maximum result will be 0x3f01 or 0x3f..f0..01
1937 and there will be no overflow, if both arguments are negative,
1938 maximum result will be 0x40..00 and there will be no overflow
1939 either, if one argument is positive and the other argument
1940 negative, the result when treated as signed will be negative
1941 and there will be always overflow, and if one argument is
1942 zero and the other negative the result will be zero and no
1943 overflow. So, do
1944 res = (U) (s1 * s2)
1945 ovf = (S) res < 0 */
1946 struct separate_ops ops;
1947 ops.code = MULT_EXPR;
1948 ops.type
1949 = build_nonstandard_integer_type (GET_MODE_PRECISION (mode),
1951 ops.op0 = make_tree (ops.type, op0);
1952 ops.op1 = make_tree (ops.type, op1);
1953 ops.op2 = NULL_TREE;
1954 ops.location = loc;
1955 res = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
1956 do_compare_rtx_and_jump (res, const0_rtx, GE, false,
1957 mode, NULL_RTX, NULL, done_label,
1958 profile_probability::very_likely ());
1959 goto do_error_label;
1961 /* The general case, do all the needed comparisons at runtime. */
1962 rtx_code_label *do_main_label, *after_negate_label;
1963 rtx rop0, rop1;
1964 rop0 = gen_reg_rtx (mode);
1965 rop1 = gen_reg_rtx (mode);
1966 emit_move_insn (rop0, op0);
1967 emit_move_insn (rop1, op1);
1968 op0 = rop0;
1969 op1 = rop1;
1970 do_main_label = gen_label_rtx ();
1971 after_negate_label = gen_label_rtx ();
1972 tem = expand_binop (mode, and_optab, op0, op1, NULL_RTX, false,
1973 OPTAB_LIB_WIDEN);
1974 do_compare_rtx_and_jump (tem, const0_rtx, GE, false, mode, NULL_RTX,
1975 NULL, after_negate_label, profile_probability::very_likely ());
1976 /* Both arguments negative here, negate them and continue with
1977 normal unsigned overflow checking multiplication. */
1978 emit_move_insn (op0, expand_unop (mode, neg_optab, op0,
1979 NULL_RTX, false));
1980 emit_move_insn (op1, expand_unop (mode, neg_optab, op1,
1981 NULL_RTX, false));
1982 /* Avoid looking at arg0/arg1 ranges, as we might have changed
1983 the arguments. */
1984 arg0 = error_mark_node;
1985 arg1 = error_mark_node;
1986 emit_jump (do_main_label);
1987 emit_label (after_negate_label);
1988 tem = expand_binop (mode, xor_optab, op0, op1, NULL_RTX, false,
1989 OPTAB_LIB_WIDEN);
1990 do_compare_rtx_and_jump (tem, const0_rtx, GE, false, mode, NULL_RTX,
1991 NULL, do_main_label,
1992 profile_probability::very_likely ());
1993 /* One argument is negative here, the other positive. This
1994 overflows always, unless one of the arguments is 0. But
1995 if e.g. s2 is 0, (U) s1 * 0 doesn't overflow, whatever s1
1996 is, thus we can keep do_main code oring in overflow as is. */
1997 if (pos_neg0 != 2)
1998 do_compare_rtx_and_jump (op0, const0_rtx, EQ, true, mode, NULL_RTX,
1999 NULL, do_main_label,
2000 profile_probability::very_unlikely ());
2001 if (pos_neg1 != 2)
2002 do_compare_rtx_and_jump (op1, const0_rtx, EQ, true, mode, NULL_RTX,
2003 NULL, do_main_label,
2004 profile_probability::very_unlikely ());
2005 expand_arith_set_overflow (lhs, target);
2006 emit_label (do_main_label);
2007 goto do_main;
2008 default:
2009 gcc_unreachable ();
2013 do_main:
2014 type = build_nonstandard_integer_type (GET_MODE_PRECISION (mode), uns);
2015 sign = uns ? UNSIGNED : SIGNED;
2016 icode = optab_handler (uns ? umulv4_optab : mulv4_optab, mode);
2017 if (uns
2018 && (integer_pow2p (arg0) || integer_pow2p (arg1))
2019 && (optimize_insn_for_speed_p () || icode == CODE_FOR_nothing))
2021 /* Optimize unsigned multiplication by power of 2 constant
2022 using 2 shifts, one for result, one to extract the shifted
2023 out bits to see if they are all zero.
2024 Don't do this if optimizing for size and we have umulv4_optab,
2025 in that case assume multiplication will be shorter.
2026 This is heuristics based on the single target that provides
2027 umulv4 right now (i?86/x86_64), if further targets add it, this
2028 might need to be revisited.
2029 Cases where both operands are constant should be folded already
2030 during GIMPLE, and cases where one operand is constant but not
2031 power of 2 are questionable, either the WIDEN_MULT_EXPR case
2032 below can be done without multiplication, just by shifts and adds,
2033 or we'd need to divide the result (and hope it actually doesn't
2034 really divide nor multiply) and compare the result of the division
2035 with the original operand. */
2036 rtx opn0 = op0;
2037 rtx opn1 = op1;
2038 tree argn0 = arg0;
2039 tree argn1 = arg1;
2040 if (integer_pow2p (arg0))
2042 std::swap (opn0, opn1);
2043 std::swap (argn0, argn1);
2045 int cnt = tree_log2 (argn1);
2046 if (cnt >= 0 && cnt < GET_MODE_PRECISION (mode))
2048 rtx upper = const0_rtx;
2049 res = expand_shift (LSHIFT_EXPR, mode, opn0, cnt, NULL_RTX, uns);
2050 if (cnt != 0)
2051 upper = expand_shift (RSHIFT_EXPR, mode, opn0,
2052 GET_MODE_PRECISION (mode) - cnt,
2053 NULL_RTX, uns);
2054 do_compare_rtx_and_jump (upper, const0_rtx, EQ, true, mode,
2055 NULL_RTX, NULL, done_label,
2056 profile_probability::very_likely ());
2057 goto do_error_label;
2060 if (icode != CODE_FOR_nothing)
2062 class expand_operand ops[4];
2063 rtx_insn *last = get_last_insn ();
2065 res = gen_reg_rtx (mode);
2066 create_output_operand (&ops[0], res, mode);
2067 create_input_operand (&ops[1], op0, mode);
2068 create_input_operand (&ops[2], op1, mode);
2069 create_fixed_operand (&ops[3], do_error);
2070 if (maybe_expand_insn (icode, 4, ops))
2072 last = get_last_insn ();
2073 if (profile_status_for_fn (cfun) != PROFILE_ABSENT
2074 && JUMP_P (last)
2075 && any_condjump_p (last)
2076 && !find_reg_note (last, REG_BR_PROB, 0))
2077 add_reg_br_prob_note (last,
2078 profile_probability::very_unlikely ());
2079 emit_jump (done_label);
2081 else
2083 delete_insns_since (last);
2084 icode = CODE_FOR_nothing;
2088 if (icode == CODE_FOR_nothing)
2090 struct separate_ops ops;
2091 int prec = GET_MODE_PRECISION (mode);
2092 scalar_int_mode hmode, wmode;
2093 ops.op0 = make_tree (type, op0);
2094 ops.op1 = make_tree (type, op1);
2095 ops.op2 = NULL_TREE;
2096 ops.location = loc;
2098 /* Optimize unsigned overflow check where we don't use the
2099 multiplication result, just whether overflow happened.
2100 If we can do MULT_HIGHPART_EXPR, that followed by
2101 comparison of the result against zero is cheapest.
2102 We'll still compute res, but it should be DCEd later. */
2103 use_operand_p use;
2104 gimple *use_stmt;
2105 if (!is_ubsan
2106 && lhs
2107 && uns
2108 && !(uns0_p && uns1_p && !unsr_p)
2109 && can_mult_highpart_p (mode, uns) == 1
2110 && single_imm_use (lhs, &use, &use_stmt)
2111 && is_gimple_assign (use_stmt)
2112 && gimple_assign_rhs_code (use_stmt) == IMAGPART_EXPR)
2113 goto highpart;
2115 if (GET_MODE_2XWIDER_MODE (mode).exists (&wmode)
2116 && targetm.scalar_mode_supported_p (wmode)
2117 && can_widen_mult_without_libcall (wmode, mode, op0, op1, uns))
2119 twoxwider:
2120 ops.code = WIDEN_MULT_EXPR;
2121 ops.type
2122 = build_nonstandard_integer_type (GET_MODE_PRECISION (wmode), uns);
2124 res = expand_expr_real_2 (&ops, NULL_RTX, wmode, EXPAND_NORMAL);
2125 rtx hipart = expand_shift (RSHIFT_EXPR, wmode, res, prec,
2126 NULL_RTX, uns);
2127 hipart = convert_modes (mode, wmode, hipart, uns);
2128 res = convert_modes (mode, wmode, res, uns);
2129 if (uns)
2130 /* For the unsigned multiplication, there was overflow if
2131 HIPART is non-zero. */
2132 do_compare_rtx_and_jump (hipart, const0_rtx, EQ, true, mode,
2133 NULL_RTX, NULL, done_label,
2134 profile_probability::very_likely ());
2135 else
2137 /* RES is used more than once, place it in a pseudo. */
2138 res = force_reg (mode, res);
2140 rtx signbit = expand_shift (RSHIFT_EXPR, mode, res, prec - 1,
2141 NULL_RTX, 0);
2142 /* RES is low half of the double width result, HIPART
2143 the high half. There was overflow if
2144 HIPART is different from RES < 0 ? -1 : 0. */
2145 do_compare_rtx_and_jump (signbit, hipart, EQ, true, mode,
2146 NULL_RTX, NULL, done_label,
2147 profile_probability::very_likely ());
2150 else if (can_mult_highpart_p (mode, uns) == 1)
2152 highpart:
2153 ops.code = MULT_HIGHPART_EXPR;
2154 ops.type = type;
2156 rtx hipart = expand_expr_real_2 (&ops, NULL_RTX, mode,
2157 EXPAND_NORMAL);
2158 ops.code = MULT_EXPR;
2159 res = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
2160 if (uns)
2161 /* For the unsigned multiplication, there was overflow if
2162 HIPART is non-zero. */
2163 do_compare_rtx_and_jump (hipart, const0_rtx, EQ, true, mode,
2164 NULL_RTX, NULL, done_label,
2165 profile_probability::very_likely ());
2166 else
2168 rtx signbit = expand_shift (RSHIFT_EXPR, mode, res, prec - 1,
2169 NULL_RTX, 0);
2170 /* RES is low half of the double width result, HIPART
2171 the high half. There was overflow if
2172 HIPART is different from RES < 0 ? -1 : 0. */
2173 do_compare_rtx_and_jump (signbit, hipart, EQ, true, mode,
2174 NULL_RTX, NULL, done_label,
2175 profile_probability::very_likely ());
2179 else if (int_mode_for_size (prec / 2, 1).exists (&hmode)
2180 && 2 * GET_MODE_PRECISION (hmode) == prec)
2182 rtx_code_label *large_op0 = gen_label_rtx ();
2183 rtx_code_label *small_op0_large_op1 = gen_label_rtx ();
2184 rtx_code_label *one_small_one_large = gen_label_rtx ();
2185 rtx_code_label *both_ops_large = gen_label_rtx ();
2186 rtx_code_label *after_hipart_neg = uns ? NULL : gen_label_rtx ();
2187 rtx_code_label *after_lopart_neg = uns ? NULL : gen_label_rtx ();
2188 rtx_code_label *do_overflow = gen_label_rtx ();
2189 rtx_code_label *hipart_different = uns ? NULL : gen_label_rtx ();
2191 unsigned int hprec = GET_MODE_PRECISION (hmode);
2192 rtx hipart0 = expand_shift (RSHIFT_EXPR, mode, op0, hprec,
2193 NULL_RTX, uns);
2194 hipart0 = convert_modes (hmode, mode, hipart0, uns);
2195 rtx lopart0 = convert_modes (hmode, mode, op0, uns);
2196 rtx signbit0 = const0_rtx;
2197 if (!uns)
2198 signbit0 = expand_shift (RSHIFT_EXPR, hmode, lopart0, hprec - 1,
2199 NULL_RTX, 0);
2200 rtx hipart1 = expand_shift (RSHIFT_EXPR, mode, op1, hprec,
2201 NULL_RTX, uns);
2202 hipart1 = convert_modes (hmode, mode, hipart1, uns);
2203 rtx lopart1 = convert_modes (hmode, mode, op1, uns);
2204 rtx signbit1 = const0_rtx;
2205 if (!uns)
2206 signbit1 = expand_shift (RSHIFT_EXPR, hmode, lopart1, hprec - 1,
2207 NULL_RTX, 0);
2209 res = gen_reg_rtx (mode);
2211 /* True if op0 resp. op1 are known to be in the range of
2212 halfstype. */
2213 bool op0_small_p = false;
2214 bool op1_small_p = false;
2215 /* True if op0 resp. op1 are known to have all zeros or all ones
2216 in the upper half of bits, but are not known to be
2217 op{0,1}_small_p. */
2218 bool op0_medium_p = false;
2219 bool op1_medium_p = false;
2220 /* -1 if op{0,1} is known to be negative, 0 if it is known to be
2221 nonnegative, 1 if unknown. */
2222 int op0_sign = 1;
2223 int op1_sign = 1;
2225 if (pos_neg0 == 1)
2226 op0_sign = 0;
2227 else if (pos_neg0 == 2)
2228 op0_sign = -1;
2229 if (pos_neg1 == 1)
2230 op1_sign = 0;
2231 else if (pos_neg1 == 2)
2232 op1_sign = -1;
2234 unsigned int mprec0 = prec;
2235 if (arg0 != error_mark_node)
2236 mprec0 = get_min_precision (arg0, sign);
2237 if (mprec0 <= hprec)
2238 op0_small_p = true;
2239 else if (!uns && mprec0 <= hprec + 1)
2240 op0_medium_p = true;
2241 unsigned int mprec1 = prec;
2242 if (arg1 != error_mark_node)
2243 mprec1 = get_min_precision (arg1, sign);
2244 if (mprec1 <= hprec)
2245 op1_small_p = true;
2246 else if (!uns && mprec1 <= hprec + 1)
2247 op1_medium_p = true;
2249 int smaller_sign = 1;
2250 int larger_sign = 1;
2251 if (op0_small_p)
2253 smaller_sign = op0_sign;
2254 larger_sign = op1_sign;
2256 else if (op1_small_p)
2258 smaller_sign = op1_sign;
2259 larger_sign = op0_sign;
2261 else if (op0_sign == op1_sign)
2263 smaller_sign = op0_sign;
2264 larger_sign = op0_sign;
2267 if (!op0_small_p)
2268 do_compare_rtx_and_jump (signbit0, hipart0, NE, true, hmode,
2269 NULL_RTX, NULL, large_op0,
2270 profile_probability::unlikely ());
2272 if (!op1_small_p)
2273 do_compare_rtx_and_jump (signbit1, hipart1, NE, true, hmode,
2274 NULL_RTX, NULL, small_op0_large_op1,
2275 profile_probability::unlikely ());
2277 /* If both op0 and op1 are sign (!uns) or zero (uns) extended from
2278 hmode to mode, the multiplication will never overflow. We can
2279 do just one hmode x hmode => mode widening multiplication. */
2280 tree halfstype = build_nonstandard_integer_type (hprec, uns);
2281 ops.op0 = make_tree (halfstype, lopart0);
2282 ops.op1 = make_tree (halfstype, lopart1);
2283 ops.code = WIDEN_MULT_EXPR;
2284 ops.type = type;
2285 rtx thisres
2286 = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
2287 emit_move_insn (res, thisres);
2288 emit_jump (done_label);
2290 emit_label (small_op0_large_op1);
2292 /* If op0 is sign (!uns) or zero (uns) extended from hmode to mode,
2293 but op1 is not, just swap the arguments and handle it as op1
2294 sign/zero extended, op0 not. */
2295 rtx larger = gen_reg_rtx (mode);
2296 rtx hipart = gen_reg_rtx (hmode);
2297 rtx lopart = gen_reg_rtx (hmode);
2298 emit_move_insn (larger, op1);
2299 emit_move_insn (hipart, hipart1);
2300 emit_move_insn (lopart, lopart0);
2301 emit_jump (one_small_one_large);
2303 emit_label (large_op0);
2305 if (!op1_small_p)
2306 do_compare_rtx_and_jump (signbit1, hipart1, NE, true, hmode,
2307 NULL_RTX, NULL, both_ops_large,
2308 profile_probability::unlikely ());
2310 /* If op1 is sign (!uns) or zero (uns) extended from hmode to mode,
2311 but op0 is not, prepare larger, hipart and lopart pseudos and
2312 handle it together with small_op0_large_op1. */
2313 emit_move_insn (larger, op0);
2314 emit_move_insn (hipart, hipart0);
2315 emit_move_insn (lopart, lopart1);
2317 emit_label (one_small_one_large);
2319 /* lopart is the low part of the operand that is sign extended
2320 to mode, larger is the other operand, hipart is the
2321 high part of larger and lopart0 and lopart1 are the low parts
2322 of both operands.
2323 We perform lopart0 * lopart1 and lopart * hipart widening
2324 multiplications. */
2325 tree halfutype = build_nonstandard_integer_type (hprec, 1);
2326 ops.op0 = make_tree (halfutype, lopart0);
2327 ops.op1 = make_tree (halfutype, lopart1);
2328 rtx lo0xlo1
2329 = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
2331 ops.op0 = make_tree (halfutype, lopart);
2332 ops.op1 = make_tree (halfutype, hipart);
2333 rtx loxhi = gen_reg_rtx (mode);
2334 rtx tem = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
2335 emit_move_insn (loxhi, tem);
2337 if (!uns)
2339 /* if (hipart < 0) loxhi -= lopart << (bitsize / 2); */
2340 if (larger_sign == 0)
2341 emit_jump (after_hipart_neg);
2342 else if (larger_sign != -1)
2343 do_compare_rtx_and_jump (hipart, const0_rtx, GE, false, hmode,
2344 NULL_RTX, NULL, after_hipart_neg,
2345 profile_probability::even ());
2347 tem = convert_modes (mode, hmode, lopart, 1);
2348 tem = expand_shift (LSHIFT_EXPR, mode, tem, hprec, NULL_RTX, 1);
2349 tem = expand_simple_binop (mode, MINUS, loxhi, tem, NULL_RTX,
2350 1, OPTAB_WIDEN);
2351 emit_move_insn (loxhi, tem);
2353 emit_label (after_hipart_neg);
2355 /* if (lopart < 0) loxhi -= larger; */
2356 if (smaller_sign == 0)
2357 emit_jump (after_lopart_neg);
2358 else if (smaller_sign != -1)
2359 do_compare_rtx_and_jump (lopart, const0_rtx, GE, false, hmode,
2360 NULL_RTX, NULL, after_lopart_neg,
2361 profile_probability::even ());
2363 tem = expand_simple_binop (mode, MINUS, loxhi, larger, NULL_RTX,
2364 1, OPTAB_WIDEN);
2365 emit_move_insn (loxhi, tem);
2367 emit_label (after_lopart_neg);
2370 /* loxhi += (uns) lo0xlo1 >> (bitsize / 2); */
2371 tem = expand_shift (RSHIFT_EXPR, mode, lo0xlo1, hprec, NULL_RTX, 1);
2372 tem = expand_simple_binop (mode, PLUS, loxhi, tem, NULL_RTX,
2373 1, OPTAB_WIDEN);
2374 emit_move_insn (loxhi, tem);
2376 /* if (loxhi >> (bitsize / 2)
2377 == (hmode) loxhi >> (bitsize / 2 - 1)) (if !uns)
2378 if (loxhi >> (bitsize / 2) == 0 (if uns). */
2379 rtx hipartloxhi = expand_shift (RSHIFT_EXPR, mode, loxhi, hprec,
2380 NULL_RTX, 0);
2381 hipartloxhi = convert_modes (hmode, mode, hipartloxhi, 0);
2382 rtx signbitloxhi = const0_rtx;
2383 if (!uns)
2384 signbitloxhi = expand_shift (RSHIFT_EXPR, hmode,
2385 convert_modes (hmode, mode,
2386 loxhi, 0),
2387 hprec - 1, NULL_RTX, 0);
2389 do_compare_rtx_and_jump (signbitloxhi, hipartloxhi, NE, true, hmode,
2390 NULL_RTX, NULL, do_overflow,
2391 profile_probability::very_unlikely ());
2393 /* res = (loxhi << (bitsize / 2)) | (hmode) lo0xlo1; */
2394 rtx loxhishifted = expand_shift (LSHIFT_EXPR, mode, loxhi, hprec,
2395 NULL_RTX, 1);
2396 tem = convert_modes (mode, hmode,
2397 convert_modes (hmode, mode, lo0xlo1, 1), 1);
2399 tem = expand_simple_binop (mode, IOR, loxhishifted, tem, res,
2400 1, OPTAB_WIDEN);
2401 if (tem != res)
2402 emit_move_insn (res, tem);
2403 emit_jump (done_label);
2405 emit_label (both_ops_large);
2407 /* If both operands are large (not sign (!uns) or zero (uns)
2408 extended from hmode), then perform the full multiplication
2409 which will be the result of the operation.
2410 The only cases which don't overflow are for signed multiplication
2411 some cases where both hipart0 and highpart1 are 0 or -1.
2412 For unsigned multiplication when high parts are both non-zero
2413 this overflows always. */
2414 ops.code = MULT_EXPR;
2415 ops.op0 = make_tree (type, op0);
2416 ops.op1 = make_tree (type, op1);
2417 tem = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
2418 emit_move_insn (res, tem);
2420 if (!uns)
2422 if (!op0_medium_p)
2424 tem = expand_simple_binop (hmode, PLUS, hipart0, const1_rtx,
2425 NULL_RTX, 1, OPTAB_WIDEN);
2426 do_compare_rtx_and_jump (tem, const1_rtx, GTU, true, hmode,
2427 NULL_RTX, NULL, do_error,
2428 profile_probability::very_unlikely ());
2431 if (!op1_medium_p)
2433 tem = expand_simple_binop (hmode, PLUS, hipart1, const1_rtx,
2434 NULL_RTX, 1, OPTAB_WIDEN);
2435 do_compare_rtx_and_jump (tem, const1_rtx, GTU, true, hmode,
2436 NULL_RTX, NULL, do_error,
2437 profile_probability::very_unlikely ());
2440 /* At this point hipart{0,1} are both in [-1, 0]. If they are
2441 the same, overflow happened if res is non-positive, if they
2442 are different, overflow happened if res is positive. */
2443 if (op0_sign != 1 && op1_sign != 1 && op0_sign != op1_sign)
2444 emit_jump (hipart_different);
2445 else if (op0_sign == 1 || op1_sign == 1)
2446 do_compare_rtx_and_jump (hipart0, hipart1, NE, true, hmode,
2447 NULL_RTX, NULL, hipart_different,
2448 profile_probability::even ());
2450 do_compare_rtx_and_jump (res, const0_rtx, LE, false, mode,
2451 NULL_RTX, NULL, do_error,
2452 profile_probability::very_unlikely ());
2453 emit_jump (done_label);
2455 emit_label (hipart_different);
2457 do_compare_rtx_and_jump (res, const0_rtx, GE, false, mode,
2458 NULL_RTX, NULL, do_error,
2459 profile_probability::very_unlikely ());
2460 emit_jump (done_label);
2463 emit_label (do_overflow);
2465 /* Overflow, do full multiplication and fallthru into do_error. */
2466 ops.op0 = make_tree (type, op0);
2467 ops.op1 = make_tree (type, op1);
2468 tem = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
2469 emit_move_insn (res, tem);
2471 else if (GET_MODE_2XWIDER_MODE (mode).exists (&wmode)
2472 && targetm.scalar_mode_supported_p (wmode))
2473 /* Even emitting a libcall is better than not detecting overflow
2474 at all. */
2475 goto twoxwider;
2476 else
2478 gcc_assert (!is_ubsan);
2479 ops.code = MULT_EXPR;
2480 ops.type = type;
2481 res = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
2482 emit_jump (done_label);
2486 do_error_label:
2487 emit_label (do_error);
2488 if (is_ubsan)
2490 /* Expand the ubsan builtin call. */
2491 push_temp_slots ();
2492 fn = ubsan_build_overflow_builtin (MULT_EXPR, loc, TREE_TYPE (arg0),
2493 arg0, arg1, datap);
2494 expand_normal (fn);
2495 pop_temp_slots ();
2496 do_pending_stack_adjust ();
2498 else if (lhs)
2499 expand_arith_set_overflow (lhs, target);
2501 /* We're done. */
2502 emit_label (done_label);
2504 /* u1 * u2 -> sr */
2505 if (uns0_p && uns1_p && !unsr_p)
2507 rtx_code_label *all_done_label = gen_label_rtx ();
2508 do_compare_rtx_and_jump (res, const0_rtx, GE, false, mode, NULL_RTX,
2509 NULL, all_done_label, profile_probability::very_likely ());
2510 expand_arith_set_overflow (lhs, target);
2511 emit_label (all_done_label);
2514 /* s1 * u2 -> sr */
2515 if (!uns0_p && uns1_p && !unsr_p && pos_neg1 == 3)
2517 rtx_code_label *all_done_label = gen_label_rtx ();
2518 rtx_code_label *set_noovf = gen_label_rtx ();
2519 do_compare_rtx_and_jump (op1, const0_rtx, GE, false, mode, NULL_RTX,
2520 NULL, all_done_label, profile_probability::very_likely ());
2521 expand_arith_set_overflow (lhs, target);
2522 do_compare_rtx_and_jump (op0, const0_rtx, EQ, true, mode, NULL_RTX,
2523 NULL, set_noovf, profile_probability::very_likely ());
2524 do_compare_rtx_and_jump (op0, constm1_rtx, NE, true, mode, NULL_RTX,
2525 NULL, all_done_label, profile_probability::very_unlikely ());
2526 do_compare_rtx_and_jump (op1, res, NE, true, mode, NULL_RTX, NULL,
2527 all_done_label, profile_probability::very_unlikely ());
2528 emit_label (set_noovf);
2529 write_complex_part (target, const0_rtx, true, false);
2530 emit_label (all_done_label);
2533 if (lhs)
2535 if (is_ubsan)
2536 expand_ubsan_result_store (lhs, target, mode, res, do_error);
2537 else
2538 expand_arith_overflow_result_store (lhs, target, mode, res);
2540 flag_trapv = save_flag_trapv;
2543 /* Expand UBSAN_CHECK_* internal function if it has vector operands. */
2545 static void
2546 expand_vector_ubsan_overflow (location_t loc, enum tree_code code, tree lhs,
2547 tree arg0, tree arg1)
2549 poly_uint64 cnt = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
2550 rtx_code_label *loop_lab = NULL;
2551 rtx cntvar = NULL_RTX;
2552 tree cntv = NULL_TREE;
2553 tree eltype = TREE_TYPE (TREE_TYPE (arg0));
2554 tree sz = TYPE_SIZE (eltype);
2555 tree data = NULL_TREE;
2556 tree resv = NULL_TREE;
2557 rtx lhsr = NULL_RTX;
2558 rtx resvr = NULL_RTX;
2559 unsigned HOST_WIDE_INT const_cnt = 0;
2560 bool use_loop_p = (!cnt.is_constant (&const_cnt) || const_cnt > 4);
2561 int save_flag_trapv = flag_trapv;
2563 /* We don't want any __mulv?i3 etc. calls from the expansion of
2564 these internal functions, so disable -ftrapv temporarily. */
2565 flag_trapv = 0;
2566 if (lhs)
2568 optab op;
2569 lhsr = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
2570 if (!VECTOR_MODE_P (GET_MODE (lhsr))
2571 || (op = optab_for_tree_code (code, TREE_TYPE (arg0),
2572 optab_default)) == unknown_optab
2573 || (optab_handler (op, TYPE_MODE (TREE_TYPE (arg0)))
2574 == CODE_FOR_nothing))
2576 if (MEM_P (lhsr))
2577 resv = make_tree (TREE_TYPE (lhs), lhsr);
2578 else
2580 resvr = assign_temp (TREE_TYPE (lhs), 1, 1);
2581 resv = make_tree (TREE_TYPE (lhs), resvr);
2585 if (use_loop_p)
2587 do_pending_stack_adjust ();
2588 loop_lab = gen_label_rtx ();
2589 cntvar = gen_reg_rtx (TYPE_MODE (sizetype));
2590 cntv = make_tree (sizetype, cntvar);
2591 emit_move_insn (cntvar, const0_rtx);
2592 emit_label (loop_lab);
2594 if (TREE_CODE (arg0) != VECTOR_CST)
2596 rtx arg0r = expand_normal (arg0);
2597 arg0 = make_tree (TREE_TYPE (arg0), arg0r);
2599 if (TREE_CODE (arg1) != VECTOR_CST)
2601 rtx arg1r = expand_normal (arg1);
2602 arg1 = make_tree (TREE_TYPE (arg1), arg1r);
2604 for (unsigned int i = 0; i < (use_loop_p ? 1 : const_cnt); i++)
2606 tree op0, op1, res = NULL_TREE;
2607 if (use_loop_p)
2609 tree atype = build_array_type_nelts (eltype, cnt);
2610 op0 = uniform_vector_p (arg0);
2611 if (op0 == NULL_TREE)
2613 op0 = fold_build1_loc (loc, VIEW_CONVERT_EXPR, atype, arg0);
2614 op0 = build4_loc (loc, ARRAY_REF, eltype, op0, cntv,
2615 NULL_TREE, NULL_TREE);
2617 op1 = uniform_vector_p (arg1);
2618 if (op1 == NULL_TREE)
2620 op1 = fold_build1_loc (loc, VIEW_CONVERT_EXPR, atype, arg1);
2621 op1 = build4_loc (loc, ARRAY_REF, eltype, op1, cntv,
2622 NULL_TREE, NULL_TREE);
2624 if (resv)
2626 res = fold_build1_loc (loc, VIEW_CONVERT_EXPR, atype, resv);
2627 res = build4_loc (loc, ARRAY_REF, eltype, res, cntv,
2628 NULL_TREE, NULL_TREE);
2631 else
2633 tree bitpos = bitsize_int (tree_to_uhwi (sz) * i);
2634 op0 = fold_build3_loc (loc, BIT_FIELD_REF, eltype, arg0, sz, bitpos);
2635 op1 = fold_build3_loc (loc, BIT_FIELD_REF, eltype, arg1, sz, bitpos);
2636 if (resv)
2637 res = fold_build3_loc (loc, BIT_FIELD_REF, eltype, resv, sz,
2638 bitpos);
2640 switch (code)
2642 case PLUS_EXPR:
2643 expand_addsub_overflow (loc, PLUS_EXPR, res, op0, op1,
2644 false, false, false, true, &data);
2645 break;
2646 case MINUS_EXPR:
2647 if (use_loop_p ? integer_zerop (arg0) : integer_zerop (op0))
2648 expand_neg_overflow (loc, res, op1, true, &data);
2649 else
2650 expand_addsub_overflow (loc, MINUS_EXPR, res, op0, op1,
2651 false, false, false, true, &data);
2652 break;
2653 case MULT_EXPR:
2654 expand_mul_overflow (loc, res, op0, op1, false, false, false,
2655 true, &data);
2656 break;
2657 default:
2658 gcc_unreachable ();
2661 if (use_loop_p)
2663 struct separate_ops ops;
2664 ops.code = PLUS_EXPR;
2665 ops.type = TREE_TYPE (cntv);
2666 ops.op0 = cntv;
2667 ops.op1 = build_int_cst (TREE_TYPE (cntv), 1);
2668 ops.op2 = NULL_TREE;
2669 ops.location = loc;
2670 rtx ret = expand_expr_real_2 (&ops, cntvar, TYPE_MODE (sizetype),
2671 EXPAND_NORMAL);
2672 if (ret != cntvar)
2673 emit_move_insn (cntvar, ret);
2674 rtx cntrtx = gen_int_mode (cnt, TYPE_MODE (sizetype));
2675 do_compare_rtx_and_jump (cntvar, cntrtx, NE, false,
2676 TYPE_MODE (sizetype), NULL_RTX, NULL, loop_lab,
2677 profile_probability::very_likely ());
2679 if (lhs && resv == NULL_TREE)
2681 struct separate_ops ops;
2682 ops.code = code;
2683 ops.type = TREE_TYPE (arg0);
2684 ops.op0 = arg0;
2685 ops.op1 = arg1;
2686 ops.op2 = NULL_TREE;
2687 ops.location = loc;
2688 rtx ret = expand_expr_real_2 (&ops, lhsr, TYPE_MODE (TREE_TYPE (arg0)),
2689 EXPAND_NORMAL);
2690 if (ret != lhsr)
2691 emit_move_insn (lhsr, ret);
2693 else if (resvr)
2694 emit_move_insn (lhsr, resvr);
2695 flag_trapv = save_flag_trapv;
2698 /* Expand UBSAN_CHECK_ADD call STMT. */
2700 static void
2701 expand_UBSAN_CHECK_ADD (internal_fn, gcall *stmt)
2703 location_t loc = gimple_location (stmt);
2704 tree lhs = gimple_call_lhs (stmt);
2705 tree arg0 = gimple_call_arg (stmt, 0);
2706 tree arg1 = gimple_call_arg (stmt, 1);
2707 if (VECTOR_TYPE_P (TREE_TYPE (arg0)))
2708 expand_vector_ubsan_overflow (loc, PLUS_EXPR, lhs, arg0, arg1);
2709 else
2710 expand_addsub_overflow (loc, PLUS_EXPR, lhs, arg0, arg1,
2711 false, false, false, true, NULL);
2714 /* Expand UBSAN_CHECK_SUB call STMT. */
2716 static void
2717 expand_UBSAN_CHECK_SUB (internal_fn, gcall *stmt)
2719 location_t loc = gimple_location (stmt);
2720 tree lhs = gimple_call_lhs (stmt);
2721 tree arg0 = gimple_call_arg (stmt, 0);
2722 tree arg1 = gimple_call_arg (stmt, 1);
2723 if (VECTOR_TYPE_P (TREE_TYPE (arg0)))
2724 expand_vector_ubsan_overflow (loc, MINUS_EXPR, lhs, arg0, arg1);
2725 else if (integer_zerop (arg0))
2726 expand_neg_overflow (loc, lhs, arg1, true, NULL);
2727 else
2728 expand_addsub_overflow (loc, MINUS_EXPR, lhs, arg0, arg1,
2729 false, false, false, true, NULL);
2732 /* Expand UBSAN_CHECK_MUL call STMT. */
2734 static void
2735 expand_UBSAN_CHECK_MUL (internal_fn, gcall *stmt)
2737 location_t loc = gimple_location (stmt);
2738 tree lhs = gimple_call_lhs (stmt);
2739 tree arg0 = gimple_call_arg (stmt, 0);
2740 tree arg1 = gimple_call_arg (stmt, 1);
2741 if (VECTOR_TYPE_P (TREE_TYPE (arg0)))
2742 expand_vector_ubsan_overflow (loc, MULT_EXPR, lhs, arg0, arg1);
2743 else
2744 expand_mul_overflow (loc, lhs, arg0, arg1, false, false, false, true,
2745 NULL);
2748 /* Helper function for {ADD,SUB,MUL}_OVERFLOW call stmt expansion. */
2750 static void
2751 expand_arith_overflow (enum tree_code code, gimple *stmt)
2753 tree lhs = gimple_call_lhs (stmt);
2754 if (lhs == NULL_TREE)
2755 return;
2756 tree arg0 = gimple_call_arg (stmt, 0);
2757 tree arg1 = gimple_call_arg (stmt, 1);
2758 tree type = TREE_TYPE (TREE_TYPE (lhs));
2759 int uns0_p = TYPE_UNSIGNED (TREE_TYPE (arg0));
2760 int uns1_p = TYPE_UNSIGNED (TREE_TYPE (arg1));
2761 int unsr_p = TYPE_UNSIGNED (type);
2762 int prec0 = TYPE_PRECISION (TREE_TYPE (arg0));
2763 int prec1 = TYPE_PRECISION (TREE_TYPE (arg1));
2764 int precres = TYPE_PRECISION (type);
2765 location_t loc = gimple_location (stmt);
2766 if (!uns0_p && get_range_pos_neg (arg0) == 1)
2767 uns0_p = true;
2768 if (!uns1_p && get_range_pos_neg (arg1) == 1)
2769 uns1_p = true;
2770 int pr = get_min_precision (arg0, uns0_p ? UNSIGNED : SIGNED);
2771 prec0 = MIN (prec0, pr);
2772 pr = get_min_precision (arg1, uns1_p ? UNSIGNED : SIGNED);
2773 prec1 = MIN (prec1, pr);
2774 int save_flag_trapv = flag_trapv;
2776 /* We don't want any __mulv?i3 etc. calls from the expansion of
2777 these internal functions, so disable -ftrapv temporarily. */
2778 flag_trapv = 0;
2779 /* If uns0_p && uns1_p, precop is minimum needed precision
2780 of unsigned type to hold the exact result, otherwise
2781 precop is minimum needed precision of signed type to
2782 hold the exact result. */
2783 int precop;
2784 if (code == MULT_EXPR)
2785 precop = prec0 + prec1 + (uns0_p != uns1_p);
2786 else
2788 if (uns0_p == uns1_p)
2789 precop = MAX (prec0, prec1) + 1;
2790 else if (uns0_p)
2791 precop = MAX (prec0 + 1, prec1) + 1;
2792 else
2793 precop = MAX (prec0, prec1 + 1) + 1;
2795 int orig_precres = precres;
2799 if ((uns0_p && uns1_p)
2800 ? ((precop + !unsr_p) <= precres
2801 /* u1 - u2 -> ur can overflow, no matter what precision
2802 the result has. */
2803 && (code != MINUS_EXPR || !unsr_p))
2804 : (!unsr_p && precop <= precres))
2806 /* The infinity precision result will always fit into result. */
2807 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
2808 write_complex_part (target, const0_rtx, true, false);
2809 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2810 struct separate_ops ops;
2811 ops.code = code;
2812 ops.type = type;
2813 ops.op0 = fold_convert_loc (loc, type, arg0);
2814 ops.op1 = fold_convert_loc (loc, type, arg1);
2815 ops.op2 = NULL_TREE;
2816 ops.location = loc;
2817 rtx tem = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
2818 expand_arith_overflow_result_store (lhs, target, mode, tem);
2819 flag_trapv = save_flag_trapv;
2820 return;
2823 /* For operations with low precision, if target doesn't have them, start
2824 with precres widening right away, otherwise do it only if the most
2825 simple cases can't be used. */
2826 const int min_precision = targetm.min_arithmetic_precision ();
2827 if (orig_precres == precres && precres < min_precision)
2829 else if ((uns0_p && uns1_p && unsr_p && prec0 <= precres
2830 && prec1 <= precres)
2831 || ((!uns0_p || !uns1_p) && !unsr_p
2832 && prec0 + uns0_p <= precres
2833 && prec1 + uns1_p <= precres))
2835 arg0 = fold_convert_loc (loc, type, arg0);
2836 arg1 = fold_convert_loc (loc, type, arg1);
2837 switch (code)
2839 case MINUS_EXPR:
2840 if (integer_zerop (arg0) && !unsr_p)
2842 expand_neg_overflow (loc, lhs, arg1, false, NULL);
2843 flag_trapv = save_flag_trapv;
2844 return;
2846 /* FALLTHRU */
2847 case PLUS_EXPR:
2848 expand_addsub_overflow (loc, code, lhs, arg0, arg1, unsr_p,
2849 unsr_p, unsr_p, false, NULL);
2850 flag_trapv = save_flag_trapv;
2851 return;
2852 case MULT_EXPR:
2853 expand_mul_overflow (loc, lhs, arg0, arg1, unsr_p,
2854 unsr_p, unsr_p, false, NULL);
2855 flag_trapv = save_flag_trapv;
2856 return;
2857 default:
2858 gcc_unreachable ();
2862 /* For sub-word operations, retry with a wider type first. */
2863 if (orig_precres == precres && precop <= BITS_PER_WORD)
2865 int p = MAX (min_precision, precop);
2866 scalar_int_mode m = smallest_int_mode_for_size (p).require ();
2867 tree optype = build_nonstandard_integer_type (GET_MODE_PRECISION (m),
2868 uns0_p && uns1_p
2869 && unsr_p);
2870 p = TYPE_PRECISION (optype);
2871 if (p > precres)
2873 precres = p;
2874 unsr_p = TYPE_UNSIGNED (optype);
2875 type = optype;
2876 continue;
2880 if (prec0 <= precres && prec1 <= precres)
2882 tree types[2];
2883 if (unsr_p)
2885 types[0] = build_nonstandard_integer_type (precres, 0);
2886 types[1] = type;
2888 else
2890 types[0] = type;
2891 types[1] = build_nonstandard_integer_type (precres, 1);
2893 arg0 = fold_convert_loc (loc, types[uns0_p], arg0);
2894 arg1 = fold_convert_loc (loc, types[uns1_p], arg1);
2895 if (code != MULT_EXPR)
2896 expand_addsub_overflow (loc, code, lhs, arg0, arg1, unsr_p,
2897 uns0_p, uns1_p, false, NULL);
2898 else
2899 expand_mul_overflow (loc, lhs, arg0, arg1, unsr_p,
2900 uns0_p, uns1_p, false, NULL);
2901 flag_trapv = save_flag_trapv;
2902 return;
2905 /* Retry with a wider type. */
2906 if (orig_precres == precres)
2908 int p = MAX (prec0, prec1);
2909 scalar_int_mode m = smallest_int_mode_for_size (p).require ();
2910 tree optype = build_nonstandard_integer_type (GET_MODE_PRECISION (m),
2911 uns0_p && uns1_p
2912 && unsr_p);
2913 p = TYPE_PRECISION (optype);
2914 if (p > precres)
2916 precres = p;
2917 unsr_p = TYPE_UNSIGNED (optype);
2918 type = optype;
2919 continue;
2923 gcc_unreachable ();
2925 while (1);
2928 /* Expand ADD_OVERFLOW STMT. */
2930 static void
2931 expand_ADD_OVERFLOW (internal_fn, gcall *stmt)
2933 expand_arith_overflow (PLUS_EXPR, stmt);
2936 /* Expand SUB_OVERFLOW STMT. */
2938 static void
2939 expand_SUB_OVERFLOW (internal_fn, gcall *stmt)
2941 expand_arith_overflow (MINUS_EXPR, stmt);
2944 /* Expand MUL_OVERFLOW STMT. */
2946 static void
2947 expand_MUL_OVERFLOW (internal_fn, gcall *stmt)
2949 expand_arith_overflow (MULT_EXPR, stmt);
2952 /* Expand UADDC STMT. */
2954 static void
2955 expand_UADDC (internal_fn ifn, gcall *stmt)
2957 tree lhs = gimple_call_lhs (stmt);
2958 tree arg1 = gimple_call_arg (stmt, 0);
2959 tree arg2 = gimple_call_arg (stmt, 1);
2960 tree arg3 = gimple_call_arg (stmt, 2);
2961 tree type = TREE_TYPE (arg1);
2962 machine_mode mode = TYPE_MODE (type);
2963 insn_code icode = optab_handler (ifn == IFN_UADDC
2964 ? uaddc5_optab : usubc5_optab, mode);
2965 rtx op1 = expand_normal (arg1);
2966 rtx op2 = expand_normal (arg2);
2967 rtx op3 = expand_normal (arg3);
2968 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
2969 rtx re = gen_reg_rtx (mode);
2970 rtx im = gen_reg_rtx (mode);
2971 class expand_operand ops[5];
2972 create_output_operand (&ops[0], re, mode);
2973 create_output_operand (&ops[1], im, mode);
2974 create_input_operand (&ops[2], op1, mode);
2975 create_input_operand (&ops[3], op2, mode);
2976 create_input_operand (&ops[4], op3, mode);
2977 expand_insn (icode, 5, ops);
2978 write_complex_part (target, re, false, false);
2979 write_complex_part (target, im, true, false);
2982 /* Expand USUBC STMT. */
2984 static void
2985 expand_USUBC (internal_fn ifn, gcall *stmt)
2987 expand_UADDC (ifn, stmt);
2990 /* This should get folded in tree-vectorizer.cc. */
2992 static void
2993 expand_LOOP_VECTORIZED (internal_fn, gcall *)
2995 gcc_unreachable ();
2998 /* This should get folded in tree-vectorizer.cc. */
3000 static void
3001 expand_LOOP_DIST_ALIAS (internal_fn, gcall *)
3003 gcc_unreachable ();
3006 /* Return a memory reference of type TYPE for argument INDEX of STMT.
3007 Use argument INDEX + 1 to derive the second (TBAA) operand. */
3009 static tree
3010 expand_call_mem_ref (tree type, gcall *stmt, int index)
3012 tree addr = gimple_call_arg (stmt, index);
3013 tree alias_ptr_type = TREE_TYPE (gimple_call_arg (stmt, index + 1));
3014 unsigned int align = tree_to_shwi (gimple_call_arg (stmt, index + 1));
3015 if (TYPE_ALIGN (type) != align)
3016 type = build_aligned_type (type, align);
3018 tree tmp = addr;
3019 if (TREE_CODE (tmp) == SSA_NAME)
3021 gimple *def = get_gimple_for_ssa_name (tmp);
3022 if (def && gimple_assign_single_p (def))
3023 tmp = gimple_assign_rhs1 (def);
3026 if (TREE_CODE (tmp) == ADDR_EXPR)
3028 tree mem = TREE_OPERAND (tmp, 0);
3029 if (TREE_CODE (mem) == TARGET_MEM_REF
3030 && types_compatible_p (TREE_TYPE (mem), type))
3032 tree offset = TMR_OFFSET (mem);
3033 if (type != TREE_TYPE (mem)
3034 || alias_ptr_type != TREE_TYPE (offset)
3035 || !integer_zerop (offset))
3037 mem = copy_node (mem);
3038 TMR_OFFSET (mem) = wide_int_to_tree (alias_ptr_type,
3039 wi::to_poly_wide (offset));
3040 TREE_TYPE (mem) = type;
3042 return mem;
3046 return fold_build2 (MEM_REF, type, addr, build_int_cst (alias_ptr_type, 0));
3049 /* Expand MASK_LOAD{,_LANES}, MASK_LEN_LOAD or LEN_LOAD call STMT using optab
3050 * OPTAB. */
3052 static void
3053 expand_partial_load_optab_fn (internal_fn ifn, gcall *stmt, convert_optab optab)
3055 int i = 0;
3056 class expand_operand ops[6];
3057 tree type, lhs, rhs, maskt;
3058 rtx mem, target;
3059 insn_code icode;
3061 maskt = gimple_call_arg (stmt, internal_fn_mask_index (ifn));
3062 lhs = gimple_call_lhs (stmt);
3063 if (lhs == NULL_TREE)
3064 return;
3065 type = TREE_TYPE (lhs);
3066 rhs = expand_call_mem_ref (type, stmt, 0);
3068 if (optab == vec_mask_load_lanes_optab
3069 || optab == vec_mask_len_load_lanes_optab)
3070 icode = get_multi_vector_move (type, optab);
3071 else if (optab == len_load_optab)
3072 icode = direct_optab_handler (optab, TYPE_MODE (type));
3073 else
3074 icode = convert_optab_handler (optab, TYPE_MODE (type),
3075 TYPE_MODE (TREE_TYPE (maskt)));
3077 mem = expand_expr (rhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3078 gcc_assert (MEM_P (mem));
3079 /* The built MEM_REF does not accurately reflect that the load
3080 is only partial. Clear it. */
3081 set_mem_expr (mem, NULL_TREE);
3082 clear_mem_offset (mem);
3083 target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3084 create_call_lhs_operand (&ops[i++], target, TYPE_MODE (type));
3085 create_fixed_operand (&ops[i++], mem);
3086 i = add_mask_else_and_len_args (ops, i, stmt);
3087 expand_insn (icode, i, ops);
3089 assign_call_lhs (lhs, target, &ops[0]);
3092 #define expand_mask_load_optab_fn expand_partial_load_optab_fn
3093 #define expand_mask_load_lanes_optab_fn expand_mask_load_optab_fn
3094 #define expand_len_load_optab_fn expand_partial_load_optab_fn
3095 #define expand_mask_len_load_optab_fn expand_partial_load_optab_fn
3097 /* Expand MASK_STORE{,_LANES}, MASK_LEN_STORE or LEN_STORE call STMT using optab
3098 * OPTAB. */
3100 static void
3101 expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab optab)
3103 int i = 0;
3104 class expand_operand ops[5];
3105 tree type, lhs, rhs, maskt;
3106 rtx mem, reg;
3107 insn_code icode;
3109 maskt = gimple_call_arg (stmt, internal_fn_mask_index (ifn));
3110 rhs = gimple_call_arg (stmt, internal_fn_stored_value_index (ifn));
3111 type = TREE_TYPE (rhs);
3112 lhs = expand_call_mem_ref (type, stmt, 0);
3114 if (optab == vec_mask_store_lanes_optab
3115 || optab == vec_mask_len_store_lanes_optab)
3116 icode = get_multi_vector_move (type, optab);
3117 else if (optab == len_store_optab)
3118 icode = direct_optab_handler (optab, TYPE_MODE (type));
3119 else
3120 icode = convert_optab_handler (optab, TYPE_MODE (type),
3121 TYPE_MODE (TREE_TYPE (maskt)));
3123 mem = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3124 gcc_assert (MEM_P (mem));
3125 /* The built MEM_REF does not accurately reflect that the store
3126 is only partial. Clear it. */
3127 set_mem_expr (mem, NULL_TREE);
3128 clear_mem_offset (mem);
3129 reg = expand_normal (rhs);
3130 create_fixed_operand (&ops[i++], mem);
3131 create_input_operand (&ops[i++], reg, TYPE_MODE (type));
3132 i = add_mask_else_and_len_args (ops, i, stmt);
3133 expand_insn (icode, i, ops);
3136 #define expand_mask_store_optab_fn expand_partial_store_optab_fn
3137 #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
3138 #define expand_len_store_optab_fn expand_partial_store_optab_fn
3139 #define expand_mask_len_store_optab_fn expand_partial_store_optab_fn
3141 /* Expand VCOND_MASK optab internal function.
3142 The expansion of STMT happens based on OPTAB table associated. */
3144 static void
3145 expand_vec_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
3147 class expand_operand ops[4];
3149 tree lhs = gimple_call_lhs (stmt);
3150 tree op0 = gimple_call_arg (stmt, 0);
3151 tree op1 = gimple_call_arg (stmt, 1);
3152 tree op2 = gimple_call_arg (stmt, 2);
3153 tree vec_cond_type = TREE_TYPE (lhs);
3155 machine_mode mode = TYPE_MODE (vec_cond_type);
3156 machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
3157 enum insn_code icode = convert_optab_handler (optab, mode, mask_mode);
3158 rtx mask, rtx_op1, rtx_op2;
3160 gcc_assert (icode != CODE_FOR_nothing);
3162 mask = expand_normal (op0);
3163 rtx_op1 = expand_normal (op1);
3164 rtx_op2 = expand_normal (op2);
3166 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3167 create_call_lhs_operand (&ops[0], target, mode);
3168 create_input_operand (&ops[1], rtx_op1, mode);
3169 create_input_operand (&ops[2], rtx_op2, mode);
3170 create_input_operand (&ops[3], mask, mask_mode);
3171 expand_insn (icode, 4, ops);
3172 assign_call_lhs (lhs, target, &ops[0]);
3175 /* Expand VEC_SET internal functions. */
3177 static void
3178 expand_vec_set_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
3180 tree lhs = gimple_call_lhs (stmt);
3181 tree op0 = gimple_call_arg (stmt, 0);
3182 tree op1 = gimple_call_arg (stmt, 1);
3183 tree op2 = gimple_call_arg (stmt, 2);
3184 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3185 rtx src = expand_normal (op0);
3187 machine_mode outermode = TYPE_MODE (TREE_TYPE (op0));
3188 scalar_mode innermode = GET_MODE_INNER (outermode);
3190 rtx value = expand_normal (op1);
3191 rtx pos = expand_normal (op2);
3193 class expand_operand ops[3];
3194 enum insn_code icode = optab_handler (optab, outermode);
3196 if (icode != CODE_FOR_nothing)
3198 rtx temp = gen_reg_rtx (outermode);
3199 emit_move_insn (temp, src);
3201 create_fixed_operand (&ops[0], temp);
3202 create_input_operand (&ops[1], value, innermode);
3203 create_convert_operand_from (&ops[2], pos, TYPE_MODE (TREE_TYPE (op2)),
3204 true);
3205 if (maybe_expand_insn (icode, 3, ops))
3207 emit_move_insn (target, temp);
3208 return;
3211 gcc_unreachable ();
3214 static void
3215 expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
3219 static void
3220 expand_BUILTIN_EXPECT (internal_fn, gcall *stmt)
3222 /* When guessing was done, the hints should be already stripped away. */
3223 gcc_assert (!flag_guess_branch_prob || optimize == 0 || seen_error ());
3225 rtx target;
3226 tree lhs = gimple_call_lhs (stmt);
3227 if (lhs)
3228 target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3229 else
3230 target = const0_rtx;
3231 rtx val = expand_expr (gimple_call_arg (stmt, 0), target, VOIDmode, EXPAND_NORMAL);
3232 if (lhs && val != target)
3233 emit_move_insn (target, val);
3236 /* IFN_VA_ARG is supposed to be expanded at pass_stdarg. So this dummy function
3237 should never be called. */
3239 static void
3240 expand_VA_ARG (internal_fn, gcall *)
3242 gcc_unreachable ();
3245 /* IFN_VEC_CONVERT is supposed to be expanded at pass_lower_vector. So this
3246 dummy function should never be called. */
3248 static void
3249 expand_VEC_CONVERT (internal_fn, gcall *)
3251 gcc_unreachable ();
3254 /* Expand IFN_RAWMEMCHR internal function. */
3256 void
3257 expand_RAWMEMCHR (internal_fn, gcall *stmt)
3259 expand_operand ops[3];
3261 tree lhs = gimple_call_lhs (stmt);
3262 if (!lhs)
3263 return;
3264 machine_mode lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
3265 rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3266 create_call_lhs_operand (&ops[0], lhs_rtx, lhs_mode);
3268 tree mem = gimple_call_arg (stmt, 0);
3269 rtx mem_rtx = get_memory_rtx (mem, NULL);
3270 create_fixed_operand (&ops[1], mem_rtx);
3272 tree pattern = gimple_call_arg (stmt, 1);
3273 machine_mode mode = TYPE_MODE (TREE_TYPE (pattern));
3274 rtx pattern_rtx = expand_normal (pattern);
3275 create_input_operand (&ops[2], pattern_rtx, mode);
3277 insn_code icode = direct_optab_handler (rawmemchr_optab, mode);
3279 expand_insn (icode, 3, ops);
3280 assign_call_lhs (lhs, lhs_rtx, &ops[0]);
3283 /* Expand the IFN_UNIQUE function according to its first argument. */
3285 static void
3286 expand_UNIQUE (internal_fn, gcall *stmt)
3288 rtx pattern = NULL_RTX;
3289 enum ifn_unique_kind kind
3290 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (stmt, 0));
3292 switch (kind)
3294 default:
3295 gcc_unreachable ();
3297 case IFN_UNIQUE_UNSPEC:
3298 if (targetm.have_unique ())
3299 pattern = targetm.gen_unique ();
3300 break;
3302 case IFN_UNIQUE_OACC_FORK:
3303 case IFN_UNIQUE_OACC_JOIN:
3304 if (targetm.have_oacc_fork () && targetm.have_oacc_join ())
3306 tree lhs = gimple_call_lhs (stmt);
3307 rtx target = const0_rtx;
3309 if (lhs)
3310 target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3312 rtx data_dep = expand_normal (gimple_call_arg (stmt, 1));
3313 rtx axis = expand_normal (gimple_call_arg (stmt, 2));
3315 if (kind == IFN_UNIQUE_OACC_FORK)
3316 pattern = targetm.gen_oacc_fork (target, data_dep, axis);
3317 else
3318 pattern = targetm.gen_oacc_join (target, data_dep, axis);
3320 else
3321 gcc_unreachable ();
3322 break;
3325 if (pattern)
3326 emit_insn (pattern);
3329 /* Expand the IFN_DEFERRED_INIT function:
3330 LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, NAME of the DECL);
3332 Initialize the LHS with zero/pattern according to its second argument
3333 INIT_TYPE:
3334 if INIT_TYPE is AUTO_INIT_ZERO, use zeroes to initialize;
3335 if INIT_TYPE is AUTO_INIT_PATTERN, use 0xFE byte-repeatable pattern
3336 to initialize;
3337 The LHS variable is initialized including paddings.
3338 The reasons to choose 0xFE for pattern initialization are:
3339 1. It is a non-canonical virtual address on x86_64, and at the
3340 high end of the i386 kernel address space.
3341 2. It is a very large float value (-1.694739530317379e+38).
3342 3. It is also an unusual number for integers. */
3343 #define INIT_PATTERN_VALUE 0xFE
3344 static void
3345 expand_DEFERRED_INIT (internal_fn, gcall *stmt)
3347 tree lhs = gimple_call_lhs (stmt);
3348 tree var_size = gimple_call_arg (stmt, 0);
3349 enum auto_init_type init_type
3350 = (enum auto_init_type) TREE_INT_CST_LOW (gimple_call_arg (stmt, 1));
3351 bool reg_lhs = true;
3353 tree var_type = TREE_TYPE (lhs);
3354 gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
3356 if (TREE_CODE (lhs) == SSA_NAME)
3357 reg_lhs = true;
3358 else
3360 tree lhs_base = lhs;
3361 while (handled_component_p (lhs_base))
3362 lhs_base = TREE_OPERAND (lhs_base, 0);
3363 reg_lhs = (mem_ref_refers_to_non_mem_p (lhs_base)
3364 || non_mem_decl_p (lhs_base));
3365 /* If this expands to a register and the underlying decl is wrapped in
3366 a MEM_REF that just serves as an access type change expose the decl
3367 if it is of correct size. This avoids a situation as in PR103271
3368 if the target does not support a direct move to the registers mode. */
3369 if (reg_lhs
3370 && TREE_CODE (lhs_base) == MEM_REF
3371 && TREE_CODE (TREE_OPERAND (lhs_base, 0)) == ADDR_EXPR
3372 && DECL_P (TREE_OPERAND (TREE_OPERAND (lhs_base, 0), 0))
3373 && integer_zerop (TREE_OPERAND (lhs_base, 1))
3374 && tree_fits_uhwi_p (var_size)
3375 && tree_int_cst_equal
3376 (var_size,
3377 DECL_SIZE_UNIT (TREE_OPERAND (TREE_OPERAND (lhs_base, 0), 0))))
3379 lhs = TREE_OPERAND (TREE_OPERAND (lhs_base, 0), 0);
3380 var_type = TREE_TYPE (lhs);
3384 if (!reg_lhs)
3386 /* If the variable is not in register, expand to a memset
3387 to initialize it. */
3388 mark_addressable (lhs);
3389 tree var_addr = build_fold_addr_expr (lhs);
3391 tree value = (init_type == AUTO_INIT_PATTERN)
3392 ? build_int_cst (integer_type_node,
3393 INIT_PATTERN_VALUE)
3394 : integer_zero_node;
3395 tree m_call = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMSET),
3396 3, var_addr, value, var_size);
3397 /* Expand this memset call. */
3398 expand_builtin_memset (m_call, NULL_RTX, TYPE_MODE (var_type));
3400 else
3402 /* If this variable is in a register use expand_assignment.
3403 For boolean scalars force zero-init. */
3404 tree init;
3405 scalar_int_mode var_mode;
3406 if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE
3407 && tree_fits_uhwi_p (var_size)
3408 && (init_type == AUTO_INIT_PATTERN
3409 || !is_gimple_reg_type (var_type))
3410 && int_mode_for_size (tree_to_uhwi (var_size) * BITS_PER_UNIT,
3411 0).exists (&var_mode)
3412 && have_insn_for (SET, var_mode))
3414 unsigned HOST_WIDE_INT total_bytes = tree_to_uhwi (var_size);
3415 unsigned char *buf = XALLOCAVEC (unsigned char, total_bytes);
3416 memset (buf, (init_type == AUTO_INIT_PATTERN
3417 ? INIT_PATTERN_VALUE : 0), total_bytes);
3418 tree itype = build_nonstandard_integer_type
3419 (total_bytes * BITS_PER_UNIT, 1);
3420 wide_int w = wi::from_buffer (buf, total_bytes);
3421 init = wide_int_to_tree (itype, w);
3422 /* Pun the LHS to make sure its type has constant size
3423 unless it is an SSA name where that's already known. */
3424 if (TREE_CODE (lhs) != SSA_NAME)
3425 lhs = build1 (VIEW_CONVERT_EXPR, itype, lhs);
3426 else
3427 init = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), init);
3429 else
3430 /* Use zero-init also for variable-length sizes. */
3431 init = build_zero_cst (var_type);
3433 expand_assignment (lhs, init, false);
3437 /* Expand the IFN_ACCESS_WITH_SIZE function:
3438 ACCESS_WITH_SIZE (REF_TO_OBJ, REF_TO_SIZE, CLASS_OF_SIZE,
3439 TYPE_OF_SIZE, ACCESS_MODE)
3440 which returns the REF_TO_OBJ same as the 1st argument;
3442 1st argument REF_TO_OBJ: The reference to the object;
3443 2nd argument REF_TO_SIZE: The reference to the size of the object,
3444 3rd argument CLASS_OF_SIZE: The size referenced by the REF_TO_SIZE represents
3445 0: the number of bytes.
3446 1: the number of the elements of the object type;
3447 4th argument TYPE_OF_SIZE: A constant 0 with its TYPE being the same as the TYPE
3448 of the object referenced by REF_TO_SIZE
3449 5th argument ACCESS_MODE:
3450 -1: Unknown access semantics
3451 0: none
3452 1: read_only
3453 2: write_only
3454 3: read_write
3455 6th argument: A constant 0 with the pointer TYPE to the original flexible
3456 array type.
3458 Both the return type and the type of the first argument of this
3459 function have been converted from the incomplete array type to
3460 the corresponding pointer type.
3462 For each call to a .ACCESS_WITH_SIZE, replace it with its 1st argument. */
3464 static void
3465 expand_ACCESS_WITH_SIZE (internal_fn, gcall *stmt)
3467 tree lhs = gimple_call_lhs (stmt);
3468 tree ref_to_obj = gimple_call_arg (stmt, 0);
3469 if (lhs)
3470 expand_assignment (lhs, ref_to_obj, false);
3473 /* The size of an OpenACC compute dimension. */
3475 static void
3476 expand_GOACC_DIM_SIZE (internal_fn, gcall *stmt)
3478 tree lhs = gimple_call_lhs (stmt);
3480 if (!lhs)
3481 return;
3483 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3484 if (targetm.have_oacc_dim_size ())
3486 rtx dim = expand_expr (gimple_call_arg (stmt, 0), NULL_RTX,
3487 VOIDmode, EXPAND_NORMAL);
3488 emit_insn (targetm.gen_oacc_dim_size (target, dim));
3490 else
3491 emit_move_insn (target, GEN_INT (1));
3494 /* The position of an OpenACC execution engine along one compute axis. */
3496 static void
3497 expand_GOACC_DIM_POS (internal_fn, gcall *stmt)
3499 tree lhs = gimple_call_lhs (stmt);
3501 if (!lhs)
3502 return;
3504 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3505 if (targetm.have_oacc_dim_pos ())
3507 rtx dim = expand_expr (gimple_call_arg (stmt, 0), NULL_RTX,
3508 VOIDmode, EXPAND_NORMAL);
3509 emit_insn (targetm.gen_oacc_dim_pos (target, dim));
3511 else
3512 emit_move_insn (target, const0_rtx);
3515 /* This is expanded by oacc_device_lower pass. */
3517 static void
3518 expand_GOACC_LOOP (internal_fn, gcall *)
3520 gcc_unreachable ();
3523 /* This is expanded by oacc_device_lower pass. */
3525 static void
3526 expand_GOACC_REDUCTION (internal_fn, gcall *)
3528 gcc_unreachable ();
3531 /* This is expanded by oacc_device_lower pass. */
3533 static void
3534 expand_GOACC_TILE (internal_fn, gcall *)
3536 gcc_unreachable ();
3539 /* Set errno to EDOM. */
3541 static void
3542 expand_SET_EDOM (internal_fn, gcall *)
3544 #ifdef TARGET_EDOM
3545 #ifdef GEN_ERRNO_RTX
3546 rtx errno_rtx = GEN_ERRNO_RTX;
3547 #else
3548 rtx errno_rtx = gen_rtx_MEM (word_mode, gen_rtx_SYMBOL_REF (Pmode, "errno"));
3549 #endif
3550 emit_move_insn (errno_rtx,
3551 gen_int_mode (TARGET_EDOM, GET_MODE (errno_rtx)));
3552 #else
3553 gcc_unreachable ();
3554 #endif
3557 /* Expand atomic bit test and set. */
3559 static void
3560 expand_ATOMIC_BIT_TEST_AND_SET (internal_fn, gcall *call)
3562 expand_ifn_atomic_bit_test_and (call);
3565 /* Expand atomic bit test and complement. */
3567 static void
3568 expand_ATOMIC_BIT_TEST_AND_COMPLEMENT (internal_fn, gcall *call)
3570 expand_ifn_atomic_bit_test_and (call);
3573 /* Expand atomic bit test and reset. */
3575 static void
3576 expand_ATOMIC_BIT_TEST_AND_RESET (internal_fn, gcall *call)
3578 expand_ifn_atomic_bit_test_and (call);
3581 /* Expand atomic bit test and set. */
3583 static void
3584 expand_ATOMIC_COMPARE_EXCHANGE (internal_fn, gcall *call)
3586 expand_ifn_atomic_compare_exchange (call);
3589 /* Expand atomic add fetch and cmp with 0. */
3591 static void
3592 expand_ATOMIC_ADD_FETCH_CMP_0 (internal_fn, gcall *call)
3594 expand_ifn_atomic_op_fetch_cmp_0 (call);
3597 /* Expand atomic sub fetch and cmp with 0. */
3599 static void
3600 expand_ATOMIC_SUB_FETCH_CMP_0 (internal_fn, gcall *call)
3602 expand_ifn_atomic_op_fetch_cmp_0 (call);
3605 /* Expand atomic and fetch and cmp with 0. */
3607 static void
3608 expand_ATOMIC_AND_FETCH_CMP_0 (internal_fn, gcall *call)
3610 expand_ifn_atomic_op_fetch_cmp_0 (call);
3613 /* Expand atomic or fetch and cmp with 0. */
3615 static void
3616 expand_ATOMIC_OR_FETCH_CMP_0 (internal_fn, gcall *call)
3618 expand_ifn_atomic_op_fetch_cmp_0 (call);
3621 /* Expand atomic xor fetch and cmp with 0. */
3623 static void
3624 expand_ATOMIC_XOR_FETCH_CMP_0 (internal_fn, gcall *call)
3626 expand_ifn_atomic_op_fetch_cmp_0 (call);
3629 /* Expand LAUNDER to assignment, lhs = arg0. */
3631 static void
3632 expand_LAUNDER (internal_fn, gcall *call)
3634 tree lhs = gimple_call_lhs (call);
3636 if (!lhs)
3637 return;
3639 expand_assignment (lhs, gimple_call_arg (call, 0), false);
3642 /* Expand {MASK_,}SCATTER_STORE{S,U} call CALL using optab OPTAB. */
3644 static void
3645 expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
3647 internal_fn ifn = gimple_call_internal_fn (stmt);
3648 int rhs_index = internal_fn_stored_value_index (ifn);
3649 tree base = gimple_call_arg (stmt, 0);
3650 tree offset = gimple_call_arg (stmt, 1);
3651 tree scale = gimple_call_arg (stmt, 2);
3652 tree rhs = gimple_call_arg (stmt, rhs_index);
3654 rtx base_rtx = expand_normal (base);
3655 rtx offset_rtx = expand_normal (offset);
3656 HOST_WIDE_INT scale_int = tree_to_shwi (scale);
3657 rtx rhs_rtx = expand_normal (rhs);
3659 class expand_operand ops[8];
3660 int i = 0;
3661 create_address_operand (&ops[i++], base_rtx);
3662 create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
3663 create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
3664 create_integer_operand (&ops[i++], scale_int);
3665 create_input_operand (&ops[i++], rhs_rtx, TYPE_MODE (TREE_TYPE (rhs)));
3666 i = add_mask_else_and_len_args (ops, i, stmt);
3668 insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)),
3669 TYPE_MODE (TREE_TYPE (offset)));
3670 expand_insn (icode, i, ops);
3673 /* Expand {MASK_,}GATHER_LOAD call CALL using optab OPTAB. */
3675 static void
3676 expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
3678 tree lhs = gimple_call_lhs (stmt);
3679 tree base = gimple_call_arg (stmt, 0);
3680 tree offset = gimple_call_arg (stmt, 1);
3681 tree scale = gimple_call_arg (stmt, 2);
3683 rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3684 rtx base_rtx = expand_normal (base);
3685 rtx offset_rtx = expand_normal (offset);
3686 HOST_WIDE_INT scale_int = tree_to_shwi (scale);
3688 int i = 0;
3689 class expand_operand ops[9];
3690 create_call_lhs_operand (&ops[i++], lhs_rtx, TYPE_MODE (TREE_TYPE (lhs)));
3691 create_address_operand (&ops[i++], base_rtx);
3692 create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
3693 create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
3694 create_integer_operand (&ops[i++], scale_int);
3695 i = add_mask_else_and_len_args (ops, i, stmt);
3696 insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)),
3697 TYPE_MODE (TREE_TYPE (offset)));
3698 expand_insn (icode, i, ops);
3699 assign_call_lhs (lhs, lhs_rtx, &ops[0]);
3702 /* Expand MASK_LEN_STRIDED_LOAD call CALL by optab OPTAB. */
3704 static void
3705 expand_strided_load_optab_fn (ATTRIBUTE_UNUSED internal_fn, gcall *stmt,
3706 direct_optab optab)
3708 tree lhs = gimple_call_lhs (stmt);
3709 tree base = gimple_call_arg (stmt, 0);
3710 tree stride = gimple_call_arg (stmt, 1);
3712 rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3713 rtx base_rtx = expand_normal (base);
3714 rtx stride_rtx = expand_normal (stride);
3716 unsigned i = 0;
3717 class expand_operand ops[7];
3718 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
3720 create_output_operand (&ops[i++], lhs_rtx, mode);
3721 create_address_operand (&ops[i++], base_rtx);
3722 create_address_operand (&ops[i++], stride_rtx);
3724 i = add_mask_else_and_len_args (ops, i, stmt);
3725 expand_insn (direct_optab_handler (optab, mode), i, ops);
3727 if (!rtx_equal_p (lhs_rtx, ops[0].value))
3728 emit_move_insn (lhs_rtx, ops[0].value);
3731 /* Expand MASK_LEN_STRIDED_STORE call CALL by optab OPTAB. */
3733 static void
3734 expand_strided_store_optab_fn (ATTRIBUTE_UNUSED internal_fn, gcall *stmt,
3735 direct_optab optab)
3737 internal_fn fn = gimple_call_internal_fn (stmt);
3738 int rhs_index = internal_fn_stored_value_index (fn);
3740 tree base = gimple_call_arg (stmt, 0);
3741 tree stride = gimple_call_arg (stmt, 1);
3742 tree rhs = gimple_call_arg (stmt, rhs_index);
3744 rtx base_rtx = expand_normal (base);
3745 rtx stride_rtx = expand_normal (stride);
3746 rtx rhs_rtx = expand_normal (rhs);
3748 unsigned i = 0;
3749 class expand_operand ops[6];
3750 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs));
3752 create_address_operand (&ops[i++], base_rtx);
3753 create_address_operand (&ops[i++], stride_rtx);
3754 create_input_operand (&ops[i++], rhs_rtx, mode);
3756 i = add_mask_else_and_len_args (ops, i, stmt);
3757 expand_insn (direct_optab_handler (optab, mode), i, ops);
3760 /* Helper for expand_DIVMOD. Return true if the sequence starting with
3761 INSN contains any call insns or insns with {,U}{DIV,MOD} rtxes. */
3763 static bool
3764 contains_call_div_mod (rtx_insn *insn)
3766 subrtx_iterator::array_type array;
3767 for (; insn; insn = NEXT_INSN (insn))
3768 if (CALL_P (insn))
3769 return true;
3770 else if (INSN_P (insn))
3771 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
3772 switch (GET_CODE (*iter))
3774 case CALL:
3775 case DIV:
3776 case UDIV:
3777 case MOD:
3778 case UMOD:
3779 return true;
3780 default:
3781 break;
3783 return false;
3786 /* Expand DIVMOD() using:
3787 a) optab handler for udivmod/sdivmod if it is available.
3788 b) If optab_handler doesn't exist, generate call to
3789 target-specific divmod libfunc. */
3791 static void
3792 expand_DIVMOD (internal_fn, gcall *call_stmt)
3794 tree lhs = gimple_call_lhs (call_stmt);
3795 tree arg0 = gimple_call_arg (call_stmt, 0);
3796 tree arg1 = gimple_call_arg (call_stmt, 1);
3798 gcc_assert (TREE_CODE (TREE_TYPE (lhs)) == COMPLEX_TYPE);
3799 tree type = TREE_TYPE (TREE_TYPE (lhs));
3800 machine_mode mode = TYPE_MODE (type);
3801 bool unsignedp = TYPE_UNSIGNED (type);
3802 optab tab = (unsignedp) ? udivmod_optab : sdivmod_optab;
3804 rtx op0 = expand_normal (arg0);
3805 rtx op1 = expand_normal (arg1);
3806 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3808 rtx quotient = NULL_RTX, remainder = NULL_RTX;
3809 rtx_insn *insns = NULL;
3811 if (TREE_CODE (arg1) == INTEGER_CST)
3813 /* For DIVMOD by integral constants, there could be efficient code
3814 expanded inline e.g. using shifts and plus/minus. Try to expand
3815 the division and modulo and if it emits any library calls or any
3816 {,U}{DIV,MOD} rtxes throw it away and use a divmod optab or
3817 divmod libcall. */
3818 scalar_int_mode int_mode;
3819 if (remainder == NULL_RTX
3820 && optimize
3821 && CONST_INT_P (op1)
3822 && !pow2p_hwi (INTVAL (op1))
3823 && is_int_mode (TYPE_MODE (type), &int_mode)
3824 && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
3825 && optab_handler (and_optab, word_mode) != CODE_FOR_nothing
3826 && optab_handler (add_optab, word_mode) != CODE_FOR_nothing
3827 && optimize_insn_for_speed_p ())
3829 rtx_insn *last = get_last_insn ();
3830 remainder = NULL_RTX;
3831 quotient = expand_doubleword_divmod (int_mode, op0, op1, &remainder,
3832 TYPE_UNSIGNED (type));
3833 if (quotient != NULL_RTX)
3835 if (optab_handler (mov_optab, int_mode) != CODE_FOR_nothing)
3837 rtx_insn *move = emit_move_insn (quotient, quotient);
3838 set_dst_reg_note (move, REG_EQUAL,
3839 gen_rtx_fmt_ee (TYPE_UNSIGNED (type)
3840 ? UDIV : DIV, int_mode,
3841 copy_rtx (op0), op1),
3842 quotient);
3843 move = emit_move_insn (remainder, remainder);
3844 set_dst_reg_note (move, REG_EQUAL,
3845 gen_rtx_fmt_ee (TYPE_UNSIGNED (type)
3846 ? UMOD : MOD, int_mode,
3847 copy_rtx (op0), op1),
3848 quotient);
3851 else
3852 delete_insns_since (last);
3855 if (remainder == NULL_RTX)
3857 struct separate_ops ops;
3858 ops.code = TRUNC_DIV_EXPR;
3859 ops.type = type;
3860 ops.op0 = make_tree (ops.type, op0);
3861 ops.op1 = arg1;
3862 ops.op2 = NULL_TREE;
3863 ops.location = gimple_location (call_stmt);
3864 start_sequence ();
3865 quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
3866 if (contains_call_div_mod (get_insns ()))
3867 quotient = NULL_RTX;
3868 else
3870 ops.code = TRUNC_MOD_EXPR;
3871 remainder = expand_expr_real_2 (&ops, NULL_RTX, mode,
3872 EXPAND_NORMAL);
3873 if (contains_call_div_mod (get_insns ()))
3874 remainder = NULL_RTX;
3876 if (remainder)
3877 insns = get_insns ();
3878 end_sequence ();
3882 if (remainder)
3883 emit_insn (insns);
3885 /* Check if optab_handler exists for divmod_optab for given mode. */
3886 else if (optab_handler (tab, mode) != CODE_FOR_nothing)
3888 quotient = gen_reg_rtx (mode);
3889 remainder = gen_reg_rtx (mode);
3890 expand_twoval_binop (tab, op0, op1, quotient, remainder, unsignedp);
3893 /* Generate call to divmod libfunc if it exists. */
3894 else if (rtx libfunc = optab_libfunc (tab, mode))
3895 targetm.expand_divmod_libfunc (libfunc, mode, op0, op1,
3896 &quotient, &remainder);
3898 else
3899 gcc_unreachable ();
3901 /* Wrap the return value (quotient, remainder) within COMPLEX_EXPR. */
3902 expand_expr (build2 (COMPLEX_EXPR, TREE_TYPE (lhs),
3903 make_tree (TREE_TYPE (arg0), quotient),
3904 make_tree (TREE_TYPE (arg1), remainder)),
3905 target, VOIDmode, EXPAND_NORMAL);
3908 /* Expand a NOP. */
3910 static void
3911 expand_NOP (internal_fn, gcall *)
3913 /* Nothing. But it shouldn't really prevail. */
3916 /* Coroutines, all should have been processed at this stage. */
3918 static void
3919 expand_CO_FRAME (internal_fn, gcall *)
3921 gcc_unreachable ();
3924 static void
3925 expand_CO_YIELD (internal_fn, gcall *)
3927 gcc_unreachable ();
3930 static void
3931 expand_CO_SUSPN (internal_fn, gcall *)
3933 gcc_unreachable ();
3936 static void
3937 expand_CO_ACTOR (internal_fn, gcall *)
3939 gcc_unreachable ();
3942 /* Expand a call to FN using the operands in STMT. FN has a single
3943 output operand and NARGS input operands. */
3945 static void
3946 expand_direct_optab_fn (internal_fn fn, gcall *stmt, direct_optab optab,
3947 unsigned int nargs)
3949 tree_pair types = direct_internal_fn_types (fn, stmt);
3950 insn_code icode = direct_optab_handler (optab, TYPE_MODE (types.first));
3951 expand_fn_using_insn (stmt, icode, 1, nargs);
3954 /* Expand WHILE_ULT call STMT using optab OPTAB. */
3956 static void
3957 expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
3959 expand_operand ops[4];
3960 tree rhs_type[2];
3962 tree lhs = gimple_call_lhs (stmt);
3963 tree lhs_type = TREE_TYPE (lhs);
3964 rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
3965 create_call_lhs_operand (&ops[0], lhs_rtx, TYPE_MODE (lhs_type));
3967 for (unsigned int i = 0; i < 2; ++i)
3969 tree rhs = gimple_call_arg (stmt, i);
3970 rhs_type[i] = TREE_TYPE (rhs);
3971 rtx rhs_rtx = expand_normal (rhs);
3972 create_input_operand (&ops[i + 1], rhs_rtx, TYPE_MODE (rhs_type[i]));
3975 int opcnt;
3976 if (!VECTOR_MODE_P (TYPE_MODE (lhs_type)))
3978 /* When the mask is an integer mode the exact vector length may not
3979 be clear to the backend, so we pass it in operand[3].
3980 Use the vector in arg2 for the most reliable intended size. */
3981 tree type = TREE_TYPE (gimple_call_arg (stmt, 2));
3982 create_integer_operand (&ops[3], TYPE_VECTOR_SUBPARTS (type));
3983 opcnt = 4;
3985 else
3986 /* The mask has a vector type so the length operand is unnecessary. */
3987 opcnt = 3;
3989 insn_code icode = convert_optab_handler (optab, TYPE_MODE (rhs_type[0]),
3990 TYPE_MODE (lhs_type));
3992 expand_insn (icode, opcnt, ops);
3993 assign_call_lhs (lhs, lhs_rtx, &ops[0]);
3996 /* Expand a call to a convert-like optab using the operands in STMT.
3997 FN has a single output operand and NARGS input operands. */
3999 static void
4000 expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab,
4001 unsigned int nargs)
4003 tree_pair types = direct_internal_fn_types (fn, stmt);
4004 insn_code icode = convert_optab_handler (optab, TYPE_MODE (types.first),
4005 TYPE_MODE (types.second));
4006 expand_fn_using_insn (stmt, icode, 1, nargs);
4009 /* Expand CRC call STMT. */
4011 static void
4012 expand_crc_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab)
4014 tree lhs = gimple_call_lhs (stmt);
4015 tree rhs1 = gimple_call_arg (stmt, 0); // crc
4016 tree rhs2 = gimple_call_arg (stmt, 1); // data
4017 tree rhs3 = gimple_call_arg (stmt, 2); // polynomial
4019 tree result_type = TREE_TYPE (lhs);
4020 tree data_type = TREE_TYPE (rhs2);
4022 gcc_assert (TYPE_MODE (result_type) >= TYPE_MODE (data_type));
4024 rtx dest = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
4025 rtx crc = expand_normal (rhs1);
4026 rtx data = expand_normal (rhs2);
4027 gcc_assert (TREE_CODE (rhs3) == INTEGER_CST);
4028 rtx polynomial = gen_rtx_CONST_INT (TYPE_MODE (result_type),
4029 TREE_INT_CST_LOW (rhs3));
4031 /* Use target specific expansion if it exists.
4032 Otherwise, generate table-based CRC. */
4033 if (direct_internal_fn_supported_p (fn, tree_pair (data_type, result_type),
4034 OPTIMIZE_FOR_SPEED))
4036 class expand_operand ops[4];
4038 if (dump_file && (dump_flags & TDF_DETAILS))
4040 fprintf (dump_file,
4041 ";; using optab for crc_%u_polynomial_"
4042 HOST_WIDE_INT_PRINT_HEX "\n",
4043 GET_MODE_BITSIZE (GET_MODE (dest)).to_constant (),
4044 TREE_INT_CST_LOW (rhs3));
4047 create_call_lhs_operand (&ops[0], dest, TYPE_MODE (result_type));
4048 create_input_operand (&ops[1], crc, TYPE_MODE (result_type));
4049 create_input_operand (&ops[2], data, TYPE_MODE (data_type));
4050 create_input_operand (&ops[3], polynomial, TYPE_MODE (result_type));
4051 insn_code icode = convert_optab_handler (optab, TYPE_MODE (data_type),
4052 TYPE_MODE (result_type));
4053 expand_insn (icode, 4, ops);
4054 assign_call_lhs (lhs, dest, &ops[0]);
4056 else
4058 /* We're bypassing all the operand conversions that are done in the
4059 case when we get an icode, operands and pass that off to expand_insn.
4061 That path has special case handling for promoted return values which
4062 we must emulate here (is the same kind of special treatment ever
4063 needed for input arguments here?).
4065 In particular we do not want to store directly into a promoted
4066 SUBREG destination, instead store into a suitably sized pseudo. */
4067 rtx orig_dest = dest;
4068 if (SUBREG_P (dest) && SUBREG_PROMOTED_VAR_P (dest))
4069 dest = gen_reg_rtx (GET_MODE (dest));
4071 /* If it's IFN_CRC generate bit-forward CRC. */
4072 if (fn == IFN_CRC)
4073 expand_crc_table_based (dest, crc, data, polynomial,
4074 TYPE_MODE (data_type));
4075 else
4076 /* If it's IFN_CRC_REV generate bit-reversed CRC. */
4077 expand_reversed_crc_table_based (dest, crc, data, polynomial,
4078 TYPE_MODE (data_type),
4079 generate_reflecting_code_standard);
4081 /* Now get the return value where it needs to be, taking care to
4082 ensure it's promoted appropriately if the ABI demands it.
4084 Re-use assign_call_lhs to handle the details. */
4085 class expand_operand ops[4];
4086 create_call_lhs_operand (&ops[0], dest, TYPE_MODE (result_type));
4087 ops[0].value = dest;
4088 assign_call_lhs (lhs, orig_dest, &ops[0]);
4092 /* Expanders for optabs that can use expand_direct_optab_fn. */
4094 #define expand_unary_optab_fn(FN, STMT, OPTAB) \
4095 expand_direct_optab_fn (FN, STMT, OPTAB, 1)
4097 #define expand_binary_optab_fn(FN, STMT, OPTAB) \
4098 expand_direct_optab_fn (FN, STMT, OPTAB, 2)
4100 #define expand_ternary_optab_fn(FN, STMT, OPTAB) \
4101 expand_direct_optab_fn (FN, STMT, OPTAB, 3)
4103 #define expand_cond_unary_optab_fn(FN, STMT, OPTAB) \
4104 expand_direct_optab_fn (FN, STMT, OPTAB, 3)
4106 #define expand_cond_binary_optab_fn(FN, STMT, OPTAB) \
4107 expand_direct_optab_fn (FN, STMT, OPTAB, 4)
4109 #define expand_cond_ternary_optab_fn(FN, STMT, OPTAB) \
4110 expand_direct_optab_fn (FN, STMT, OPTAB, 5)
4112 #define expand_cond_len_unary_optab_fn(FN, STMT, OPTAB) \
4113 expand_direct_optab_fn (FN, STMT, OPTAB, 5)
4115 #define expand_cond_len_binary_optab_fn(FN, STMT, OPTAB) \
4116 expand_direct_optab_fn (FN, STMT, OPTAB, 6)
4118 #define expand_cond_len_ternary_optab_fn(FN, STMT, OPTAB) \
4119 expand_direct_optab_fn (FN, STMT, OPTAB, 7)
4121 #define expand_fold_extract_optab_fn(FN, STMT, OPTAB) \
4122 expand_direct_optab_fn (FN, STMT, OPTAB, 3)
4124 #define expand_fold_len_extract_optab_fn(FN, STMT, OPTAB) \
4125 expand_direct_optab_fn (FN, STMT, OPTAB, 5)
4127 #define expand_fold_left_optab_fn(FN, STMT, OPTAB) \
4128 expand_direct_optab_fn (FN, STMT, OPTAB, 2)
4130 #define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \
4131 expand_direct_optab_fn (FN, STMT, OPTAB, 3)
4133 #define expand_mask_len_fold_left_optab_fn(FN, STMT, OPTAB) \
4134 expand_direct_optab_fn (FN, STMT, OPTAB, 5)
4136 #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \
4137 expand_direct_optab_fn (FN, STMT, OPTAB, 4)
4139 /* Expanders for optabs that can use expand_convert_optab_fn. */
4141 #define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \
4142 expand_convert_optab_fn (FN, STMT, OPTAB, 1)
4144 #define expand_vec_extract_optab_fn(FN, STMT, OPTAB) \
4145 expand_convert_optab_fn (FN, STMT, OPTAB, 2)
4147 /* RETURN_TYPE and ARGS are a return type and argument list that are
4148 in principle compatible with FN (which satisfies direct_internal_fn_p).
4149 Return the types that should be used to determine whether the
4150 target supports FN. */
4152 tree_pair
4153 direct_internal_fn_types (internal_fn fn, tree return_type, tree *args)
4155 const direct_internal_fn_info &info = direct_internal_fn (fn);
4156 tree type0 = (info.type0 < 0 ? return_type : TREE_TYPE (args[info.type0]));
4157 tree type1 = (info.type1 < 0 ? return_type : TREE_TYPE (args[info.type1]));
4158 return tree_pair (type0, type1);
4161 /* CALL is a call whose return type and arguments are in principle
4162 compatible with FN (which satisfies direct_internal_fn_p). Return the
4163 types that should be used to determine whether the target supports FN. */
4165 tree_pair
4166 direct_internal_fn_types (internal_fn fn, gcall *call)
4168 const direct_internal_fn_info &info = direct_internal_fn (fn);
4169 tree op0 = (info.type0 < 0
4170 ? gimple_call_lhs (call)
4171 : gimple_call_arg (call, info.type0));
4172 tree op1 = (info.type1 < 0
4173 ? gimple_call_lhs (call)
4174 : gimple_call_arg (call, info.type1));
4175 return tree_pair (TREE_TYPE (op0), TREE_TYPE (op1));
4178 /* Return true if OPTAB is supported for TYPES (whose modes should be
4179 the same) when the optimization type is OPT_TYPE. Used for simple
4180 direct optabs. */
4182 static bool
4183 direct_optab_supported_p (direct_optab optab, tree_pair types,
4184 optimization_type opt_type)
4186 machine_mode mode = TYPE_MODE (types.first);
4187 gcc_checking_assert (mode == TYPE_MODE (types.second));
4188 return direct_optab_handler (optab, mode, opt_type) != CODE_FOR_nothing;
4191 /* Return true if OPTAB is supported for TYPES, where the first type
4192 is the destination and the second type is the source. Used for
4193 convert optabs. */
4195 static bool
4196 convert_optab_supported_p (convert_optab optab, tree_pair types,
4197 optimization_type opt_type)
4199 return (convert_optab_handler (optab, TYPE_MODE (types.first),
4200 TYPE_MODE (types.second), opt_type)
4201 != CODE_FOR_nothing);
4204 /* Return true if load/store lanes optab OPTAB is supported for
4205 array type TYPES.first when the optimization type is OPT_TYPE. */
4207 static bool
4208 multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
4209 optimization_type opt_type)
4211 gcc_assert (TREE_CODE (types.first) == ARRAY_TYPE);
4212 machine_mode imode = TYPE_MODE (types.first);
4213 machine_mode vmode = TYPE_MODE (TREE_TYPE (types.first));
4214 return (convert_optab_handler (optab, imode, vmode, opt_type)
4215 != CODE_FOR_nothing);
4218 #define direct_unary_optab_supported_p direct_optab_supported_p
4219 #define direct_unary_convert_optab_supported_p convert_optab_supported_p
4220 #define direct_binary_optab_supported_p direct_optab_supported_p
4221 #define direct_ternary_optab_supported_p direct_optab_supported_p
4222 #define direct_cond_unary_optab_supported_p direct_optab_supported_p
4223 #define direct_cond_binary_optab_supported_p direct_optab_supported_p
4224 #define direct_cond_ternary_optab_supported_p direct_optab_supported_p
4225 #define direct_cond_len_unary_optab_supported_p direct_optab_supported_p
4226 #define direct_cond_len_binary_optab_supported_p direct_optab_supported_p
4227 #define direct_cond_len_ternary_optab_supported_p direct_optab_supported_p
4228 #define direct_crc_optab_supported_p convert_optab_supported_p
4229 #define direct_mask_load_optab_supported_p convert_optab_supported_p
4230 #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p
4231 #define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
4232 #define direct_gather_load_optab_supported_p convert_optab_supported_p
4233 #define direct_strided_load_optab_supported_p direct_optab_supported_p
4234 #define direct_len_load_optab_supported_p direct_optab_supported_p
4235 #define direct_mask_len_load_optab_supported_p convert_optab_supported_p
4236 #define direct_mask_store_optab_supported_p convert_optab_supported_p
4237 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
4238 #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
4239 #define direct_vec_cond_mask_optab_supported_p convert_optab_supported_p
4240 #define direct_vec_cond_optab_supported_p convert_optab_supported_p
4241 #define direct_scatter_store_optab_supported_p convert_optab_supported_p
4242 #define direct_strided_store_optab_supported_p direct_optab_supported_p
4243 #define direct_len_store_optab_supported_p direct_optab_supported_p
4244 #define direct_mask_len_store_optab_supported_p convert_optab_supported_p
4245 #define direct_while_optab_supported_p convert_optab_supported_p
4246 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
4247 #define direct_fold_len_extract_optab_supported_p direct_optab_supported_p
4248 #define direct_fold_left_optab_supported_p direct_optab_supported_p
4249 #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
4250 #define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p
4251 #define direct_check_ptrs_optab_supported_p direct_optab_supported_p
4252 #define direct_vec_set_optab_supported_p direct_optab_supported_p
4253 #define direct_vec_extract_optab_supported_p convert_optab_supported_p
4255 /* Return the optab used by internal function FN. */
4257 optab
4258 direct_internal_fn_optab (internal_fn fn, tree_pair types)
4260 switch (fn)
4262 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
4263 case IFN_##CODE: break;
4264 #define DEF_INTERNAL_OPTAB_FN(CODE, FLAGS, OPTAB, TYPE) \
4265 case IFN_##CODE: return OPTAB##_optab;
4266 #define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
4267 UNSIGNED_OPTAB, TYPE) \
4268 case IFN_##CODE: return (TYPE_UNSIGNED (types.SELECTOR) \
4269 ? UNSIGNED_OPTAB ## _optab \
4270 : SIGNED_OPTAB ## _optab);
4271 #include "internal-fn.def"
4273 case IFN_LAST:
4274 break;
4276 gcc_unreachable ();
4279 /* Return the optab used by internal function FN. */
4281 static optab
4282 direct_internal_fn_optab (internal_fn fn)
4284 switch (fn)
4286 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
4287 case IFN_##CODE: break;
4288 #define DEF_INTERNAL_OPTAB_FN(CODE, FLAGS, OPTAB, TYPE) \
4289 case IFN_##CODE: return OPTAB##_optab;
4290 #include "internal-fn.def"
4292 case IFN_LAST:
4293 break;
4295 gcc_unreachable ();
4298 /* Return true if TYPE's mode has the same format as TYPE, and if there is
4299 a 1:1 correspondence between the values that the mode can store and the
4300 values that the type can store. */
4302 static bool
4303 type_strictly_matches_mode_p (const_tree type)
4305 /* The masked vector operations have both vector data operands and vector
4306 boolean operands. The vector data operands are expected to have a vector
4307 mode, but the vector boolean operands can be an integer mode rather than
4308 a vector mode, depending on how TARGET_VECTORIZE_GET_MASK_MODE is
4309 defined. PR116103. */
4310 if (VECTOR_BOOLEAN_TYPE_P (type)
4311 && SCALAR_INT_MODE_P (TYPE_MODE (type))
4312 && TYPE_PRECISION (TREE_TYPE (type)) == 1)
4313 return true;
4315 if (VECTOR_TYPE_P (type))
4316 return VECTOR_MODE_P (TYPE_MODE (type));
4318 if (INTEGRAL_TYPE_P (type))
4319 return type_has_mode_precision_p (type);
4321 if (SCALAR_FLOAT_TYPE_P (type) || COMPLEX_FLOAT_TYPE_P (type))
4322 return true;
4324 return false;
4327 /* Returns true if both types of TYPE_PAIR strictly match their modes,
4328 else returns false. */
4330 static bool
4331 type_pair_strictly_matches_mode_p (tree_pair type_pair)
4333 return type_strictly_matches_mode_p (type_pair.first)
4334 && type_strictly_matches_mode_p (type_pair.second);
4337 /* Return true if FN is supported for the types in TYPES when the
4338 optimization type is OPT_TYPE. The types are those associated with
4339 the "type0" and "type1" fields of FN's direct_internal_fn_info
4340 structure. */
4342 bool
4343 direct_internal_fn_supported_p (internal_fn fn, tree_pair types,
4344 optimization_type opt_type)
4346 if (!type_pair_strictly_matches_mode_p (types))
4347 return false;
4349 switch (fn)
4351 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
4352 case IFN_##CODE: break;
4353 #define DEF_INTERNAL_OPTAB_FN(CODE, FLAGS, OPTAB, TYPE) \
4354 case IFN_##CODE: \
4355 return direct_##TYPE##_optab_supported_p (OPTAB##_optab, types, \
4356 opt_type);
4357 #define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
4358 UNSIGNED_OPTAB, TYPE) \
4359 case IFN_##CODE: \
4361 optab which_optab = (TYPE_UNSIGNED (types.SELECTOR) \
4362 ? UNSIGNED_OPTAB ## _optab \
4363 : SIGNED_OPTAB ## _optab); \
4364 return direct_##TYPE##_optab_supported_p (which_optab, types, \
4365 opt_type); \
4367 #include "internal-fn.def"
4369 case IFN_LAST:
4370 break;
4372 gcc_unreachable ();
4375 /* Return true if FN is supported for type TYPE when the optimization
4376 type is OPT_TYPE. The caller knows that the "type0" and "type1"
4377 fields of FN's direct_internal_fn_info structure are the same. */
4379 bool
4380 direct_internal_fn_supported_p (internal_fn fn, tree type,
4381 optimization_type opt_type)
4383 const direct_internal_fn_info &info = direct_internal_fn (fn);
4384 gcc_checking_assert (info.type0 == info.type1);
4385 return direct_internal_fn_supported_p (fn, tree_pair (type, type), opt_type);
4388 /* Return true if the STMT is supported when the optimization type is OPT_TYPE,
4389 given that STMT is a call to a direct internal function. */
4391 bool
4392 direct_internal_fn_supported_p (gcall *stmt, optimization_type opt_type)
4394 internal_fn fn = gimple_call_internal_fn (stmt);
4395 tree_pair types = direct_internal_fn_types (fn, stmt);
4396 return direct_internal_fn_supported_p (fn, types, opt_type);
4399 /* Return true if FN is a binary operation and if FN is commutative. */
4401 bool
4402 commutative_binary_fn_p (internal_fn fn)
4404 switch (fn)
4406 case IFN_AVG_FLOOR:
4407 case IFN_AVG_CEIL:
4408 case IFN_MULH:
4409 case IFN_MULHS:
4410 case IFN_MULHRS:
4411 case IFN_FMIN:
4412 case IFN_FMAX:
4413 case IFN_COMPLEX_MUL:
4414 case IFN_UBSAN_CHECK_ADD:
4415 case IFN_UBSAN_CHECK_MUL:
4416 case IFN_ADD_OVERFLOW:
4417 case IFN_MUL_OVERFLOW:
4418 case IFN_SAT_ADD:
4419 case IFN_VEC_WIDEN_PLUS:
4420 case IFN_VEC_WIDEN_PLUS_LO:
4421 case IFN_VEC_WIDEN_PLUS_HI:
4422 case IFN_VEC_WIDEN_PLUS_EVEN:
4423 case IFN_VEC_WIDEN_PLUS_ODD:
4424 return true;
4426 default:
4427 return false;
4431 /* Return true if FN is a ternary operation and if its first two arguments
4432 are commutative. */
4434 bool
4435 commutative_ternary_fn_p (internal_fn fn)
4437 switch (fn)
4439 case IFN_FMA:
4440 case IFN_FMS:
4441 case IFN_FNMA:
4442 case IFN_FNMS:
4443 case IFN_UADDC:
4444 return true;
4446 default:
4447 return false;
4451 /* Return true if FN is an associative binary operation. */
4453 bool
4454 associative_binary_fn_p (internal_fn fn)
4456 switch (fn)
4458 case IFN_FMIN:
4459 case IFN_FMAX:
4460 return true;
4462 default:
4463 return false;
4467 /* If FN is commutative in two consecutive arguments, return the
4468 index of the first, otherwise return -1. */
4471 first_commutative_argument (internal_fn fn)
4473 switch (fn)
4475 case IFN_COND_ADD:
4476 case IFN_COND_MUL:
4477 case IFN_COND_MIN:
4478 case IFN_COND_MAX:
4479 case IFN_COND_FMIN:
4480 case IFN_COND_FMAX:
4481 case IFN_COND_AND:
4482 case IFN_COND_IOR:
4483 case IFN_COND_XOR:
4484 case IFN_COND_FMA:
4485 case IFN_COND_FMS:
4486 case IFN_COND_FNMA:
4487 case IFN_COND_FNMS:
4488 case IFN_COND_LEN_ADD:
4489 case IFN_COND_LEN_MUL:
4490 case IFN_COND_LEN_MIN:
4491 case IFN_COND_LEN_MAX:
4492 case IFN_COND_LEN_FMIN:
4493 case IFN_COND_LEN_FMAX:
4494 case IFN_COND_LEN_AND:
4495 case IFN_COND_LEN_IOR:
4496 case IFN_COND_LEN_XOR:
4497 case IFN_COND_LEN_FMA:
4498 case IFN_COND_LEN_FMS:
4499 case IFN_COND_LEN_FNMA:
4500 case IFN_COND_LEN_FNMS:
4501 return 1;
4503 default:
4504 if (commutative_binary_fn_p (fn)
4505 || commutative_ternary_fn_p (fn))
4506 return 0;
4507 return -1;
4511 /* Return true if this CODE describes an internal_fn that returns a vector with
4512 elements twice as wide as the element size of the input vectors. */
4514 bool
4515 widening_fn_p (code_helper code)
4517 if (!code.is_fn_code ())
4518 return false;
4520 if (!internal_fn_p ((combined_fn) code))
4521 return false;
4523 internal_fn fn = as_internal_fn ((combined_fn) code);
4524 switch (fn)
4526 #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, F, S, SO, UO, T) \
4527 case IFN_##NAME: \
4528 case IFN_##NAME##_HI: \
4529 case IFN_##NAME##_LO: \
4530 case IFN_##NAME##_EVEN: \
4531 case IFN_##NAME##_ODD: \
4532 return true;
4533 #include "internal-fn.def"
4535 default:
4536 return false;
4540 /* Return true if IFN_SET_EDOM is supported. */
4542 bool
4543 set_edom_supported_p (void)
4545 #ifdef TARGET_EDOM
4546 return true;
4547 #else
4548 return false;
4549 #endif
4552 #define DEF_INTERNAL_OPTAB_FN(CODE, FLAGS, OPTAB, TYPE) \
4553 static void \
4554 expand_##CODE (internal_fn fn, gcall *stmt) \
4556 expand_##TYPE##_optab_fn (fn, stmt, OPTAB##_optab); \
4558 #define DEF_INTERNAL_INT_EXT_FN(CODE, FLAGS, OPTAB, TYPE)
4559 #define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
4560 UNSIGNED_OPTAB, TYPE) \
4561 static void \
4562 expand_##CODE (internal_fn fn, gcall *stmt) \
4564 tree_pair types = direct_internal_fn_types (fn, stmt); \
4565 optab which_optab = direct_internal_fn_optab (fn, types); \
4566 expand_##TYPE##_optab_fn (fn, stmt, which_optab); \
4568 #include "internal-fn.def"
4570 /* Routines to expand each internal function, indexed by function number.
4571 Each routine has the prototype:
4573 expand_<NAME> (gcall *stmt)
4575 where STMT is the statement that performs the call. */
4576 static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
4578 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) expand_##CODE,
4579 #include "internal-fn.def"
4583 /* Invoke T(CODE, SUFFIX) for each conditional function IFN_COND_##SUFFIX
4584 that maps to a tree code CODE. There is also an IFN_COND_LEN_##SUFFIX
4585 for each such IFN_COND_##SUFFIX. */
4586 #define FOR_EACH_CODE_MAPPING(T) \
4587 T (PLUS_EXPR, ADD) \
4588 T (MINUS_EXPR, SUB) \
4589 T (MULT_EXPR, MUL) \
4590 T (TRUNC_DIV_EXPR, DIV) \
4591 T (TRUNC_MOD_EXPR, MOD) \
4592 T (RDIV_EXPR, RDIV) \
4593 T (MIN_EXPR, MIN) \
4594 T (MAX_EXPR, MAX) \
4595 T (BIT_AND_EXPR, AND) \
4596 T (BIT_IOR_EXPR, IOR) \
4597 T (BIT_XOR_EXPR, XOR) \
4598 T (LSHIFT_EXPR, SHL) \
4599 T (RSHIFT_EXPR, SHR) \
4600 T (NEGATE_EXPR, NEG)
4602 /* Return a function that only performs CODE when a certain condition is met
4603 and that uses a given fallback value otherwise. For example, if CODE is
4604 a binary operation associated with conditional function FN:
4606 LHS = FN (COND, A, B, ELSE)
4608 is equivalent to the C expression:
4610 LHS = COND ? A CODE B : ELSE;
4612 operating elementwise if the operands are vectors.
4614 Return IFN_LAST if no such function exists. */
4616 internal_fn
4617 get_conditional_internal_fn (tree_code code)
4619 switch (code)
4621 #define CASE(CODE, IFN) case CODE: return IFN_COND_##IFN;
4622 FOR_EACH_CODE_MAPPING(CASE)
4623 #undef CASE
4624 default:
4625 return IFN_LAST;
4629 /* If IFN implements the conditional form of a tree code, return that
4630 tree code, otherwise return ERROR_MARK. */
4632 tree_code
4633 conditional_internal_fn_code (internal_fn ifn)
4635 switch (ifn)
4637 #define CASE(CODE, IFN) \
4638 case IFN_COND_##IFN: \
4639 case IFN_COND_LEN_##IFN: \
4640 return CODE;
4641 FOR_EACH_CODE_MAPPING (CASE)
4642 #undef CASE
4643 default:
4644 return ERROR_MARK;
4648 /* Like get_conditional_internal_fn, but return a function that
4649 additionally restricts the operation to the leading elements
4650 of a vector. The number of elements to process is given by a length
4651 and bias pair, as for IFN_LOAD_LEN. The values of the remaining
4652 elements are taken from the fallback ("else") argument.
4654 For example, if CODE is a binary operation associated with FN:
4656 LHS = FN (COND, A, B, ELSE, LEN, BIAS)
4658 is equivalent to the C code:
4660 for (int i = 0; i < NUNITS; i++)
4662 if (i < LEN + BIAS && COND[i])
4663 LHS[i] = A[i] CODE B[i];
4664 else
4665 LHS[i] = ELSE[i];
4669 internal_fn
4670 get_conditional_len_internal_fn (tree_code code)
4672 switch (code)
4674 #define CASE(CODE, IFN) case CODE: return IFN_COND_LEN_##IFN;
4675 FOR_EACH_CODE_MAPPING(CASE)
4676 #undef CASE
4677 default:
4678 return IFN_LAST;
4682 /* Invoke T(IFN) for each internal function IFN that also has an
4683 IFN_COND_* form. */
4684 #define FOR_EACH_COND_FN_PAIR(T) \
4685 T (FMAX) \
4686 T (FMIN) \
4687 T (FMA) \
4688 T (FMS) \
4689 T (FNMA) \
4690 T (FNMS)
4692 /* Return a function that only performs internal function FN when a
4693 certain condition is met and that uses a given fallback value otherwise.
4694 In other words, the returned function FN' is such that:
4696 LHS = FN' (COND, A1, ... An, ELSE)
4698 is equivalent to the C expression:
4700 LHS = COND ? FN (A1, ..., An) : ELSE;
4702 operating elementwise if the operands are vectors.
4704 Return IFN_LAST if no such function exists. */
4706 internal_fn
4707 get_conditional_internal_fn (internal_fn fn)
4709 switch (fn)
4711 #define CASE(NAME) case IFN_##NAME: return IFN_COND_##NAME;
4712 FOR_EACH_COND_FN_PAIR(CASE)
4713 #undef CASE
4714 default:
4715 return IFN_LAST;
4719 /* If there exists an internal function like IFN that operates on vectors,
4720 but with additional length and bias parameters, return the internal_fn
4721 for that function, otherwise return IFN_LAST. */
4722 internal_fn
4723 get_len_internal_fn (internal_fn fn)
4725 switch (fn)
4727 #define DEF_INTERNAL_COND_FN(NAME, ...) \
4728 case IFN_COND_##NAME: \
4729 return IFN_COND_LEN_##NAME;
4730 #define DEF_INTERNAL_SIGNED_COND_FN(NAME, ...) \
4731 case IFN_COND_##NAME: \
4732 return IFN_COND_LEN_##NAME;
4733 #include "internal-fn.def"
4734 default:
4735 break;
4738 switch (fn)
4740 case IFN_MASK_LOAD:
4741 return IFN_MASK_LEN_LOAD;
4742 case IFN_MASK_LOAD_LANES:
4743 return IFN_MASK_LEN_LOAD_LANES;
4744 case IFN_MASK_GATHER_LOAD:
4745 return IFN_MASK_LEN_GATHER_LOAD;
4746 default:
4747 return IFN_LAST;
4751 /* If IFN implements the conditional form of an unconditional internal
4752 function, return that unconditional function, otherwise return IFN_LAST. */
4754 internal_fn
4755 get_unconditional_internal_fn (internal_fn ifn)
4757 switch (ifn)
4759 #define CASE(NAME) \
4760 case IFN_COND_##NAME: \
4761 case IFN_COND_LEN_##NAME: \
4762 return IFN_##NAME;
4763 FOR_EACH_COND_FN_PAIR (CASE)
4764 #undef CASE
4765 default:
4766 return IFN_LAST;
4770 /* Return true if STMT can be interpreted as a conditional tree code
4771 operation of the form:
4773 LHS = COND ? OP (RHS1, ...) : ELSE;
4775 operating elementwise if the operands are vectors. This includes
4776 the case of an all-true COND, so that the operation always happens.
4778 There is an alternative approach to interpret the STMT when the operands
4779 are vectors which is the operation predicated by both conditional mask
4780 and loop control length, the equivalent C code:
4782 for (int i = 0; i < NUNTIS; i++)
4784 if (i < LEN + BIAS && COND[i])
4785 LHS[i] = A[i] CODE B[i];
4786 else
4787 LHS[i] = ELSE[i];
4790 When returning true, set:
4792 - *COND_OUT to the condition COND, or to NULL_TREE if the condition
4793 is known to be all-true
4794 - *CODE_OUT to the tree code
4795 - OPS[I] to operand I of *CODE_OUT
4796 - *ELSE_OUT to the fallback value ELSE, or to NULL_TREE if the
4797 condition is known to be all true.
4798 - *LEN to the len argument if it COND_LEN_* operations or to NULL_TREE.
4799 - *BIAS to the bias argument if it COND_LEN_* operations or to NULL_TREE. */
4801 bool
4802 can_interpret_as_conditional_op_p (gimple *stmt, tree *cond_out,
4803 tree_code *code_out,
4804 tree (&ops)[3], tree *else_out,
4805 tree *len, tree *bias)
4807 *len = NULL_TREE;
4808 *bias = NULL_TREE;
4809 if (gassign *assign = dyn_cast <gassign *> (stmt))
4811 *cond_out = NULL_TREE;
4812 *code_out = gimple_assign_rhs_code (assign);
4813 ops[0] = gimple_assign_rhs1 (assign);
4814 ops[1] = gimple_assign_rhs2 (assign);
4815 ops[2] = gimple_assign_rhs3 (assign);
4816 *else_out = NULL_TREE;
4817 return true;
4819 if (gcall *call = dyn_cast <gcall *> (stmt))
4820 if (gimple_call_internal_p (call))
4822 internal_fn ifn = gimple_call_internal_fn (call);
4823 tree_code code = conditional_internal_fn_code (ifn);
4824 int len_index = internal_fn_len_index (ifn);
4825 int cond_nargs = len_index >= 0 ? 4 : 2;
4826 if (code != ERROR_MARK)
4828 *cond_out = gimple_call_arg (call, 0);
4829 *code_out = code;
4830 unsigned int nops = gimple_call_num_args (call) - cond_nargs;
4831 for (unsigned int i = 0; i < 3; ++i)
4832 ops[i] = i < nops ? gimple_call_arg (call, i + 1) : NULL_TREE;
4833 *else_out = gimple_call_arg (call, nops + 1);
4834 if (len_index < 0)
4836 if (integer_truep (*cond_out))
4838 *cond_out = NULL_TREE;
4839 *else_out = NULL_TREE;
4842 else
4844 *len = gimple_call_arg (call, len_index);
4845 *bias = gimple_call_arg (call, len_index + 1);
4847 return true;
4850 return false;
4853 /* Return true if IFN is some form of load from memory. */
4855 bool
4856 internal_load_fn_p (internal_fn fn)
4858 switch (fn)
4860 case IFN_MASK_LOAD:
4861 case IFN_LOAD_LANES:
4862 case IFN_MASK_LOAD_LANES:
4863 case IFN_MASK_LEN_LOAD_LANES:
4864 case IFN_GATHER_LOAD:
4865 case IFN_MASK_GATHER_LOAD:
4866 case IFN_MASK_LEN_GATHER_LOAD:
4867 case IFN_LEN_LOAD:
4868 case IFN_MASK_LEN_LOAD:
4869 return true;
4871 default:
4872 return false;
4876 /* Return true if IFN is some form of store to memory. */
4878 bool
4879 internal_store_fn_p (internal_fn fn)
4881 switch (fn)
4883 case IFN_MASK_STORE:
4884 case IFN_STORE_LANES:
4885 case IFN_MASK_STORE_LANES:
4886 case IFN_MASK_LEN_STORE_LANES:
4887 case IFN_SCATTER_STORE:
4888 case IFN_MASK_SCATTER_STORE:
4889 case IFN_MASK_LEN_SCATTER_STORE:
4890 case IFN_LEN_STORE:
4891 case IFN_MASK_LEN_STORE:
4892 return true;
4894 default:
4895 return false;
4899 /* Return true if IFN is some form of gather load or scatter store. */
4901 bool
4902 internal_gather_scatter_fn_p (internal_fn fn)
4904 switch (fn)
4906 case IFN_GATHER_LOAD:
4907 case IFN_MASK_GATHER_LOAD:
4908 case IFN_MASK_LEN_GATHER_LOAD:
4909 case IFN_SCATTER_STORE:
4910 case IFN_MASK_SCATTER_STORE:
4911 case IFN_MASK_LEN_SCATTER_STORE:
4912 return true;
4914 default:
4915 return false;
4919 /* If FN takes a vector len argument, return the index of that argument,
4920 otherwise return -1. */
4923 internal_fn_len_index (internal_fn fn)
4925 switch (fn)
4927 case IFN_LEN_LOAD:
4928 case IFN_LEN_STORE:
4929 return 2;
4931 case IFN_MASK_LEN_SCATTER_STORE:
4932 case IFN_MASK_LEN_STRIDED_LOAD:
4933 return 5;
4935 case IFN_MASK_LEN_GATHER_LOAD:
4936 return 6;
4938 case IFN_COND_LEN_FMA:
4939 case IFN_COND_LEN_FMS:
4940 case IFN_COND_LEN_FNMA:
4941 case IFN_COND_LEN_FNMS:
4942 return 5;
4944 case IFN_COND_LEN_ADD:
4945 case IFN_COND_LEN_SUB:
4946 case IFN_COND_LEN_MUL:
4947 case IFN_COND_LEN_DIV:
4948 case IFN_COND_LEN_MOD:
4949 case IFN_COND_LEN_RDIV:
4950 case IFN_COND_LEN_MIN:
4951 case IFN_COND_LEN_MAX:
4952 case IFN_COND_LEN_FMIN:
4953 case IFN_COND_LEN_FMAX:
4954 case IFN_COND_LEN_AND:
4955 case IFN_COND_LEN_IOR:
4956 case IFN_COND_LEN_XOR:
4957 case IFN_COND_LEN_SHL:
4958 case IFN_COND_LEN_SHR:
4959 case IFN_MASK_LEN_STRIDED_STORE:
4960 return 4;
4962 case IFN_COND_LEN_NEG:
4963 case IFN_MASK_LEN_STORE:
4964 case IFN_MASK_LEN_STORE_LANES:
4965 case IFN_VCOND_MASK_LEN:
4966 return 3;
4968 case IFN_MASK_LEN_LOAD:
4969 case IFN_MASK_LEN_LOAD_LANES:
4970 return 4;
4972 default:
4973 return -1;
4977 /* If FN is an IFN_COND_* or IFN_COND_LEN_* function, return the index of the
4978 argument that is used when the condition is false. Return -1 otherwise. */
4981 internal_fn_else_index (internal_fn fn)
4983 switch (fn)
4985 case IFN_COND_NEG:
4986 case IFN_COND_NOT:
4987 case IFN_COND_LEN_NEG:
4988 case IFN_COND_LEN_NOT:
4989 return 2;
4991 case IFN_COND_ADD:
4992 case IFN_COND_SUB:
4993 case IFN_COND_MUL:
4994 case IFN_COND_DIV:
4995 case IFN_COND_MOD:
4996 case IFN_COND_MIN:
4997 case IFN_COND_MAX:
4998 case IFN_COND_FMIN:
4999 case IFN_COND_FMAX:
5000 case IFN_COND_AND:
5001 case IFN_COND_IOR:
5002 case IFN_COND_XOR:
5003 case IFN_COND_SHL:
5004 case IFN_COND_SHR:
5005 case IFN_COND_LEN_ADD:
5006 case IFN_COND_LEN_SUB:
5007 case IFN_COND_LEN_MUL:
5008 case IFN_COND_LEN_DIV:
5009 case IFN_COND_LEN_MOD:
5010 case IFN_COND_LEN_MIN:
5011 case IFN_COND_LEN_MAX:
5012 case IFN_COND_LEN_FMIN:
5013 case IFN_COND_LEN_FMAX:
5014 case IFN_COND_LEN_AND:
5015 case IFN_COND_LEN_IOR:
5016 case IFN_COND_LEN_XOR:
5017 case IFN_COND_LEN_SHL:
5018 case IFN_COND_LEN_SHR:
5019 return 3;
5021 case IFN_MASK_LOAD:
5022 case IFN_MASK_LEN_LOAD:
5023 case IFN_MASK_LOAD_LANES:
5024 case IFN_MASK_LEN_LOAD_LANES:
5025 return 3;
5027 case IFN_COND_FMA:
5028 case IFN_COND_FMS:
5029 case IFN_COND_FNMA:
5030 case IFN_COND_FNMS:
5031 case IFN_COND_LEN_FMA:
5032 case IFN_COND_LEN_FMS:
5033 case IFN_COND_LEN_FNMA:
5034 case IFN_COND_LEN_FNMS:
5035 case IFN_MASK_LEN_STRIDED_LOAD:
5036 return 4;
5038 case IFN_MASK_GATHER_LOAD:
5039 case IFN_MASK_LEN_GATHER_LOAD:
5040 return 5;
5042 default:
5043 return -1;
5046 return -1;
5049 /* If FN takes a vector mask argument, return the index of that argument,
5050 otherwise return -1. */
5053 internal_fn_mask_index (internal_fn fn)
5055 switch (fn)
5057 case IFN_MASK_LOAD:
5058 case IFN_MASK_LOAD_LANES:
5059 case IFN_MASK_LEN_LOAD_LANES:
5060 case IFN_MASK_STORE:
5061 case IFN_MASK_STORE_LANES:
5062 case IFN_MASK_LEN_STORE_LANES:
5063 case IFN_MASK_LEN_LOAD:
5064 case IFN_MASK_LEN_STORE:
5065 return 2;
5067 case IFN_MASK_LEN_STRIDED_LOAD:
5068 case IFN_MASK_LEN_STRIDED_STORE:
5069 return 3;
5071 case IFN_MASK_GATHER_LOAD:
5072 case IFN_MASK_SCATTER_STORE:
5073 case IFN_MASK_LEN_GATHER_LOAD:
5074 case IFN_MASK_LEN_SCATTER_STORE:
5075 return 4;
5077 case IFN_VCOND_MASK:
5078 case IFN_VCOND_MASK_LEN:
5079 return 0;
5081 default:
5082 return (conditional_internal_fn_code (fn) != ERROR_MARK
5083 || get_unconditional_internal_fn (fn) != IFN_LAST ? 0 : -1);
5087 /* If FN takes a value that should be stored to memory, return the index
5088 of that argument, otherwise return -1. */
5091 internal_fn_stored_value_index (internal_fn fn)
5093 switch (fn)
5095 case IFN_MASK_LEN_STRIDED_STORE:
5096 return 2;
5098 case IFN_MASK_STORE:
5099 case IFN_MASK_STORE_LANES:
5100 case IFN_SCATTER_STORE:
5101 case IFN_MASK_SCATTER_STORE:
5102 case IFN_MASK_LEN_SCATTER_STORE:
5103 return 3;
5105 case IFN_LEN_STORE:
5106 return 4;
5108 case IFN_MASK_LEN_STORE:
5109 case IFN_MASK_LEN_STORE_LANES:
5110 return 5;
5112 default:
5113 return -1;
5118 /* Store all supported else values for the optab referred to by ICODE
5119 in ELSE_VALS. The index of the else operand must be specified in
5120 ELSE_INDEX. */
5122 void
5123 get_supported_else_vals (enum insn_code icode, unsigned else_index,
5124 vec<int> &else_vals)
5126 const struct insn_data_d *data = &insn_data[icode];
5127 if ((char)else_index >= data->n_operands)
5128 return;
5130 machine_mode else_mode = data->operand[else_index].mode;
5132 else_vals.truncate (0);
5134 /* For now we only support else values of 0, -1, and "undefined". */
5135 if (insn_operand_matches (icode, else_index, CONST0_RTX (else_mode)))
5136 else_vals.safe_push (MASK_LOAD_ELSE_ZERO);
5138 if (insn_operand_matches (icode, else_index, gen_rtx_SCRATCH (else_mode)))
5139 else_vals.safe_push (MASK_LOAD_ELSE_UNDEFINED);
5141 if (GET_MODE_CLASS (else_mode) == MODE_VECTOR_INT
5142 && insn_operand_matches (icode, else_index, CONSTM1_RTX (else_mode)))
5143 else_vals.safe_push (MASK_LOAD_ELSE_M1);
5146 /* Return true if the else value ELSE_VAL (one of MASK_LOAD_ELSE_ZERO,
5147 MASK_LOAD_ELSE_M1, and MASK_LOAD_ELSE_UNDEFINED) is valid fo the optab
5148 referred to by ICODE. The index of the else operand must be specified
5149 in ELSE_INDEX. */
5151 bool
5152 supported_else_val_p (enum insn_code icode, unsigned else_index, int else_val)
5154 if (else_val != MASK_LOAD_ELSE_ZERO && else_val != MASK_LOAD_ELSE_M1
5155 && else_val != MASK_LOAD_ELSE_UNDEFINED)
5156 gcc_unreachable ();
5158 auto_vec<int> else_vals;
5159 get_supported_else_vals (icode, else_index, else_vals);
5160 return else_vals.contains (else_val);
5163 /* Return true if the target supports gather load or scatter store function
5164 IFN. For loads, VECTOR_TYPE is the vector type of the load result,
5165 while for stores it is the vector type of the stored data argument.
5166 MEMORY_ELEMENT_TYPE is the type of the memory elements being loaded
5167 or stored. OFFSET_VECTOR_TYPE is the vector type that holds the
5168 offset from the shared base address of each loaded or stored element.
5169 SCALE is the amount by which these offsets should be multiplied
5170 *after* they have been extended to address width.
5171 If the target supports the gather load the supported else values
5172 will be added to the vector ELSVAL points to if it is nonzero. */
5174 bool
5175 internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
5176 tree memory_element_type,
5177 tree offset_vector_type, int scale,
5178 vec<int> *elsvals)
5180 if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)),
5181 TYPE_SIZE (memory_element_type)))
5182 return false;
5183 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vector_type),
5184 TYPE_VECTOR_SUBPARTS (offset_vector_type)))
5185 return false;
5186 optab optab = direct_internal_fn_optab (ifn);
5187 insn_code icode = convert_optab_handler (optab, TYPE_MODE (vector_type),
5188 TYPE_MODE (offset_vector_type));
5189 int output_ops = internal_load_fn_p (ifn) ? 1 : 0;
5190 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (offset_vector_type));
5191 bool ok = icode != CODE_FOR_nothing
5192 && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p))
5193 && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale));
5195 /* For gather the optab's operand indices do not match the IFN's because
5196 the latter does not have the extension operand (operand 3). It is
5197 implicitly added during expansion so we use the IFN's else index + 1.
5199 if (ok && elsvals)
5200 get_supported_else_vals
5201 (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals);
5203 return ok;
5206 /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
5207 for pointers of type TYPE when the accesses have LENGTH bytes and their
5208 common byte alignment is ALIGN. */
5210 bool
5211 internal_check_ptrs_fn_supported_p (internal_fn ifn, tree type,
5212 poly_uint64 length, unsigned int align)
5214 machine_mode mode = TYPE_MODE (type);
5215 optab optab = direct_internal_fn_optab (ifn);
5216 insn_code icode = direct_optab_handler (optab, mode);
5217 if (icode == CODE_FOR_nothing)
5218 return false;
5219 rtx length_rtx = immed_wide_int_const (length, mode);
5220 return (insn_operand_matches (icode, 3, length_rtx)
5221 && insn_operand_matches (icode, 4, GEN_INT (align)));
5224 /* Return the supported bias for IFN which is either IFN_{LEN_,MASK_LEN_,}LOAD
5225 or IFN_{LEN_,MASK_LEN_,}STORE. For now we only support the biases of 0 and
5226 -1 (in case 0 is not an allowable length for {len_,mask_len_}load or
5227 {len_,mask_len_}store). If none of the biases match what the backend
5228 provides, return VECT_PARTIAL_BIAS_UNSUPPORTED. */
5230 signed char
5231 internal_len_load_store_bias (internal_fn ifn, machine_mode mode)
5233 optab optab = direct_internal_fn_optab (ifn);
5234 insn_code icode = direct_optab_handler (optab, mode);
5235 int bias_no = 3;
5237 if (icode == CODE_FOR_nothing)
5239 machine_mode mask_mode;
5240 if (!targetm.vectorize.get_mask_mode (mode).exists (&mask_mode))
5241 return VECT_PARTIAL_BIAS_UNSUPPORTED;
5242 if (ifn == IFN_LEN_LOAD)
5244 /* Try MASK_LEN_LOAD. */
5245 optab = direct_internal_fn_optab (IFN_MASK_LEN_LOAD);
5247 else
5249 /* Try MASK_LEN_STORE. */
5250 optab = direct_internal_fn_optab (IFN_MASK_LEN_STORE);
5252 icode = convert_optab_handler (optab, mode, mask_mode);
5253 bias_no = 4;
5256 if (icode != CODE_FOR_nothing)
5258 /* For now we only support biases of 0 or -1. Try both of them. */
5259 if (insn_operand_matches (icode, bias_no, GEN_INT (0)))
5260 return 0;
5261 if (insn_operand_matches (icode, bias_no, GEN_INT (-1)))
5262 return -1;
5265 return VECT_PARTIAL_BIAS_UNSUPPORTED;
5268 /* Expand STMT as though it were a call to internal function FN. */
5270 void
5271 expand_internal_call (internal_fn fn, gcall *stmt)
5273 internal_fn_expanders[fn] (fn, stmt);
5276 /* Expand STMT, which is a call to internal function FN. */
5278 void
5279 expand_internal_call (gcall *stmt)
5281 expand_internal_call (gimple_call_internal_fn (stmt), stmt);
5284 /* If TYPE is a vector type, return true if IFN is a direct internal
5285 function that is supported for that type. If TYPE is a scalar type,
5286 return true if IFN is a direct internal function that is supported for
5287 the target's preferred vector version of TYPE. */
5289 bool
5290 vectorized_internal_fn_supported_p (internal_fn ifn, tree type)
5292 if (VECTOR_MODE_P (TYPE_MODE (type)))
5293 return direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED);
5295 scalar_mode smode;
5296 if (VECTOR_TYPE_P (type)
5297 || !is_a <scalar_mode> (TYPE_MODE (type), &smode))
5298 return false;
5300 machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode);
5301 if (VECTOR_MODE_P (vmode))
5303 tree vectype = build_vector_type_for_mode (type, vmode);
5304 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
5305 return true;
5308 auto_vector_modes vector_modes;
5309 targetm.vectorize.autovectorize_vector_modes (&vector_modes, true);
5310 for (machine_mode base_mode : vector_modes)
5311 if (related_vector_mode (base_mode, smode).exists (&vmode))
5313 tree vectype = build_vector_type_for_mode (type, vmode);
5314 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
5315 return true;
5318 return false;
5321 void
5322 expand_SHUFFLEVECTOR (internal_fn, gcall *)
5324 gcc_unreachable ();
5327 void
5328 expand_PHI (internal_fn, gcall *)
5330 gcc_unreachable ();
5333 void
5334 expand_SPACESHIP (internal_fn, gcall *stmt)
5336 tree lhs = gimple_call_lhs (stmt);
5337 tree rhs1 = gimple_call_arg (stmt, 0);
5338 tree rhs2 = gimple_call_arg (stmt, 1);
5339 tree rhs3 = gimple_call_arg (stmt, 2);
5340 tree type = TREE_TYPE (rhs1);
5342 do_pending_stack_adjust ();
5344 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
5345 rtx op1 = expand_normal (rhs1);
5346 rtx op2 = expand_normal (rhs2);
5347 rtx op3 = expand_normal (rhs3);
5349 class expand_operand ops[4];
5350 create_call_lhs_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (lhs)));
5351 create_input_operand (&ops[1], op1, TYPE_MODE (type));
5352 create_input_operand (&ops[2], op2, TYPE_MODE (type));
5353 create_input_operand (&ops[3], op3, TYPE_MODE (TREE_TYPE (rhs3)));
5354 insn_code icode = optab_handler (spaceship_optab, TYPE_MODE (type));
5355 expand_insn (icode, 4, ops);
5356 assign_call_lhs (lhs, target, &ops[0]);
5359 void
5360 expand_ASSUME (internal_fn, gcall *)
5364 void
5365 expand_MASK_CALL (internal_fn, gcall *)
5367 /* This IFN should only exist between ifcvt and vect passes. */
5368 gcc_unreachable ();
5371 void
5372 expand_MULBITINT (internal_fn, gcall *stmt)
5374 rtx_mode_t args[6];
5375 for (int i = 0; i < 6; i++)
5376 args[i] = rtx_mode_t (expand_normal (gimple_call_arg (stmt, i)),
5377 (i & 1) ? SImode : ptr_mode);
5378 rtx fun = init_one_libfunc ("__mulbitint3");
5379 emit_library_call_value_1 (0, fun, NULL_RTX, LCT_NORMAL, VOIDmode, 6, args);
5382 void
5383 expand_DIVMODBITINT (internal_fn, gcall *stmt)
5385 rtx_mode_t args[8];
5386 for (int i = 0; i < 8; i++)
5387 args[i] = rtx_mode_t (expand_normal (gimple_call_arg (stmt, i)),
5388 (i & 1) ? SImode : ptr_mode);
5389 rtx fun = init_one_libfunc ("__divmodbitint4");
5390 emit_library_call_value_1 (0, fun, NULL_RTX, LCT_NORMAL, VOIDmode, 8, args);
5393 void
5394 expand_FLOATTOBITINT (internal_fn, gcall *stmt)
5396 machine_mode mode = TYPE_MODE (TREE_TYPE (gimple_call_arg (stmt, 2)));
5397 rtx arg0 = expand_normal (gimple_call_arg (stmt, 0));
5398 rtx arg1 = expand_normal (gimple_call_arg (stmt, 1));
5399 rtx arg2 = expand_normal (gimple_call_arg (stmt, 2));
5400 const char *mname = GET_MODE_NAME (mode);
5401 unsigned mname_len = strlen (mname);
5402 int len = 12 + mname_len;
5403 if (DECIMAL_FLOAT_MODE_P (mode))
5404 len += 4;
5405 char *libfunc_name = XALLOCAVEC (char, len);
5406 char *p = libfunc_name;
5407 const char *q;
5408 if (DECIMAL_FLOAT_MODE_P (mode))
5410 #if ENABLE_DECIMAL_BID_FORMAT
5411 memcpy (p, "__bid_fix", 9);
5412 #else
5413 memcpy (p, "__dpd_fix", 9);
5414 #endif
5415 p += 9;
5417 else
5419 memcpy (p, "__fix", 5);
5420 p += 5;
5422 for (q = mname; *q; q++)
5423 *p++ = TOLOWER (*q);
5424 memcpy (p, "bitint", 7);
5425 rtx fun = init_one_libfunc (libfunc_name);
5426 emit_library_call (fun, LCT_NORMAL, VOIDmode, arg0, ptr_mode, arg1,
5427 SImode, arg2, mode);
5430 void
5431 expand_BITINTTOFLOAT (internal_fn, gcall *stmt)
5433 tree lhs = gimple_call_lhs (stmt);
5434 if (!lhs)
5435 return;
5436 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5437 rtx arg0 = expand_normal (gimple_call_arg (stmt, 0));
5438 rtx arg1 = expand_normal (gimple_call_arg (stmt, 1));
5439 const char *mname = GET_MODE_NAME (mode);
5440 unsigned mname_len = strlen (mname);
5441 int len = 14 + mname_len;
5442 if (DECIMAL_FLOAT_MODE_P (mode))
5443 len += 4;
5444 char *libfunc_name = XALLOCAVEC (char, len);
5445 char *p = libfunc_name;
5446 const char *q;
5447 if (DECIMAL_FLOAT_MODE_P (mode))
5449 #if ENABLE_DECIMAL_BID_FORMAT
5450 memcpy (p, "__bid_floatbitint", 17);
5451 #else
5452 memcpy (p, "__dpd_floatbitint", 17);
5453 #endif
5454 p += 17;
5456 else
5458 memcpy (p, "__floatbitint", 13);
5459 p += 13;
5461 for (q = mname; *q; q++)
5462 *p++ = TOLOWER (*q);
5463 *p = '\0';
5464 rtx fun = init_one_libfunc (libfunc_name);
5465 rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
5466 rtx val = emit_library_call_value (fun, target, LCT_PURE, mode,
5467 arg0, ptr_mode, arg1, SImode);
5468 if (val != target)
5469 emit_move_insn (target, val);
5472 static bool
5473 expand_bitquery (internal_fn fn, gcall *stmt)
5475 tree lhs = gimple_call_lhs (stmt);
5476 if (lhs == NULL_TREE)
5477 return false;
5478 tree arg = gimple_call_arg (stmt, 0);
5479 if (TREE_CODE (arg) == INTEGER_CST)
5481 tree ret = fold_const_call (as_combined_fn (fn), TREE_TYPE (arg), arg);
5482 gcc_checking_assert (ret && TREE_CODE (ret) == INTEGER_CST);
5483 expand_assignment (lhs, ret, false);
5484 return false;
5486 return true;
5489 void
5490 expand_CLRSB (internal_fn fn, gcall *stmt)
5492 if (expand_bitquery (fn, stmt))
5493 expand_unary_optab_fn (fn, stmt, clrsb_optab);
5496 void
5497 expand_CLZ (internal_fn fn, gcall *stmt)
5499 if (expand_bitquery (fn, stmt))
5500 expand_unary_optab_fn (fn, stmt, clz_optab);
5503 void
5504 expand_CTZ (internal_fn fn, gcall *stmt)
5506 if (expand_bitquery (fn, stmt))
5507 expand_unary_optab_fn (fn, stmt, ctz_optab);
5510 void
5511 expand_FFS (internal_fn fn, gcall *stmt)
5513 if (expand_bitquery (fn, stmt))
5514 expand_unary_optab_fn (fn, stmt, ffs_optab);
5517 void
5518 expand_PARITY (internal_fn fn, gcall *stmt)
5520 if (expand_bitquery (fn, stmt))
5521 expand_unary_optab_fn (fn, stmt, parity_optab);
5524 void
5525 expand_POPCOUNT (internal_fn fn, gcall *stmt)
5527 if (!expand_bitquery (fn, stmt))
5528 return;
5529 if (gimple_call_num_args (stmt) == 1)
5531 expand_unary_optab_fn (fn, stmt, popcount_optab);
5532 return;
5534 /* If .POPCOUNT call has 2 arguments, match_single_bit_test marked it
5535 because the result is only used in an equality comparison against 1.
5536 Use rtx costs in that case to determine if .POPCOUNT (arg) == 1
5537 or (arg ^ (arg - 1)) > arg - 1 is cheaper.
5538 If .POPCOUNT second argument is 0, we additionally know that arg
5539 is non-zero, so use arg & (arg - 1) == 0 instead.
5540 If .POPCOUNT second argument is -1, the comparison was either `<= 1`
5541 or `> 1`. */
5542 bool speed_p = optimize_insn_for_speed_p ();
5543 tree lhs = gimple_call_lhs (stmt);
5544 tree arg = gimple_call_arg (stmt, 0);
5545 bool nonzero_arg = integer_zerop (gimple_call_arg (stmt, 1));
5546 bool was_le = integer_minus_onep (gimple_call_arg (stmt, 1));
5547 if (was_le)
5548 nonzero_arg = true;
5549 tree type = TREE_TYPE (arg);
5550 machine_mode mode = TYPE_MODE (type);
5551 machine_mode lhsmode = TYPE_MODE (TREE_TYPE (lhs));
5552 do_pending_stack_adjust ();
5553 start_sequence ();
5554 expand_unary_optab_fn (fn, stmt, popcount_optab);
5555 rtx_insn *popcount_insns = get_insns ();
5556 end_sequence ();
5557 start_sequence ();
5558 rtx plhs = expand_normal (lhs);
5559 rtx pcmp = emit_store_flag (NULL_RTX, EQ, plhs, const1_rtx, lhsmode, 0, 0);
5560 if (pcmp == NULL_RTX)
5562 fail:
5563 end_sequence ();
5564 emit_insn (popcount_insns);
5565 return;
5567 rtx_insn *popcount_cmp_insns = get_insns ();
5568 end_sequence ();
5569 start_sequence ();
5570 rtx op0 = expand_normal (arg);
5571 rtx argm1 = expand_simple_binop (mode, PLUS, op0, constm1_rtx, NULL_RTX,
5572 1, OPTAB_WIDEN);
5573 if (argm1 == NULL_RTX)
5574 goto fail;
5575 rtx argxorargm1 = expand_simple_binop (mode, nonzero_arg ? AND : XOR, op0,
5576 argm1, NULL_RTX, 1, OPTAB_WIDEN);
5577 if (argxorargm1 == NULL_RTX)
5578 goto fail;
5579 rtx cmp;
5580 if (nonzero_arg)
5581 cmp = emit_store_flag (NULL_RTX, EQ, argxorargm1, const0_rtx, mode, 1, 1);
5582 else
5583 cmp = emit_store_flag (NULL_RTX, GTU, argxorargm1, argm1, mode, 1, 1);
5584 if (cmp == NULL_RTX)
5585 goto fail;
5586 rtx_insn *cmp_insns = get_insns ();
5587 end_sequence ();
5588 unsigned popcount_cost = (seq_cost (popcount_insns, speed_p)
5589 + seq_cost (popcount_cmp_insns, speed_p));
5590 unsigned cmp_cost = seq_cost (cmp_insns, speed_p);
5592 if (dump_file && (dump_flags & TDF_DETAILS))
5593 fprintf(dump_file, "popcount == 1: popcount cost: %u; cmp cost: %u\n",
5594 popcount_cost, cmp_cost);
5596 if (popcount_cost <= cmp_cost)
5597 emit_insn (popcount_insns);
5598 else
5600 start_sequence ();
5601 emit_insn (cmp_insns);
5602 plhs = expand_normal (lhs);
5603 if (GET_MODE (cmp) != GET_MODE (plhs))
5604 cmp = convert_to_mode (GET_MODE (plhs), cmp, 1);
5605 /* For `<= 1`, we need to produce `2 - cmp` or `cmp ? 1 : 2` as that
5606 then gets compared against 1 and we need the false case to be 2. */
5607 if (was_le)
5609 cmp = expand_simple_binop (GET_MODE (cmp), MINUS, const2_rtx,
5610 cmp, NULL_RTX, 1, OPTAB_WIDEN);
5611 if (!cmp)
5612 goto fail;
5614 emit_move_insn (plhs, cmp);
5615 rtx_insn *all_insns = get_insns ();
5616 end_sequence ();
5617 emit_insn (all_insns);