tree-optimization/117080 - Add SLP_TREE_MEMORY_ACCESS_TYPE
[official-gcc.git] / gcc / config / i386 / i386.cc
bloba1f0ae7a7e167e4318a563bbc895835ce54fa3a2
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define INCLUDE_MEMORY
21 #define INCLUDE_STRING
22 #define IN_TARGET_CODE 1
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic.h"
44 #include "cfgbuild.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "calls.h"
49 #include "stor-layout.h"
50 #include "varasm.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "except.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "cfgrtl.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "reload.h"
61 #include "gimplify.h"
62 #include "dwarf2.h"
63 #include "tm-constrs.h"
64 #include "cselib.h"
65 #include "sched-int.h"
66 #include "opts.h"
67 #include "tree-pass.h"
68 #include "context.h"
69 #include "pass_manager.h"
70 #include "target-globals.h"
71 #include "gimple-iterator.h"
72 #include "gimple-fold.h"
73 #include "tree-vectorizer.h"
74 #include "shrink-wrap.h"
75 #include "builtins.h"
76 #include "rtl-iter.h"
77 #include "tree-iterator.h"
78 #include "dbgcnt.h"
79 #include "case-cfn-macros.h"
80 #include "dojump.h"
81 #include "fold-const-call.h"
82 #include "tree-vrp.h"
83 #include "tree-ssanames.h"
84 #include "selftest.h"
85 #include "selftest-rtl.h"
86 #include "print-rtl.h"
87 #include "intl.h"
88 #include "ifcvt.h"
89 #include "symbol-summary.h"
90 #include "sreal.h"
91 #include "ipa-cp.h"
92 #include "ipa-prop.h"
93 #include "ipa-fnsummary.h"
94 #include "wide-int-bitmask.h"
95 #include "tree-vector-builder.h"
96 #include "debug.h"
97 #include "dwarf2out.h"
98 #include "i386-options.h"
99 #include "i386-builtins.h"
100 #include "i386-expand.h"
101 #include "i386-features.h"
102 #include "function-abi.h"
103 #include "rtl-error.h"
105 /* This file should be included last. */
106 #include "target-def.h"
108 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
109 static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
112 #ifndef CHECK_STACK_LIMIT
113 #define CHECK_STACK_LIMIT (-1)
114 #endif
116 /* Return index of given mode in mult and division cost tables. */
117 #define MODE_INDEX(mode) \
118 ((mode) == QImode ? 0 \
119 : (mode) == HImode ? 1 \
120 : (mode) == SImode ? 2 \
121 : (mode) == DImode ? 3 \
122 : 4)
125 /* Set by -mtune. */
126 const struct processor_costs *ix86_tune_cost = NULL;
128 /* Set by -mtune or -Os. */
129 const struct processor_costs *ix86_cost = NULL;
131 /* In case the average insn count for single function invocation is
132 lower than this constant, emit fast (but longer) prologue and
133 epilogue code. */
134 #define FAST_PROLOGUE_INSN_COUNT 20
136 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
137 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
138 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
139 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
141 /* Array of the smallest class containing reg number REGNO, indexed by
142 REGNO. Used by REGNO_REG_CLASS in i386.h. */
144 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
146 /* ax, dx, cx, bx */
147 AREG, DREG, CREG, BREG,
148 /* si, di, bp, sp */
149 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
150 /* FP registers */
151 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
152 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
153 /* arg pointer, flags, fpsr, frame */
154 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
155 /* SSE registers */
156 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
157 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
158 /* MMX registers */
159 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
160 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
161 /* REX registers */
162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
163 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 /* SSE REX registers */
165 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
166 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
167 /* AVX-512 SSE registers */
168 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
169 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
171 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
172 /* Mask registers. */
173 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
174 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
175 /* REX2 registers */
176 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
177 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
178 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
179 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
182 /* The "default" register map used in 32bit mode. */
184 unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
186 /* general regs */
187 0, 2, 1, 3, 6, 7, 4, 5,
188 /* fp regs */
189 12, 13, 14, 15, 16, 17, 18, 19,
190 /* arg, flags, fpsr, frame */
191 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
192 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
193 /* SSE */
194 21, 22, 23, 24, 25, 26, 27, 28,
195 /* MMX */
196 29, 30, 31, 32, 33, 34, 35, 36,
197 /* extended integer registers */
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 /* extended sse registers */
201 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
202 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
203 /* AVX-512 registers 16-23 */
204 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
205 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
206 /* AVX-512 registers 24-31 */
207 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
208 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
209 /* Mask registers */
210 93, 94, 95, 96, 97, 98, 99, 100
213 /* The "default" register map used in 64bit mode. */
215 unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
217 /* general regs */
218 0, 1, 2, 3, 4, 5, 6, 7,
219 /* fp regs */
220 33, 34, 35, 36, 37, 38, 39, 40,
221 /* arg, flags, fpsr, frame */
222 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
223 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
224 /* SSE */
225 17, 18, 19, 20, 21, 22, 23, 24,
226 /* MMX */
227 41, 42, 43, 44, 45, 46, 47, 48,
228 /* extended integer registers */
229 8, 9, 10, 11, 12, 13, 14, 15,
230 /* extended SSE registers */
231 25, 26, 27, 28, 29, 30, 31, 32,
232 /* AVX-512 registers 16-23 */
233 67, 68, 69, 70, 71, 72, 73, 74,
234 /* AVX-512 registers 24-31 */
235 75, 76, 77, 78, 79, 80, 81, 82,
236 /* Mask registers */
237 118, 119, 120, 121, 122, 123, 124, 125,
238 /* rex2 extend interger registers */
239 130, 131, 132, 133, 134, 135, 136, 137,
240 138, 139, 140, 141, 142, 143, 144, 145
243 /* Define the register numbers to be used in Dwarf debugging information.
244 The SVR4 reference port C compiler uses the following register numbers
245 in its Dwarf output code:
246 0 for %eax (gcc regno = 0)
247 1 for %ecx (gcc regno = 2)
248 2 for %edx (gcc regno = 1)
249 3 for %ebx (gcc regno = 3)
250 4 for %esp (gcc regno = 7)
251 5 for %ebp (gcc regno = 6)
252 6 for %esi (gcc regno = 4)
253 7 for %edi (gcc regno = 5)
254 The following three DWARF register numbers are never generated by
255 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
256 believed these numbers have these meanings.
257 8 for %eip (no gcc equivalent)
258 9 for %eflags (gcc regno = 17)
259 10 for %trapno (no gcc equivalent)
260 It is not at all clear how we should number the FP stack registers
261 for the x86 architecture. If the version of SDB on x86/svr4 were
262 a bit less brain dead with respect to floating-point then we would
263 have a precedent to follow with respect to DWARF register numbers
264 for x86 FP registers, but the SDB on x86/svr4 was so completely
265 broken with respect to FP registers that it is hardly worth thinking
266 of it as something to strive for compatibility with.
267 The version of x86/svr4 SDB I had does (partially)
268 seem to believe that DWARF register number 11 is associated with
269 the x86 register %st(0), but that's about all. Higher DWARF
270 register numbers don't seem to be associated with anything in
271 particular, and even for DWARF regno 11, SDB only seemed to under-
272 stand that it should say that a variable lives in %st(0) (when
273 asked via an `=' command) if we said it was in DWARF regno 11,
274 but SDB still printed garbage when asked for the value of the
275 variable in question (via a `/' command).
276 (Also note that the labels SDB printed for various FP stack regs
277 when doing an `x' command were all wrong.)
278 Note that these problems generally don't affect the native SVR4
279 C compiler because it doesn't allow the use of -O with -g and
280 because when it is *not* optimizing, it allocates a memory
281 location for each floating-point variable, and the memory
282 location is what gets described in the DWARF AT_location
283 attribute for the variable in question.
284 Regardless of the severe mental illness of the x86/svr4 SDB, we
285 do something sensible here and we use the following DWARF
286 register numbers. Note that these are all stack-top-relative
287 numbers.
288 11 for %st(0) (gcc regno = 8)
289 12 for %st(1) (gcc regno = 9)
290 13 for %st(2) (gcc regno = 10)
291 14 for %st(3) (gcc regno = 11)
292 15 for %st(4) (gcc regno = 12)
293 16 for %st(5) (gcc regno = 13)
294 17 for %st(6) (gcc regno = 14)
295 18 for %st(7) (gcc regno = 15)
297 unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
299 /* general regs */
300 0, 2, 1, 3, 6, 7, 5, 4,
301 /* fp regs */
302 11, 12, 13, 14, 15, 16, 17, 18,
303 /* arg, flags, fpsr, frame */
304 IGNORED_DWARF_REGNUM, 9,
305 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
306 /* SSE registers */
307 21, 22, 23, 24, 25, 26, 27, 28,
308 /* MMX registers */
309 29, 30, 31, 32, 33, 34, 35, 36,
310 /* extended integer registers */
311 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
312 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
313 /* extended sse registers */
314 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
315 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
316 /* AVX-512 registers 16-23 */
317 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
318 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
319 /* AVX-512 registers 24-31 */
320 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
321 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
322 /* Mask registers */
323 93, 94, 95, 96, 97, 98, 99, 100
326 /* Define parameter passing and return registers. */
328 static int const x86_64_int_parameter_registers[6] =
330 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
333 static int const x86_64_ms_abi_int_parameter_registers[4] =
335 CX_REG, DX_REG, R8_REG, R9_REG
338 static int const x86_64_int_return_registers[4] =
340 AX_REG, DX_REG, DI_REG, SI_REG
343 /* Define the structure for the machine field in struct function. */
345 struct GTY(()) stack_local_entry {
346 unsigned short mode;
347 unsigned short n;
348 rtx rtl;
349 struct stack_local_entry *next;
352 /* Which cpu are we scheduling for. */
353 enum attr_cpu ix86_schedule;
355 /* Which cpu are we optimizing for. */
356 enum processor_type ix86_tune;
358 /* Which instruction set architecture to use. */
359 enum processor_type ix86_arch;
361 /* True if processor has SSE prefetch instruction. */
362 unsigned char ix86_prefetch_sse;
364 /* Preferred alignment for stack boundary in bits. */
365 unsigned int ix86_preferred_stack_boundary;
367 /* Alignment for incoming stack boundary in bits specified at
368 command line. */
369 unsigned int ix86_user_incoming_stack_boundary;
371 /* Default alignment for incoming stack boundary in bits. */
372 unsigned int ix86_default_incoming_stack_boundary;
374 /* Alignment for incoming stack boundary in bits. */
375 unsigned int ix86_incoming_stack_boundary;
377 /* True if there is no direct access to extern symbols. */
378 bool ix86_has_no_direct_extern_access;
380 /* Calling abi specific va_list type nodes. */
381 tree sysv_va_list_type_node;
382 tree ms_va_list_type_node;
384 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
385 char internal_label_prefix[16];
386 int internal_label_prefix_len;
388 /* Fence to use after loop using movnt. */
389 tree x86_mfence;
391 /* Register class used for passing given 64bit part of the argument.
392 These represent classes as documented by the PS ABI, with the exception
393 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
394 use SF or DFmode move instead of DImode to avoid reformatting penalties.
396 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
397 whenever possible (upper half does contain padding). */
398 enum x86_64_reg_class
400 X86_64_NO_CLASS,
401 X86_64_INTEGER_CLASS,
402 X86_64_INTEGERSI_CLASS,
403 X86_64_SSE_CLASS,
404 X86_64_SSEHF_CLASS,
405 X86_64_SSESF_CLASS,
406 X86_64_SSEDF_CLASS,
407 X86_64_SSEUP_CLASS,
408 X86_64_X87_CLASS,
409 X86_64_X87UP_CLASS,
410 X86_64_COMPLEX_X87_CLASS,
411 X86_64_MEMORY_CLASS
414 #define MAX_CLASSES 8
416 /* Table of constants used by fldpi, fldln2, etc.... */
417 static REAL_VALUE_TYPE ext_80387_constants_table [5];
418 static bool ext_80387_constants_init;
421 static rtx ix86_function_value (const_tree, const_tree, bool);
422 static bool ix86_function_value_regno_p (const unsigned int);
423 static unsigned int ix86_function_arg_boundary (machine_mode,
424 const_tree);
425 static rtx ix86_static_chain (const_tree, bool);
426 static int ix86_function_regparm (const_tree, const_tree);
427 static void ix86_compute_frame_layout (void);
428 static tree ix86_canonical_va_list_type (tree);
429 static unsigned int split_stack_prologue_scratch_regno (void);
430 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
432 static bool ix86_can_inline_p (tree, tree);
433 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
435 typedef enum ix86_flags_cc
437 X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
438 X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
439 X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
440 X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
441 } ix86_cc;
443 static const char *ix86_ccmp_dfv_mapping[] =
445 "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
446 "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
447 "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
448 "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
452 /* Whether -mtune= or -march= were specified */
453 int ix86_tune_defaulted;
454 int ix86_arch_specified;
456 /* Return true if a red-zone is in use. We can't use red-zone when
457 there are local indirect jumps, like "indirect_jump" or "tablejump",
458 which jumps to another place in the function, since "call" in the
459 indirect thunk pushes the return address onto stack, destroying
460 red-zone.
462 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
463 for CALL, in red-zone, we can allow local indirect jumps with
464 indirect thunk. */
466 bool
467 ix86_using_red_zone (void)
469 return (TARGET_RED_ZONE
470 && !TARGET_64BIT_MS_ABI
471 && (!cfun->machine->has_local_indirect_jump
472 || cfun->machine->indirect_branch_type == indirect_branch_keep));
475 /* Return true, if profiling code should be emitted before
476 prologue. Otherwise it returns false.
477 Note: For x86 with "hotfix" it is sorried. */
478 static bool
479 ix86_profile_before_prologue (void)
481 return flag_fentry != 0;
484 /* Update register usage after having seen the compiler flags. */
486 static void
487 ix86_conditional_register_usage (void)
489 int i, c_mask;
491 /* If there are no caller-saved registers, preserve all registers.
492 except fixed_regs and registers used for function return value
493 since aggregate_value_p checks call_used_regs[regno] on return
494 value. */
495 if (cfun
496 && (cfun->machine->call_saved_registers
497 == TYPE_NO_CALLER_SAVED_REGISTERS))
498 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
499 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
500 call_used_regs[i] = 0;
502 /* For 32-bit targets, disable the REX registers. */
503 if (! TARGET_64BIT)
505 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
506 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
507 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
508 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
509 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
510 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
513 /* See the definition of CALL_USED_REGISTERS in i386.h. */
514 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
516 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
518 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
520 /* Set/reset conditionally defined registers from
521 CALL_USED_REGISTERS initializer. */
522 if (call_used_regs[i] > 1)
523 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
525 /* Calculate registers of CLOBBERED_REGS register set
526 as call used registers from GENERAL_REGS register set. */
527 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
528 && call_used_regs[i])
529 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
532 /* If MMX is disabled, disable the registers. */
533 if (! TARGET_MMX)
534 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
536 /* If SSE is disabled, disable the registers. */
537 if (! TARGET_SSE)
538 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
540 /* If the FPU is disabled, disable the registers. */
541 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
542 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
544 /* If AVX512F is disabled, disable the registers. */
545 if (! TARGET_AVX512F)
547 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
548 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
550 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
553 /* If APX is disabled, disable the registers. */
554 if (! (TARGET_APX_EGPR && TARGET_64BIT))
556 for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
557 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
561 /* Canonicalize a comparison from one we don't have to one we do have. */
563 static void
564 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
565 bool op0_preserve_value)
567 /* The order of operands in x87 ficom compare is forced by combine in
568 simplify_comparison () function. Float operator is treated as RTX_OBJ
569 with a precedence over other operators and is always put in the first
570 place. Swap condition and operands to match ficom instruction. */
571 if (!op0_preserve_value
572 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
574 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
576 /* We are called only for compares that are split to SAHF instruction.
577 Ensure that we have setcc/jcc insn for the swapped condition. */
578 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
580 std::swap (*op0, *op1);
581 *code = (int) scode;
587 /* Hook to determine if one function can safely inline another. */
589 static bool
590 ix86_can_inline_p (tree caller, tree callee)
592 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
593 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
595 /* Changes of those flags can be tolerated for always inlines. Lets hope
596 user knows what he is doing. */
597 unsigned HOST_WIDE_INT always_inline_safe_mask
598 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
599 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
600 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
601 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
602 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
603 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
604 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
607 if (!callee_tree)
608 callee_tree = target_option_default_node;
609 if (!caller_tree)
610 caller_tree = target_option_default_node;
611 if (callee_tree == caller_tree)
612 return true;
614 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
615 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
616 bool ret = false;
617 bool always_inline
618 = (DECL_DISREGARD_INLINE_LIMITS (callee)
619 && lookup_attribute ("always_inline",
620 DECL_ATTRIBUTES (callee)));
622 /* If callee only uses GPRs, ignore MASK_80387. */
623 if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
624 always_inline_safe_mask |= MASK_80387;
626 cgraph_node *callee_node = cgraph_node::get (callee);
627 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
628 function can inline a SSE2 function but a SSE2 function can't inline
629 a SSE4 function. */
630 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
631 != callee_opts->x_ix86_isa_flags)
632 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
633 != callee_opts->x_ix86_isa_flags2))
634 ret = false;
636 /* See if we have the same non-isa options. */
637 else if ((!always_inline
638 && caller_opts->x_target_flags != callee_opts->x_target_flags)
639 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
640 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
641 ret = false;
643 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
644 /* If the calle doesn't use FP expressions differences in
645 ix86_fpmath can be ignored. We are called from FEs
646 for multi-versioning call optimization, so beware of
647 ipa_fn_summaries not available. */
648 && (! ipa_fn_summaries
649 || ipa_fn_summaries->get (callee_node) == NULL
650 || ipa_fn_summaries->get (callee_node)->fp_expressions))
651 ret = false;
653 /* At this point we cannot identify whether arch or tune setting
654 comes from target attribute or not. So the most conservative way
655 is to allow the callee that uses default arch and tune string to
656 be inlined. */
657 else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
658 && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
659 ret = true;
661 /* See if arch, tune, etc. are the same. As previous ISA flags already
662 checks if callee's ISA is subset of caller's, do not block
663 always_inline attribute for callee even it has different arch. */
664 else if (!always_inline && caller_opts->arch != callee_opts->arch)
665 ret = false;
667 else if (!always_inline && caller_opts->tune != callee_opts->tune)
668 ret = false;
670 else if (!always_inline
671 && caller_opts->branch_cost != callee_opts->branch_cost)
672 ret = false;
674 else
675 ret = true;
677 return ret;
680 /* Return true if this goes in large data/bss. */
682 static bool
683 ix86_in_large_data_p (tree exp)
685 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
686 && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
687 return false;
689 if (exp == NULL_TREE)
690 return false;
692 /* Functions are never large data. */
693 if (TREE_CODE (exp) == FUNCTION_DECL)
694 return false;
696 /* Automatic variables are never large data. */
697 if (VAR_P (exp) && !is_global_var (exp))
698 return false;
700 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
702 const char *section = DECL_SECTION_NAME (exp);
703 if (strcmp (section, ".ldata") == 0
704 || strcmp (section, ".lbss") == 0)
705 return true;
706 return false;
708 else
710 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
712 /* If this is an incomplete type with size 0, then we can't put it
713 in data because it might be too big when completed. Also,
714 int_size_in_bytes returns -1 if size can vary or is larger than
715 an integer in which case also it is safer to assume that it goes in
716 large data. */
717 if (size <= 0 || size > ix86_section_threshold)
718 return true;
721 return false;
724 /* i386-specific section flag to mark large sections. */
725 #define SECTION_LARGE SECTION_MACH_DEP
727 /* Switch to the appropriate section for output of DECL.
728 DECL is either a `VAR_DECL' node or a constant of some sort.
729 RELOC indicates whether forming the initial value of DECL requires
730 link-time relocations. */
732 ATTRIBUTE_UNUSED static section *
733 x86_64_elf_select_section (tree decl, int reloc,
734 unsigned HOST_WIDE_INT align)
736 if (ix86_in_large_data_p (decl))
738 const char *sname = NULL;
739 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
740 switch (categorize_decl_for_section (decl, reloc))
742 case SECCAT_DATA:
743 sname = ".ldata";
744 break;
745 case SECCAT_DATA_REL:
746 sname = ".ldata.rel";
747 break;
748 case SECCAT_DATA_REL_LOCAL:
749 sname = ".ldata.rel.local";
750 break;
751 case SECCAT_DATA_REL_RO:
752 sname = ".ldata.rel.ro";
753 break;
754 case SECCAT_DATA_REL_RO_LOCAL:
755 sname = ".ldata.rel.ro.local";
756 break;
757 case SECCAT_BSS:
758 sname = ".lbss";
759 flags |= SECTION_BSS;
760 break;
761 case SECCAT_RODATA:
762 case SECCAT_RODATA_MERGE_STR:
763 case SECCAT_RODATA_MERGE_STR_INIT:
764 case SECCAT_RODATA_MERGE_CONST:
765 sname = ".lrodata";
766 flags &= ~SECTION_WRITE;
767 break;
768 case SECCAT_SRODATA:
769 case SECCAT_SDATA:
770 case SECCAT_SBSS:
771 gcc_unreachable ();
772 case SECCAT_TEXT:
773 case SECCAT_TDATA:
774 case SECCAT_TBSS:
775 /* We don't split these for medium model. Place them into
776 default sections and hope for best. */
777 break;
779 if (sname)
781 /* We might get called with string constants, but get_named_section
782 doesn't like them as they are not DECLs. Also, we need to set
783 flags in that case. */
784 if (!DECL_P (decl))
785 return get_section (sname, flags, NULL);
786 return get_named_section (decl, sname, reloc);
789 return default_elf_select_section (decl, reloc, align);
792 /* Select a set of attributes for section NAME based on the properties
793 of DECL and whether or not RELOC indicates that DECL's initializer
794 might contain runtime relocations. */
796 static unsigned int ATTRIBUTE_UNUSED
797 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
799 unsigned int flags = default_section_type_flags (decl, name, reloc);
801 if (ix86_in_large_data_p (decl))
802 flags |= SECTION_LARGE;
804 if (decl == NULL_TREE
805 && (strcmp (name, ".ldata.rel.ro") == 0
806 || strcmp (name, ".ldata.rel.ro.local") == 0))
807 flags |= SECTION_RELRO;
809 if (strcmp (name, ".lbss") == 0
810 || startswith (name, ".lbss.")
811 || startswith (name, ".gnu.linkonce.lb."))
812 flags |= SECTION_BSS;
814 return flags;
817 /* Build up a unique section name, expressed as a
818 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
819 RELOC indicates whether the initial value of EXP requires
820 link-time relocations. */
822 static void ATTRIBUTE_UNUSED
823 x86_64_elf_unique_section (tree decl, int reloc)
825 if (ix86_in_large_data_p (decl))
827 const char *prefix = NULL;
828 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
829 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
831 switch (categorize_decl_for_section (decl, reloc))
833 case SECCAT_DATA:
834 case SECCAT_DATA_REL:
835 case SECCAT_DATA_REL_LOCAL:
836 case SECCAT_DATA_REL_RO:
837 case SECCAT_DATA_REL_RO_LOCAL:
838 prefix = one_only ? ".ld" : ".ldata";
839 break;
840 case SECCAT_BSS:
841 prefix = one_only ? ".lb" : ".lbss";
842 break;
843 case SECCAT_RODATA:
844 case SECCAT_RODATA_MERGE_STR:
845 case SECCAT_RODATA_MERGE_STR_INIT:
846 case SECCAT_RODATA_MERGE_CONST:
847 prefix = one_only ? ".lr" : ".lrodata";
848 break;
849 case SECCAT_SRODATA:
850 case SECCAT_SDATA:
851 case SECCAT_SBSS:
852 gcc_unreachable ();
853 case SECCAT_TEXT:
854 case SECCAT_TDATA:
855 case SECCAT_TBSS:
856 /* We don't split these for medium model. Place them into
857 default sections and hope for best. */
858 break;
860 if (prefix)
862 const char *name, *linkonce;
863 char *string;
865 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
866 name = targetm.strip_name_encoding (name);
868 /* If we're using one_only, then there needs to be a .gnu.linkonce
869 prefix to the section name. */
870 linkonce = one_only ? ".gnu.linkonce" : "";
872 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
874 set_decl_section_name (decl, string);
875 return;
878 default_unique_section (decl, reloc);
881 #ifdef COMMON_ASM_OP
883 #ifndef LARGECOMM_SECTION_ASM_OP
884 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
885 #endif
887 /* This says how to output assembler code to declare an
888 uninitialized external linkage data object.
890 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
891 large objects. */
892 void
893 x86_elf_aligned_decl_common (FILE *file, tree decl,
894 const char *name, unsigned HOST_WIDE_INT size,
895 unsigned align)
897 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
898 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
899 && size > (unsigned int)ix86_section_threshold)
901 switch_to_section (get_named_section (decl, ".lbss", 0));
902 fputs (LARGECOMM_SECTION_ASM_OP, file);
904 else
905 fputs (COMMON_ASM_OP, file);
906 assemble_name (file, name);
907 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
908 size, align / BITS_PER_UNIT);
910 #endif
912 /* Utility function for targets to use in implementing
913 ASM_OUTPUT_ALIGNED_BSS. */
915 void
916 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
917 unsigned HOST_WIDE_INT size, unsigned align)
919 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
920 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
921 && size > (unsigned int)ix86_section_threshold)
922 switch_to_section (get_named_section (decl, ".lbss", 0));
923 else
924 switch_to_section (bss_section);
925 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
926 #ifdef ASM_DECLARE_OBJECT_NAME
927 last_assemble_variable_decl = decl;
928 ASM_DECLARE_OBJECT_NAME (file, name, decl);
929 #else
930 /* Standard thing is just output label for the object. */
931 ASM_OUTPUT_LABEL (file, name);
932 #endif /* ASM_DECLARE_OBJECT_NAME */
933 ASM_OUTPUT_SKIP (file, size ? size : 1);
936 /* Decide whether we must probe the stack before any space allocation
937 on this target. It's essentially TARGET_STACK_PROBE except when
938 -fstack-check causes the stack to be already probed differently. */
940 bool
941 ix86_target_stack_probe (void)
943 /* Do not probe the stack twice if static stack checking is enabled. */
944 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
945 return false;
947 return TARGET_STACK_PROBE;
950 /* Decide whether we can make a sibling call to a function. DECL is the
951 declaration of the function being targeted by the call and EXP is the
952 CALL_EXPR representing the call. */
954 static bool
955 ix86_function_ok_for_sibcall (tree decl, tree exp)
957 tree type, decl_or_type;
958 rtx a, b;
959 bool bind_global = decl && !targetm.binds_local_p (decl);
961 if (ix86_function_naked (current_function_decl))
962 return false;
964 /* Sibling call isn't OK if there are no caller-saved registers
965 since all registers must be preserved before return. */
966 if (cfun->machine->call_saved_registers
967 == TYPE_NO_CALLER_SAVED_REGISTERS)
968 return false;
970 /* If we are generating position-independent code, we cannot sibcall
971 optimize direct calls to global functions, as the PLT requires
972 %ebx be live. (Darwin does not have a PLT.) */
973 if (!TARGET_MACHO
974 && !TARGET_64BIT
975 && flag_pic
976 && flag_plt
977 && bind_global)
978 return false;
980 /* If we need to align the outgoing stack, then sibcalling would
981 unalign the stack, which may break the called function. */
982 if (ix86_minimum_incoming_stack_boundary (true)
983 < PREFERRED_STACK_BOUNDARY)
984 return false;
986 if (decl)
988 decl_or_type = decl;
989 type = TREE_TYPE (decl);
991 else
993 /* We're looking at the CALL_EXPR, we need the type of the function. */
994 type = CALL_EXPR_FN (exp); /* pointer expression */
995 type = TREE_TYPE (type); /* pointer type */
996 type = TREE_TYPE (type); /* function type */
997 decl_or_type = type;
1000 /* Sibling call isn't OK if callee has no callee-saved registers
1001 and the calling function has callee-saved registers. */
1002 if (cfun->machine->call_saved_registers != TYPE_NO_CALLEE_SAVED_REGISTERS
1003 && (cfun->machine->call_saved_registers
1004 != TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP)
1005 && lookup_attribute ("no_callee_saved_registers",
1006 TYPE_ATTRIBUTES (type)))
1007 return false;
1009 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
1010 if ((OUTGOING_REG_PARM_STACK_SPACE (type)
1011 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
1012 || (REG_PARM_STACK_SPACE (decl_or_type)
1013 != REG_PARM_STACK_SPACE (current_function_decl)))
1015 maybe_complain_about_tail_call (exp,
1016 "inconsistent size of stack space"
1017 " allocated for arguments which are"
1018 " passed in registers");
1019 return false;
1022 /* Check that the return value locations are the same. Like
1023 if we are returning floats on the 80387 register stack, we cannot
1024 make a sibcall from a function that doesn't return a float to a
1025 function that does or, conversely, from a function that does return
1026 a float to a function that doesn't; the necessary stack adjustment
1027 would not be executed. This is also the place we notice
1028 differences in the return value ABI. Note that it is ok for one
1029 of the functions to have void return type as long as the return
1030 value of the other is passed in a register. */
1031 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
1032 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1033 cfun->decl, false);
1034 if (STACK_REG_P (a) || STACK_REG_P (b))
1036 if (!rtx_equal_p (a, b))
1037 return false;
1039 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1041 else if (!rtx_equal_p (a, b))
1042 return false;
1044 if (TARGET_64BIT)
1046 /* The SYSV ABI has more call-clobbered registers;
1047 disallow sibcalls from MS to SYSV. */
1048 if (cfun->machine->call_abi == MS_ABI
1049 && ix86_function_type_abi (type) == SYSV_ABI)
1050 return false;
1052 else
1054 /* If this call is indirect, we'll need to be able to use a
1055 call-clobbered register for the address of the target function.
1056 Make sure that all such registers are not used for passing
1057 parameters. Note that DLLIMPORT functions and call to global
1058 function via GOT slot are indirect. */
1059 if (!decl
1060 || (bind_global && flag_pic && !flag_plt)
1061 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1062 || flag_force_indirect_call)
1064 /* Check if regparm >= 3 since arg_reg_available is set to
1065 false if regparm == 0. If regparm is 1 or 2, there is
1066 always a call-clobbered register available.
1068 ??? The symbol indirect call doesn't need a call-clobbered
1069 register. But we don't know if this is a symbol indirect
1070 call or not here. */
1071 if (ix86_function_regparm (type, decl) >= 3
1072 && !cfun->machine->arg_reg_available)
1073 return false;
1077 if (decl && ix86_use_pseudo_pic_reg ())
1079 /* When PIC register is used, it must be restored after ifunc
1080 function returns. */
1081 cgraph_node *node = cgraph_node::get (decl);
1082 if (node && node->ifunc_resolver)
1083 return false;
1086 /* Disable sibcall if callee has indirect_return attribute and
1087 caller doesn't since callee will return to the caller's caller
1088 via an indirect jump. */
1089 if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1090 == (CF_RETURN | CF_BRANCH))
1091 && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
1092 && !lookup_attribute ("indirect_return",
1093 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1094 return false;
1096 /* Otherwise okay. That also includes certain types of indirect calls. */
1097 return true;
1100 /* This function determines from TYPE the calling-convention. */
1102 unsigned int
1103 ix86_get_callcvt (const_tree type)
1105 unsigned int ret = 0;
1106 bool is_stdarg;
1107 tree attrs;
1109 if (TARGET_64BIT)
1110 return IX86_CALLCVT_CDECL;
1112 attrs = TYPE_ATTRIBUTES (type);
1113 if (attrs != NULL_TREE)
1115 if (lookup_attribute ("cdecl", attrs))
1116 ret |= IX86_CALLCVT_CDECL;
1117 else if (lookup_attribute ("stdcall", attrs))
1118 ret |= IX86_CALLCVT_STDCALL;
1119 else if (lookup_attribute ("fastcall", attrs))
1120 ret |= IX86_CALLCVT_FASTCALL;
1121 else if (lookup_attribute ("thiscall", attrs))
1122 ret |= IX86_CALLCVT_THISCALL;
1124 /* Regparam isn't allowed for thiscall and fastcall. */
1125 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1127 if (lookup_attribute ("regparm", attrs))
1128 ret |= IX86_CALLCVT_REGPARM;
1129 if (lookup_attribute ("sseregparm", attrs))
1130 ret |= IX86_CALLCVT_SSEREGPARM;
1133 if (IX86_BASE_CALLCVT(ret) != 0)
1134 return ret;
1137 is_stdarg = stdarg_p (type);
1138 if (TARGET_RTD && !is_stdarg)
1139 return IX86_CALLCVT_STDCALL | ret;
1141 if (ret != 0
1142 || is_stdarg
1143 || TREE_CODE (type) != METHOD_TYPE
1144 || ix86_function_type_abi (type) != MS_ABI)
1145 return IX86_CALLCVT_CDECL | ret;
1147 return IX86_CALLCVT_THISCALL;
1150 /* Return 0 if the attributes for two types are incompatible, 1 if they
1151 are compatible, and 2 if they are nearly compatible (which causes a
1152 warning to be generated). */
1154 static int
1155 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1157 unsigned int ccvt1, ccvt2;
1159 if (TREE_CODE (type1) != FUNCTION_TYPE
1160 && TREE_CODE (type1) != METHOD_TYPE)
1161 return 1;
1163 ccvt1 = ix86_get_callcvt (type1);
1164 ccvt2 = ix86_get_callcvt (type2);
1165 if (ccvt1 != ccvt2)
1166 return 0;
1167 if (ix86_function_regparm (type1, NULL)
1168 != ix86_function_regparm (type2, NULL))
1169 return 0;
1171 if (lookup_attribute ("no_callee_saved_registers",
1172 TYPE_ATTRIBUTES (type1))
1173 != lookup_attribute ("no_callee_saved_registers",
1174 TYPE_ATTRIBUTES (type2)))
1175 return 0;
1177 return 1;
1180 /* Return the regparm value for a function with the indicated TYPE and DECL.
1181 DECL may be NULL when calling function indirectly
1182 or considering a libcall. */
1184 static int
1185 ix86_function_regparm (const_tree type, const_tree decl)
1187 tree attr;
1188 int regparm;
1189 unsigned int ccvt;
1191 if (TARGET_64BIT)
1192 return (ix86_function_type_abi (type) == SYSV_ABI
1193 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1194 ccvt = ix86_get_callcvt (type);
1195 regparm = ix86_regparm;
1197 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1199 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1200 if (attr)
1202 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1203 return regparm;
1206 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1207 return 2;
1208 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1209 return 1;
1211 /* Use register calling convention for local functions when possible. */
1212 if (decl
1213 && TREE_CODE (decl) == FUNCTION_DECL)
1215 cgraph_node *target = cgraph_node::get (decl);
1216 if (target)
1217 target = target->function_symbol ();
1219 /* Caller and callee must agree on the calling convention, so
1220 checking here just optimize means that with
1221 __attribute__((optimize (...))) caller could use regparm convention
1222 and callee not, or vice versa. Instead look at whether the callee
1223 is optimized or not. */
1224 if (target && opt_for_fn (target->decl, optimize)
1225 && !(profile_flag && !flag_fentry))
1227 if (target->local && target->can_change_signature)
1229 int local_regparm, globals = 0, regno;
1231 /* Make sure no regparm register is taken by a
1232 fixed register variable. */
1233 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1234 local_regparm++)
1235 if (fixed_regs[local_regparm])
1236 break;
1238 /* We don't want to use regparm(3) for nested functions as
1239 these use a static chain pointer in the third argument. */
1240 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1241 local_regparm = 2;
1243 /* Save a register for the split stack. */
1244 if (flag_split_stack)
1246 if (local_regparm == 3)
1247 local_regparm = 2;
1248 else if (local_regparm == 2
1249 && DECL_STATIC_CHAIN (target->decl))
1250 local_regparm = 1;
1253 /* Each fixed register usage increases register pressure,
1254 so less registers should be used for argument passing.
1255 This functionality can be overriden by an explicit
1256 regparm value. */
1257 for (regno = AX_REG; regno <= DI_REG; regno++)
1258 if (fixed_regs[regno])
1259 globals++;
1261 local_regparm
1262 = globals < local_regparm ? local_regparm - globals : 0;
1264 if (local_regparm > regparm)
1265 regparm = local_regparm;
1270 return regparm;
1273 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1274 DFmode (2) arguments in SSE registers for a function with the
1275 indicated TYPE and DECL. DECL may be NULL when calling function
1276 indirectly or considering a libcall. Return -1 if any FP parameter
1277 should be rejected by error. This is used in siutation we imply SSE
1278 calling convetion but the function is called from another function with
1279 SSE disabled. Otherwise return 0. */
1281 static int
1282 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1284 gcc_assert (!TARGET_64BIT);
1286 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1287 by the sseregparm attribute. */
1288 if (TARGET_SSEREGPARM
1289 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1291 if (!TARGET_SSE)
1293 if (warn)
1295 if (decl)
1296 error ("calling %qD with attribute sseregparm without "
1297 "SSE/SSE2 enabled", decl);
1298 else
1299 error ("calling %qT with attribute sseregparm without "
1300 "SSE/SSE2 enabled", type);
1302 return 0;
1305 return 2;
1308 if (!decl)
1309 return 0;
1311 cgraph_node *target = cgraph_node::get (decl);
1312 if (target)
1313 target = target->function_symbol ();
1315 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1316 (and DFmode for SSE2) arguments in SSE registers. */
1317 if (target
1318 /* TARGET_SSE_MATH */
1319 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1320 && opt_for_fn (target->decl, optimize)
1321 && !(profile_flag && !flag_fentry))
1323 if (target->local && target->can_change_signature)
1325 /* Refuse to produce wrong code when local function with SSE enabled
1326 is called from SSE disabled function.
1327 FIXME: We need a way to detect these cases cross-ltrans partition
1328 and avoid using SSE calling conventions on local functions called
1329 from function with SSE disabled. For now at least delay the
1330 warning until we know we are going to produce wrong code.
1331 See PR66047 */
1332 if (!TARGET_SSE && warn)
1333 return -1;
1334 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1335 ->x_ix86_isa_flags) ? 2 : 1;
1339 return 0;
1342 /* Return true if EAX is live at the start of the function. Used by
1343 ix86_expand_prologue to determine if we need special help before
1344 calling allocate_stack_worker. */
1346 static bool
1347 ix86_eax_live_at_start_p (void)
1349 /* Cheat. Don't bother working forward from ix86_function_regparm
1350 to the function type to whether an actual argument is located in
1351 eax. Instead just look at cfg info, which is still close enough
1352 to correct at this point. This gives false positives for broken
1353 functions that might use uninitialized data that happens to be
1354 allocated in eax, but who cares? */
1355 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1358 static bool
1359 ix86_keep_aggregate_return_pointer (tree fntype)
1361 tree attr;
1363 if (!TARGET_64BIT)
1365 attr = lookup_attribute ("callee_pop_aggregate_return",
1366 TYPE_ATTRIBUTES (fntype));
1367 if (attr)
1368 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1370 /* For 32-bit MS-ABI the default is to keep aggregate
1371 return pointer. */
1372 if (ix86_function_type_abi (fntype) == MS_ABI)
1373 return true;
1375 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1378 /* Value is the number of bytes of arguments automatically
1379 popped when returning from a subroutine call.
1380 FUNDECL is the declaration node of the function (as a tree),
1381 FUNTYPE is the data type of the function (as a tree),
1382 or for a library call it is an identifier node for the subroutine name.
1383 SIZE is the number of bytes of arguments passed on the stack.
1385 On the 80386, the RTD insn may be used to pop them if the number
1386 of args is fixed, but if the number is variable then the caller
1387 must pop them all. RTD can't be used for library calls now
1388 because the library is compiled with the Unix compiler.
1389 Use of RTD is a selectable option, since it is incompatible with
1390 standard Unix calling sequences. If the option is not selected,
1391 the caller must always pop the args.
1393 The attribute stdcall is equivalent to RTD on a per module basis. */
1395 static poly_int64
1396 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1398 unsigned int ccvt;
1400 /* None of the 64-bit ABIs pop arguments. */
1401 if (TARGET_64BIT)
1402 return 0;
1404 ccvt = ix86_get_callcvt (funtype);
1406 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1407 | IX86_CALLCVT_THISCALL)) != 0
1408 && ! stdarg_p (funtype))
1409 return size;
1411 /* Lose any fake structure return argument if it is passed on the stack. */
1412 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1413 && !ix86_keep_aggregate_return_pointer (funtype))
1415 int nregs = ix86_function_regparm (funtype, fundecl);
1416 if (nregs == 0)
1417 return GET_MODE_SIZE (Pmode);
1420 return 0;
1423 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1425 static bool
1426 ix86_legitimate_combined_insn (rtx_insn *insn)
1428 int i;
1430 /* Check operand constraints in case hard registers were propagated
1431 into insn pattern. This check prevents combine pass from
1432 generating insn patterns with invalid hard register operands.
1433 These invalid insns can eventually confuse reload to error out
1434 with a spill failure. See also PRs 46829 and 46843. */
1436 gcc_assert (INSN_CODE (insn) >= 0);
1438 extract_insn (insn);
1439 preprocess_constraints (insn);
1441 int n_operands = recog_data.n_operands;
1442 int n_alternatives = recog_data.n_alternatives;
1443 for (i = 0; i < n_operands; i++)
1445 rtx op = recog_data.operand[i];
1446 machine_mode mode = GET_MODE (op);
1447 const operand_alternative *op_alt;
1448 int offset = 0;
1449 bool win;
1450 int j;
1452 /* A unary operator may be accepted by the predicate, but it
1453 is irrelevant for matching constraints. */
1454 if (UNARY_P (op))
1455 op = XEXP (op, 0);
1457 if (SUBREG_P (op))
1459 if (REG_P (SUBREG_REG (op))
1460 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1461 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1462 GET_MODE (SUBREG_REG (op)),
1463 SUBREG_BYTE (op),
1464 GET_MODE (op));
1465 op = SUBREG_REG (op);
1468 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1469 continue;
1471 op_alt = recog_op_alt;
1473 /* Operand has no constraints, anything is OK. */
1474 win = !n_alternatives;
1476 alternative_mask preferred = get_preferred_alternatives (insn);
1477 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1479 if (!TEST_BIT (preferred, j))
1480 continue;
1481 if (op_alt[i].anything_ok
1482 || (op_alt[i].matches != -1
1483 && operands_match_p
1484 (recog_data.operand[i],
1485 recog_data.operand[op_alt[i].matches]))
1486 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1488 win = true;
1489 break;
1493 if (!win)
1494 return false;
1497 return true;
1500 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1502 static unsigned HOST_WIDE_INT
1503 ix86_asan_shadow_offset (void)
1505 return SUBTARGET_SHADOW_OFFSET;
1508 /* Argument support functions. */
1510 /* Return true when register may be used to pass function parameters. */
1511 bool
1512 ix86_function_arg_regno_p (int regno)
1514 int i;
1515 enum calling_abi call_abi;
1516 const int *parm_regs;
1518 if (TARGET_SSE && SSE_REGNO_P (regno)
1519 && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1520 return true;
1522 if (!TARGET_64BIT)
1523 return (regno < REGPARM_MAX
1524 || (TARGET_MMX && MMX_REGNO_P (regno)
1525 && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1527 /* TODO: The function should depend on current function ABI but
1528 builtins.cc would need updating then. Therefore we use the
1529 default ABI. */
1530 call_abi = ix86_cfun_abi ();
1532 /* RAX is used as hidden argument to va_arg functions. */
1533 if (call_abi == SYSV_ABI && regno == AX_REG)
1534 return true;
1536 if (call_abi == MS_ABI)
1537 parm_regs = x86_64_ms_abi_int_parameter_registers;
1538 else
1539 parm_regs = x86_64_int_parameter_registers;
1541 for (i = 0; i < (call_abi == MS_ABI
1542 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1543 if (regno == parm_regs[i])
1544 return true;
1545 return false;
1548 /* Return if we do not know how to pass ARG solely in registers. */
1550 static bool
1551 ix86_must_pass_in_stack (const function_arg_info &arg)
1553 if (must_pass_in_stack_var_size_or_pad (arg))
1554 return true;
1556 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1557 The layout_type routine is crafty and tries to trick us into passing
1558 currently unsupported vector types on the stack by using TImode. */
1559 return (!TARGET_64BIT && arg.mode == TImode
1560 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1563 /* It returns the size, in bytes, of the area reserved for arguments passed
1564 in registers for the function represented by fndecl dependent to the used
1565 abi format. */
1567 ix86_reg_parm_stack_space (const_tree fndecl)
1569 enum calling_abi call_abi = SYSV_ABI;
1570 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1571 call_abi = ix86_function_abi (fndecl);
1572 else
1573 call_abi = ix86_function_type_abi (fndecl);
1574 if (TARGET_64BIT && call_abi == MS_ABI)
1575 return 32;
1576 return 0;
1579 /* We add this as a workaround in order to use libc_has_function
1580 hook in i386.md. */
1581 bool
1582 ix86_libc_has_function (enum function_class fn_class)
1584 return targetm.libc_has_function (fn_class, NULL_TREE);
1587 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1588 specifying the call abi used. */
1589 enum calling_abi
1590 ix86_function_type_abi (const_tree fntype)
1592 enum calling_abi abi = ix86_abi;
1594 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1595 return abi;
1597 if (abi == SYSV_ABI
1598 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1600 static int warned;
1601 if (TARGET_X32 && !warned)
1603 error ("X32 does not support %<ms_abi%> attribute");
1604 warned = 1;
1607 abi = MS_ABI;
1609 else if (abi == MS_ABI
1610 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1611 abi = SYSV_ABI;
1613 return abi;
1616 enum calling_abi
1617 ix86_function_abi (const_tree fndecl)
1619 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1622 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1623 specifying the call abi used. */
1624 enum calling_abi
1625 ix86_cfun_abi (void)
1627 return cfun ? cfun->machine->call_abi : ix86_abi;
1630 bool
1631 ix86_function_ms_hook_prologue (const_tree fn)
1633 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1635 if (decl_function_context (fn) != NULL_TREE)
1636 error_at (DECL_SOURCE_LOCATION (fn),
1637 "%<ms_hook_prologue%> attribute is not compatible "
1638 "with nested function");
1639 else
1640 return true;
1642 return false;
1645 bool
1646 ix86_function_naked (const_tree fn)
1648 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1649 return true;
1651 return false;
1654 /* Write the extra assembler code needed to declare a function properly. */
1656 void
1657 ix86_asm_output_function_label (FILE *out_file, const char *fname,
1658 tree decl)
1660 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1662 if (cfun)
1663 cfun->machine->function_label_emitted = true;
1665 if (is_ms_hook)
1667 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1668 unsigned int filler_cc = 0xcccccccc;
1670 for (i = 0; i < filler_count; i += 4)
1671 fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
1674 #ifdef SUBTARGET_ASM_UNWIND_INIT
1675 SUBTARGET_ASM_UNWIND_INIT (out_file);
1676 #endif
1678 assemble_function_label_raw (out_file, fname);
1680 /* Output magic byte marker, if hot-patch attribute is set. */
1681 if (is_ms_hook)
1683 if (TARGET_64BIT)
1685 /* leaq [%rsp + 0], %rsp */
1686 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1687 out_file);
1689 else
1691 /* movl.s %edi, %edi
1692 push %ebp
1693 movl.s %esp, %ebp */
1694 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
1699 /* Implementation of call abi switching target hook. Specific to FNDECL
1700 the specific call register sets are set. See also
1701 ix86_conditional_register_usage for more details. */
1702 void
1703 ix86_call_abi_override (const_tree fndecl)
1705 cfun->machine->call_abi = ix86_function_abi (fndecl);
1708 /* Return 1 if pseudo register should be created and used to hold
1709 GOT address for PIC code. */
1710 bool
1711 ix86_use_pseudo_pic_reg (void)
1713 if ((TARGET_64BIT
1714 && (ix86_cmodel == CM_SMALL_PIC
1715 || TARGET_PECOFF))
1716 || !flag_pic)
1717 return false;
1718 return true;
1721 /* Initialize large model PIC register. */
1723 static void
1724 ix86_init_large_pic_reg (unsigned int tmp_regno)
1726 rtx_code_label *label;
1727 rtx tmp_reg;
1729 gcc_assert (Pmode == DImode);
1730 label = gen_label_rtx ();
1731 emit_label (label);
1732 LABEL_PRESERVE_P (label) = 1;
1733 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1734 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1735 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1736 label));
1737 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1738 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1739 const char *name = LABEL_NAME (label);
1740 PUT_CODE (label, NOTE);
1741 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1742 NOTE_DELETED_LABEL_NAME (label) = name;
1745 /* Create and initialize PIC register if required. */
1746 static void
1747 ix86_init_pic_reg (void)
1749 edge entry_edge;
1750 rtx_insn *seq;
1752 if (!ix86_use_pseudo_pic_reg ())
1753 return;
1755 start_sequence ();
1757 if (TARGET_64BIT)
1759 if (ix86_cmodel == CM_LARGE_PIC)
1760 ix86_init_large_pic_reg (R11_REG);
1761 else
1762 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1764 else
1766 /* If there is future mcount call in the function it is more profitable
1767 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1768 rtx reg = crtl->profile
1769 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1770 : pic_offset_table_rtx;
1771 rtx_insn *insn = emit_insn (gen_set_got (reg));
1772 RTX_FRAME_RELATED_P (insn) = 1;
1773 if (crtl->profile)
1774 emit_move_insn (pic_offset_table_rtx, reg);
1775 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1778 seq = get_insns ();
1779 end_sequence ();
1781 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1782 insert_insn_on_edge (seq, entry_edge);
1783 commit_one_edge_insertion (entry_edge);
1786 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1787 for a call to a function whose data type is FNTYPE.
1788 For a library call, FNTYPE is 0. */
1790 void
1791 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1792 tree fntype, /* tree ptr for function decl */
1793 rtx libname, /* SYMBOL_REF of library name or 0 */
1794 tree fndecl,
1795 int caller)
1797 struct cgraph_node *local_info_node = NULL;
1798 struct cgraph_node *target = NULL;
1800 /* Set silent_p to false to raise an error for invalid calls when
1801 expanding function body. */
1802 cfun->machine->silent_p = false;
1804 memset (cum, 0, sizeof (*cum));
1806 if (fndecl)
1808 target = cgraph_node::get (fndecl);
1809 if (target)
1811 target = target->function_symbol ();
1812 local_info_node = cgraph_node::local_info_node (target->decl);
1813 cum->call_abi = ix86_function_abi (target->decl);
1815 else
1816 cum->call_abi = ix86_function_abi (fndecl);
1818 else
1819 cum->call_abi = ix86_function_type_abi (fntype);
1821 cum->caller = caller;
1823 /* Set up the number of registers to use for passing arguments. */
1824 cum->nregs = ix86_regparm;
1825 if (TARGET_64BIT)
1827 cum->nregs = (cum->call_abi == SYSV_ABI
1828 ? X86_64_REGPARM_MAX
1829 : X86_64_MS_REGPARM_MAX);
1831 if (TARGET_SSE)
1833 cum->sse_nregs = SSE_REGPARM_MAX;
1834 if (TARGET_64BIT)
1836 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1837 ? X86_64_SSE_REGPARM_MAX
1838 : X86_64_MS_SSE_REGPARM_MAX);
1841 if (TARGET_MMX)
1842 cum->mmx_nregs = MMX_REGPARM_MAX;
1843 cum->warn_avx512f = true;
1844 cum->warn_avx = true;
1845 cum->warn_sse = true;
1846 cum->warn_mmx = true;
1848 /* Because type might mismatch in between caller and callee, we need to
1849 use actual type of function for local calls.
1850 FIXME: cgraph_analyze can be told to actually record if function uses
1851 va_start so for local functions maybe_vaarg can be made aggressive
1852 helping K&R code.
1853 FIXME: once typesytem is fixed, we won't need this code anymore. */
1854 if (local_info_node && local_info_node->local
1855 && local_info_node->can_change_signature)
1856 fntype = TREE_TYPE (target->decl);
1857 cum->stdarg = stdarg_p (fntype);
1858 cum->maybe_vaarg = (fntype
1859 ? (!prototype_p (fntype) || stdarg_p (fntype))
1860 : !libname);
1862 cum->decl = fndecl;
1864 cum->warn_empty = !warn_abi || cum->stdarg;
1865 if (!cum->warn_empty && fntype)
1867 function_args_iterator iter;
1868 tree argtype;
1869 bool seen_empty_type = false;
1870 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1872 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1873 break;
1874 if (TYPE_EMPTY_P (argtype))
1875 seen_empty_type = true;
1876 else if (seen_empty_type)
1878 cum->warn_empty = true;
1879 break;
1884 if (!TARGET_64BIT)
1886 /* If there are variable arguments, then we won't pass anything
1887 in registers in 32-bit mode. */
1888 if (stdarg_p (fntype))
1890 cum->nregs = 0;
1891 /* Since in 32-bit, variable arguments are always passed on
1892 stack, there is scratch register available for indirect
1893 sibcall. */
1894 cfun->machine->arg_reg_available = true;
1895 cum->sse_nregs = 0;
1896 cum->mmx_nregs = 0;
1897 cum->warn_avx512f = false;
1898 cum->warn_avx = false;
1899 cum->warn_sse = false;
1900 cum->warn_mmx = false;
1901 return;
1904 /* Use ecx and edx registers if function has fastcall attribute,
1905 else look for regparm information. */
1906 if (fntype)
1908 unsigned int ccvt = ix86_get_callcvt (fntype);
1909 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1911 cum->nregs = 1;
1912 cum->fastcall = 1; /* Same first register as in fastcall. */
1914 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1916 cum->nregs = 2;
1917 cum->fastcall = 1;
1919 else
1920 cum->nregs = ix86_function_regparm (fntype, fndecl);
1923 /* Set up the number of SSE registers used for passing SFmode
1924 and DFmode arguments. Warn for mismatching ABI. */
1925 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1928 cfun->machine->arg_reg_available = (cum->nregs > 0);
1931 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1932 But in the case of vector types, it is some vector mode.
1934 When we have only some of our vector isa extensions enabled, then there
1935 are some modes for which vector_mode_supported_p is false. For these
1936 modes, the generic vector support in gcc will choose some non-vector mode
1937 in order to implement the type. By computing the natural mode, we'll
1938 select the proper ABI location for the operand and not depend on whatever
1939 the middle-end decides to do with these vector types.
1941 The midde-end can't deal with the vector types > 16 bytes. In this
1942 case, we return the original mode and warn ABI change if CUM isn't
1943 NULL.
1945 If INT_RETURN is true, warn ABI change if the vector mode isn't
1946 available for function return value. */
1948 static machine_mode
1949 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1950 bool in_return)
1952 machine_mode mode = TYPE_MODE (type);
1954 if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
1956 HOST_WIDE_INT size = int_size_in_bytes (type);
1957 if ((size == 8 || size == 16 || size == 32 || size == 64)
1958 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1959 && TYPE_VECTOR_SUBPARTS (type) > 1)
1961 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1963 /* There are no XFmode vector modes ... */
1964 if (innermode == XFmode)
1965 return mode;
1967 /* ... and no decimal float vector modes. */
1968 if (DECIMAL_FLOAT_MODE_P (innermode))
1969 return mode;
1971 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
1972 mode = MIN_MODE_VECTOR_FLOAT;
1973 else
1974 mode = MIN_MODE_VECTOR_INT;
1976 /* Get the mode which has this inner mode and number of units. */
1977 FOR_EACH_MODE_FROM (mode, mode)
1978 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1979 && GET_MODE_INNER (mode) == innermode)
1981 if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512)
1982 && !TARGET_IAMCU)
1984 static bool warnedavx512f;
1985 static bool warnedavx512f_ret;
1987 if (cum && cum->warn_avx512f && !warnedavx512f)
1989 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1990 "without AVX512F enabled changes the ABI"))
1991 warnedavx512f = true;
1993 else if (in_return && !warnedavx512f_ret)
1995 if (warning (OPT_Wpsabi, "AVX512F vector return "
1996 "without AVX512F enabled changes the ABI"))
1997 warnedavx512f_ret = true;
2000 return TYPE_MODE (type);
2002 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
2004 static bool warnedavx;
2005 static bool warnedavx_ret;
2007 if (cum && cum->warn_avx && !warnedavx)
2009 if (warning (OPT_Wpsabi, "AVX vector argument "
2010 "without AVX enabled changes the ABI"))
2011 warnedavx = true;
2013 else if (in_return && !warnedavx_ret)
2015 if (warning (OPT_Wpsabi, "AVX vector return "
2016 "without AVX enabled changes the ABI"))
2017 warnedavx_ret = true;
2020 return TYPE_MODE (type);
2022 else if (((size == 8 && TARGET_64BIT) || size == 16)
2023 && !TARGET_SSE
2024 && !TARGET_IAMCU)
2026 static bool warnedsse;
2027 static bool warnedsse_ret;
2029 if (cum && cum->warn_sse && !warnedsse)
2031 if (warning (OPT_Wpsabi, "SSE vector argument "
2032 "without SSE enabled changes the ABI"))
2033 warnedsse = true;
2035 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
2037 if (warning (OPT_Wpsabi, "SSE vector return "
2038 "without SSE enabled changes the ABI"))
2039 warnedsse_ret = true;
2042 else if ((size == 8 && !TARGET_64BIT)
2043 && (!cfun
2044 || cfun->machine->func_type == TYPE_NORMAL)
2045 && !TARGET_MMX
2046 && !TARGET_IAMCU)
2048 static bool warnedmmx;
2049 static bool warnedmmx_ret;
2051 if (cum && cum->warn_mmx && !warnedmmx)
2053 if (warning (OPT_Wpsabi, "MMX vector argument "
2054 "without MMX enabled changes the ABI"))
2055 warnedmmx = true;
2057 else if (in_return && !warnedmmx_ret)
2059 if (warning (OPT_Wpsabi, "MMX vector return "
2060 "without MMX enabled changes the ABI"))
2061 warnedmmx_ret = true;
2064 return mode;
2067 gcc_unreachable ();
2071 return mode;
2074 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2075 this may not agree with the mode that the type system has chosen for the
2076 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2077 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2079 static rtx
2080 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2081 unsigned int regno)
2083 rtx tmp;
2085 if (orig_mode != BLKmode)
2086 tmp = gen_rtx_REG (orig_mode, regno);
2087 else
2089 tmp = gen_rtx_REG (mode, regno);
2090 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2091 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2094 return tmp;
2097 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2098 of this code is to classify each 8bytes of incoming argument by the register
2099 class and assign registers accordingly. */
2101 /* Return the union class of CLASS1 and CLASS2.
2102 See the x86-64 PS ABI for details. */
2104 static enum x86_64_reg_class
2105 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2107 /* Rule #1: If both classes are equal, this is the resulting class. */
2108 if (class1 == class2)
2109 return class1;
2111 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2112 the other class. */
2113 if (class1 == X86_64_NO_CLASS)
2114 return class2;
2115 if (class2 == X86_64_NO_CLASS)
2116 return class1;
2118 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2119 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2120 return X86_64_MEMORY_CLASS;
2122 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2123 if ((class1 == X86_64_INTEGERSI_CLASS
2124 && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2125 || (class2 == X86_64_INTEGERSI_CLASS
2126 && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2127 return X86_64_INTEGERSI_CLASS;
2128 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2129 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2130 return X86_64_INTEGER_CLASS;
2132 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2133 MEMORY is used. */
2134 if (class1 == X86_64_X87_CLASS
2135 || class1 == X86_64_X87UP_CLASS
2136 || class1 == X86_64_COMPLEX_X87_CLASS
2137 || class2 == X86_64_X87_CLASS
2138 || class2 == X86_64_X87UP_CLASS
2139 || class2 == X86_64_COMPLEX_X87_CLASS)
2140 return X86_64_MEMORY_CLASS;
2142 /* Rule #6: Otherwise class SSE is used. */
2143 return X86_64_SSE_CLASS;
2146 /* Classify the argument of type TYPE and mode MODE.
2147 CLASSES will be filled by the register class used to pass each word
2148 of the operand. The number of words is returned. In case the parameter
2149 should be passed in memory, 0 is returned. As a special case for zero
2150 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2152 BIT_OFFSET is used internally for handling records and specifies offset
2153 of the offset in bits modulo 512 to avoid overflow cases.
2155 See the x86-64 PS ABI for details.
2158 static int
2159 classify_argument (machine_mode mode, const_tree type,
2160 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2161 int &zero_width_bitfields)
2163 HOST_WIDE_INT bytes
2164 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2165 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2167 /* Variable sized entities are always passed/returned in memory. */
2168 if (bytes < 0)
2169 return 0;
2171 if (mode != VOIDmode)
2173 /* The value of "named" doesn't matter. */
2174 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2175 if (targetm.calls.must_pass_in_stack (arg))
2176 return 0;
2179 if (type && (AGGREGATE_TYPE_P (type)
2180 || (TREE_CODE (type) == BITINT_TYPE && words > 1)))
2182 int i;
2183 tree field;
2184 enum x86_64_reg_class subclasses[MAX_CLASSES];
2186 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2187 if (bytes > 64)
2188 return 0;
2190 for (i = 0; i < words; i++)
2191 classes[i] = X86_64_NO_CLASS;
2193 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2194 signalize memory class, so handle it as special case. */
2195 if (!words)
2197 classes[0] = X86_64_NO_CLASS;
2198 return 1;
2201 /* Classify each field of record and merge classes. */
2202 switch (TREE_CODE (type))
2204 case RECORD_TYPE:
2205 /* And now merge the fields of structure. */
2206 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2208 if (TREE_CODE (field) == FIELD_DECL)
2210 int num;
2212 if (TREE_TYPE (field) == error_mark_node)
2213 continue;
2215 /* Bitfields are always classified as integer. Handle them
2216 early, since later code would consider them to be
2217 misaligned integers. */
2218 if (DECL_BIT_FIELD (field))
2220 if (integer_zerop (DECL_SIZE (field)))
2222 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2223 continue;
2224 if (zero_width_bitfields != 2)
2226 zero_width_bitfields = 1;
2227 continue;
2230 for (i = (int_bit_position (field)
2231 + (bit_offset % 64)) / 8 / 8;
2232 i < ((int_bit_position (field) + (bit_offset % 64))
2233 + tree_to_shwi (DECL_SIZE (field))
2234 + 63) / 8 / 8; i++)
2235 classes[i]
2236 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2238 else
2240 int pos;
2242 type = TREE_TYPE (field);
2244 /* Flexible array member is ignored. */
2245 if (TYPE_MODE (type) == BLKmode
2246 && TREE_CODE (type) == ARRAY_TYPE
2247 && TYPE_SIZE (type) == NULL_TREE
2248 && TYPE_DOMAIN (type) != NULL_TREE
2249 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2250 == NULL_TREE))
2252 static bool warned;
2254 if (!warned && warn_psabi)
2256 warned = true;
2257 inform (input_location,
2258 "the ABI of passing struct with"
2259 " a flexible array member has"
2260 " changed in GCC 4.4");
2262 continue;
2264 num = classify_argument (TYPE_MODE (type), type,
2265 subclasses,
2266 (int_bit_position (field)
2267 + bit_offset) % 512,
2268 zero_width_bitfields);
2269 if (!num)
2270 return 0;
2271 pos = (int_bit_position (field)
2272 + (bit_offset % 64)) / 8 / 8;
2273 for (i = 0; i < num && (i + pos) < words; i++)
2274 classes[i + pos]
2275 = merge_classes (subclasses[i], classes[i + pos]);
2279 break;
2281 case ARRAY_TYPE:
2282 /* Arrays are handled as small records. */
2284 int num;
2285 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2286 TREE_TYPE (type), subclasses, bit_offset,
2287 zero_width_bitfields);
2288 if (!num)
2289 return 0;
2291 /* The partial classes are now full classes. */
2292 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2293 subclasses[0] = X86_64_SSE_CLASS;
2294 if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2295 subclasses[0] = X86_64_SSE_CLASS;
2296 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2297 && !((bit_offset % 64) == 0 && bytes == 4))
2298 subclasses[0] = X86_64_INTEGER_CLASS;
2300 for (i = 0; i < words; i++)
2301 classes[i] = subclasses[i % num];
2303 break;
2305 case UNION_TYPE:
2306 case QUAL_UNION_TYPE:
2307 /* Unions are similar to RECORD_TYPE but offset is always 0.
2309 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2311 if (TREE_CODE (field) == FIELD_DECL)
2313 int num;
2315 if (TREE_TYPE (field) == error_mark_node)
2316 continue;
2318 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2319 TREE_TYPE (field), subclasses,
2320 bit_offset, zero_width_bitfields);
2321 if (!num)
2322 return 0;
2323 for (i = 0; i < num && i < words; i++)
2324 classes[i] = merge_classes (subclasses[i], classes[i]);
2327 break;
2329 case BITINT_TYPE:
2330 /* _BitInt(N) for N > 64 is passed as structure containing
2331 (N + 63) / 64 64-bit elements. */
2332 if (words > 2)
2333 return 0;
2334 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2335 return 2;
2337 default:
2338 gcc_unreachable ();
2341 if (words > 2)
2343 /* When size > 16 bytes, if the first one isn't
2344 X86_64_SSE_CLASS or any other ones aren't
2345 X86_64_SSEUP_CLASS, everything should be passed in
2346 memory. */
2347 if (classes[0] != X86_64_SSE_CLASS)
2348 return 0;
2350 for (i = 1; i < words; i++)
2351 if (classes[i] != X86_64_SSEUP_CLASS)
2352 return 0;
2355 /* Final merger cleanup. */
2356 for (i = 0; i < words; i++)
2358 /* If one class is MEMORY, everything should be passed in
2359 memory. */
2360 if (classes[i] == X86_64_MEMORY_CLASS)
2361 return 0;
2363 /* The X86_64_SSEUP_CLASS should be always preceded by
2364 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2365 if (classes[i] == X86_64_SSEUP_CLASS
2366 && classes[i - 1] != X86_64_SSE_CLASS
2367 && classes[i - 1] != X86_64_SSEUP_CLASS)
2369 /* The first one should never be X86_64_SSEUP_CLASS. */
2370 gcc_assert (i != 0);
2371 classes[i] = X86_64_SSE_CLASS;
2374 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2375 everything should be passed in memory. */
2376 if (classes[i] == X86_64_X87UP_CLASS
2377 && (classes[i - 1] != X86_64_X87_CLASS))
2379 static bool warned;
2381 /* The first one should never be X86_64_X87UP_CLASS. */
2382 gcc_assert (i != 0);
2383 if (!warned && warn_psabi)
2385 warned = true;
2386 inform (input_location,
2387 "the ABI of passing union with %<long double%>"
2388 " has changed in GCC 4.4");
2390 return 0;
2393 return words;
2396 /* Compute alignment needed. We align all types to natural boundaries with
2397 exception of XFmode that is aligned to 64bits. */
2398 if (mode != VOIDmode && mode != BLKmode)
2400 int mode_alignment = GET_MODE_BITSIZE (mode);
2402 if (mode == XFmode)
2403 mode_alignment = 128;
2404 else if (mode == XCmode)
2405 mode_alignment = 256;
2406 if (COMPLEX_MODE_P (mode))
2407 mode_alignment /= 2;
2408 /* Misaligned fields are always returned in memory. */
2409 if (bit_offset % mode_alignment)
2410 return 0;
2413 /* for V1xx modes, just use the base mode */
2414 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2415 && GET_MODE_UNIT_SIZE (mode) == bytes)
2416 mode = GET_MODE_INNER (mode);
2418 /* Classification of atomic types. */
2419 switch (mode)
2421 case E_SDmode:
2422 case E_DDmode:
2423 classes[0] = X86_64_SSE_CLASS;
2424 return 1;
2425 case E_TDmode:
2426 classes[0] = X86_64_SSE_CLASS;
2427 classes[1] = X86_64_SSEUP_CLASS;
2428 return 2;
2429 case E_DImode:
2430 case E_SImode:
2431 case E_HImode:
2432 case E_QImode:
2433 case E_CSImode:
2434 case E_CHImode:
2435 case E_CQImode:
2437 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2439 /* Analyze last 128 bits only. */
2440 size = (size - 1) & 0x7f;
2442 if (size < 32)
2444 classes[0] = X86_64_INTEGERSI_CLASS;
2445 return 1;
2447 else if (size < 64)
2449 classes[0] = X86_64_INTEGER_CLASS;
2450 return 1;
2452 else if (size < 64+32)
2454 classes[0] = X86_64_INTEGER_CLASS;
2455 classes[1] = X86_64_INTEGERSI_CLASS;
2456 return 2;
2458 else if (size < 64+64)
2460 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2461 return 2;
2463 else
2464 gcc_unreachable ();
2466 case E_CDImode:
2467 case E_TImode:
2468 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2469 return 2;
2470 case E_COImode:
2471 case E_OImode:
2472 /* OImode shouldn't be used directly. */
2473 gcc_unreachable ();
2474 case E_CTImode:
2475 return 0;
2476 case E_HFmode:
2477 case E_BFmode:
2478 if (!(bit_offset % 64))
2479 classes[0] = X86_64_SSEHF_CLASS;
2480 else
2481 classes[0] = X86_64_SSE_CLASS;
2482 return 1;
2483 case E_SFmode:
2484 if (!(bit_offset % 64))
2485 classes[0] = X86_64_SSESF_CLASS;
2486 else
2487 classes[0] = X86_64_SSE_CLASS;
2488 return 1;
2489 case E_DFmode:
2490 classes[0] = X86_64_SSEDF_CLASS;
2491 return 1;
2492 case E_XFmode:
2493 classes[0] = X86_64_X87_CLASS;
2494 classes[1] = X86_64_X87UP_CLASS;
2495 return 2;
2496 case E_TFmode:
2497 classes[0] = X86_64_SSE_CLASS;
2498 classes[1] = X86_64_SSEUP_CLASS;
2499 return 2;
2500 case E_HCmode:
2501 case E_BCmode:
2502 classes[0] = X86_64_SSE_CLASS;
2503 if (!(bit_offset % 64))
2504 return 1;
2505 else
2507 classes[1] = X86_64_SSEHF_CLASS;
2508 return 2;
2510 case E_SCmode:
2511 classes[0] = X86_64_SSE_CLASS;
2512 if (!(bit_offset % 64))
2513 return 1;
2514 else
2516 static bool warned;
2518 if (!warned && warn_psabi)
2520 warned = true;
2521 inform (input_location,
2522 "the ABI of passing structure with %<complex float%>"
2523 " member has changed in GCC 4.4");
2525 classes[1] = X86_64_SSESF_CLASS;
2526 return 2;
2528 case E_DCmode:
2529 classes[0] = X86_64_SSEDF_CLASS;
2530 classes[1] = X86_64_SSEDF_CLASS;
2531 return 2;
2532 case E_XCmode:
2533 classes[0] = X86_64_COMPLEX_X87_CLASS;
2534 return 1;
2535 case E_TCmode:
2536 /* This modes is larger than 16 bytes. */
2537 return 0;
2538 case E_V8SFmode:
2539 case E_V8SImode:
2540 case E_V32QImode:
2541 case E_V16HFmode:
2542 case E_V16BFmode:
2543 case E_V16HImode:
2544 case E_V4DFmode:
2545 case E_V4DImode:
2546 classes[0] = X86_64_SSE_CLASS;
2547 classes[1] = X86_64_SSEUP_CLASS;
2548 classes[2] = X86_64_SSEUP_CLASS;
2549 classes[3] = X86_64_SSEUP_CLASS;
2550 return 4;
2551 case E_V8DFmode:
2552 case E_V16SFmode:
2553 case E_V32HFmode:
2554 case E_V32BFmode:
2555 case E_V8DImode:
2556 case E_V16SImode:
2557 case E_V32HImode:
2558 case E_V64QImode:
2559 classes[0] = X86_64_SSE_CLASS;
2560 classes[1] = X86_64_SSEUP_CLASS;
2561 classes[2] = X86_64_SSEUP_CLASS;
2562 classes[3] = X86_64_SSEUP_CLASS;
2563 classes[4] = X86_64_SSEUP_CLASS;
2564 classes[5] = X86_64_SSEUP_CLASS;
2565 classes[6] = X86_64_SSEUP_CLASS;
2566 classes[7] = X86_64_SSEUP_CLASS;
2567 return 8;
2568 case E_V4SFmode:
2569 case E_V4SImode:
2570 case E_V16QImode:
2571 case E_V8HImode:
2572 case E_V8HFmode:
2573 case E_V8BFmode:
2574 case E_V2DFmode:
2575 case E_V2DImode:
2576 classes[0] = X86_64_SSE_CLASS;
2577 classes[1] = X86_64_SSEUP_CLASS;
2578 return 2;
2579 case E_V1TImode:
2580 case E_V1DImode:
2581 case E_V2SFmode:
2582 case E_V2SImode:
2583 case E_V4HImode:
2584 case E_V4HFmode:
2585 case E_V4BFmode:
2586 case E_V2HFmode:
2587 case E_V2BFmode:
2588 case E_V8QImode:
2589 classes[0] = X86_64_SSE_CLASS;
2590 return 1;
2591 case E_BLKmode:
2592 case E_VOIDmode:
2593 return 0;
2594 default:
2595 gcc_assert (VECTOR_MODE_P (mode));
2597 if (bytes > 16)
2598 return 0;
2600 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2602 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2603 classes[0] = X86_64_INTEGERSI_CLASS;
2604 else
2605 classes[0] = X86_64_INTEGER_CLASS;
2606 classes[1] = X86_64_INTEGER_CLASS;
2607 return 1 + (bytes > 8);
2611 /* Wrapper around classify_argument with the extra zero_width_bitfields
2612 argument, to diagnose GCC 12.1 ABI differences for C. */
2614 static int
2615 classify_argument (machine_mode mode, const_tree type,
2616 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2618 int zero_width_bitfields = 0;
2619 static bool warned = false;
2620 int n = classify_argument (mode, type, classes, bit_offset,
2621 zero_width_bitfields);
2622 if (!zero_width_bitfields || warned || !warn_psabi)
2623 return n;
2624 enum x86_64_reg_class alt_classes[MAX_CLASSES];
2625 zero_width_bitfields = 2;
2626 if (classify_argument (mode, type, alt_classes, bit_offset,
2627 zero_width_bitfields) != n)
2628 zero_width_bitfields = 3;
2629 else
2630 for (int i = 0; i < n; i++)
2631 if (classes[i] != alt_classes[i])
2633 zero_width_bitfields = 3;
2634 break;
2636 if (zero_width_bitfields == 3)
2638 warned = true;
2639 const char *url
2640 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2642 inform (input_location,
2643 "the ABI of passing C structures with zero-width bit-fields"
2644 " has changed in GCC %{12.1%}", url);
2646 return n;
2649 /* Examine the argument and return set number of register required in each
2650 class. Return true iff parameter should be passed in memory. */
2652 static bool
2653 examine_argument (machine_mode mode, const_tree type, int in_return,
2654 int *int_nregs, int *sse_nregs)
2656 enum x86_64_reg_class regclass[MAX_CLASSES];
2657 int n = classify_argument (mode, type, regclass, 0);
2659 *int_nregs = 0;
2660 *sse_nregs = 0;
2662 if (!n)
2663 return true;
2664 for (n--; n >= 0; n--)
2665 switch (regclass[n])
2667 case X86_64_INTEGER_CLASS:
2668 case X86_64_INTEGERSI_CLASS:
2669 (*int_nregs)++;
2670 break;
2671 case X86_64_SSE_CLASS:
2672 case X86_64_SSEHF_CLASS:
2673 case X86_64_SSESF_CLASS:
2674 case X86_64_SSEDF_CLASS:
2675 (*sse_nregs)++;
2676 break;
2677 case X86_64_NO_CLASS:
2678 case X86_64_SSEUP_CLASS:
2679 break;
2680 case X86_64_X87_CLASS:
2681 case X86_64_X87UP_CLASS:
2682 case X86_64_COMPLEX_X87_CLASS:
2683 if (!in_return)
2684 return true;
2685 break;
2686 case X86_64_MEMORY_CLASS:
2687 gcc_unreachable ();
2690 return false;
2693 /* Construct container for the argument used by GCC interface. See
2694 FUNCTION_ARG for the detailed description. */
2696 static rtx
2697 construct_container (machine_mode mode, machine_mode orig_mode,
2698 const_tree type, int in_return, int nintregs, int nsseregs,
2699 const int *intreg, int sse_regno)
2701 /* The following variables hold the static issued_error state. */
2702 static bool issued_sse_arg_error;
2703 static bool issued_sse_ret_error;
2704 static bool issued_x87_ret_error;
2706 machine_mode tmpmode;
2707 int bytes
2708 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2709 enum x86_64_reg_class regclass[MAX_CLASSES];
2710 int n;
2711 int i;
2712 int nexps = 0;
2713 int needed_sseregs, needed_intregs;
2714 rtx exp[MAX_CLASSES];
2715 rtx ret;
2717 n = classify_argument (mode, type, regclass, 0);
2718 if (!n)
2719 return NULL;
2720 if (examine_argument (mode, type, in_return, &needed_intregs,
2721 &needed_sseregs))
2722 return NULL;
2723 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2724 return NULL;
2726 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2727 some less clueful developer tries to use floating-point anyway. */
2728 if (needed_sseregs
2729 && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
2731 /* Return early if we shouldn't raise an error for invalid
2732 calls. */
2733 if (cfun != NULL && cfun->machine->silent_p)
2734 return NULL;
2735 if (in_return)
2737 if (!issued_sse_ret_error)
2739 if (VALID_SSE2_TYPE_MODE (mode))
2740 error ("SSE register return with SSE2 disabled");
2741 else
2742 error ("SSE register return with SSE disabled");
2743 issued_sse_ret_error = true;
2746 else if (!issued_sse_arg_error)
2748 if (VALID_SSE2_TYPE_MODE (mode))
2749 error ("SSE register argument with SSE2 disabled");
2750 else
2751 error ("SSE register argument with SSE disabled");
2752 issued_sse_arg_error = true;
2754 return NULL;
2757 /* Likewise, error if the ABI requires us to return values in the
2758 x87 registers and the user specified -mno-80387. */
2759 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2760 for (i = 0; i < n; i++)
2761 if (regclass[i] == X86_64_X87_CLASS
2762 || regclass[i] == X86_64_X87UP_CLASS
2763 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2765 /* Return early if we shouldn't raise an error for invalid
2766 calls. */
2767 if (cfun != NULL && cfun->machine->silent_p)
2768 return NULL;
2769 if (!issued_x87_ret_error)
2771 error ("x87 register return with x87 disabled");
2772 issued_x87_ret_error = true;
2774 return NULL;
2777 /* First construct simple cases. Avoid SCmode, since we want to use
2778 single register to pass this type. */
2779 if (n == 1 && mode != SCmode && mode != HCmode)
2780 switch (regclass[0])
2782 case X86_64_INTEGER_CLASS:
2783 case X86_64_INTEGERSI_CLASS:
2784 return gen_rtx_REG (mode, intreg[0]);
2785 case X86_64_SSE_CLASS:
2786 case X86_64_SSEHF_CLASS:
2787 case X86_64_SSESF_CLASS:
2788 case X86_64_SSEDF_CLASS:
2789 if (mode != BLKmode)
2790 return gen_reg_or_parallel (mode, orig_mode,
2791 GET_SSE_REGNO (sse_regno));
2792 break;
2793 case X86_64_X87_CLASS:
2794 case X86_64_COMPLEX_X87_CLASS:
2795 return gen_rtx_REG (mode, FIRST_STACK_REG);
2796 case X86_64_NO_CLASS:
2797 /* Zero sized array, struct or class. */
2798 return NULL;
2799 default:
2800 gcc_unreachable ();
2802 if (n == 2
2803 && regclass[0] == X86_64_SSE_CLASS
2804 && regclass[1] == X86_64_SSEUP_CLASS
2805 && mode != BLKmode)
2806 return gen_reg_or_parallel (mode, orig_mode,
2807 GET_SSE_REGNO (sse_regno));
2808 if (n == 4
2809 && regclass[0] == X86_64_SSE_CLASS
2810 && regclass[1] == X86_64_SSEUP_CLASS
2811 && regclass[2] == X86_64_SSEUP_CLASS
2812 && regclass[3] == X86_64_SSEUP_CLASS
2813 && mode != BLKmode)
2814 return gen_reg_or_parallel (mode, orig_mode,
2815 GET_SSE_REGNO (sse_regno));
2816 if (n == 8
2817 && regclass[0] == X86_64_SSE_CLASS
2818 && regclass[1] == X86_64_SSEUP_CLASS
2819 && regclass[2] == X86_64_SSEUP_CLASS
2820 && regclass[3] == X86_64_SSEUP_CLASS
2821 && regclass[4] == X86_64_SSEUP_CLASS
2822 && regclass[5] == X86_64_SSEUP_CLASS
2823 && regclass[6] == X86_64_SSEUP_CLASS
2824 && regclass[7] == X86_64_SSEUP_CLASS
2825 && mode != BLKmode)
2826 return gen_reg_or_parallel (mode, orig_mode,
2827 GET_SSE_REGNO (sse_regno));
2828 if (n == 2
2829 && regclass[0] == X86_64_X87_CLASS
2830 && regclass[1] == X86_64_X87UP_CLASS)
2831 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2833 if (n == 2
2834 && regclass[0] == X86_64_INTEGER_CLASS
2835 && regclass[1] == X86_64_INTEGER_CLASS
2836 && (mode == CDImode || mode == TImode || mode == BLKmode)
2837 && intreg[0] + 1 == intreg[1])
2839 if (mode == BLKmode)
2841 /* Use TImode for BLKmode values in 2 integer registers. */
2842 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2843 gen_rtx_REG (TImode, intreg[0]),
2844 GEN_INT (0));
2845 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2846 XVECEXP (ret, 0, 0) = exp[0];
2847 return ret;
2849 else
2850 return gen_rtx_REG (mode, intreg[0]);
2853 /* Otherwise figure out the entries of the PARALLEL. */
2854 for (i = 0; i < n; i++)
2856 int pos;
2858 switch (regclass[i])
2860 case X86_64_NO_CLASS:
2861 break;
2862 case X86_64_INTEGER_CLASS:
2863 case X86_64_INTEGERSI_CLASS:
2864 /* Merge TImodes on aligned occasions here too. */
2865 if (i * 8 + 8 > bytes)
2867 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2868 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2869 /* We've requested 24 bytes we
2870 don't have mode for. Use DImode. */
2871 tmpmode = DImode;
2873 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2874 tmpmode = SImode;
2875 else
2876 tmpmode = DImode;
2877 exp [nexps++]
2878 = gen_rtx_EXPR_LIST (VOIDmode,
2879 gen_rtx_REG (tmpmode, *intreg),
2880 GEN_INT (i*8));
2881 intreg++;
2882 break;
2883 case X86_64_SSEHF_CLASS:
2884 tmpmode = (mode == BFmode ? BFmode : HFmode);
2885 exp [nexps++]
2886 = gen_rtx_EXPR_LIST (VOIDmode,
2887 gen_rtx_REG (tmpmode,
2888 GET_SSE_REGNO (sse_regno)),
2889 GEN_INT (i*8));
2890 sse_regno++;
2891 break;
2892 case X86_64_SSESF_CLASS:
2893 exp [nexps++]
2894 = gen_rtx_EXPR_LIST (VOIDmode,
2895 gen_rtx_REG (SFmode,
2896 GET_SSE_REGNO (sse_regno)),
2897 GEN_INT (i*8));
2898 sse_regno++;
2899 break;
2900 case X86_64_SSEDF_CLASS:
2901 exp [nexps++]
2902 = gen_rtx_EXPR_LIST (VOIDmode,
2903 gen_rtx_REG (DFmode,
2904 GET_SSE_REGNO (sse_regno)),
2905 GEN_INT (i*8));
2906 sse_regno++;
2907 break;
2908 case X86_64_SSE_CLASS:
2909 pos = i;
2910 switch (n)
2912 case 1:
2913 tmpmode = DImode;
2914 break;
2915 case 2:
2916 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2918 tmpmode = TImode;
2919 i++;
2921 else
2922 tmpmode = DImode;
2923 break;
2924 case 4:
2925 gcc_assert (i == 0
2926 && regclass[1] == X86_64_SSEUP_CLASS
2927 && regclass[2] == X86_64_SSEUP_CLASS
2928 && regclass[3] == X86_64_SSEUP_CLASS);
2929 tmpmode = OImode;
2930 i += 3;
2931 break;
2932 case 8:
2933 gcc_assert (i == 0
2934 && regclass[1] == X86_64_SSEUP_CLASS
2935 && regclass[2] == X86_64_SSEUP_CLASS
2936 && regclass[3] == X86_64_SSEUP_CLASS
2937 && regclass[4] == X86_64_SSEUP_CLASS
2938 && regclass[5] == X86_64_SSEUP_CLASS
2939 && regclass[6] == X86_64_SSEUP_CLASS
2940 && regclass[7] == X86_64_SSEUP_CLASS);
2941 tmpmode = XImode;
2942 i += 7;
2943 break;
2944 default:
2945 gcc_unreachable ();
2947 exp [nexps++]
2948 = gen_rtx_EXPR_LIST (VOIDmode,
2949 gen_rtx_REG (tmpmode,
2950 GET_SSE_REGNO (sse_regno)),
2951 GEN_INT (pos*8));
2952 sse_regno++;
2953 break;
2954 default:
2955 gcc_unreachable ();
2959 /* Empty aligned struct, union or class. */
2960 if (nexps == 0)
2961 return NULL;
2963 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2964 for (i = 0; i < nexps; i++)
2965 XVECEXP (ret, 0, i) = exp [i];
2966 return ret;
2969 /* Update the data in CUM to advance over an argument of mode MODE
2970 and data type TYPE. (TYPE is null for libcalls where that information
2971 may not be available.)
2973 Return a number of integer regsiters advanced over. */
2975 static int
2976 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2977 const_tree type, HOST_WIDE_INT bytes,
2978 HOST_WIDE_INT words)
2980 int res = 0;
2981 bool error_p = false;
2983 if (TARGET_IAMCU)
2985 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2986 bytes in registers. */
2987 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2988 goto pass_in_reg;
2989 return res;
2992 switch (mode)
2994 default:
2995 break;
2997 case E_BLKmode:
2998 if (bytes < 0)
2999 break;
3000 /* FALLTHRU */
3002 case E_DImode:
3003 case E_SImode:
3004 case E_HImode:
3005 case E_QImode:
3006 pass_in_reg:
3007 cum->words += words;
3008 cum->nregs -= words;
3009 cum->regno += words;
3010 if (cum->nregs >= 0)
3011 res = words;
3012 if (cum->nregs <= 0)
3014 cum->nregs = 0;
3015 cfun->machine->arg_reg_available = false;
3016 cum->regno = 0;
3018 break;
3020 case E_OImode:
3021 /* OImode shouldn't be used directly. */
3022 gcc_unreachable ();
3024 case E_DFmode:
3025 if (cum->float_in_sse == -1)
3026 error_p = true;
3027 if (cum->float_in_sse < 2)
3028 break;
3029 /* FALLTHRU */
3030 case E_SFmode:
3031 if (cum->float_in_sse == -1)
3032 error_p = true;
3033 if (cum->float_in_sse < 1)
3034 break;
3035 /* FALLTHRU */
3037 case E_V16HFmode:
3038 case E_V16BFmode:
3039 case E_V8SFmode:
3040 case E_V8SImode:
3041 case E_V64QImode:
3042 case E_V32HImode:
3043 case E_V16SImode:
3044 case E_V8DImode:
3045 case E_V32HFmode:
3046 case E_V32BFmode:
3047 case E_V16SFmode:
3048 case E_V8DFmode:
3049 case E_V32QImode:
3050 case E_V16HImode:
3051 case E_V4DFmode:
3052 case E_V4DImode:
3053 case E_TImode:
3054 case E_V16QImode:
3055 case E_V8HImode:
3056 case E_V4SImode:
3057 case E_V2DImode:
3058 case E_V8HFmode:
3059 case E_V8BFmode:
3060 case E_V4SFmode:
3061 case E_V2DFmode:
3062 if (!type || !AGGREGATE_TYPE_P (type))
3064 cum->sse_words += words;
3065 cum->sse_nregs -= 1;
3066 cum->sse_regno += 1;
3067 if (cum->sse_nregs <= 0)
3069 cum->sse_nregs = 0;
3070 cum->sse_regno = 0;
3073 break;
3075 case E_V8QImode:
3076 case E_V4HImode:
3077 case E_V4HFmode:
3078 case E_V4BFmode:
3079 case E_V2SImode:
3080 case E_V2SFmode:
3081 case E_V1TImode:
3082 case E_V1DImode:
3083 if (!type || !AGGREGATE_TYPE_P (type))
3085 cum->mmx_words += words;
3086 cum->mmx_nregs -= 1;
3087 cum->mmx_regno += 1;
3088 if (cum->mmx_nregs <= 0)
3090 cum->mmx_nregs = 0;
3091 cum->mmx_regno = 0;
3094 break;
3096 if (error_p)
3098 cum->float_in_sse = 0;
3099 error ("calling %qD with SSE calling convention without "
3100 "SSE/SSE2 enabled", cum->decl);
3101 sorry ("this is a GCC bug that can be worked around by adding "
3102 "attribute used to function called");
3105 return res;
3108 static int
3109 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3110 const_tree type, HOST_WIDE_INT words, bool named)
3112 int int_nregs, sse_nregs;
3114 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3115 if (!named && (VALID_AVX512F_REG_MODE (mode)
3116 || VALID_AVX256_REG_MODE (mode)))
3117 return 0;
3119 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
3120 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3122 cum->nregs -= int_nregs;
3123 cum->sse_nregs -= sse_nregs;
3124 cum->regno += int_nregs;
3125 cum->sse_regno += sse_nregs;
3126 return int_nregs;
3128 else
3130 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3131 cum->words = ROUND_UP (cum->words, align);
3132 cum->words += words;
3133 return 0;
3137 static int
3138 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3139 HOST_WIDE_INT words)
3141 /* Otherwise, this should be passed indirect. */
3142 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3144 cum->words += words;
3145 if (cum->nregs > 0)
3147 cum->nregs -= 1;
3148 cum->regno += 1;
3149 return 1;
3151 return 0;
3154 /* Update the data in CUM to advance over argument ARG. */
3156 static void
3157 ix86_function_arg_advance (cumulative_args_t cum_v,
3158 const function_arg_info &arg)
3160 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3161 machine_mode mode = arg.mode;
3162 HOST_WIDE_INT bytes, words;
3163 int nregs;
3165 /* The argument of interrupt handler is a special case and is
3166 handled in ix86_function_arg. */
3167 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3168 return;
3170 bytes = arg.promoted_size_in_bytes ();
3171 words = CEIL (bytes, UNITS_PER_WORD);
3173 if (arg.type)
3174 mode = type_natural_mode (arg.type, NULL, false);
3176 if (TARGET_64BIT)
3178 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3180 if (call_abi == MS_ABI)
3181 nregs = function_arg_advance_ms_64 (cum, bytes, words);
3182 else
3183 nregs = function_arg_advance_64 (cum, mode, arg.type, words,
3184 arg.named);
3186 else
3187 nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
3189 if (!nregs)
3191 /* Track if there are outgoing arguments on stack. */
3192 if (cum->caller)
3193 cfun->machine->outgoing_args_on_stack = true;
3197 /* Define where to put the arguments to a function.
3198 Value is zero to push the argument on the stack,
3199 or a hard register in which to store the argument.
3201 MODE is the argument's machine mode.
3202 TYPE is the data type of the argument (as a tree).
3203 This is null for libcalls where that information may
3204 not be available.
3205 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3206 the preceding args and about the function being called.
3207 NAMED is nonzero if this argument is a named parameter
3208 (otherwise it is an extra parameter matching an ellipsis). */
3210 static rtx
3211 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3212 machine_mode orig_mode, const_tree type,
3213 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3215 bool error_p = false;
3217 /* Avoid the AL settings for the Unix64 ABI. */
3218 if (mode == VOIDmode)
3219 return constm1_rtx;
3221 if (TARGET_IAMCU)
3223 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3224 bytes in registers. */
3225 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3226 goto pass_in_reg;
3227 return NULL_RTX;
3230 switch (mode)
3232 default:
3233 break;
3235 case E_BLKmode:
3236 if (bytes < 0)
3237 break;
3238 /* FALLTHRU */
3239 case E_DImode:
3240 case E_SImode:
3241 case E_HImode:
3242 case E_QImode:
3243 pass_in_reg:
3244 if (words <= cum->nregs)
3246 int regno = cum->regno;
3248 /* Fastcall allocates the first two DWORD (SImode) or
3249 smaller arguments to ECX and EDX if it isn't an
3250 aggregate type . */
3251 if (cum->fastcall)
3253 if (mode == BLKmode
3254 || mode == DImode
3255 || (type && AGGREGATE_TYPE_P (type)))
3256 break;
3258 /* ECX not EAX is the first allocated register. */
3259 if (regno == AX_REG)
3260 regno = CX_REG;
3262 return gen_rtx_REG (mode, regno);
3264 break;
3266 case E_DFmode:
3267 if (cum->float_in_sse == -1)
3268 error_p = true;
3269 if (cum->float_in_sse < 2)
3270 break;
3271 /* FALLTHRU */
3272 case E_SFmode:
3273 if (cum->float_in_sse == -1)
3274 error_p = true;
3275 if (cum->float_in_sse < 1)
3276 break;
3277 /* FALLTHRU */
3278 case E_TImode:
3279 /* In 32bit, we pass TImode in xmm registers. */
3280 case E_V16QImode:
3281 case E_V8HImode:
3282 case E_V4SImode:
3283 case E_V2DImode:
3284 case E_V8HFmode:
3285 case E_V8BFmode:
3286 case E_V4SFmode:
3287 case E_V2DFmode:
3288 if (!type || !AGGREGATE_TYPE_P (type))
3290 if (cum->sse_nregs)
3291 return gen_reg_or_parallel (mode, orig_mode,
3292 cum->sse_regno + FIRST_SSE_REG);
3294 break;
3296 case E_OImode:
3297 case E_XImode:
3298 /* OImode and XImode shouldn't be used directly. */
3299 gcc_unreachable ();
3301 case E_V64QImode:
3302 case E_V32HImode:
3303 case E_V16SImode:
3304 case E_V8DImode:
3305 case E_V32HFmode:
3306 case E_V32BFmode:
3307 case E_V16SFmode:
3308 case E_V8DFmode:
3309 case E_V16HFmode:
3310 case E_V16BFmode:
3311 case E_V8SFmode:
3312 case E_V8SImode:
3313 case E_V32QImode:
3314 case E_V16HImode:
3315 case E_V4DFmode:
3316 case E_V4DImode:
3317 if (!type || !AGGREGATE_TYPE_P (type))
3319 if (cum->sse_nregs)
3320 return gen_reg_or_parallel (mode, orig_mode,
3321 cum->sse_regno + FIRST_SSE_REG);
3323 break;
3325 case E_V8QImode:
3326 case E_V4HImode:
3327 case E_V4HFmode:
3328 case E_V4BFmode:
3329 case E_V2SImode:
3330 case E_V2SFmode:
3331 case E_V1TImode:
3332 case E_V1DImode:
3333 if (!type || !AGGREGATE_TYPE_P (type))
3335 if (cum->mmx_nregs)
3336 return gen_reg_or_parallel (mode, orig_mode,
3337 cum->mmx_regno + FIRST_MMX_REG);
3339 break;
3341 if (error_p)
3343 cum->float_in_sse = 0;
3344 error ("calling %qD with SSE calling convention without "
3345 "SSE/SSE2 enabled", cum->decl);
3346 sorry ("this is a GCC bug that can be worked around by adding "
3347 "attribute used to function called");
3350 return NULL_RTX;
3353 static rtx
3354 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3355 machine_mode orig_mode, const_tree type, bool named)
3357 /* Handle a hidden AL argument containing number of registers
3358 for varargs x86-64 functions. */
3359 if (mode == VOIDmode)
3360 return GEN_INT (cum->maybe_vaarg
3361 ? (cum->sse_nregs < 0
3362 ? X86_64_SSE_REGPARM_MAX
3363 : cum->sse_regno)
3364 : -1);
3366 switch (mode)
3368 default:
3369 break;
3371 case E_V16HFmode:
3372 case E_V16BFmode:
3373 case E_V8SFmode:
3374 case E_V8SImode:
3375 case E_V32QImode:
3376 case E_V16HImode:
3377 case E_V4DFmode:
3378 case E_V4DImode:
3379 case E_V32HFmode:
3380 case E_V32BFmode:
3381 case E_V16SFmode:
3382 case E_V16SImode:
3383 case E_V64QImode:
3384 case E_V32HImode:
3385 case E_V8DFmode:
3386 case E_V8DImode:
3387 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3388 if (!named)
3389 return NULL;
3390 break;
3393 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3394 cum->sse_nregs,
3395 &x86_64_int_parameter_registers [cum->regno],
3396 cum->sse_regno);
3399 static rtx
3400 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3401 machine_mode orig_mode, bool named, const_tree type,
3402 HOST_WIDE_INT bytes)
3404 unsigned int regno;
3406 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3407 We use value of -2 to specify that current function call is MSABI. */
3408 if (mode == VOIDmode)
3409 return GEN_INT (-2);
3411 /* If we've run out of registers, it goes on the stack. */
3412 if (cum->nregs == 0)
3413 return NULL_RTX;
3415 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3417 /* Only floating point modes are passed in anything but integer regs. */
3418 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3420 if (named)
3422 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3423 regno = cum->regno + FIRST_SSE_REG;
3425 else
3427 rtx t1, t2;
3429 /* Unnamed floating parameters are passed in both the
3430 SSE and integer registers. */
3431 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3432 t2 = gen_rtx_REG (mode, regno);
3433 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3434 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3435 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3438 /* Handle aggregated types passed in register. */
3439 if (orig_mode == BLKmode)
3441 if (bytes > 0 && bytes <= 8)
3442 mode = (bytes > 4 ? DImode : SImode);
3443 if (mode == BLKmode)
3444 mode = DImode;
3447 return gen_reg_or_parallel (mode, orig_mode, regno);
3450 /* Return where to put the arguments to a function.
3451 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3453 ARG describes the argument while CUM gives information about the
3454 preceding args and about the function being called. */
3456 static rtx
3457 ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3459 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3460 machine_mode mode = arg.mode;
3461 HOST_WIDE_INT bytes, words;
3462 rtx reg;
3464 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3466 gcc_assert (arg.type != NULL_TREE);
3467 if (POINTER_TYPE_P (arg.type))
3469 /* This is the pointer argument. */
3470 gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3471 /* It is at -WORD(AP) in the current frame in interrupt and
3472 exception handlers. */
3473 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3475 else
3477 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3478 && TREE_CODE (arg.type) == INTEGER_TYPE
3479 && TYPE_MODE (arg.type) == word_mode);
3480 /* The error code is the word-mode integer argument at
3481 -2 * WORD(AP) in the current frame of the exception
3482 handler. */
3483 reg = gen_rtx_MEM (word_mode,
3484 plus_constant (Pmode,
3485 arg_pointer_rtx,
3486 -2 * UNITS_PER_WORD));
3488 return reg;
3491 bytes = arg.promoted_size_in_bytes ();
3492 words = CEIL (bytes, UNITS_PER_WORD);
3494 /* To simplify the code below, represent vector types with a vector mode
3495 even if MMX/SSE are not active. */
3496 if (arg.type && VECTOR_TYPE_P (arg.type))
3497 mode = type_natural_mode (arg.type, cum, false);
3499 if (TARGET_64BIT)
3501 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3503 if (call_abi == MS_ABI)
3504 reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3505 arg.type, bytes);
3506 else
3507 reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3509 else
3510 reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3512 /* Track if there are outgoing arguments on stack. */
3513 if (reg == NULL_RTX && cum->caller)
3514 cfun->machine->outgoing_args_on_stack = true;
3516 return reg;
3519 /* A C expression that indicates when an argument must be passed by
3520 reference. If nonzero for an argument, a copy of that argument is
3521 made in memory and a pointer to the argument is passed instead of
3522 the argument itself. The pointer is passed in whatever way is
3523 appropriate for passing a pointer to that type. */
3525 static bool
3526 ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3528 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3530 if (TARGET_64BIT)
3532 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3534 /* See Windows x64 Software Convention. */
3535 if (call_abi == MS_ABI)
3537 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3539 if (tree type = arg.type)
3541 /* Arrays are passed by reference. */
3542 if (TREE_CODE (type) == ARRAY_TYPE)
3543 return true;
3545 if (RECORD_OR_UNION_TYPE_P (type))
3547 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3548 are passed by reference. */
3549 msize = int_size_in_bytes (type);
3553 /* __m128 is passed by reference. */
3554 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3556 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3557 return true;
3560 return false;
3563 /* Return true when TYPE should be 128bit aligned for 32bit argument
3564 passing ABI. XXX: This function is obsolete and is only used for
3565 checking psABI compatibility with previous versions of GCC. */
3567 static bool
3568 ix86_compat_aligned_value_p (const_tree type)
3570 machine_mode mode = TYPE_MODE (type);
3571 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3572 || mode == TDmode
3573 || mode == TFmode
3574 || mode == TCmode)
3575 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3576 return true;
3577 if (TYPE_ALIGN (type) < 128)
3578 return false;
3580 if (AGGREGATE_TYPE_P (type))
3582 /* Walk the aggregates recursively. */
3583 switch (TREE_CODE (type))
3585 case RECORD_TYPE:
3586 case UNION_TYPE:
3587 case QUAL_UNION_TYPE:
3589 tree field;
3591 /* Walk all the structure fields. */
3592 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3594 if (TREE_CODE (field) == FIELD_DECL
3595 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3596 return true;
3598 break;
3601 case ARRAY_TYPE:
3602 /* Just for use if some languages passes arrays by value. */
3603 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3604 return true;
3605 break;
3607 default:
3608 gcc_unreachable ();
3611 return false;
3614 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3615 XXX: This function is obsolete and is only used for checking psABI
3616 compatibility with previous versions of GCC. */
3618 static unsigned int
3619 ix86_compat_function_arg_boundary (machine_mode mode,
3620 const_tree type, unsigned int align)
3622 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3623 natural boundaries. */
3624 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3626 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3627 make an exception for SSE modes since these require 128bit
3628 alignment.
3630 The handling here differs from field_alignment. ICC aligns MMX
3631 arguments to 4 byte boundaries, while structure fields are aligned
3632 to 8 byte boundaries. */
3633 if (!type)
3635 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3636 align = PARM_BOUNDARY;
3638 else
3640 if (!ix86_compat_aligned_value_p (type))
3641 align = PARM_BOUNDARY;
3644 if (align > BIGGEST_ALIGNMENT)
3645 align = BIGGEST_ALIGNMENT;
3646 return align;
3649 /* Return true when TYPE should be 128bit aligned for 32bit argument
3650 passing ABI. */
3652 static bool
3653 ix86_contains_aligned_value_p (const_tree type)
3655 machine_mode mode = TYPE_MODE (type);
3657 if (mode == XFmode || mode == XCmode)
3658 return false;
3660 if (TYPE_ALIGN (type) < 128)
3661 return false;
3663 if (AGGREGATE_TYPE_P (type))
3665 /* Walk the aggregates recursively. */
3666 switch (TREE_CODE (type))
3668 case RECORD_TYPE:
3669 case UNION_TYPE:
3670 case QUAL_UNION_TYPE:
3672 tree field;
3674 /* Walk all the structure fields. */
3675 for (field = TYPE_FIELDS (type);
3676 field;
3677 field = DECL_CHAIN (field))
3679 if (TREE_CODE (field) == FIELD_DECL
3680 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3681 return true;
3683 break;
3686 case ARRAY_TYPE:
3687 /* Just for use if some languages passes arrays by value. */
3688 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3689 return true;
3690 break;
3692 default:
3693 gcc_unreachable ();
3696 else
3697 return TYPE_ALIGN (type) >= 128;
3699 return false;
3702 /* Gives the alignment boundary, in bits, of an argument with the
3703 specified mode and type. */
3705 static unsigned int
3706 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3708 unsigned int align;
3709 if (type)
3711 /* Since the main variant type is used for call, we convert it to
3712 the main variant type. */
3713 type = TYPE_MAIN_VARIANT (type);
3714 align = TYPE_ALIGN (type);
3715 if (TYPE_EMPTY_P (type))
3716 return PARM_BOUNDARY;
3718 else
3719 align = GET_MODE_ALIGNMENT (mode);
3720 if (align < PARM_BOUNDARY)
3721 align = PARM_BOUNDARY;
3722 else
3724 static bool warned;
3725 unsigned int saved_align = align;
3727 if (!TARGET_64BIT)
3729 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3730 if (!type)
3732 if (mode == XFmode || mode == XCmode)
3733 align = PARM_BOUNDARY;
3735 else if (!ix86_contains_aligned_value_p (type))
3736 align = PARM_BOUNDARY;
3738 if (align < 128)
3739 align = PARM_BOUNDARY;
3742 if (warn_psabi
3743 && !warned
3744 && align != ix86_compat_function_arg_boundary (mode, type,
3745 saved_align))
3747 warned = true;
3748 inform (input_location,
3749 "the ABI for passing parameters with %d-byte"
3750 " alignment has changed in GCC 4.6",
3751 align / BITS_PER_UNIT);
3755 return align;
3758 /* Return true if N is a possible register number of function value. */
3760 static bool
3761 ix86_function_value_regno_p (const unsigned int regno)
3763 switch (regno)
3765 case AX_REG:
3766 return true;
3767 case DX_REG:
3768 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3769 case DI_REG:
3770 case SI_REG:
3771 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3773 /* Complex values are returned in %st(0)/%st(1) pair. */
3774 case ST0_REG:
3775 case ST1_REG:
3776 /* TODO: The function should depend on current function ABI but
3777 builtins.cc would need updating then. Therefore we use the
3778 default ABI. */
3779 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3780 return false;
3781 return TARGET_FLOAT_RETURNS_IN_80387;
3783 /* Complex values are returned in %xmm0/%xmm1 pair. */
3784 case XMM0_REG:
3785 case XMM1_REG:
3786 return TARGET_SSE;
3788 case MM0_REG:
3789 if (TARGET_MACHO || TARGET_64BIT)
3790 return false;
3791 return TARGET_MMX;
3794 return false;
3797 /* Check whether the register REGNO should be zeroed on X86.
3798 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3799 together, no need to zero it again.
3800 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3802 static bool
3803 zero_call_used_regno_p (const unsigned int regno,
3804 bool all_sse_zeroed,
3805 bool need_zero_mmx)
3807 return GENERAL_REGNO_P (regno)
3808 || (!all_sse_zeroed && SSE_REGNO_P (regno))
3809 || MASK_REGNO_P (regno)
3810 || (need_zero_mmx && MMX_REGNO_P (regno));
3813 /* Return the machine_mode that is used to zero register REGNO. */
3815 static machine_mode
3816 zero_call_used_regno_mode (const unsigned int regno)
3818 /* NB: We only need to zero the lower 32 bits for integer registers
3819 and the lower 128 bits for vector registers since destination are
3820 zero-extended to the full register width. */
3821 if (GENERAL_REGNO_P (regno))
3822 return SImode;
3823 else if (SSE_REGNO_P (regno))
3824 return V4SFmode;
3825 else if (MASK_REGNO_P (regno))
3826 return HImode;
3827 else if (MMX_REGNO_P (regno))
3828 return V2SImode;
3829 else
3830 gcc_unreachable ();
3833 /* Generate a rtx to zero all vector registers together if possible,
3834 otherwise, return NULL. */
3836 static rtx
3837 zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3839 if (!TARGET_AVX)
3840 return NULL;
3842 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3843 if ((LEGACY_SSE_REGNO_P (regno)
3844 || (TARGET_64BIT
3845 && (REX_SSE_REGNO_P (regno)
3846 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3847 && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3848 return NULL;
3850 return gen_avx_vzeroall ();
3853 /* Generate insns to zero all st registers together.
3854 Return true when zeroing instructions are generated.
3855 Assume the number of st registers that are zeroed is num_of_st,
3856 we will emit the following sequence to zero them together:
3857 fldz; \
3858 fldz; \
3860 fldz; \
3861 fstp %%st(0); \
3862 fstp %%st(0); \
3864 fstp %%st(0);
3865 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3866 mark stack slots empty.
3868 How to compute the num_of_st:
3869 There is no direct mapping from stack registers to hard register
3870 numbers. If one stack register needs to be cleared, we don't know
3871 where in the stack the value remains. So, if any stack register
3872 needs to be cleared, the whole stack should be cleared. However,
3873 x87 stack registers that hold the return value should be excluded.
3874 x87 returns in the top (two for complex values) register, so
3875 num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3876 return the value of num_of_st. */
3879 static int
3880 zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3883 /* If the FPU is disabled, no need to zero all st registers. */
3884 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3885 return 0;
3887 unsigned int num_of_st = 0;
3888 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3889 if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3890 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3892 num_of_st++;
3893 break;
3896 if (num_of_st == 0)
3897 return 0;
3899 bool return_with_x87 = false;
3900 return_with_x87 = (crtl->return_rtx
3901 && (STACK_REG_P (crtl->return_rtx)));
3903 bool complex_return = false;
3904 complex_return = (crtl->return_rtx
3905 && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
3907 if (return_with_x87)
3908 if (complex_return)
3909 num_of_st = 6;
3910 else
3911 num_of_st = 7;
3912 else
3913 num_of_st = 8;
3915 rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
3916 for (unsigned int i = 0; i < num_of_st; i++)
3917 emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
3919 for (unsigned int i = 0; i < num_of_st; i++)
3921 rtx insn;
3922 insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
3923 add_reg_note (insn, REG_DEAD, st_reg);
3925 return num_of_st;
3929 /* When the routine exit in MMX mode, if any ST register needs
3930 to be zeroed, we should clear all MMX registers except the
3931 RET_MMX_REGNO that holds the return value. */
3932 static bool
3933 zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
3934 unsigned int ret_mmx_regno)
3936 bool need_zero_all_mm = false;
3937 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3938 if (STACK_REGNO_P (regno)
3939 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3941 need_zero_all_mm = true;
3942 break;
3945 if (!need_zero_all_mm)
3946 return false;
3948 machine_mode mode = V2SImode;
3949 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3950 if (regno != ret_mmx_regno)
3952 rtx reg = gen_rtx_REG (mode, regno);
3953 emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
3955 return true;
3958 /* TARGET_ZERO_CALL_USED_REGS. */
3959 /* Generate a sequence of instructions that zero registers specified by
3960 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3961 zeroed. */
3962 static HARD_REG_SET
3963 ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
3965 HARD_REG_SET zeroed_hardregs;
3966 bool all_sse_zeroed = false;
3967 int all_st_zeroed_num = 0;
3968 bool all_mm_zeroed = false;
3970 CLEAR_HARD_REG_SET (zeroed_hardregs);
3972 /* first, let's see whether we can zero all vector registers together. */
3973 rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
3974 if (zero_all_vec_insn)
3976 emit_insn (zero_all_vec_insn);
3977 all_sse_zeroed = true;
3980 /* mm/st registers are shared registers set, we should follow the following
3981 rules to clear them:
3982 MMX exit mode x87 exit mode
3983 -------------|----------------------|---------------
3984 uses x87 reg | clear all MMX | clear all x87
3985 uses MMX reg | clear individual MMX | clear all x87
3986 x87 + MMX | clear all MMX | clear all x87
3988 first, we should decide which mode (MMX mode or x87 mode) the function
3989 exit with. */
3991 bool exit_with_mmx_mode = (crtl->return_rtx
3992 && (MMX_REG_P (crtl->return_rtx)));
3994 if (!exit_with_mmx_mode)
3995 /* x87 exit mode, we should zero all st registers together. */
3997 all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
3999 if (all_st_zeroed_num > 0)
4000 for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
4001 /* x87 stack registers that hold the return value should be excluded.
4002 x87 returns in the top (two for complex values) register. */
4003 if (all_st_zeroed_num == 8
4004 || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
4005 || (all_st_zeroed_num == 6
4006 && (regno == (REGNO (crtl->return_rtx) + 1)))))
4007 SET_HARD_REG_BIT (zeroed_hardregs, regno);
4009 else
4010 /* MMX exit mode, check whether we can zero all mm registers. */
4012 unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
4013 all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
4014 exit_mmx_regno);
4015 if (all_mm_zeroed)
4016 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4017 if (regno != exit_mmx_regno)
4018 SET_HARD_REG_BIT (zeroed_hardregs, regno);
4021 /* Now, generate instructions to zero all the other registers. */
4023 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4025 if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
4026 continue;
4027 if (!zero_call_used_regno_p (regno, all_sse_zeroed,
4028 exit_with_mmx_mode && !all_mm_zeroed))
4029 continue;
4031 SET_HARD_REG_BIT (zeroed_hardregs, regno);
4033 machine_mode mode = zero_call_used_regno_mode (regno);
4035 rtx reg = gen_rtx_REG (mode, regno);
4036 rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
4038 switch (mode)
4040 case E_SImode:
4041 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
4043 rtx clob = gen_rtx_CLOBBER (VOIDmode,
4044 gen_rtx_REG (CCmode,
4045 FLAGS_REG));
4046 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
4047 tmp,
4048 clob));
4050 /* FALLTHRU. */
4052 case E_V4SFmode:
4053 case E_HImode:
4054 case E_V2SImode:
4055 emit_insn (tmp);
4056 break;
4058 default:
4059 gcc_unreachable ();
4062 return zeroed_hardregs;
4065 /* Define how to find the value returned by a function.
4066 VALTYPE is the data type of the value (as a tree).
4067 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4068 otherwise, FUNC is 0. */
4070 static rtx
4071 function_value_32 (machine_mode orig_mode, machine_mode mode,
4072 const_tree fntype, const_tree fn)
4074 unsigned int regno;
4076 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4077 we normally prevent this case when mmx is not available. However
4078 some ABIs may require the result to be returned like DImode. */
4079 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4080 regno = FIRST_MMX_REG;
4082 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4083 we prevent this case when sse is not available. However some ABIs
4084 may require the result to be returned like integer TImode. */
4085 else if (mode == TImode
4086 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4087 regno = FIRST_SSE_REG;
4089 /* 32-byte vector modes in %ymm0. */
4090 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4091 regno = FIRST_SSE_REG;
4093 /* 64-byte vector modes in %zmm0. */
4094 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4095 regno = FIRST_SSE_REG;
4097 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4098 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4099 regno = FIRST_FLOAT_REG;
4100 else
4101 /* Most things go in %eax. */
4102 regno = AX_REG;
4104 /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4105 if (mode == HFmode || mode == BFmode)
4107 if (!TARGET_SSE2)
4109 error ("SSE register return with SSE2 disabled");
4110 regno = AX_REG;
4112 else
4113 regno = FIRST_SSE_REG;
4116 if (mode == HCmode)
4118 if (!TARGET_SSE2)
4119 error ("SSE register return with SSE2 disabled");
4121 rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4122 XVECEXP (ret, 0, 0)
4123 = gen_rtx_EXPR_LIST (VOIDmode,
4124 gen_rtx_REG (SImode,
4125 TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
4126 GEN_INT (0));
4127 return ret;
4130 /* Override FP return register with %xmm0 for local functions when
4131 SSE math is enabled or for functions with sseregparm attribute. */
4132 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4134 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4135 if (sse_level == -1)
4137 error ("calling %qD with SSE calling convention without "
4138 "SSE/SSE2 enabled", fn);
4139 sorry ("this is a GCC bug that can be worked around by adding "
4140 "attribute used to function called");
4142 else if ((sse_level >= 1 && mode == SFmode)
4143 || (sse_level == 2 && mode == DFmode))
4144 regno = FIRST_SSE_REG;
4147 /* OImode shouldn't be used directly. */
4148 gcc_assert (mode != OImode);
4150 return gen_rtx_REG (orig_mode, regno);
4153 static rtx
4154 function_value_64 (machine_mode orig_mode, machine_mode mode,
4155 const_tree valtype)
4157 rtx ret;
4159 /* Handle libcalls, which don't provide a type node. */
4160 if (valtype == NULL)
4162 unsigned int regno;
4164 switch (mode)
4166 case E_BFmode:
4167 case E_HFmode:
4168 case E_HCmode:
4169 case E_SFmode:
4170 case E_SCmode:
4171 case E_DFmode:
4172 case E_DCmode:
4173 case E_TFmode:
4174 case E_SDmode:
4175 case E_DDmode:
4176 case E_TDmode:
4177 regno = FIRST_SSE_REG;
4178 break;
4179 case E_XFmode:
4180 case E_XCmode:
4181 regno = FIRST_FLOAT_REG;
4182 break;
4183 case E_TCmode:
4184 return NULL;
4185 default:
4186 regno = AX_REG;
4189 return gen_rtx_REG (mode, regno);
4191 else if (POINTER_TYPE_P (valtype))
4193 /* Pointers are always returned in word_mode. */
4194 mode = word_mode;
4197 ret = construct_container (mode, orig_mode, valtype, 1,
4198 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4199 x86_64_int_return_registers, 0);
4201 /* For zero sized structures, construct_container returns NULL, but we
4202 need to keep rest of compiler happy by returning meaningful value. */
4203 if (!ret)
4204 ret = gen_rtx_REG (orig_mode, AX_REG);
4206 return ret;
4209 static rtx
4210 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4211 const_tree fntype, const_tree fn, const_tree valtype)
4213 unsigned int regno;
4215 /* Floating point return values in %st(0)
4216 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4217 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4218 && (GET_MODE_SIZE (mode) > 8
4219 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4221 regno = FIRST_FLOAT_REG;
4222 return gen_rtx_REG (orig_mode, regno);
4224 else
4225 return function_value_32(orig_mode, mode, fntype,fn);
4228 static rtx
4229 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4230 const_tree valtype)
4232 unsigned int regno = AX_REG;
4234 if (TARGET_SSE)
4236 switch (GET_MODE_SIZE (mode))
4238 case 16:
4239 if (valtype != NULL_TREE
4240 && !VECTOR_INTEGER_TYPE_P (valtype)
4241 && !VECTOR_INTEGER_TYPE_P (valtype)
4242 && !INTEGRAL_TYPE_P (valtype)
4243 && !VECTOR_FLOAT_TYPE_P (valtype))
4244 break;
4245 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4246 && !COMPLEX_MODE_P (mode))
4247 regno = FIRST_SSE_REG;
4248 break;
4249 case 8:
4250 case 4:
4251 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4252 break;
4253 if (mode == SFmode || mode == DFmode)
4254 regno = FIRST_SSE_REG;
4255 break;
4256 default:
4257 break;
4260 return gen_rtx_REG (orig_mode, regno);
4263 static rtx
4264 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4265 machine_mode orig_mode, machine_mode mode)
4267 const_tree fn, fntype;
4269 fn = NULL_TREE;
4270 if (fntype_or_decl && DECL_P (fntype_or_decl))
4271 fn = fntype_or_decl;
4272 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4274 if (ix86_function_type_abi (fntype) == MS_ABI)
4276 if (TARGET_64BIT)
4277 return function_value_ms_64 (orig_mode, mode, valtype);
4278 else
4279 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4281 else if (TARGET_64BIT)
4282 return function_value_64 (orig_mode, mode, valtype);
4283 else
4284 return function_value_32 (orig_mode, mode, fntype, fn);
4287 static rtx
4288 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4290 machine_mode mode, orig_mode;
4292 orig_mode = TYPE_MODE (valtype);
4293 mode = type_natural_mode (valtype, NULL, true);
4294 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4297 /* Pointer function arguments and return values are promoted to
4298 word_mode for normal functions. */
4300 static machine_mode
4301 ix86_promote_function_mode (const_tree type, machine_mode mode,
4302 int *punsignedp, const_tree fntype,
4303 int for_return)
4305 if (cfun->machine->func_type == TYPE_NORMAL
4306 && type != NULL_TREE
4307 && POINTER_TYPE_P (type))
4309 *punsignedp = POINTERS_EXTEND_UNSIGNED;
4310 return word_mode;
4312 return default_promote_function_mode (type, mode, punsignedp, fntype,
4313 for_return);
4316 /* Return true if a structure, union or array with MODE containing FIELD
4317 should be accessed using BLKmode. */
4319 static bool
4320 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4322 /* Union with XFmode must be in BLKmode. */
4323 return (mode == XFmode
4324 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4325 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4329 ix86_libcall_value (machine_mode mode)
4331 return ix86_function_value_1 (NULL, NULL, mode, mode);
4334 /* Return true iff type is returned in memory. */
4336 static bool
4337 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4339 const machine_mode mode = type_natural_mode (type, NULL, true);
4340 HOST_WIDE_INT size;
4342 if (TARGET_64BIT)
4344 if (ix86_function_type_abi (fntype) == MS_ABI)
4346 size = int_size_in_bytes (type);
4348 /* __m128 is returned in xmm0. */
4349 if ((!type || VECTOR_INTEGER_TYPE_P (type)
4350 || INTEGRAL_TYPE_P (type)
4351 || VECTOR_FLOAT_TYPE_P (type))
4352 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4353 && !COMPLEX_MODE_P (mode)
4354 && (GET_MODE_SIZE (mode) == 16 || size == 16))
4355 return false;
4357 /* Otherwise, the size must be exactly in [1248]. */
4358 return size != 1 && size != 2 && size != 4 && size != 8;
4360 else
4362 int needed_intregs, needed_sseregs;
4364 return examine_argument (mode, type, 1,
4365 &needed_intregs, &needed_sseregs);
4368 else
4370 size = int_size_in_bytes (type);
4372 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4373 bytes in registers. */
4374 if (TARGET_IAMCU)
4375 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4377 if (mode == BLKmode)
4378 return true;
4380 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4381 return false;
4383 if (VECTOR_MODE_P (mode) || mode == TImode)
4385 /* User-created vectors small enough to fit in EAX. */
4386 if (size < 8)
4387 return false;
4389 /* Unless ABI prescibes otherwise,
4390 MMX/3dNow values are returned in MM0 if available. */
4392 if (size == 8)
4393 return TARGET_VECT8_RETURNS || !TARGET_MMX;
4395 /* SSE values are returned in XMM0 if available. */
4396 if (size == 16)
4397 return !TARGET_SSE;
4399 /* AVX values are returned in YMM0 if available. */
4400 if (size == 32)
4401 return !TARGET_AVX;
4403 /* AVX512F values are returned in ZMM0 if available. */
4404 if (size == 64)
4405 return !TARGET_AVX512F || !TARGET_EVEX512;
4408 if (mode == XFmode)
4409 return false;
4411 if (size > 12)
4412 return true;
4414 /* OImode shouldn't be used directly. */
4415 gcc_assert (mode != OImode);
4417 return false;
4421 /* Implement TARGET_PUSH_ARGUMENT. */
4423 static bool
4424 ix86_push_argument (unsigned int npush)
4426 /* If SSE2 is available, use vector move to put large argument onto
4427 stack. NB: In 32-bit mode, use 8-byte vector move. */
4428 return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4429 && TARGET_PUSH_ARGS
4430 && !ACCUMULATE_OUTGOING_ARGS);
4434 /* Create the va_list data type. */
4436 static tree
4437 ix86_build_builtin_va_list_64 (void)
4439 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4441 record = lang_hooks.types.make_type (RECORD_TYPE);
4442 type_decl = build_decl (BUILTINS_LOCATION,
4443 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4445 f_gpr = build_decl (BUILTINS_LOCATION,
4446 FIELD_DECL, get_identifier ("gp_offset"),
4447 unsigned_type_node);
4448 f_fpr = build_decl (BUILTINS_LOCATION,
4449 FIELD_DECL, get_identifier ("fp_offset"),
4450 unsigned_type_node);
4451 f_ovf = build_decl (BUILTINS_LOCATION,
4452 FIELD_DECL, get_identifier ("overflow_arg_area"),
4453 ptr_type_node);
4454 f_sav = build_decl (BUILTINS_LOCATION,
4455 FIELD_DECL, get_identifier ("reg_save_area"),
4456 ptr_type_node);
4458 va_list_gpr_counter_field = f_gpr;
4459 va_list_fpr_counter_field = f_fpr;
4461 DECL_FIELD_CONTEXT (f_gpr) = record;
4462 DECL_FIELD_CONTEXT (f_fpr) = record;
4463 DECL_FIELD_CONTEXT (f_ovf) = record;
4464 DECL_FIELD_CONTEXT (f_sav) = record;
4466 TYPE_STUB_DECL (record) = type_decl;
4467 TYPE_NAME (record) = type_decl;
4468 TYPE_FIELDS (record) = f_gpr;
4469 DECL_CHAIN (f_gpr) = f_fpr;
4470 DECL_CHAIN (f_fpr) = f_ovf;
4471 DECL_CHAIN (f_ovf) = f_sav;
4473 layout_type (record);
4475 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4476 NULL_TREE, TYPE_ATTRIBUTES (record));
4478 /* The correct type is an array type of one element. */
4479 return build_array_type (record, build_index_type (size_zero_node));
4482 /* Setup the builtin va_list data type and for 64-bit the additional
4483 calling convention specific va_list data types. */
4485 static tree
4486 ix86_build_builtin_va_list (void)
4488 if (TARGET_64BIT)
4490 /* Initialize ABI specific va_list builtin types.
4492 In lto1, we can encounter two va_list types:
4493 - one as a result of the type-merge across TUs, and
4494 - the one constructed here.
4495 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4496 a type identity check in canonical_va_list_type based on
4497 TYPE_MAIN_VARIANT (which we used to have) will not work.
4498 Instead, we tag each va_list_type_node with its unique attribute, and
4499 look for the attribute in the type identity check in
4500 canonical_va_list_type.
4502 Tagging sysv_va_list_type_node directly with the attribute is
4503 problematic since it's a array of one record, which will degrade into a
4504 pointer to record when used as parameter (see build_va_arg comments for
4505 an example), dropping the attribute in the process. So we tag the
4506 record instead. */
4508 /* For SYSV_ABI we use an array of one record. */
4509 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4511 /* For MS_ABI we use plain pointer to argument area. */
4512 tree char_ptr_type = build_pointer_type (char_type_node);
4513 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4514 TYPE_ATTRIBUTES (char_ptr_type));
4515 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4517 return ((ix86_abi == MS_ABI)
4518 ? ms_va_list_type_node
4519 : sysv_va_list_type_node);
4521 else
4523 /* For i386 we use plain pointer to argument area. */
4524 return build_pointer_type (char_type_node);
4528 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4530 static void
4531 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4533 rtx save_area, mem;
4534 alias_set_type set;
4535 int i, max;
4537 /* GPR size of varargs save area. */
4538 if (cfun->va_list_gpr_size)
4539 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4540 else
4541 ix86_varargs_gpr_size = 0;
4543 /* FPR size of varargs save area. We don't need it if we don't pass
4544 anything in SSE registers. */
4545 if (TARGET_SSE && cfun->va_list_fpr_size)
4546 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4547 else
4548 ix86_varargs_fpr_size = 0;
4550 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4551 return;
4553 save_area = frame_pointer_rtx;
4554 set = get_varargs_alias_set ();
4556 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4557 if (max > X86_64_REGPARM_MAX)
4558 max = X86_64_REGPARM_MAX;
4560 for (i = cum->regno; i < max; i++)
4562 mem = gen_rtx_MEM (word_mode,
4563 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4564 MEM_NOTRAP_P (mem) = 1;
4565 set_mem_alias_set (mem, set);
4566 emit_move_insn (mem,
4567 gen_rtx_REG (word_mode,
4568 x86_64_int_parameter_registers[i]));
4571 if (ix86_varargs_fpr_size)
4573 machine_mode smode;
4574 rtx_code_label *label;
4575 rtx test;
4577 /* Now emit code to save SSE registers. The AX parameter contains number
4578 of SSE parameter registers used to call this function, though all we
4579 actually check here is the zero/non-zero status. */
4581 label = gen_label_rtx ();
4582 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4583 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4584 label));
4586 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4587 we used movdqa (i.e. TImode) instead? Perhaps even better would
4588 be if we could determine the real mode of the data, via a hook
4589 into pass_stdarg. Ignore all that for now. */
4590 smode = V4SFmode;
4591 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4592 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4594 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4595 if (max > X86_64_SSE_REGPARM_MAX)
4596 max = X86_64_SSE_REGPARM_MAX;
4598 for (i = cum->sse_regno; i < max; ++i)
4600 mem = plus_constant (Pmode, save_area,
4601 i * 16 + ix86_varargs_gpr_size);
4602 mem = gen_rtx_MEM (smode, mem);
4603 MEM_NOTRAP_P (mem) = 1;
4604 set_mem_alias_set (mem, set);
4605 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4607 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4610 emit_label (label);
4614 static void
4615 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4617 alias_set_type set = get_varargs_alias_set ();
4618 int i;
4620 /* Reset to zero, as there might be a sysv vaarg used
4621 before. */
4622 ix86_varargs_gpr_size = 0;
4623 ix86_varargs_fpr_size = 0;
4625 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4627 rtx reg, mem;
4629 mem = gen_rtx_MEM (Pmode,
4630 plus_constant (Pmode, virtual_incoming_args_rtx,
4631 i * UNITS_PER_WORD));
4632 MEM_NOTRAP_P (mem) = 1;
4633 set_mem_alias_set (mem, set);
4635 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4636 emit_move_insn (mem, reg);
4640 static void
4641 ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4642 const function_arg_info &arg,
4643 int *, int no_rtl)
4645 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4646 CUMULATIVE_ARGS next_cum;
4647 tree fntype;
4649 /* This argument doesn't appear to be used anymore. Which is good,
4650 because the old code here didn't suppress rtl generation. */
4651 gcc_assert (!no_rtl);
4653 if (!TARGET_64BIT)
4654 return;
4656 fntype = TREE_TYPE (current_function_decl);
4658 /* For varargs, we do not want to skip the dummy va_dcl argument.
4659 For stdargs, we do want to skip the last named argument. */
4660 next_cum = *cum;
4661 if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4662 || arg.type != NULL_TREE)
4663 && stdarg_p (fntype))
4664 ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4666 if (cum->call_abi == MS_ABI)
4667 setup_incoming_varargs_ms_64 (&next_cum);
4668 else
4669 setup_incoming_varargs_64 (&next_cum);
4672 /* Checks if TYPE is of kind va_list char *. */
4674 static bool
4675 is_va_list_char_pointer (tree type)
4677 tree canonic;
4679 /* For 32-bit it is always true. */
4680 if (!TARGET_64BIT)
4681 return true;
4682 canonic = ix86_canonical_va_list_type (type);
4683 return (canonic == ms_va_list_type_node
4684 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4687 /* Implement va_start. */
4689 static void
4690 ix86_va_start (tree valist, rtx nextarg)
4692 HOST_WIDE_INT words, n_gpr, n_fpr;
4693 tree f_gpr, f_fpr, f_ovf, f_sav;
4694 tree gpr, fpr, ovf, sav, t;
4695 tree type;
4696 rtx ovf_rtx;
4698 if (flag_split_stack
4699 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4701 unsigned int scratch_regno;
4703 /* When we are splitting the stack, we can't refer to the stack
4704 arguments using internal_arg_pointer, because they may be on
4705 the old stack. The split stack prologue will arrange to
4706 leave a pointer to the old stack arguments in a scratch
4707 register, which we here copy to a pseudo-register. The split
4708 stack prologue can't set the pseudo-register directly because
4709 it (the prologue) runs before any registers have been saved. */
4711 scratch_regno = split_stack_prologue_scratch_regno ();
4712 if (scratch_regno != INVALID_REGNUM)
4714 rtx reg;
4715 rtx_insn *seq;
4717 reg = gen_reg_rtx (Pmode);
4718 cfun->machine->split_stack_varargs_pointer = reg;
4720 start_sequence ();
4721 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4722 seq = get_insns ();
4723 end_sequence ();
4725 push_topmost_sequence ();
4726 emit_insn_after (seq, entry_of_function ());
4727 pop_topmost_sequence ();
4731 /* Only 64bit target needs something special. */
4732 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4734 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4735 std_expand_builtin_va_start (valist, nextarg);
4736 else
4738 rtx va_r, next;
4740 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4741 next = expand_binop (ptr_mode, add_optab,
4742 cfun->machine->split_stack_varargs_pointer,
4743 crtl->args.arg_offset_rtx,
4744 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4745 convert_move (va_r, next, 0);
4747 return;
4750 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4751 f_fpr = DECL_CHAIN (f_gpr);
4752 f_ovf = DECL_CHAIN (f_fpr);
4753 f_sav = DECL_CHAIN (f_ovf);
4755 valist = build_simple_mem_ref (valist);
4756 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4757 /* The following should be folded into the MEM_REF offset. */
4758 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4759 f_gpr, NULL_TREE);
4760 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4761 f_fpr, NULL_TREE);
4762 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4763 f_ovf, NULL_TREE);
4764 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4765 f_sav, NULL_TREE);
4767 /* Count number of gp and fp argument registers used. */
4768 words = crtl->args.info.words;
4769 n_gpr = crtl->args.info.regno;
4770 n_fpr = crtl->args.info.sse_regno;
4772 if (cfun->va_list_gpr_size)
4774 type = TREE_TYPE (gpr);
4775 t = build2 (MODIFY_EXPR, type,
4776 gpr, build_int_cst (type, n_gpr * 8));
4777 TREE_SIDE_EFFECTS (t) = 1;
4778 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4781 if (TARGET_SSE && cfun->va_list_fpr_size)
4783 type = TREE_TYPE (fpr);
4784 t = build2 (MODIFY_EXPR, type, fpr,
4785 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4786 TREE_SIDE_EFFECTS (t) = 1;
4787 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4790 /* Find the overflow area. */
4791 type = TREE_TYPE (ovf);
4792 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4793 ovf_rtx = crtl->args.internal_arg_pointer;
4794 else
4795 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4796 t = make_tree (type, ovf_rtx);
4797 if (words != 0)
4798 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4800 t = build2 (MODIFY_EXPR, type, ovf, t);
4801 TREE_SIDE_EFFECTS (t) = 1;
4802 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4804 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4806 /* Find the register save area.
4807 Prologue of the function save it right above stack frame. */
4808 type = TREE_TYPE (sav);
4809 t = make_tree (type, frame_pointer_rtx);
4810 if (!ix86_varargs_gpr_size)
4811 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4813 t = build2 (MODIFY_EXPR, type, sav, t);
4814 TREE_SIDE_EFFECTS (t) = 1;
4815 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4819 /* Implement va_arg. */
4821 static tree
4822 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4823 gimple_seq *post_p)
4825 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4826 tree f_gpr, f_fpr, f_ovf, f_sav;
4827 tree gpr, fpr, ovf, sav, t;
4828 int size, rsize;
4829 tree lab_false, lab_over = NULL_TREE;
4830 tree addr, t2;
4831 rtx container;
4832 int indirect_p = 0;
4833 tree ptrtype;
4834 machine_mode nat_mode;
4835 unsigned int arg_boundary;
4836 unsigned int type_align;
4838 /* Only 64bit target needs something special. */
4839 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4840 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4842 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4843 f_fpr = DECL_CHAIN (f_gpr);
4844 f_ovf = DECL_CHAIN (f_fpr);
4845 f_sav = DECL_CHAIN (f_ovf);
4847 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4848 valist, f_gpr, NULL_TREE);
4850 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4851 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4852 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4854 indirect_p = pass_va_arg_by_reference (type);
4855 if (indirect_p)
4856 type = build_pointer_type (type);
4857 size = arg_int_size_in_bytes (type);
4858 rsize = CEIL (size, UNITS_PER_WORD);
4860 nat_mode = type_natural_mode (type, NULL, false);
4861 switch (nat_mode)
4863 case E_V16HFmode:
4864 case E_V16BFmode:
4865 case E_V8SFmode:
4866 case E_V8SImode:
4867 case E_V32QImode:
4868 case E_V16HImode:
4869 case E_V4DFmode:
4870 case E_V4DImode:
4871 case E_V32HFmode:
4872 case E_V32BFmode:
4873 case E_V16SFmode:
4874 case E_V16SImode:
4875 case E_V64QImode:
4876 case E_V32HImode:
4877 case E_V8DFmode:
4878 case E_V8DImode:
4879 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4880 if (!TARGET_64BIT_MS_ABI)
4882 container = NULL;
4883 break;
4885 /* FALLTHRU */
4887 default:
4888 container = construct_container (nat_mode, TYPE_MODE (type),
4889 type, 0, X86_64_REGPARM_MAX,
4890 X86_64_SSE_REGPARM_MAX, intreg,
4892 break;
4895 /* Pull the value out of the saved registers. */
4897 addr = create_tmp_var (ptr_type_node, "addr");
4898 type_align = TYPE_ALIGN (type);
4900 if (container)
4902 int needed_intregs, needed_sseregs;
4903 bool need_temp;
4904 tree int_addr, sse_addr;
4906 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4907 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4909 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4911 bool container_in_reg = false;
4912 if (REG_P (container))
4913 container_in_reg = true;
4914 else if (GET_CODE (container) == PARALLEL
4915 && GET_MODE (container) == BLKmode
4916 && XVECLEN (container, 0) == 1)
4918 /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
4919 expression in a TImode register. In this case, temp isn't
4920 needed. Otherwise, the TImode variable will be put in the
4921 GPR save area which guarantees only 8-byte alignment. */
4922 rtx x = XVECEXP (container, 0, 0);
4923 if (GET_CODE (x) == EXPR_LIST
4924 && REG_P (XEXP (x, 0))
4925 && XEXP (x, 1) == const0_rtx)
4926 container_in_reg = true;
4929 need_temp = (!container_in_reg
4930 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4931 || TYPE_ALIGN (type) > 128));
4933 /* In case we are passing structure, verify that it is consecutive block
4934 on the register save area. If not we need to do moves. */
4935 if (!need_temp && !container_in_reg)
4937 /* Verify that all registers are strictly consecutive */
4938 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4940 int i;
4942 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4944 rtx slot = XVECEXP (container, 0, i);
4945 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4946 || INTVAL (XEXP (slot, 1)) != i * 16)
4947 need_temp = true;
4950 else
4952 int i;
4954 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4956 rtx slot = XVECEXP (container, 0, i);
4957 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4958 || INTVAL (XEXP (slot, 1)) != i * 8)
4959 need_temp = true;
4963 if (!need_temp)
4965 int_addr = addr;
4966 sse_addr = addr;
4968 else
4970 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4971 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4974 /* First ensure that we fit completely in registers. */
4975 if (needed_intregs)
4977 t = build_int_cst (TREE_TYPE (gpr),
4978 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4979 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4980 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4981 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4982 gimplify_and_add (t, pre_p);
4984 if (needed_sseregs)
4986 t = build_int_cst (TREE_TYPE (fpr),
4987 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4988 + X86_64_REGPARM_MAX * 8);
4989 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4990 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4991 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4992 gimplify_and_add (t, pre_p);
4995 /* Compute index to start of area used for integer regs. */
4996 if (needed_intregs)
4998 /* int_addr = gpr + sav; */
4999 t = fold_build_pointer_plus (sav, gpr);
5000 gimplify_assign (int_addr, t, pre_p);
5002 if (needed_sseregs)
5004 /* sse_addr = fpr + sav; */
5005 t = fold_build_pointer_plus (sav, fpr);
5006 gimplify_assign (sse_addr, t, pre_p);
5008 if (need_temp)
5010 int i, prev_size = 0;
5011 tree temp = create_tmp_var (type, "va_arg_tmp");
5012 TREE_ADDRESSABLE (temp) = 1;
5014 /* addr = &temp; */
5015 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5016 gimplify_assign (addr, t, pre_p);
5018 for (i = 0; i < XVECLEN (container, 0); i++)
5020 rtx slot = XVECEXP (container, 0, i);
5021 rtx reg = XEXP (slot, 0);
5022 machine_mode mode = GET_MODE (reg);
5023 tree piece_type;
5024 tree addr_type;
5025 tree daddr_type;
5026 tree src_addr, src;
5027 int src_offset;
5028 tree dest_addr, dest;
5029 int cur_size = GET_MODE_SIZE (mode);
5031 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
5032 prev_size = INTVAL (XEXP (slot, 1));
5033 if (prev_size + cur_size > size)
5035 cur_size = size - prev_size;
5036 unsigned int nbits = cur_size * BITS_PER_UNIT;
5037 if (!int_mode_for_size (nbits, 1).exists (&mode))
5038 mode = QImode;
5040 piece_type = lang_hooks.types.type_for_mode (mode, 1);
5041 if (mode == GET_MODE (reg))
5042 addr_type = build_pointer_type (piece_type);
5043 else
5044 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5045 true);
5046 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5047 true);
5049 if (SSE_REGNO_P (REGNO (reg)))
5051 src_addr = sse_addr;
5052 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5054 else
5056 src_addr = int_addr;
5057 src_offset = REGNO (reg) * 8;
5059 src_addr = fold_convert (addr_type, src_addr);
5060 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
5062 dest_addr = fold_convert (daddr_type, addr);
5063 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
5064 if (cur_size == GET_MODE_SIZE (mode))
5066 src = build_va_arg_indirect_ref (src_addr);
5067 dest = build_va_arg_indirect_ref (dest_addr);
5069 gimplify_assign (dest, src, pre_p);
5071 else
5073 tree copy
5074 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
5075 3, dest_addr, src_addr,
5076 size_int (cur_size));
5077 gimplify_and_add (copy, pre_p);
5079 prev_size += cur_size;
5083 if (needed_intregs)
5085 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5086 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5087 gimplify_assign (gpr, t, pre_p);
5088 /* The GPR save area guarantees only 8-byte alignment. */
5089 if (!need_temp)
5090 type_align = MIN (type_align, 64);
5093 if (needed_sseregs)
5095 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5096 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5097 gimplify_assign (unshare_expr (fpr), t, pre_p);
5100 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
5102 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
5105 /* ... otherwise out of the overflow area. */
5107 /* When we align parameter on stack for caller, if the parameter
5108 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5109 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5110 here with caller. */
5111 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
5112 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5113 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5115 /* Care for on-stack alignment if needed. */
5116 if (arg_boundary <= 64 || size == 0)
5117 t = ovf;
5118 else
5120 HOST_WIDE_INT align = arg_boundary / 8;
5121 t = fold_build_pointer_plus_hwi (ovf, align - 1);
5122 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5123 build_int_cst (TREE_TYPE (t), -align));
5126 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5127 gimplify_assign (addr, t, pre_p);
5129 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5130 gimplify_assign (unshare_expr (ovf), t, pre_p);
5132 if (container)
5133 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
5135 type = build_aligned_type (type, type_align);
5136 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5137 addr = fold_convert (ptrtype, addr);
5139 if (indirect_p)
5140 addr = build_va_arg_indirect_ref (addr);
5141 return build_va_arg_indirect_ref (addr);
5144 /* Return true if OPNUM's MEM should be matched
5145 in movabs* patterns. */
5147 bool
5148 ix86_check_movabs (rtx insn, int opnum)
5150 rtx set, mem;
5152 set = PATTERN (insn);
5153 if (GET_CODE (set) == PARALLEL)
5154 set = XVECEXP (set, 0, 0);
5155 gcc_assert (GET_CODE (set) == SET);
5156 mem = XEXP (set, opnum);
5157 while (SUBREG_P (mem))
5158 mem = SUBREG_REG (mem);
5159 gcc_assert (MEM_P (mem));
5160 return volatile_ok || !MEM_VOLATILE_P (mem);
5163 /* Return false if INSN contains a MEM with a non-default address space. */
5164 bool
5165 ix86_check_no_addr_space (rtx insn)
5167 subrtx_var_iterator::array_type array;
5168 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5170 rtx x = *iter;
5171 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5172 return false;
5174 return true;
5177 /* Initialize the table of extra 80387 mathematical constants. */
5179 static void
5180 init_ext_80387_constants (void)
5182 static const char * cst[5] =
5184 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5185 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5186 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5187 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5188 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5190 int i;
5192 for (i = 0; i < 5; i++)
5194 real_from_string (&ext_80387_constants_table[i], cst[i]);
5195 /* Ensure each constant is rounded to XFmode precision. */
5196 real_convert (&ext_80387_constants_table[i],
5197 XFmode, &ext_80387_constants_table[i]);
5200 ext_80387_constants_init = 1;
5203 /* Return non-zero if the constant is something that
5204 can be loaded with a special instruction. */
5207 standard_80387_constant_p (rtx x)
5209 machine_mode mode = GET_MODE (x);
5211 const REAL_VALUE_TYPE *r;
5213 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5214 return -1;
5216 if (x == CONST0_RTX (mode))
5217 return 1;
5218 if (x == CONST1_RTX (mode))
5219 return 2;
5221 r = CONST_DOUBLE_REAL_VALUE (x);
5223 /* For XFmode constants, try to find a special 80387 instruction when
5224 optimizing for size or on those CPUs that benefit from them. */
5225 if (mode == XFmode
5226 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5227 && !flag_rounding_math)
5229 int i;
5231 if (! ext_80387_constants_init)
5232 init_ext_80387_constants ();
5234 for (i = 0; i < 5; i++)
5235 if (real_identical (r, &ext_80387_constants_table[i]))
5236 return i + 3;
5239 /* Load of the constant -0.0 or -1.0 will be split as
5240 fldz;fchs or fld1;fchs sequence. */
5241 if (real_isnegzero (r))
5242 return 8;
5243 if (real_identical (r, &dconstm1))
5244 return 9;
5246 return 0;
5249 /* Return the opcode of the special instruction to be used to load
5250 the constant X. */
5252 const char *
5253 standard_80387_constant_opcode (rtx x)
5255 switch (standard_80387_constant_p (x))
5257 case 1:
5258 return "fldz";
5259 case 2:
5260 return "fld1";
5261 case 3:
5262 return "fldlg2";
5263 case 4:
5264 return "fldln2";
5265 case 5:
5266 return "fldl2e";
5267 case 6:
5268 return "fldl2t";
5269 case 7:
5270 return "fldpi";
5271 case 8:
5272 case 9:
5273 return "#";
5274 default:
5275 gcc_unreachable ();
5279 /* Return the CONST_DOUBLE representing the 80387 constant that is
5280 loaded by the specified special instruction. The argument IDX
5281 matches the return value from standard_80387_constant_p. */
5284 standard_80387_constant_rtx (int idx)
5286 int i;
5288 if (! ext_80387_constants_init)
5289 init_ext_80387_constants ();
5291 switch (idx)
5293 case 3:
5294 case 4:
5295 case 5:
5296 case 6:
5297 case 7:
5298 i = idx - 3;
5299 break;
5301 default:
5302 gcc_unreachable ();
5305 return const_double_from_real_value (ext_80387_constants_table[i],
5306 XFmode);
5309 /* Return 1 if X is all bits 0, 2 if X is all bits 1
5310 and 3 if X is all bits 1 with zero extend
5311 in supported SSE/AVX vector mode. */
5314 standard_sse_constant_p (rtx x, machine_mode pred_mode)
5316 machine_mode mode;
5318 if (!TARGET_SSE)
5319 return 0;
5321 mode = GET_MODE (x);
5323 if (x == const0_rtx || const0_operand (x, mode))
5324 return 1;
5326 if (x == constm1_rtx
5327 || vector_all_ones_operand (x, mode)
5328 || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5329 || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5330 && float_vector_all_ones_operand (x, mode)))
5332 /* VOIDmode integer constant, get mode from the predicate. */
5333 if (mode == VOIDmode)
5334 mode = pred_mode;
5336 switch (GET_MODE_SIZE (mode))
5338 case 64:
5339 if (TARGET_AVX512F && TARGET_EVEX512)
5340 return 2;
5341 break;
5342 case 32:
5343 if (TARGET_AVX2)
5344 return 2;
5345 break;
5346 case 16:
5347 if (TARGET_SSE2)
5348 return 2;
5349 break;
5350 case 0:
5351 /* VOIDmode */
5352 gcc_unreachable ();
5353 default:
5354 break;
5358 if (vector_all_ones_zero_extend_half_operand (x, mode)
5359 || vector_all_ones_zero_extend_quarter_operand (x, mode))
5360 return 3;
5362 return 0;
5365 /* Return the opcode of the special instruction to be used to load
5366 the constant operands[1] into operands[0]. */
5368 const char *
5369 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5371 machine_mode mode;
5372 rtx x = operands[1];
5374 gcc_assert (TARGET_SSE);
5376 mode = GET_MODE (x);
5378 if (x == const0_rtx || const0_operand (x, mode))
5380 switch (get_attr_mode (insn))
5382 case MODE_TI:
5383 if (!EXT_REX_SSE_REG_P (operands[0]))
5384 return "%vpxor\t%0, %d0";
5385 /* FALLTHRU */
5386 case MODE_XI:
5387 case MODE_OI:
5388 if (EXT_REX_SSE_REG_P (operands[0]))
5390 if (TARGET_AVX512VL)
5391 return "vpxord\t%x0, %x0, %x0";
5392 else if (TARGET_EVEX512)
5393 return "vpxord\t%g0, %g0, %g0";
5394 else
5395 gcc_unreachable ();
5397 return "vpxor\t%x0, %x0, %x0";
5399 case MODE_V2DF:
5400 if (!EXT_REX_SSE_REG_P (operands[0]))
5401 return "%vxorpd\t%0, %d0";
5402 /* FALLTHRU */
5403 case MODE_V8DF:
5404 case MODE_V4DF:
5405 if (EXT_REX_SSE_REG_P (operands[0]))
5407 if (TARGET_AVX512DQ)
5409 if (TARGET_AVX512VL)
5410 return "vxorpd\t%x0, %x0, %x0";
5411 else if (TARGET_EVEX512)
5412 return "vxorpd\t%g0, %g0, %g0";
5413 else
5414 gcc_unreachable ();
5416 else
5418 if (TARGET_AVX512VL)
5419 return "vpxorq\t%x0, %x0, %x0";
5420 else if (TARGET_EVEX512)
5421 return "vpxorq\t%g0, %g0, %g0";
5422 else
5423 gcc_unreachable ();
5426 return "vxorpd\t%x0, %x0, %x0";
5428 case MODE_V4SF:
5429 if (!EXT_REX_SSE_REG_P (operands[0]))
5430 return "%vxorps\t%0, %d0";
5431 /* FALLTHRU */
5432 case MODE_V16SF:
5433 case MODE_V8SF:
5434 if (EXT_REX_SSE_REG_P (operands[0]))
5436 if (TARGET_AVX512DQ)
5438 if (TARGET_AVX512VL)
5439 return "vxorps\t%x0, %x0, %x0";
5440 else if (TARGET_EVEX512)
5441 return "vxorps\t%g0, %g0, %g0";
5442 else
5443 gcc_unreachable ();
5445 else
5447 if (TARGET_AVX512VL)
5448 return "vpxord\t%x0, %x0, %x0";
5449 else if (TARGET_EVEX512)
5450 return "vpxord\t%g0, %g0, %g0";
5451 else
5452 gcc_unreachable ();
5455 return "vxorps\t%x0, %x0, %x0";
5457 default:
5458 gcc_unreachable ();
5461 else if (x == constm1_rtx
5462 || vector_all_ones_operand (x, mode)
5463 || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5464 && float_vector_all_ones_operand (x, mode)))
5466 enum attr_mode insn_mode = get_attr_mode (insn);
5468 switch (insn_mode)
5470 case MODE_XI:
5471 case MODE_V8DF:
5472 case MODE_V16SF:
5473 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5474 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5476 case MODE_OI:
5477 case MODE_V4DF:
5478 case MODE_V8SF:
5479 gcc_assert (TARGET_AVX2);
5480 /* FALLTHRU */
5481 case MODE_TI:
5482 case MODE_V2DF:
5483 case MODE_V4SF:
5484 gcc_assert (TARGET_SSE2);
5485 if (EXT_REX_SSE_REG_P (operands[0]))
5487 if (TARGET_AVX512VL)
5488 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5489 else if (TARGET_EVEX512)
5490 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5491 else
5492 gcc_unreachable ();
5494 return (TARGET_AVX
5495 ? "vpcmpeqd\t%0, %0, %0"
5496 : "pcmpeqd\t%0, %0");
5498 default:
5499 gcc_unreachable ();
5502 else if (vector_all_ones_zero_extend_half_operand (x, mode))
5504 if (GET_MODE_SIZE (mode) == 64)
5506 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5507 return "vpcmpeqd\t%t0, %t0, %t0";
5509 else if (GET_MODE_SIZE (mode) == 32)
5511 gcc_assert (TARGET_AVX);
5512 return "vpcmpeqd\t%x0, %x0, %x0";
5514 gcc_unreachable ();
5516 else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5518 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5519 return "vpcmpeqd\t%x0, %x0, %x0";
5522 gcc_unreachable ();
5525 /* Returns true if INSN can be transformed from a memory load
5526 to a supported FP constant load. */
5528 bool
5529 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5531 rtx src = find_constant_src (insn);
5533 gcc_assert (REG_P (dst));
5535 if (src == NULL
5536 || (SSE_REGNO_P (REGNO (dst))
5537 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
5538 || (!TARGET_AVX512VL
5539 && EXT_REX_SSE_REGNO_P (REGNO (dst))
5540 && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
5541 || (STACK_REGNO_P (REGNO (dst))
5542 && standard_80387_constant_p (src) < 1))
5543 return false;
5545 return true;
5548 /* Predicate for pre-reload splitters with associated instructions,
5549 which can match any time before the split1 pass (usually combine),
5550 then are unconditionally split in that pass and should not be
5551 matched again afterwards. */
5553 bool
5554 ix86_pre_reload_split (void)
5556 return (can_create_pseudo_p ()
5557 && !(cfun->curr_properties & PROP_rtl_split_insns));
5560 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5561 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5562 TARGET_AVX512VL or it is a register to register move which can
5563 be done with zmm register move. */
5565 static const char *
5566 ix86_get_ssemov (rtx *operands, unsigned size,
5567 enum attr_mode insn_mode, machine_mode mode)
5569 char buf[128];
5570 bool misaligned_p = (misaligned_operand (operands[0], mode)
5571 || misaligned_operand (operands[1], mode));
5572 bool evex_reg_p = (size == 64
5573 || EXT_REX_SSE_REG_P (operands[0])
5574 || EXT_REX_SSE_REG_P (operands[1]));
5576 bool egpr_p = (TARGET_APX_EGPR
5577 && (x86_extended_rex2reg_mentioned_p (operands[0])
5578 || x86_extended_rex2reg_mentioned_p (operands[1])));
5579 bool egpr_vl = egpr_p && TARGET_AVX512VL;
5581 machine_mode scalar_mode;
5583 const char *opcode = NULL;
5584 enum
5586 opcode_int,
5587 opcode_float,
5588 opcode_double
5589 } type = opcode_int;
5591 switch (insn_mode)
5593 case MODE_V16SF:
5594 case MODE_V8SF:
5595 case MODE_V4SF:
5596 scalar_mode = E_SFmode;
5597 type = opcode_float;
5598 break;
5599 case MODE_V8DF:
5600 case MODE_V4DF:
5601 case MODE_V2DF:
5602 scalar_mode = E_DFmode;
5603 type = opcode_double;
5604 break;
5605 case MODE_XI:
5606 case MODE_OI:
5607 case MODE_TI:
5608 scalar_mode = GET_MODE_INNER (mode);
5609 break;
5610 default:
5611 gcc_unreachable ();
5614 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5615 we can only use zmm register move without memory operand. */
5616 if (evex_reg_p
5617 && !TARGET_AVX512VL
5618 && GET_MODE_SIZE (mode) < 64)
5620 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5621 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5622 AVX512VL is disabled, LRA can still generate reg to
5623 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5624 modes. */
5625 if (memory_operand (operands[0], mode)
5626 || memory_operand (operands[1], mode))
5627 gcc_unreachable ();
5628 size = 64;
5629 /* We need TARGET_EVEX512 to move into zmm register. */
5630 gcc_assert (TARGET_EVEX512);
5631 switch (type)
5633 case opcode_int:
5634 if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5635 opcode = (misaligned_p
5636 ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5637 : "vmovdqa64");
5638 else
5639 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5640 break;
5641 case opcode_float:
5642 opcode = misaligned_p ? "vmovups" : "vmovaps";
5643 break;
5644 case opcode_double:
5645 opcode = misaligned_p ? "vmovupd" : "vmovapd";
5646 break;
5649 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5651 switch (scalar_mode)
5653 case E_HFmode:
5654 case E_BFmode:
5655 if (evex_reg_p || egpr_vl)
5656 opcode = (misaligned_p
5657 ? (TARGET_AVX512BW
5658 ? "vmovdqu16"
5659 : "vmovdqu64")
5660 : "vmovdqa64");
5661 else if (egpr_p)
5662 opcode = (misaligned_p
5663 ? (TARGET_AVX512BW
5664 ? "vmovdqu16"
5665 : "%vmovups")
5666 : "%vmovaps");
5667 else
5668 opcode = (misaligned_p
5669 ? (TARGET_AVX512BW
5670 ? "vmovdqu16"
5671 : "%vmovdqu")
5672 : "%vmovdqa");
5673 break;
5674 case E_SFmode:
5675 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5676 break;
5677 case E_DFmode:
5678 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5679 break;
5680 case E_TFmode:
5681 if (evex_reg_p || egpr_vl)
5682 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5683 else if (egpr_p)
5684 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5685 else
5686 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5687 break;
5688 default:
5689 gcc_unreachable ();
5692 else if (SCALAR_INT_MODE_P (scalar_mode))
5694 switch (scalar_mode)
5696 case E_QImode:
5697 if (evex_reg_p || egpr_vl)
5698 opcode = (misaligned_p
5699 ? (TARGET_AVX512BW
5700 ? "vmovdqu8"
5701 : "vmovdqu64")
5702 : "vmovdqa64");
5703 else if (egpr_p)
5704 opcode = (misaligned_p
5705 ? (TARGET_AVX512BW
5706 ? "vmovdqu8"
5707 : "%vmovups")
5708 : "%vmovaps");
5709 else
5710 opcode = (misaligned_p
5711 ? (TARGET_AVX512BW
5712 ? "vmovdqu8"
5713 : "%vmovdqu")
5714 : "%vmovdqa");
5715 break;
5716 case E_HImode:
5717 if (evex_reg_p || egpr_vl)
5718 opcode = (misaligned_p
5719 ? (TARGET_AVX512BW
5720 ? "vmovdqu16"
5721 : "vmovdqu64")
5722 : "vmovdqa64");
5723 else if (egpr_p)
5724 opcode = (misaligned_p
5725 ? (TARGET_AVX512BW
5726 ? "vmovdqu16"
5727 : "%vmovups")
5728 : "%vmovaps");
5729 else
5730 opcode = (misaligned_p
5731 ? (TARGET_AVX512BW
5732 ? "vmovdqu16"
5733 : "%vmovdqu")
5734 : "%vmovdqa");
5735 break;
5736 case E_SImode:
5737 if (evex_reg_p || egpr_vl)
5738 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5739 else if (egpr_p)
5740 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5741 else
5742 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5743 break;
5744 case E_DImode:
5745 case E_TImode:
5746 case E_OImode:
5747 if (evex_reg_p || egpr_vl)
5748 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5749 else if (egpr_p)
5750 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5751 else
5752 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5753 break;
5754 case E_XImode:
5755 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5756 break;
5757 default:
5758 gcc_unreachable ();
5761 else
5762 gcc_unreachable ();
5764 switch (size)
5766 case 64:
5767 snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5768 opcode);
5769 break;
5770 case 32:
5771 snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5772 opcode);
5773 break;
5774 case 16:
5775 snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5776 opcode);
5777 break;
5778 default:
5779 gcc_unreachable ();
5781 output_asm_insn (buf, operands);
5782 return "";
5785 /* Return the template of the TYPE_SSEMOV instruction to move
5786 operands[1] into operands[0]. */
5788 const char *
5789 ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5791 machine_mode mode = GET_MODE (operands[0]);
5792 if (get_attr_type (insn) != TYPE_SSEMOV
5793 || mode != GET_MODE (operands[1]))
5794 gcc_unreachable ();
5796 enum attr_mode insn_mode = get_attr_mode (insn);
5798 switch (insn_mode)
5800 case MODE_XI:
5801 case MODE_V8DF:
5802 case MODE_V16SF:
5803 return ix86_get_ssemov (operands, 64, insn_mode, mode);
5805 case MODE_OI:
5806 case MODE_V4DF:
5807 case MODE_V8SF:
5808 return ix86_get_ssemov (operands, 32, insn_mode, mode);
5810 case MODE_TI:
5811 case MODE_V2DF:
5812 case MODE_V4SF:
5813 return ix86_get_ssemov (operands, 16, insn_mode, mode);
5815 case MODE_DI:
5816 /* Handle broken assemblers that require movd instead of movq. */
5817 if (GENERAL_REG_P (operands[0]))
5819 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5820 return "%vmovq\t{%1, %q0|%q0, %1}";
5821 else
5822 return "%vmovd\t{%1, %q0|%q0, %1}";
5824 else if (GENERAL_REG_P (operands[1]))
5826 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5827 return "%vmovq\t{%q1, %0|%0, %q1}";
5828 else
5829 return "%vmovd\t{%q1, %0|%0, %q1}";
5831 else
5832 return "%vmovq\t{%1, %0|%0, %1}";
5834 case MODE_SI:
5835 if (GENERAL_REG_P (operands[0]))
5836 return "%vmovd\t{%1, %k0|%k0, %1}";
5837 else if (GENERAL_REG_P (operands[1]))
5838 return "%vmovd\t{%k1, %0|%0, %k1}";
5839 else
5840 return "%vmovd\t{%1, %0|%0, %1}";
5842 case MODE_HI:
5843 if (GENERAL_REG_P (operands[0]))
5844 return "vmovw\t{%1, %k0|%k0, %1}";
5845 else if (GENERAL_REG_P (operands[1]))
5846 return "vmovw\t{%k1, %0|%0, %k1}";
5847 else
5848 return "vmovw\t{%1, %0|%0, %1}";
5850 case MODE_DF:
5851 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5852 return "vmovsd\t{%d1, %0|%0, %d1}";
5853 else
5854 return "%vmovsd\t{%1, %0|%0, %1}";
5856 case MODE_SF:
5857 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5858 return "vmovss\t{%d1, %0|%0, %d1}";
5859 else
5860 return "%vmovss\t{%1, %0|%0, %1}";
5862 case MODE_HF:
5863 case MODE_BF:
5864 if (REG_P (operands[0]) && REG_P (operands[1]))
5865 return "vmovsh\t{%d1, %0|%0, %d1}";
5866 else
5867 return "vmovsh\t{%1, %0|%0, %1}";
5869 case MODE_V1DF:
5870 gcc_assert (!TARGET_AVX);
5871 return "movlpd\t{%1, %0|%0, %1}";
5873 case MODE_V2SF:
5874 if (TARGET_AVX && REG_P (operands[0]))
5875 return "vmovlps\t{%1, %d0|%d0, %1}";
5876 else
5877 return "%vmovlps\t{%1, %0|%0, %1}";
5879 default:
5880 gcc_unreachable ();
5884 /* Returns true if OP contains a symbol reference */
5886 bool
5887 symbolic_reference_mentioned_p (rtx op)
5889 const char *fmt;
5890 int i;
5892 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5893 return true;
5895 fmt = GET_RTX_FORMAT (GET_CODE (op));
5896 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5898 if (fmt[i] == 'E')
5900 int j;
5902 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5903 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5904 return true;
5907 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5908 return true;
5911 return false;
5914 /* Return true if it is appropriate to emit `ret' instructions in the
5915 body of a function. Do this only if the epilogue is simple, needing a
5916 couple of insns. Prior to reloading, we can't tell how many registers
5917 must be saved, so return false then. Return false if there is no frame
5918 marker to de-allocate. */
5920 bool
5921 ix86_can_use_return_insn_p (void)
5923 if (ix86_function_ms_hook_prologue (current_function_decl))
5924 return false;
5926 if (ix86_function_naked (current_function_decl))
5927 return false;
5929 /* Don't use `ret' instruction in interrupt handler. */
5930 if (! reload_completed
5931 || frame_pointer_needed
5932 || cfun->machine->func_type != TYPE_NORMAL)
5933 return 0;
5935 /* Don't allow more than 32k pop, since that's all we can do
5936 with one instruction. */
5937 if (crtl->args.pops_args && crtl->args.size >= 32768)
5938 return 0;
5940 struct ix86_frame &frame = cfun->machine->frame;
5941 return (frame.stack_pointer_offset == UNITS_PER_WORD
5942 && (frame.nregs + frame.nsseregs) == 0);
5945 /* Return stack frame size. get_frame_size () returns used stack slots
5946 during compilation, which may be optimized out later. If stack frame
5947 is needed, stack_frame_required should be true. */
5949 static HOST_WIDE_INT
5950 ix86_get_frame_size (void)
5952 if (cfun->machine->stack_frame_required)
5953 return get_frame_size ();
5954 else
5955 return 0;
5958 /* Value should be nonzero if functions must have frame pointers.
5959 Zero means the frame pointer need not be set up (and parms may
5960 be accessed via the stack pointer) in functions that seem suitable. */
5962 static bool
5963 ix86_frame_pointer_required (void)
5965 /* If we accessed previous frames, then the generated code expects
5966 to be able to access the saved ebp value in our frame. */
5967 if (cfun->machine->accesses_prev_frame)
5968 return true;
5970 /* Several x86 os'es need a frame pointer for other reasons,
5971 usually pertaining to setjmp. */
5972 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5973 return true;
5975 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5976 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5977 return true;
5979 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5980 allocation is 4GB. */
5981 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5982 return true;
5984 /* SSE saves require frame-pointer when stack is misaligned. */
5985 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5986 return true;
5988 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5989 turns off the frame pointer by default. Turn it back on now if
5990 we've not got a leaf function. */
5991 if (TARGET_OMIT_LEAF_FRAME_POINTER
5992 && (!crtl->is_leaf
5993 || ix86_current_function_calls_tls_descriptor))
5994 return true;
5996 /* Several versions of mcount for the x86 assumes that there is a
5997 frame, so we cannot allow profiling without a frame pointer. */
5998 if (crtl->profile && !flag_fentry)
5999 return true;
6001 return false;
6004 /* Record that the current function accesses previous call frames. */
6006 void
6007 ix86_setup_frame_addresses (void)
6009 cfun->machine->accesses_prev_frame = 1;
6012 #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
6013 # define USE_HIDDEN_LINKONCE 1
6014 #else
6015 # define USE_HIDDEN_LINKONCE 0
6016 #endif
6018 /* Label count for call and return thunks. It is used to make unique
6019 labels in call and return thunks. */
6020 static int indirectlabelno;
6022 /* True if call thunk function is needed. */
6023 static bool indirect_thunk_needed = false;
6025 /* Bit masks of integer registers, which contain branch target, used
6026 by call thunk functions. */
6027 static HARD_REG_SET indirect_thunks_used;
6029 /* True if return thunk function is needed. */
6030 static bool indirect_return_needed = false;
6032 /* True if return thunk function via CX is needed. */
6033 static bool indirect_return_via_cx;
6035 #ifndef INDIRECT_LABEL
6036 # define INDIRECT_LABEL "LIND"
6037 #endif
6039 /* Indicate what prefix is needed for an indirect branch. */
6040 enum indirect_thunk_prefix
6042 indirect_thunk_prefix_none,
6043 indirect_thunk_prefix_nt
6046 /* Return the prefix needed for an indirect branch INSN. */
6048 enum indirect_thunk_prefix
6049 indirect_thunk_need_prefix (rtx_insn *insn)
6051 enum indirect_thunk_prefix need_prefix;
6052 if ((cfun->machine->indirect_branch_type
6053 == indirect_branch_thunk_extern)
6054 && ix86_notrack_prefixed_insn_p (insn))
6056 /* NOTRACK prefix is only used with external thunk so that it
6057 can be properly updated to support CET at run-time. */
6058 need_prefix = indirect_thunk_prefix_nt;
6060 else
6061 need_prefix = indirect_thunk_prefix_none;
6062 return need_prefix;
6065 /* Fills in the label name that should be used for the indirect thunk. */
6067 static void
6068 indirect_thunk_name (char name[32], unsigned int regno,
6069 enum indirect_thunk_prefix need_prefix,
6070 bool ret_p)
6072 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
6073 gcc_unreachable ();
6075 if (USE_HIDDEN_LINKONCE)
6077 const char *prefix;
6079 if (need_prefix == indirect_thunk_prefix_nt
6080 && regno != INVALID_REGNUM)
6082 /* NOTRACK prefix is only used with external thunk via
6083 register so that NOTRACK prefix can be added to indirect
6084 branch via register to support CET at run-time. */
6085 prefix = "_nt";
6087 else
6088 prefix = "";
6090 const char *ret = ret_p ? "return" : "indirect";
6092 if (regno != INVALID_REGNUM)
6094 const char *reg_prefix;
6095 if (LEGACY_INT_REGNO_P (regno))
6096 reg_prefix = TARGET_64BIT ? "r" : "e";
6097 else
6098 reg_prefix = "";
6099 sprintf (name, "__x86_%s_thunk%s_%s%s",
6100 ret, prefix, reg_prefix, reg_names[regno]);
6102 else
6103 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
6105 else
6107 if (regno != INVALID_REGNUM)
6108 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
6109 else
6111 if (ret_p)
6112 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
6113 else
6114 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
6119 /* Output a call and return thunk for indirect branch. If REGNO != -1,
6120 the function address is in REGNO and the call and return thunk looks like:
6122 call L2
6124 pause
6125 lfence
6126 jmp L1
6128 mov %REG, (%sp)
6131 Otherwise, the function address is on the top of stack and the
6132 call and return thunk looks like:
6134 call L2
6136 pause
6137 lfence
6138 jmp L1
6140 lea WORD_SIZE(%sp), %sp
6144 static void
6145 output_indirect_thunk (unsigned int regno)
6147 char indirectlabel1[32];
6148 char indirectlabel2[32];
6150 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
6151 indirectlabelno++);
6152 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
6153 indirectlabelno++);
6155 /* Call */
6156 fputs ("\tcall\t", asm_out_file);
6157 assemble_name_raw (asm_out_file, indirectlabel2);
6158 fputc ('\n', asm_out_file);
6160 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
6162 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6163 Usage of both pause + lfence is compromise solution. */
6164 fprintf (asm_out_file, "\tpause\n\tlfence\n");
6166 /* Jump. */
6167 fputs ("\tjmp\t", asm_out_file);
6168 assemble_name_raw (asm_out_file, indirectlabel1);
6169 fputc ('\n', asm_out_file);
6171 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
6173 /* The above call insn pushed a word to stack. Adjust CFI info. */
6174 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
6176 if (! dwarf2out_do_cfi_asm ())
6178 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6179 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6180 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6181 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6183 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6184 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6185 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6186 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6187 dwarf2out_emit_cfi (xcfi);
6190 if (regno != INVALID_REGNUM)
6192 /* MOV. */
6193 rtx xops[2];
6194 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6195 xops[1] = gen_rtx_REG (word_mode, regno);
6196 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6198 else
6200 /* LEA. */
6201 rtx xops[2];
6202 xops[0] = stack_pointer_rtx;
6203 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6204 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6207 fputs ("\tret\n", asm_out_file);
6208 if ((ix86_harden_sls & harden_sls_return))
6209 fputs ("\tint3\n", asm_out_file);
6212 /* Output a funtion with a call and return thunk for indirect branch.
6213 If REGNO != INVALID_REGNUM, the function address is in REGNO.
6214 Otherwise, the function address is on the top of stack. Thunk is
6215 used for function return if RET_P is true. */
6217 static void
6218 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6219 unsigned int regno, bool ret_p)
6221 char name[32];
6222 tree decl;
6224 /* Create __x86_indirect_thunk. */
6225 indirect_thunk_name (name, regno, need_prefix, ret_p);
6226 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6227 get_identifier (name),
6228 build_function_type_list (void_type_node, NULL_TREE));
6229 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6230 NULL_TREE, void_type_node);
6231 TREE_PUBLIC (decl) = 1;
6232 TREE_STATIC (decl) = 1;
6233 DECL_IGNORED_P (decl) = 1;
6235 #if TARGET_MACHO
6236 if (TARGET_MACHO)
6238 switch_to_section (darwin_sections[picbase_thunk_section]);
6239 fputs ("\t.weak_definition\t", asm_out_file);
6240 assemble_name (asm_out_file, name);
6241 fputs ("\n\t.private_extern\t", asm_out_file);
6242 assemble_name (asm_out_file, name);
6243 putc ('\n', asm_out_file);
6244 ASM_OUTPUT_LABEL (asm_out_file, name);
6245 DECL_WEAK (decl) = 1;
6247 else
6248 #endif
6249 if (USE_HIDDEN_LINKONCE)
6251 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6253 targetm.asm_out.unique_section (decl, 0);
6254 switch_to_section (get_named_section (decl, NULL, 0));
6256 targetm.asm_out.globalize_label (asm_out_file, name);
6257 fputs ("\t.hidden\t", asm_out_file);
6258 assemble_name (asm_out_file, name);
6259 putc ('\n', asm_out_file);
6260 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6262 else
6264 switch_to_section (text_section);
6265 ASM_OUTPUT_LABEL (asm_out_file, name);
6268 DECL_INITIAL (decl) = make_node (BLOCK);
6269 current_function_decl = decl;
6270 allocate_struct_function (decl, false);
6271 init_function_start (decl);
6272 /* We're about to hide the function body from callees of final_* by
6273 emitting it directly; tell them we're a thunk, if they care. */
6274 cfun->is_thunk = true;
6275 first_function_block_is_cold = false;
6276 /* Make sure unwind info is emitted for the thunk if needed. */
6277 final_start_function (emit_barrier (), asm_out_file, 1);
6279 output_indirect_thunk (regno);
6281 final_end_function ();
6282 init_insn_lengths ();
6283 free_after_compilation (cfun);
6284 set_cfun (NULL);
6285 current_function_decl = NULL;
6288 static int pic_labels_used;
6290 /* Fills in the label name that should be used for a pc thunk for
6291 the given register. */
6293 static void
6294 get_pc_thunk_name (char name[32], unsigned int regno)
6296 gcc_assert (!TARGET_64BIT);
6298 if (USE_HIDDEN_LINKONCE)
6299 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
6300 else
6301 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6305 /* This function generates code for -fpic that loads %ebx with
6306 the return address of the caller and then returns. */
6308 static void
6309 ix86_code_end (void)
6311 rtx xops[2];
6312 unsigned int regno;
6314 if (indirect_return_needed)
6315 output_indirect_thunk_function (indirect_thunk_prefix_none,
6316 INVALID_REGNUM, true);
6317 if (indirect_return_via_cx)
6318 output_indirect_thunk_function (indirect_thunk_prefix_none,
6319 CX_REG, true);
6320 if (indirect_thunk_needed)
6321 output_indirect_thunk_function (indirect_thunk_prefix_none,
6322 INVALID_REGNUM, false);
6324 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6326 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6327 output_indirect_thunk_function (indirect_thunk_prefix_none,
6328 regno, false);
6331 for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
6333 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6334 output_indirect_thunk_function (indirect_thunk_prefix_none,
6335 regno, false);
6338 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6340 char name[32];
6341 tree decl;
6343 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6344 output_indirect_thunk_function (indirect_thunk_prefix_none,
6345 regno, false);
6347 if (!(pic_labels_used & (1 << regno)))
6348 continue;
6350 get_pc_thunk_name (name, regno);
6352 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6353 get_identifier (name),
6354 build_function_type_list (void_type_node, NULL_TREE));
6355 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6356 NULL_TREE, void_type_node);
6357 TREE_PUBLIC (decl) = 1;
6358 TREE_STATIC (decl) = 1;
6359 DECL_IGNORED_P (decl) = 1;
6361 #if TARGET_MACHO
6362 if (TARGET_MACHO)
6364 switch_to_section (darwin_sections[picbase_thunk_section]);
6365 fputs ("\t.weak_definition\t", asm_out_file);
6366 assemble_name (asm_out_file, name);
6367 fputs ("\n\t.private_extern\t", asm_out_file);
6368 assemble_name (asm_out_file, name);
6369 putc ('\n', asm_out_file);
6370 ASM_OUTPUT_LABEL (asm_out_file, name);
6371 DECL_WEAK (decl) = 1;
6373 else
6374 #endif
6375 if (USE_HIDDEN_LINKONCE)
6377 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6379 targetm.asm_out.unique_section (decl, 0);
6380 switch_to_section (get_named_section (decl, NULL, 0));
6382 targetm.asm_out.globalize_label (asm_out_file, name);
6383 fputs ("\t.hidden\t", asm_out_file);
6384 assemble_name (asm_out_file, name);
6385 putc ('\n', asm_out_file);
6386 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6388 else
6390 switch_to_section (text_section);
6391 ASM_OUTPUT_LABEL (asm_out_file, name);
6394 DECL_INITIAL (decl) = make_node (BLOCK);
6395 current_function_decl = decl;
6396 allocate_struct_function (decl, false);
6397 init_function_start (decl);
6398 /* We're about to hide the function body from callees of final_* by
6399 emitting it directly; tell them we're a thunk, if they care. */
6400 cfun->is_thunk = true;
6401 first_function_block_is_cold = false;
6402 /* Make sure unwind info is emitted for the thunk if needed. */
6403 final_start_function (emit_barrier (), asm_out_file, 1);
6405 /* Pad stack IP move with 4 instructions (two NOPs count
6406 as one instruction). */
6407 if (TARGET_PAD_SHORT_FUNCTION)
6409 int i = 8;
6411 while (i--)
6412 fputs ("\tnop\n", asm_out_file);
6415 xops[0] = gen_rtx_REG (Pmode, regno);
6416 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6417 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6418 fputs ("\tret\n", asm_out_file);
6419 final_end_function ();
6420 init_insn_lengths ();
6421 free_after_compilation (cfun);
6422 set_cfun (NULL);
6423 current_function_decl = NULL;
6426 if (flag_split_stack)
6427 file_end_indicate_split_stack ();
6430 /* Emit code for the SET_GOT patterns. */
6432 const char *
6433 output_set_got (rtx dest, rtx label)
6435 rtx xops[3];
6437 xops[0] = dest;
6439 if (TARGET_VXWORKS_RTP && flag_pic)
6441 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6442 xops[2] = gen_rtx_MEM (Pmode,
6443 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6444 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6446 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6447 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6448 an unadorned address. */
6449 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6450 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6451 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6452 return "";
6455 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6457 if (flag_pic)
6459 char name[32];
6460 get_pc_thunk_name (name, REGNO (dest));
6461 pic_labels_used |= 1 << REGNO (dest);
6463 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6464 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6465 output_asm_insn ("%!call\t%X2", xops);
6467 #if TARGET_MACHO
6468 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6469 This is what will be referenced by the Mach-O PIC subsystem. */
6470 if (machopic_should_output_picbase_label () || !label)
6471 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6473 /* When we are restoring the pic base at the site of a nonlocal label,
6474 and we decided to emit the pic base above, we will still output a
6475 local label used for calculating the correction offset (even though
6476 the offset will be 0 in that case). */
6477 if (label)
6478 targetm.asm_out.internal_label (asm_out_file, "L",
6479 CODE_LABEL_NUMBER (label));
6480 #endif
6482 else
6484 if (TARGET_MACHO)
6485 /* We don't need a pic base, we're not producing pic. */
6486 gcc_unreachable ();
6488 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6489 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6490 targetm.asm_out.internal_label (asm_out_file, "L",
6491 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6494 if (!TARGET_MACHO)
6495 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6497 return "";
6500 /* Generate an "push" pattern for input ARG. */
6503 gen_push (rtx arg, bool ppx_p)
6505 struct machine_function *m = cfun->machine;
6507 if (m->fs.cfa_reg == stack_pointer_rtx)
6508 m->fs.cfa_offset += UNITS_PER_WORD;
6509 m->fs.sp_offset += UNITS_PER_WORD;
6511 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6512 arg = gen_rtx_REG (word_mode, REGNO (arg));
6514 rtx stack = gen_rtx_MEM (word_mode,
6515 gen_rtx_PRE_DEC (Pmode,
6516 stack_pointer_rtx));
6517 return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
6521 gen_pushfl (void)
6523 struct machine_function *m = cfun->machine;
6524 rtx flags, mem;
6526 if (m->fs.cfa_reg == stack_pointer_rtx)
6527 m->fs.cfa_offset += UNITS_PER_WORD;
6528 m->fs.sp_offset += UNITS_PER_WORD;
6530 flags = gen_rtx_REG (CCmode, FLAGS_REG);
6532 mem = gen_rtx_MEM (word_mode,
6533 gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
6535 return gen_pushfl2 (word_mode, mem, flags);
6538 /* Generate an "pop" pattern for input ARG. */
6541 gen_pop (rtx arg, bool ppx_p)
6543 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6544 arg = gen_rtx_REG (word_mode, REGNO (arg));
6546 rtx stack = gen_rtx_MEM (word_mode,
6547 gen_rtx_POST_INC (Pmode,
6548 stack_pointer_rtx));
6550 return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
6554 gen_popfl (void)
6556 rtx flags, mem;
6558 flags = gen_rtx_REG (CCmode, FLAGS_REG);
6560 mem = gen_rtx_MEM (word_mode,
6561 gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
6563 return gen_popfl1 (word_mode, flags, mem);
6566 /* Generate a "push2" pattern for input ARG. */
6568 gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
6570 struct machine_function *m = cfun->machine;
6571 const int offset = UNITS_PER_WORD * 2;
6573 if (m->fs.cfa_reg == stack_pointer_rtx)
6574 m->fs.cfa_offset += offset;
6575 m->fs.sp_offset += offset;
6577 if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
6578 reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
6580 if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
6581 reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
6583 return ppx_p ? gen_push2p_di (mem, reg1, reg2):
6584 gen_push2_di (mem, reg1, reg2);
6587 /* Return >= 0 if there is an unused call-clobbered register available
6588 for the entire function. */
6590 static unsigned int
6591 ix86_select_alt_pic_regnum (void)
6593 if (ix86_use_pseudo_pic_reg ())
6594 return INVALID_REGNUM;
6596 if (crtl->is_leaf
6597 && !crtl->profile
6598 && !ix86_current_function_calls_tls_descriptor)
6600 int i, drap;
6601 /* Can't use the same register for both PIC and DRAP. */
6602 if (crtl->drap_reg)
6603 drap = REGNO (crtl->drap_reg);
6604 else
6605 drap = -1;
6606 for (i = 2; i >= 0; --i)
6607 if (i != drap && !df_regs_ever_live_p (i))
6608 return i;
6611 return INVALID_REGNUM;
6614 /* Return true if REGNO is used by the epilogue. */
6616 bool
6617 ix86_epilogue_uses (int regno)
6619 /* If there are no caller-saved registers, we preserve all registers,
6620 except for MMX and x87 registers which aren't supported when saving
6621 and restoring registers. Don't explicitly save SP register since
6622 it is always preserved. */
6623 return (epilogue_completed
6624 && (cfun->machine->call_saved_registers
6625 == TYPE_NO_CALLER_SAVED_REGISTERS)
6626 && !fixed_regs[regno]
6627 && !STACK_REGNO_P (regno)
6628 && !MMX_REGNO_P (regno));
6631 /* Return nonzero if register REGNO can be used as a scratch register
6632 in peephole2. */
6634 static bool
6635 ix86_hard_regno_scratch_ok (unsigned int regno)
6637 /* If there are no caller-saved registers, we can't use any register
6638 as a scratch register after epilogue and use REGNO as scratch
6639 register only if it has been used before to avoid saving and
6640 restoring it. */
6641 return ((cfun->machine->call_saved_registers
6642 != TYPE_NO_CALLER_SAVED_REGISTERS)
6643 || (!epilogue_completed
6644 && df_regs_ever_live_p (regno)));
6647 /* Return TRUE if we need to save REGNO. */
6649 bool
6650 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6652 rtx reg;
6654 switch (cfun->machine->call_saved_registers)
6656 case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
6657 break;
6659 case TYPE_NO_CALLER_SAVED_REGISTERS:
6660 /* If there are no caller-saved registers, we preserve all
6661 registers, except for MMX and x87 registers which aren't
6662 supported when saving and restoring registers. Don't
6663 explicitly save SP register since it is always preserved.
6665 Don't preserve registers used for function return value. */
6666 reg = crtl->return_rtx;
6667 if (reg)
6669 unsigned int i = REGNO (reg);
6670 unsigned int nregs = REG_NREGS (reg);
6671 while (nregs-- > 0)
6672 if ((i + nregs) == regno)
6673 return false;
6676 return (df_regs_ever_live_p (regno)
6677 && !fixed_regs[regno]
6678 && !STACK_REGNO_P (regno)
6679 && !MMX_REGNO_P (regno)
6680 && (regno != HARD_FRAME_POINTER_REGNUM
6681 || !frame_pointer_needed));
6683 case TYPE_NO_CALLEE_SAVED_REGISTERS:
6684 return false;
6686 case TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP:
6687 if (regno != HARD_FRAME_POINTER_REGNUM)
6688 return false;
6689 break;
6692 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6693 && pic_offset_table_rtx)
6695 if (ix86_use_pseudo_pic_reg ())
6697 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6698 _mcount in prologue. */
6699 if (!TARGET_64BIT && flag_pic && crtl->profile)
6700 return true;
6702 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6703 || crtl->profile
6704 || crtl->calls_eh_return
6705 || crtl->uses_const_pool
6706 || cfun->has_nonlocal_label)
6707 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6710 if (crtl->calls_eh_return && maybe_eh_return)
6712 unsigned i;
6713 for (i = 0; ; i++)
6715 unsigned test = EH_RETURN_DATA_REGNO (i);
6716 if (test == INVALID_REGNUM)
6717 break;
6718 if (test == regno)
6719 return true;
6723 if (ignore_outlined && cfun->machine->call_ms2sysv)
6725 unsigned count = cfun->machine->call_ms2sysv_extra_regs
6726 + xlogue_layout::MIN_REGS;
6727 if (xlogue_layout::is_stub_managed_reg (regno, count))
6728 return false;
6731 if (crtl->drap_reg
6732 && regno == REGNO (crtl->drap_reg)
6733 && !cfun->machine->no_drap_save_restore)
6734 return true;
6736 return (df_regs_ever_live_p (regno)
6737 && !call_used_or_fixed_reg_p (regno)
6738 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6741 /* Return number of saved general prupose registers. */
6743 static int
6744 ix86_nsaved_regs (void)
6746 int nregs = 0;
6747 int regno;
6749 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6750 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6751 nregs ++;
6752 return nregs;
6755 /* Return number of saved SSE registers. */
6757 static int
6758 ix86_nsaved_sseregs (void)
6760 int nregs = 0;
6761 int regno;
6763 if (!TARGET_64BIT_MS_ABI)
6764 return 0;
6765 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6766 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6767 nregs ++;
6768 return nregs;
6771 /* Given FROM and TO register numbers, say whether this elimination is
6772 allowed. If stack alignment is needed, we can only replace argument
6773 pointer with hard frame pointer, or replace frame pointer with stack
6774 pointer. Otherwise, frame pointer elimination is automatically
6775 handled and all other eliminations are valid. */
6777 static bool
6778 ix86_can_eliminate (const int from, const int to)
6780 if (stack_realign_fp)
6781 return ((from == ARG_POINTER_REGNUM
6782 && to == HARD_FRAME_POINTER_REGNUM)
6783 || (from == FRAME_POINTER_REGNUM
6784 && to == STACK_POINTER_REGNUM));
6785 else
6786 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6789 /* Return the offset between two registers, one to be eliminated, and the other
6790 its replacement, at the start of a routine. */
6792 HOST_WIDE_INT
6793 ix86_initial_elimination_offset (int from, int to)
6795 struct ix86_frame &frame = cfun->machine->frame;
6797 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6798 return frame.hard_frame_pointer_offset;
6799 else if (from == FRAME_POINTER_REGNUM
6800 && to == HARD_FRAME_POINTER_REGNUM)
6801 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6802 else
6804 gcc_assert (to == STACK_POINTER_REGNUM);
6806 if (from == ARG_POINTER_REGNUM)
6807 return frame.stack_pointer_offset;
6809 gcc_assert (from == FRAME_POINTER_REGNUM);
6810 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6814 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6815 void
6816 warn_once_call_ms2sysv_xlogues (const char *feature)
6818 static bool warned_once = false;
6819 if (!warned_once)
6821 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6822 feature);
6823 warned_once = true;
6827 /* Return the probing interval for -fstack-clash-protection. */
6829 static HOST_WIDE_INT
6830 get_probe_interval (void)
6832 if (flag_stack_clash_protection)
6833 return (HOST_WIDE_INT_1U
6834 << param_stack_clash_protection_probe_interval);
6835 else
6836 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6839 /* When using -fsplit-stack, the allocation routines set a field in
6840 the TCB to the bottom of the stack plus this much space, measured
6841 in bytes. */
6843 #define SPLIT_STACK_AVAILABLE 256
6845 /* Return true if push2/pop2 can be generated. */
6847 static bool
6848 ix86_can_use_push2pop2 (void)
6850 /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */
6851 unsigned int incoming_stack_boundary
6852 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
6853 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
6854 return incoming_stack_boundary % 128 == 0;
6857 /* Helper function to determine whether push2/pop2 can be used in prologue or
6858 epilogue for register save/restore. */
6859 static bool
6860 ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
6862 if (!ix86_can_use_push2pop2 ())
6863 return false;
6864 int aligned = cfun->machine->fs.sp_offset % 16 == 0;
6865 return TARGET_APX_PUSH2POP2
6866 && !cfun->machine->frame.save_regs_using_mov
6867 && cfun->machine->func_type == TYPE_NORMAL
6868 && (nregs + aligned) >= 3;
6871 /* Fill structure ix86_frame about frame of currently computed function. */
6873 static void
6874 ix86_compute_frame_layout (void)
6876 struct ix86_frame *frame = &cfun->machine->frame;
6877 struct machine_function *m = cfun->machine;
6878 unsigned HOST_WIDE_INT stack_alignment_needed;
6879 HOST_WIDE_INT offset;
6880 unsigned HOST_WIDE_INT preferred_alignment;
6881 HOST_WIDE_INT size = ix86_get_frame_size ();
6882 HOST_WIDE_INT to_allocate;
6884 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6885 * ms_abi functions that call a sysv function. We now need to prune away
6886 * cases where it should be disabled. */
6887 if (TARGET_64BIT && m->call_ms2sysv)
6889 gcc_assert (TARGET_64BIT_MS_ABI);
6890 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6891 gcc_assert (!TARGET_SEH);
6892 gcc_assert (TARGET_SSE);
6893 gcc_assert (!ix86_using_red_zone ());
6895 if (crtl->calls_eh_return)
6897 gcc_assert (!reload_completed);
6898 m->call_ms2sysv = false;
6899 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6902 else if (ix86_static_chain_on_stack)
6904 gcc_assert (!reload_completed);
6905 m->call_ms2sysv = false;
6906 warn_once_call_ms2sysv_xlogues ("static call chains");
6909 /* Finally, compute which registers the stub will manage. */
6910 else
6912 unsigned count = xlogue_layout::count_stub_managed_regs ();
6913 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6914 m->call_ms2sysv_pad_in = 0;
6918 frame->nregs = ix86_nsaved_regs ();
6919 frame->nsseregs = ix86_nsaved_sseregs ();
6921 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6922 except for function prologues, leaf functions and when the defult
6923 incoming stack boundary is overriden at command line or via
6924 force_align_arg_pointer attribute.
6926 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6927 at call sites, including profile function calls.
6929 For APX push2/pop2, the stack also requires 128b alignment. */
6930 if ((ix86_pro_and_epilogue_can_use_push2pop2 (frame->nregs)
6931 && crtl->preferred_stack_boundary < 128)
6932 || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6933 && crtl->preferred_stack_boundary < 128)
6934 && (!crtl->is_leaf || cfun->calls_alloca != 0
6935 || ix86_current_function_calls_tls_descriptor
6936 || (TARGET_MACHO && crtl->profile)
6937 || ix86_incoming_stack_boundary < 128)))
6939 crtl->preferred_stack_boundary = 128;
6940 if (crtl->stack_alignment_needed < 128)
6941 crtl->stack_alignment_needed = 128;
6944 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6945 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6947 gcc_assert (!size || stack_alignment_needed);
6948 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6949 gcc_assert (preferred_alignment <= stack_alignment_needed);
6951 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6952 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6953 if (TARGET_64BIT && m->call_ms2sysv)
6955 gcc_assert (stack_alignment_needed >= 16);
6956 gcc_assert (!frame->nsseregs);
6959 /* For SEH we have to limit the amount of code movement into the prologue.
6960 At present we do this via a BLOCKAGE, at which point there's very little
6961 scheduling that can be done, which means that there's very little point
6962 in doing anything except PUSHs. */
6963 if (TARGET_SEH)
6964 m->use_fast_prologue_epilogue = false;
6965 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6967 int count = frame->nregs;
6968 struct cgraph_node *node = cgraph_node::get (current_function_decl);
6970 /* The fast prologue uses move instead of push to save registers. This
6971 is significantly longer, but also executes faster as modern hardware
6972 can execute the moves in parallel, but can't do that for push/pop.
6974 Be careful about choosing what prologue to emit: When function takes
6975 many instructions to execute we may use slow version as well as in
6976 case function is known to be outside hot spot (this is known with
6977 feedback only). Weight the size of function by number of registers
6978 to save as it is cheap to use one or two push instructions but very
6979 slow to use many of them.
6981 Calling this hook multiple times with the same frame requirements
6982 must produce the same layout, since the RA might otherwise be
6983 unable to reach a fixed point or might fail its final sanity checks.
6984 This means that once we've assumed that a function does or doesn't
6985 have a particular size, we have to stick to that assumption
6986 regardless of how the function has changed since. */
6987 if (count)
6988 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6989 if (node->frequency < NODE_FREQUENCY_NORMAL
6990 || (flag_branch_probabilities
6991 && node->frequency < NODE_FREQUENCY_HOT))
6992 m->use_fast_prologue_epilogue = false;
6993 else
6995 if (count != frame->expensive_count)
6997 frame->expensive_count = count;
6998 frame->expensive_p = expensive_function_p (count);
7000 m->use_fast_prologue_epilogue = !frame->expensive_p;
7004 frame->save_regs_using_mov
7005 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
7007 /* Skip return address and error code in exception handler. */
7008 offset = INCOMING_FRAME_SP_OFFSET;
7010 /* Skip pushed static chain. */
7011 if (ix86_static_chain_on_stack)
7012 offset += UNITS_PER_WORD;
7014 /* Skip saved base pointer. */
7015 if (frame_pointer_needed)
7016 offset += UNITS_PER_WORD;
7017 frame->hfp_save_offset = offset;
7019 /* The traditional frame pointer location is at the top of the frame. */
7020 frame->hard_frame_pointer_offset = offset;
7022 /* Register save area */
7023 offset += frame->nregs * UNITS_PER_WORD;
7024 frame->reg_save_offset = offset;
7026 /* Calculate the size of the va-arg area (not including padding, if any). */
7027 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7029 /* Also adjust stack_realign_offset for the largest alignment of
7030 stack slot actually used. */
7031 if (stack_realign_fp
7032 || (cfun->machine->max_used_stack_alignment != 0
7033 && (offset % cfun->machine->max_used_stack_alignment) != 0))
7035 /* We may need a 16-byte aligned stack for the remainder of the
7036 register save area, but the stack frame for the local function
7037 may require a greater alignment if using AVX/2/512. In order
7038 to avoid wasting space, we first calculate the space needed for
7039 the rest of the register saves, add that to the stack pointer,
7040 and then realign the stack to the boundary of the start of the
7041 frame for the local function. */
7042 HOST_WIDE_INT space_needed = 0;
7043 HOST_WIDE_INT sse_reg_space_needed = 0;
7045 if (TARGET_64BIT)
7047 if (m->call_ms2sysv)
7049 m->call_ms2sysv_pad_in = 0;
7050 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
7053 else if (frame->nsseregs)
7054 /* The only ABI that has saved SSE registers (Win64) also has a
7055 16-byte aligned default stack. However, many programs violate
7056 the ABI, and Wine64 forces stack realignment to compensate. */
7057 space_needed = frame->nsseregs * 16;
7059 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
7061 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
7062 rounding to be pedantic. */
7063 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
7065 else
7066 space_needed = frame->va_arg_size;
7068 /* Record the allocation size required prior to the realignment AND. */
7069 frame->stack_realign_allocate = space_needed;
7071 /* The re-aligned stack starts at frame->stack_realign_offset. Values
7072 before this point are not directly comparable with values below
7073 this point. Use sp_valid_at to determine if the stack pointer is
7074 valid for a given offset, fp_valid_at for the frame pointer, or
7075 choose_baseaddr to have a base register chosen for you.
7077 Note that the result of (frame->stack_realign_offset
7078 & (stack_alignment_needed - 1)) may not equal zero. */
7079 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
7080 frame->stack_realign_offset = offset - space_needed;
7081 frame->sse_reg_save_offset = frame->stack_realign_offset
7082 + sse_reg_space_needed;
7084 else
7086 frame->stack_realign_offset = offset;
7088 if (TARGET_64BIT && m->call_ms2sysv)
7090 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
7091 offset += xlogue_layout::get_instance ().get_stack_space_used ();
7094 /* Align and set SSE register save area. */
7095 else if (frame->nsseregs)
7097 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
7098 required and the DRAP re-alignment boundary is at least 16 bytes,
7099 then we want the SSE register save area properly aligned. */
7100 if (ix86_incoming_stack_boundary >= 128
7101 || (stack_realign_drap && stack_alignment_needed >= 16))
7102 offset = ROUND_UP (offset, 16);
7103 offset += frame->nsseregs * 16;
7105 frame->sse_reg_save_offset = offset;
7106 offset += frame->va_arg_size;
7109 /* Align start of frame for local function. When a function call
7110 is removed, it may become a leaf function. But if argument may
7111 be passed on stack, we need to align the stack when there is no
7112 tail call. */
7113 if (m->call_ms2sysv
7114 || frame->va_arg_size != 0
7115 || size != 0
7116 || !crtl->is_leaf
7117 || (!crtl->tail_call_emit
7118 && cfun->machine->outgoing_args_on_stack)
7119 || cfun->calls_alloca
7120 || ix86_current_function_calls_tls_descriptor)
7121 offset = ROUND_UP (offset, stack_alignment_needed);
7123 /* Frame pointer points here. */
7124 frame->frame_pointer_offset = offset;
7126 offset += size;
7128 /* Add outgoing arguments area. Can be skipped if we eliminated
7129 all the function calls as dead code.
7130 Skipping is however impossible when function calls alloca. Alloca
7131 expander assumes that last crtl->outgoing_args_size
7132 of stack frame are unused. */
7133 if (ACCUMULATE_OUTGOING_ARGS
7134 && (!crtl->is_leaf || cfun->calls_alloca
7135 || ix86_current_function_calls_tls_descriptor))
7137 offset += crtl->outgoing_args_size;
7138 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7140 else
7141 frame->outgoing_arguments_size = 0;
7143 /* Align stack boundary. Only needed if we're calling another function
7144 or using alloca. */
7145 if (!crtl->is_leaf || cfun->calls_alloca
7146 || ix86_current_function_calls_tls_descriptor)
7147 offset = ROUND_UP (offset, preferred_alignment);
7149 /* We've reached end of stack frame. */
7150 frame->stack_pointer_offset = offset;
7152 /* Size prologue needs to allocate. */
7153 to_allocate = offset - frame->sse_reg_save_offset;
7155 if ((!to_allocate && frame->nregs <= 1)
7156 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
7157 /* If static stack checking is enabled and done with probes,
7158 the registers need to be saved before allocating the frame. */
7159 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
7160 /* If stack clash probing needs a loop, then it needs a
7161 scratch register. But the returned register is only guaranteed
7162 to be safe to use after register saves are complete. So if
7163 stack clash protections are enabled and the allocated frame is
7164 larger than the probe interval, then use pushes to save
7165 callee saved registers. */
7166 || (flag_stack_clash_protection
7167 && !ix86_target_stack_probe ()
7168 && to_allocate > get_probe_interval ()))
7169 frame->save_regs_using_mov = false;
7171 if (ix86_using_red_zone ()
7172 && crtl->sp_is_unchanging
7173 && crtl->is_leaf
7174 && !ix86_pc_thunk_call_expanded
7175 && !ix86_current_function_calls_tls_descriptor)
7177 frame->red_zone_size = to_allocate;
7178 if (frame->save_regs_using_mov)
7179 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7180 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7181 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7183 else
7184 frame->red_zone_size = 0;
7185 frame->stack_pointer_offset -= frame->red_zone_size;
7187 /* The SEH frame pointer location is near the bottom of the frame.
7188 This is enforced by the fact that the difference between the
7189 stack pointer and the frame pointer is limited to 240 bytes in
7190 the unwind data structure. */
7191 if (TARGET_SEH)
7193 /* Force the frame pointer to point at or below the lowest register save
7194 area, see the SEH code in config/i386/winnt.cc for the rationale. */
7195 frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
7197 /* If we can leave the frame pointer where it is, do so; however return
7198 the establisher frame for __builtin_frame_address (0) or else if the
7199 frame overflows the SEH maximum frame size.
7201 Note that the value returned by __builtin_frame_address (0) is quite
7202 constrained, because setjmp is piggybacked on the SEH machinery with
7203 recent versions of MinGW:
7205 # elif defined(__SEH__)
7206 # if defined(__aarch64__) || defined(_ARM64_)
7207 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
7208 # elif (__MINGW_GCC_VERSION < 40702)
7209 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
7210 # else
7211 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
7212 # endif
7214 and the second argument passed to _setjmp, if not null, is forwarded
7215 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
7216 built an ExceptionRecord on the fly describing the setjmp buffer). */
7217 const HOST_WIDE_INT diff
7218 = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
7219 if (diff <= 255 && !crtl->accesses_prior_frames)
7221 /* The resulting diff will be a multiple of 16 lower than 255,
7222 i.e. at most 240 as required by the unwind data structure. */
7223 frame->hard_frame_pointer_offset += (diff & 15);
7225 else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
7227 /* Ideally we'd determine what portion of the local stack frame
7228 (within the constraint of the lowest 240) is most heavily used.
7229 But without that complication, simply bias the frame pointer
7230 by 128 bytes so as to maximize the amount of the local stack
7231 frame that is addressable with 8-bit offsets. */
7232 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
7234 else
7235 frame->hard_frame_pointer_offset = frame->hfp_save_offset;
7239 /* This is semi-inlined memory_address_length, but simplified
7240 since we know that we're always dealing with reg+offset, and
7241 to avoid having to create and discard all that rtl. */
7243 static inline int
7244 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
7246 int len = 4;
7248 if (offset == 0)
7250 /* EBP and R13 cannot be encoded without an offset. */
7251 len = (regno == BP_REG || regno == R13_REG);
7253 else if (IN_RANGE (offset, -128, 127))
7254 len = 1;
7256 /* ESP and R12 must be encoded with a SIB byte. */
7257 if (regno == SP_REG || regno == R12_REG)
7258 len++;
7260 return len;
7263 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7264 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7266 static bool
7267 sp_valid_at (HOST_WIDE_INT cfa_offset)
7269 const struct machine_frame_state &fs = cfun->machine->fs;
7270 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
7272 /* Validate that the cfa_offset isn't in a "no-man's land". */
7273 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
7274 return false;
7276 return fs.sp_valid;
7279 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7280 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7282 static inline bool
7283 fp_valid_at (HOST_WIDE_INT cfa_offset)
7285 const struct machine_frame_state &fs = cfun->machine->fs;
7286 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7288 /* Validate that the cfa_offset isn't in a "no-man's land". */
7289 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7290 return false;
7292 return fs.fp_valid;
7295 /* Choose a base register based upon alignment requested, speed and/or
7296 size. */
7298 static void
7299 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7300 HOST_WIDE_INT &base_offset,
7301 unsigned int align_reqested, unsigned int *align)
7303 const struct machine_function *m = cfun->machine;
7304 unsigned int hfp_align;
7305 unsigned int drap_align;
7306 unsigned int sp_align;
7307 bool hfp_ok = fp_valid_at (cfa_offset);
7308 bool drap_ok = m->fs.drap_valid;
7309 bool sp_ok = sp_valid_at (cfa_offset);
7311 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7313 /* Filter out any registers that don't meet the requested alignment
7314 criteria. */
7315 if (align_reqested)
7317 if (m->fs.realigned)
7318 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7319 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7320 notes (which we would need to use a realigned stack pointer),
7321 so disable on SEH targets. */
7322 else if (m->fs.sp_realigned)
7323 sp_align = crtl->stack_alignment_needed;
7325 hfp_ok = hfp_ok && hfp_align >= align_reqested;
7326 drap_ok = drap_ok && drap_align >= align_reqested;
7327 sp_ok = sp_ok && sp_align >= align_reqested;
7330 if (m->use_fast_prologue_epilogue)
7332 /* Choose the base register most likely to allow the most scheduling
7333 opportunities. Generally FP is valid throughout the function,
7334 while DRAP must be reloaded within the epilogue. But choose either
7335 over the SP due to increased encoding size. */
7337 if (hfp_ok)
7339 base_reg = hard_frame_pointer_rtx;
7340 base_offset = m->fs.fp_offset - cfa_offset;
7342 else if (drap_ok)
7344 base_reg = crtl->drap_reg;
7345 base_offset = 0 - cfa_offset;
7347 else if (sp_ok)
7349 base_reg = stack_pointer_rtx;
7350 base_offset = m->fs.sp_offset - cfa_offset;
7353 else
7355 HOST_WIDE_INT toffset;
7356 int len = 16, tlen;
7358 /* Choose the base register with the smallest address encoding.
7359 With a tie, choose FP > DRAP > SP. */
7360 if (sp_ok)
7362 base_reg = stack_pointer_rtx;
7363 base_offset = m->fs.sp_offset - cfa_offset;
7364 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
7366 if (drap_ok)
7368 toffset = 0 - cfa_offset;
7369 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
7370 if (tlen <= len)
7372 base_reg = crtl->drap_reg;
7373 base_offset = toffset;
7374 len = tlen;
7377 if (hfp_ok)
7379 toffset = m->fs.fp_offset - cfa_offset;
7380 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
7381 if (tlen <= len)
7383 base_reg = hard_frame_pointer_rtx;
7384 base_offset = toffset;
7389 /* Set the align return value. */
7390 if (align)
7392 if (base_reg == stack_pointer_rtx)
7393 *align = sp_align;
7394 else if (base_reg == crtl->drap_reg)
7395 *align = drap_align;
7396 else if (base_reg == hard_frame_pointer_rtx)
7397 *align = hfp_align;
7401 /* Return an RTX that points to CFA_OFFSET within the stack frame and
7402 the alignment of address. If ALIGN is non-null, it should point to
7403 an alignment value (in bits) that is preferred or zero and will
7404 recieve the alignment of the base register that was selected,
7405 irrespective of rather or not CFA_OFFSET is a multiple of that
7406 alignment value. If it is possible for the base register offset to be
7407 non-immediate then SCRATCH_REGNO should specify a scratch register to
7408 use.
7410 The valid base registers are taken from CFUN->MACHINE->FS. */
7412 static rtx
7413 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7414 unsigned int scratch_regno = INVALID_REGNUM)
7416 rtx base_reg = NULL;
7417 HOST_WIDE_INT base_offset = 0;
7419 /* If a specific alignment is requested, try to get a base register
7420 with that alignment first. */
7421 if (align && *align)
7422 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
7424 if (!base_reg)
7425 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
7427 gcc_assert (base_reg != NULL);
7429 rtx base_offset_rtx = GEN_INT (base_offset);
7431 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7433 gcc_assert (scratch_regno != INVALID_REGNUM);
7435 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7436 emit_move_insn (scratch_reg, base_offset_rtx);
7438 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7441 return plus_constant (Pmode, base_reg, base_offset);
7444 /* Emit code to save registers in the prologue. */
7446 static void
7447 ix86_emit_save_regs (void)
7449 int regno;
7450 rtx_insn *insn;
7451 bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
7453 if (!TARGET_APX_PUSH2POP2
7454 || !ix86_can_use_push2pop2 ()
7455 || cfun->machine->func_type != TYPE_NORMAL)
7457 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7458 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7460 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7461 use_ppx));
7462 RTX_FRAME_RELATED_P (insn) = 1;
7465 else
7467 int regno_list[2];
7468 regno_list[0] = regno_list[1] = -1;
7469 int loaded_regnum = 0;
7470 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
7472 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7473 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7475 if (aligned)
7477 regno_list[loaded_regnum++] = regno;
7478 if (loaded_regnum == 2)
7480 gcc_assert (regno_list[0] != -1
7481 && regno_list[1] != -1
7482 && regno_list[0] != regno_list[1]);
7483 const int offset = UNITS_PER_WORD * 2;
7484 rtx mem = gen_rtx_MEM (TImode,
7485 gen_rtx_PRE_DEC (Pmode,
7486 stack_pointer_rtx));
7487 insn = emit_insn (gen_push2 (mem,
7488 gen_rtx_REG (word_mode,
7489 regno_list[0]),
7490 gen_rtx_REG (word_mode,
7491 regno_list[1]),
7492 use_ppx));
7493 RTX_FRAME_RELATED_P (insn) = 1;
7494 rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
7496 for (int i = 0; i < 2; i++)
7498 rtx dwarf_reg = gen_rtx_REG (word_mode,
7499 regno_list[i]);
7500 rtx sp_offset = plus_constant (Pmode,
7501 stack_pointer_rtx,
7502 + UNITS_PER_WORD
7503 * (1 - i));
7504 rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
7505 sp_offset),
7506 dwarf_reg);
7507 RTX_FRAME_RELATED_P (tmp) = 1;
7508 XVECEXP (dwarf, 0, i + 1) = tmp;
7510 rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
7511 plus_constant (Pmode,
7512 stack_pointer_rtx,
7513 -offset));
7514 RTX_FRAME_RELATED_P (sp_tmp) = 1;
7515 XVECEXP (dwarf, 0, 0) = sp_tmp;
7516 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
7518 loaded_regnum = 0;
7519 regno_list[0] = regno_list[1] = -1;
7522 else
7524 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7525 use_ppx));
7526 RTX_FRAME_RELATED_P (insn) = 1;
7527 aligned = true;
7530 if (loaded_regnum == 1)
7532 insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
7533 regno_list[0]),
7534 use_ppx));
7535 RTX_FRAME_RELATED_P (insn) = 1;
7540 /* Emit a single register save at CFA - CFA_OFFSET. */
7542 static void
7543 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7544 HOST_WIDE_INT cfa_offset)
7546 struct machine_function *m = cfun->machine;
7547 rtx reg = gen_rtx_REG (mode, regno);
7548 rtx mem, addr, base, insn;
7549 unsigned int align = GET_MODE_ALIGNMENT (mode);
7551 addr = choose_baseaddr (cfa_offset, &align);
7552 mem = gen_frame_mem (mode, addr);
7554 /* The location aligment depends upon the base register. */
7555 align = MIN (GET_MODE_ALIGNMENT (mode), align);
7556 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7557 set_mem_align (mem, align);
7559 insn = emit_insn (gen_rtx_SET (mem, reg));
7560 RTX_FRAME_RELATED_P (insn) = 1;
7562 base = addr;
7563 if (GET_CODE (base) == PLUS)
7564 base = XEXP (base, 0);
7565 gcc_checking_assert (REG_P (base));
7567 /* When saving registers into a re-aligned local stack frame, avoid
7568 any tricky guessing by dwarf2out. */
7569 if (m->fs.realigned)
7571 gcc_checking_assert (stack_realign_drap);
7573 if (regno == REGNO (crtl->drap_reg))
7575 /* A bit of a hack. We force the DRAP register to be saved in
7576 the re-aligned stack frame, which provides us with a copy
7577 of the CFA that will last past the prologue. Install it. */
7578 gcc_checking_assert (cfun->machine->fs.fp_valid);
7579 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7580 cfun->machine->fs.fp_offset - cfa_offset);
7581 mem = gen_rtx_MEM (mode, addr);
7582 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7584 else
7586 /* The frame pointer is a stable reference within the
7587 aligned frame. Use it. */
7588 gcc_checking_assert (cfun->machine->fs.fp_valid);
7589 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7590 cfun->machine->fs.fp_offset - cfa_offset);
7591 mem = gen_rtx_MEM (mode, addr);
7592 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7596 else if (base == stack_pointer_rtx && m->fs.sp_realigned
7597 && cfa_offset >= m->fs.sp_realigned_offset)
7599 gcc_checking_assert (stack_realign_fp);
7600 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7603 /* The memory may not be relative to the current CFA register,
7604 which means that we may need to generate a new pattern for
7605 use by the unwind info. */
7606 else if (base != m->fs.cfa_reg)
7608 addr = plus_constant (Pmode, m->fs.cfa_reg,
7609 m->fs.cfa_offset - cfa_offset);
7610 mem = gen_rtx_MEM (mode, addr);
7611 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7615 /* Emit code to save registers using MOV insns.
7616 First register is stored at CFA - CFA_OFFSET. */
7617 static void
7618 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7620 unsigned int regno;
7622 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7623 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7625 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
7626 cfa_offset -= UNITS_PER_WORD;
7630 /* Emit code to save SSE registers using MOV insns.
7631 First register is stored at CFA - CFA_OFFSET. */
7632 static void
7633 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7635 unsigned int regno;
7637 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7638 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7640 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7641 cfa_offset -= GET_MODE_SIZE (V4SFmode);
7645 static GTY(()) rtx queued_cfa_restores;
7647 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7648 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7649 Don't add the note if the previously saved value will be left untouched
7650 within stack red-zone till return, as unwinders can find the same value
7651 in the register and on the stack. */
7653 static void
7654 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7656 if (!crtl->shrink_wrapped
7657 && cfa_offset <= cfun->machine->fs.red_zone_offset)
7658 return;
7660 if (insn)
7662 add_reg_note (insn, REG_CFA_RESTORE, reg);
7663 RTX_FRAME_RELATED_P (insn) = 1;
7665 else
7666 queued_cfa_restores
7667 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7670 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7672 static void
7673 ix86_add_queued_cfa_restore_notes (rtx insn)
7675 rtx last;
7676 if (!queued_cfa_restores)
7677 return;
7678 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7680 XEXP (last, 1) = REG_NOTES (insn);
7681 REG_NOTES (insn) = queued_cfa_restores;
7682 queued_cfa_restores = NULL_RTX;
7683 RTX_FRAME_RELATED_P (insn) = 1;
7686 /* Expand prologue or epilogue stack adjustment.
7687 The pattern exist to put a dependency on all ebp-based memory accesses.
7688 STYLE should be negative if instructions should be marked as frame related,
7689 zero if %r11 register is live and cannot be freely used and positive
7690 otherwise. */
7692 static rtx
7693 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7694 int style, bool set_cfa)
7696 struct machine_function *m = cfun->machine;
7697 rtx addend = offset;
7698 rtx insn;
7699 bool add_frame_related_expr = false;
7701 if (!x86_64_immediate_operand (offset, Pmode))
7703 /* r11 is used by indirect sibcall return as well, set before the
7704 epilogue and used after the epilogue. */
7705 if (style)
7706 addend = gen_rtx_REG (Pmode, R11_REG);
7707 else
7709 gcc_assert (src != hard_frame_pointer_rtx
7710 && dest != hard_frame_pointer_rtx);
7711 addend = hard_frame_pointer_rtx;
7713 emit_insn (gen_rtx_SET (addend, offset));
7714 if (style < 0)
7715 add_frame_related_expr = true;
7718 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7719 (Pmode, dest, src, addend));
7720 if (style >= 0)
7721 ix86_add_queued_cfa_restore_notes (insn);
7723 if (set_cfa)
7725 rtx r;
7727 gcc_assert (m->fs.cfa_reg == src);
7728 m->fs.cfa_offset += INTVAL (offset);
7729 m->fs.cfa_reg = dest;
7731 r = gen_rtx_PLUS (Pmode, src, offset);
7732 r = gen_rtx_SET (dest, r);
7733 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7734 RTX_FRAME_RELATED_P (insn) = 1;
7736 else if (style < 0)
7738 RTX_FRAME_RELATED_P (insn) = 1;
7739 if (add_frame_related_expr)
7741 rtx r = gen_rtx_PLUS (Pmode, src, offset);
7742 r = gen_rtx_SET (dest, r);
7743 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7747 if (dest == stack_pointer_rtx)
7749 HOST_WIDE_INT ooffset = m->fs.sp_offset;
7750 bool valid = m->fs.sp_valid;
7751 bool realigned = m->fs.sp_realigned;
7753 if (src == hard_frame_pointer_rtx)
7755 valid = m->fs.fp_valid;
7756 realigned = false;
7757 ooffset = m->fs.fp_offset;
7759 else if (src == crtl->drap_reg)
7761 valid = m->fs.drap_valid;
7762 realigned = false;
7763 ooffset = 0;
7765 else
7767 /* Else there are two possibilities: SP itself, which we set
7768 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7769 taken care of this by hand along the eh_return path. */
7770 gcc_checking_assert (src == stack_pointer_rtx
7771 || offset == const0_rtx);
7774 m->fs.sp_offset = ooffset - INTVAL (offset);
7775 m->fs.sp_valid = valid;
7776 m->fs.sp_realigned = realigned;
7778 return insn;
7781 /* Find an available register to be used as dynamic realign argument
7782 pointer regsiter. Such a register will be written in prologue and
7783 used in begin of body, so it must not be
7784 1. parameter passing register.
7785 2. GOT pointer.
7786 We reuse static-chain register if it is available. Otherwise, we
7787 use DI for i386 and R13 for x86-64. We chose R13 since it has
7788 shorter encoding.
7790 Return: the regno of chosen register. */
7792 static unsigned int
7793 find_drap_reg (void)
7795 tree decl = cfun->decl;
7797 /* Always use callee-saved register if there are no caller-saved
7798 registers. */
7799 if (TARGET_64BIT)
7801 /* Use R13 for nested function or function need static chain.
7802 Since function with tail call may use any caller-saved
7803 registers in epilogue, DRAP must not use caller-saved
7804 register in such case. */
7805 if (DECL_STATIC_CHAIN (decl)
7806 || (cfun->machine->call_saved_registers
7807 == TYPE_NO_CALLER_SAVED_REGISTERS)
7808 || crtl->tail_call_emit)
7809 return R13_REG;
7811 return R10_REG;
7813 else
7815 /* Use DI for nested function or function need static chain.
7816 Since function with tail call may use any caller-saved
7817 registers in epilogue, DRAP must not use caller-saved
7818 register in such case. */
7819 if (DECL_STATIC_CHAIN (decl)
7820 || (cfun->machine->call_saved_registers
7821 == TYPE_NO_CALLER_SAVED_REGISTERS)
7822 || crtl->tail_call_emit
7823 || crtl->calls_eh_return)
7824 return DI_REG;
7826 /* Reuse static chain register if it isn't used for parameter
7827 passing. */
7828 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7830 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7831 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7832 return CX_REG;
7834 return DI_REG;
7838 /* Return minimum incoming stack alignment. */
7840 static unsigned int
7841 ix86_minimum_incoming_stack_boundary (bool sibcall)
7843 unsigned int incoming_stack_boundary;
7845 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7846 if (cfun->machine->func_type != TYPE_NORMAL)
7847 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7848 /* Prefer the one specified at command line. */
7849 else if (ix86_user_incoming_stack_boundary)
7850 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7851 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7852 if -mstackrealign is used, it isn't used for sibcall check and
7853 estimated stack alignment is 128bit. */
7854 else if (!sibcall
7855 && ix86_force_align_arg_pointer
7856 && crtl->stack_alignment_estimated == 128)
7857 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7858 else
7859 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7861 /* Incoming stack alignment can be changed on individual functions
7862 via force_align_arg_pointer attribute. We use the smallest
7863 incoming stack boundary. */
7864 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7865 && lookup_attribute ("force_align_arg_pointer",
7866 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7867 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7869 /* The incoming stack frame has to be aligned at least at
7870 parm_stack_boundary. */
7871 if (incoming_stack_boundary < crtl->parm_stack_boundary)
7872 incoming_stack_boundary = crtl->parm_stack_boundary;
7874 /* Stack at entrance of main is aligned by runtime. We use the
7875 smallest incoming stack boundary. */
7876 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
7877 && DECL_NAME (current_function_decl)
7878 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7879 && DECL_FILE_SCOPE_P (current_function_decl))
7880 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7882 return incoming_stack_boundary;
7885 /* Update incoming stack boundary and estimated stack alignment. */
7887 static void
7888 ix86_update_stack_boundary (void)
7890 ix86_incoming_stack_boundary
7891 = ix86_minimum_incoming_stack_boundary (false);
7893 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7894 if (TARGET_64BIT
7895 && cfun->stdarg
7896 && crtl->stack_alignment_estimated < 128)
7897 crtl->stack_alignment_estimated = 128;
7899 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7900 if (ix86_tls_descriptor_calls_expanded_in_cfun
7901 && crtl->preferred_stack_boundary < 128)
7902 crtl->preferred_stack_boundary = 128;
7905 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7906 needed or an rtx for DRAP otherwise. */
7908 static rtx
7909 ix86_get_drap_rtx (void)
7911 /* We must use DRAP if there are outgoing arguments on stack or
7912 the stack pointer register is clobbered by asm statment and
7913 ACCUMULATE_OUTGOING_ARGS is false. */
7914 if (ix86_force_drap
7915 || ((cfun->machine->outgoing_args_on_stack
7916 || crtl->sp_is_clobbered_by_asm)
7917 && !ACCUMULATE_OUTGOING_ARGS))
7918 crtl->need_drap = true;
7920 if (stack_realign_drap)
7922 /* Assign DRAP to vDRAP and returns vDRAP */
7923 unsigned int regno = find_drap_reg ();
7924 rtx drap_vreg;
7925 rtx arg_ptr;
7926 rtx_insn *seq, *insn;
7928 arg_ptr = gen_rtx_REG (Pmode, regno);
7929 crtl->drap_reg = arg_ptr;
7931 start_sequence ();
7932 drap_vreg = copy_to_reg (arg_ptr);
7933 seq = get_insns ();
7934 end_sequence ();
7936 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7937 if (!optimize)
7939 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7940 RTX_FRAME_RELATED_P (insn) = 1;
7942 return drap_vreg;
7944 else
7945 return NULL;
7948 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7950 static rtx
7951 ix86_internal_arg_pointer (void)
7953 return virtual_incoming_args_rtx;
7956 struct scratch_reg {
7957 rtx reg;
7958 bool saved;
7961 /* Return a short-lived scratch register for use on function entry.
7962 In 32-bit mode, it is valid only after the registers are saved
7963 in the prologue. This register must be released by means of
7964 release_scratch_register_on_entry once it is dead. */
7966 static void
7967 get_scratch_register_on_entry (struct scratch_reg *sr)
7969 int regno;
7971 sr->saved = false;
7973 if (TARGET_64BIT)
7975 /* We always use R11 in 64-bit mode. */
7976 regno = R11_REG;
7978 else
7980 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7981 bool fastcall_p
7982 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7983 bool thiscall_p
7984 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7985 bool static_chain_p = DECL_STATIC_CHAIN (decl);
7986 int regparm = ix86_function_regparm (fntype, decl);
7987 int drap_regno
7988 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7990 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7991 for the static chain register. */
7992 if ((regparm < 1 || (fastcall_p && !static_chain_p))
7993 && drap_regno != AX_REG)
7994 regno = AX_REG;
7995 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7996 for the static chain register. */
7997 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7998 regno = AX_REG;
7999 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
8000 regno = DX_REG;
8001 /* ecx is the static chain register. */
8002 else if (regparm < 3 && !fastcall_p && !thiscall_p
8003 && !static_chain_p
8004 && drap_regno != CX_REG)
8005 regno = CX_REG;
8006 else if (ix86_save_reg (BX_REG, true, false))
8007 regno = BX_REG;
8008 /* esi is the static chain register. */
8009 else if (!(regparm == 3 && static_chain_p)
8010 && ix86_save_reg (SI_REG, true, false))
8011 regno = SI_REG;
8012 else if (ix86_save_reg (DI_REG, true, false))
8013 regno = DI_REG;
8014 else
8016 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8017 sr->saved = true;
8021 sr->reg = gen_rtx_REG (Pmode, regno);
8022 if (sr->saved)
8024 rtx_insn *insn = emit_insn (gen_push (sr->reg));
8025 RTX_FRAME_RELATED_P (insn) = 1;
8029 /* Release a scratch register obtained from the preceding function.
8031 If RELEASE_VIA_POP is true, we just pop the register off the stack
8032 to release it. This is what non-Linux systems use with -fstack-check.
8034 Otherwise we use OFFSET to locate the saved register and the
8035 allocated stack space becomes part of the local frame and is
8036 deallocated by the epilogue. */
8038 static void
8039 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
8040 bool release_via_pop)
8042 if (sr->saved)
8044 if (release_via_pop)
8046 struct machine_function *m = cfun->machine;
8047 rtx x, insn = emit_insn (gen_pop (sr->reg));
8049 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
8050 RTX_FRAME_RELATED_P (insn) = 1;
8051 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8052 x = gen_rtx_SET (stack_pointer_rtx, x);
8053 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8054 m->fs.sp_offset -= UNITS_PER_WORD;
8056 else
8058 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
8059 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
8060 emit_insn (x);
8065 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
8067 If INT_REGISTERS_SAVED is true, then integer registers have already been
8068 pushed on the stack.
8070 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
8071 beyond SIZE bytes.
8073 This assumes no knowledge of the current probing state, i.e. it is never
8074 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
8075 a suitable probe. */
8077 static void
8078 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
8079 const bool int_registers_saved,
8080 const bool protection_area)
8082 struct machine_function *m = cfun->machine;
8084 /* If this function does not statically allocate stack space, then
8085 no probes are needed. */
8086 if (!size)
8088 /* However, the allocation of space via pushes for register
8089 saves could be viewed as allocating space, but without the
8090 need to probe. */
8091 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
8092 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8093 else
8094 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8095 return;
8098 /* If we are a noreturn function, then we have to consider the
8099 possibility that we're called via a jump rather than a call.
8101 Thus we don't have the implicit probe generated by saving the
8102 return address into the stack at the call. Thus, the stack
8103 pointer could be anywhere in the guard page. The safe thing
8104 to do is emit a probe now.
8106 The probe can be avoided if we have already emitted any callee
8107 register saves into the stack or have a frame pointer (which will
8108 have been saved as well). Those saves will function as implicit
8109 probes.
8111 ?!? This should be revamped to work like aarch64 and s390 where
8112 we track the offset from the most recent probe. Normally that
8113 offset would be zero. For a noreturn function we would reset
8114 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
8115 we just probe when we cross PROBE_INTERVAL. */
8116 if (TREE_THIS_VOLATILE (cfun->decl)
8117 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
8119 /* We can safely use any register here since we're just going to push
8120 its value and immediately pop it back. But we do try and avoid
8121 argument passing registers so as not to introduce dependencies in
8122 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
8123 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
8124 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
8125 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
8126 m->fs.sp_offset -= UNITS_PER_WORD;
8127 if (m->fs.cfa_reg == stack_pointer_rtx)
8129 m->fs.cfa_offset -= UNITS_PER_WORD;
8130 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8131 x = gen_rtx_SET (stack_pointer_rtx, x);
8132 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
8133 RTX_FRAME_RELATED_P (insn_push) = 1;
8134 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8135 x = gen_rtx_SET (stack_pointer_rtx, x);
8136 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
8137 RTX_FRAME_RELATED_P (insn_pop) = 1;
8139 emit_insn (gen_blockage ());
8142 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8143 const int dope = 4 * UNITS_PER_WORD;
8145 /* If there is protection area, take it into account in the size. */
8146 if (protection_area)
8147 size += probe_interval + dope;
8149 /* If we allocate less than the size of the guard statically,
8150 then no probing is necessary, but we do need to allocate
8151 the stack. */
8152 else if (size < (1 << param_stack_clash_protection_guard_size))
8154 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8155 GEN_INT (-size), -1,
8156 m->fs.cfa_reg == stack_pointer_rtx);
8157 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8158 return;
8161 /* We're allocating a large enough stack frame that we need to
8162 emit probes. Either emit them inline or in a loop depending
8163 on the size. */
8164 if (size <= 4 * probe_interval)
8166 HOST_WIDE_INT i;
8167 for (i = probe_interval; i <= size; i += probe_interval)
8169 /* Allocate PROBE_INTERVAL bytes. */
8170 rtx insn
8171 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8172 GEN_INT (-probe_interval), -1,
8173 m->fs.cfa_reg == stack_pointer_rtx);
8174 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
8176 /* And probe at *sp. */
8177 emit_stack_probe (stack_pointer_rtx);
8178 emit_insn (gen_blockage ());
8181 /* We need to allocate space for the residual, but we do not need
8182 to probe the residual... */
8183 HOST_WIDE_INT residual = (i - probe_interval - size);
8184 if (residual)
8186 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8187 GEN_INT (residual), -1,
8188 m->fs.cfa_reg == stack_pointer_rtx);
8190 /* ...except if there is a protection area to maintain. */
8191 if (protection_area)
8192 emit_stack_probe (stack_pointer_rtx);
8195 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
8197 else
8199 /* We expect the GP registers to be saved when probes are used
8200 as the probing sequences might need a scratch register and
8201 the routine to allocate one assumes the integer registers
8202 have already been saved. */
8203 gcc_assert (int_registers_saved);
8205 struct scratch_reg sr;
8206 get_scratch_register_on_entry (&sr);
8208 /* If we needed to save a register, then account for any space
8209 that was pushed (we are not going to pop the register when
8210 we do the restore). */
8211 if (sr.saved)
8212 size -= UNITS_PER_WORD;
8214 /* Step 1: round SIZE down to a multiple of the interval. */
8215 HOST_WIDE_INT rounded_size = size & -probe_interval;
8217 /* Step 2: compute final value of the loop counter. Use lea if
8218 possible. */
8219 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
8220 rtx insn;
8221 if (address_no_seg_operand (addr, Pmode))
8222 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
8223 else
8225 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8226 insn = emit_insn (gen_rtx_SET (sr.reg,
8227 gen_rtx_PLUS (Pmode, sr.reg,
8228 stack_pointer_rtx)));
8230 if (m->fs.cfa_reg == stack_pointer_rtx)
8232 add_reg_note (insn, REG_CFA_DEF_CFA,
8233 plus_constant (Pmode, sr.reg,
8234 m->fs.cfa_offset + rounded_size));
8235 RTX_FRAME_RELATED_P (insn) = 1;
8238 /* Step 3: the loop. */
8239 rtx size_rtx = GEN_INT (rounded_size);
8240 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
8241 size_rtx));
8242 if (m->fs.cfa_reg == stack_pointer_rtx)
8244 m->fs.cfa_offset += rounded_size;
8245 add_reg_note (insn, REG_CFA_DEF_CFA,
8246 plus_constant (Pmode, stack_pointer_rtx,
8247 m->fs.cfa_offset));
8248 RTX_FRAME_RELATED_P (insn) = 1;
8250 m->fs.sp_offset += rounded_size;
8251 emit_insn (gen_blockage ());
8253 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
8254 is equal to ROUNDED_SIZE. */
8256 if (size != rounded_size)
8258 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8259 GEN_INT (rounded_size - size), -1,
8260 m->fs.cfa_reg == stack_pointer_rtx);
8262 if (protection_area)
8263 emit_stack_probe (stack_pointer_rtx);
8266 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
8268 /* This does not deallocate the space reserved for the scratch
8269 register. That will be deallocated in the epilogue. */
8270 release_scratch_register_on_entry (&sr, size, false);
8273 /* Adjust back to account for the protection area. */
8274 if (protection_area)
8275 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8276 GEN_INT (probe_interval + dope), -1,
8277 m->fs.cfa_reg == stack_pointer_rtx);
8279 /* Make sure nothing is scheduled before we are done. */
8280 emit_insn (gen_blockage ());
8283 /* Adjust the stack pointer up to REG while probing it. */
8285 const char *
8286 output_adjust_stack_and_probe (rtx reg)
8288 static int labelno = 0;
8289 char loop_lab[32];
8290 rtx xops[2];
8292 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8294 /* Loop. */
8295 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8297 /* SP = SP + PROBE_INTERVAL. */
8298 xops[0] = stack_pointer_rtx;
8299 xops[1] = GEN_INT (get_probe_interval ());
8300 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8302 /* Probe at SP. */
8303 xops[1] = const0_rtx;
8304 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
8306 /* Test if SP == LAST_ADDR. */
8307 xops[0] = stack_pointer_rtx;
8308 xops[1] = reg;
8309 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8311 /* Branch. */
8312 fputs ("\tjne\t", asm_out_file);
8313 assemble_name_raw (asm_out_file, loop_lab);
8314 fputc ('\n', asm_out_file);
8316 return "";
8319 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
8320 inclusive. These are offsets from the current stack pointer.
8322 INT_REGISTERS_SAVED is true if integer registers have already been
8323 pushed on the stack. */
8325 static void
8326 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
8327 const bool int_registers_saved)
8329 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8331 /* See if we have a constant small number of probes to generate. If so,
8332 that's the easy case. The run-time loop is made up of 6 insns in the
8333 generic case while the compile-time loop is made up of n insns for n #
8334 of intervals. */
8335 if (size <= 6 * probe_interval)
8337 HOST_WIDE_INT i;
8339 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8340 it exceeds SIZE. If only one probe is needed, this will not
8341 generate any code. Then probe at FIRST + SIZE. */
8342 for (i = probe_interval; i < size; i += probe_interval)
8343 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8344 -(first + i)));
8346 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8347 -(first + size)));
8350 /* Otherwise, do the same as above, but in a loop. Note that we must be
8351 extra careful with variables wrapping around because we might be at
8352 the very top (or the very bottom) of the address space and we have
8353 to be able to handle this case properly; in particular, we use an
8354 equality test for the loop condition. */
8355 else
8357 /* We expect the GP registers to be saved when probes are used
8358 as the probing sequences might need a scratch register and
8359 the routine to allocate one assumes the integer registers
8360 have already been saved. */
8361 gcc_assert (int_registers_saved);
8363 HOST_WIDE_INT rounded_size, last;
8364 struct scratch_reg sr;
8366 get_scratch_register_on_entry (&sr);
8369 /* Step 1: round SIZE to the previous multiple of the interval. */
8371 rounded_size = ROUND_DOWN (size, probe_interval);
8374 /* Step 2: compute initial and final value of the loop counter. */
8376 /* TEST_OFFSET = FIRST. */
8377 emit_move_insn (sr.reg, GEN_INT (-first));
8379 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8380 last = first + rounded_size;
8383 /* Step 3: the loop
8387 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8388 probe at TEST_ADDR
8390 while (TEST_ADDR != LAST_ADDR)
8392 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8393 until it is equal to ROUNDED_SIZE. */
8395 emit_insn
8396 (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
8399 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8400 that SIZE is equal to ROUNDED_SIZE. */
8402 if (size != rounded_size)
8403 emit_stack_probe (plus_constant (Pmode,
8404 gen_rtx_PLUS (Pmode,
8405 stack_pointer_rtx,
8406 sr.reg),
8407 rounded_size - size));
8409 release_scratch_register_on_entry (&sr, size, true);
8412 /* Make sure nothing is scheduled before we are done. */
8413 emit_insn (gen_blockage ());
8416 /* Probe a range of stack addresses from REG to END, inclusive. These are
8417 offsets from the current stack pointer. */
8419 const char *
8420 output_probe_stack_range (rtx reg, rtx end)
8422 static int labelno = 0;
8423 char loop_lab[32];
8424 rtx xops[3];
8426 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8428 /* Loop. */
8429 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8431 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8432 xops[0] = reg;
8433 xops[1] = GEN_INT (get_probe_interval ());
8434 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8436 /* Probe at TEST_ADDR. */
8437 xops[0] = stack_pointer_rtx;
8438 xops[1] = reg;
8439 xops[2] = const0_rtx;
8440 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
8442 /* Test if TEST_ADDR == LAST_ADDR. */
8443 xops[0] = reg;
8444 xops[1] = end;
8445 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8447 /* Branch. */
8448 fputs ("\tjne\t", asm_out_file);
8449 assemble_name_raw (asm_out_file, loop_lab);
8450 fputc ('\n', asm_out_file);
8452 return "";
8455 /* Set stack_frame_required to false if stack frame isn't required.
8456 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8457 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8459 static void
8460 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8461 bool check_stack_slot)
8463 HARD_REG_SET set_up_by_prologue, prologue_used;
8464 basic_block bb;
8466 CLEAR_HARD_REG_SET (prologue_used);
8467 CLEAR_HARD_REG_SET (set_up_by_prologue);
8468 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8469 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8470 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
8471 HARD_FRAME_POINTER_REGNUM);
8473 /* The preferred stack alignment is the minimum stack alignment. */
8474 if (stack_alignment > crtl->preferred_stack_boundary)
8475 stack_alignment = crtl->preferred_stack_boundary;
8477 bool require_stack_frame = false;
8479 FOR_EACH_BB_FN (bb, cfun)
8481 rtx_insn *insn;
8482 FOR_BB_INSNS (bb, insn)
8483 if (NONDEBUG_INSN_P (insn)
8484 && requires_stack_frame_p (insn, prologue_used,
8485 set_up_by_prologue))
8487 require_stack_frame = true;
8489 if (check_stack_slot)
8491 /* Find the maximum stack alignment. */
8492 subrtx_iterator::array_type array;
8493 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
8494 if (MEM_P (*iter)
8495 && (reg_mentioned_p (stack_pointer_rtx,
8496 *iter)
8497 || reg_mentioned_p (frame_pointer_rtx,
8498 *iter)))
8500 unsigned int alignment = MEM_ALIGN (*iter);
8501 if (alignment > stack_alignment)
8502 stack_alignment = alignment;
8508 cfun->machine->stack_frame_required = require_stack_frame;
8511 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
8512 will guide prologue/epilogue to be generated in correct form. */
8514 static void
8515 ix86_finalize_stack_frame_flags (void)
8517 /* Check if stack realign is really needed after reload, and
8518 stores result in cfun */
8519 unsigned int incoming_stack_boundary
8520 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8521 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8522 unsigned int stack_alignment
8523 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
8524 ? crtl->max_used_stack_slot_alignment
8525 : crtl->stack_alignment_needed);
8526 unsigned int stack_realign
8527 = (incoming_stack_boundary < stack_alignment);
8528 bool recompute_frame_layout_p = false;
8530 if (crtl->stack_realign_finalized)
8532 /* After stack_realign_needed is finalized, we can't no longer
8533 change it. */
8534 gcc_assert (crtl->stack_realign_needed == stack_realign);
8535 return;
8538 /* It is always safe to compute max_used_stack_alignment. We
8539 compute it only if 128-bit aligned load/store may be generated
8540 on misaligned stack slot which will lead to segfault. */
8541 bool check_stack_slot
8542 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
8543 ix86_find_max_used_stack_alignment (stack_alignment,
8544 check_stack_slot);
8546 /* If the only reason for frame_pointer_needed is that we conservatively
8547 assumed stack realignment might be needed or -fno-omit-frame-pointer
8548 is used, but in the end nothing that needed the stack alignment had
8549 been spilled nor stack access, clear frame_pointer_needed and say we
8550 don't need stack realignment.
8552 When vector register is used for piecewise move and store, we don't
8553 increase stack_alignment_needed as there is no register spill for
8554 piecewise move and store. Since stack_realign_needed is set to true
8555 by checking stack_alignment_estimated which is updated by pseudo
8556 vector register usage, we also need to check stack_realign_needed to
8557 eliminate frame pointer. */
8558 if ((stack_realign
8559 || (!flag_omit_frame_pointer && optimize)
8560 || crtl->stack_realign_needed)
8561 && frame_pointer_needed
8562 && crtl->is_leaf
8563 && crtl->sp_is_unchanging
8564 && !ix86_current_function_calls_tls_descriptor
8565 && !crtl->accesses_prior_frames
8566 && !cfun->calls_alloca
8567 && !crtl->calls_eh_return
8568 /* See ira_setup_eliminable_regset for the rationale. */
8569 && !(STACK_CHECK_MOVING_SP
8570 && flag_stack_check
8571 && flag_exceptions
8572 && cfun->can_throw_non_call_exceptions)
8573 && !ix86_frame_pointer_required ()
8574 && ix86_get_frame_size () == 0
8575 && ix86_nsaved_sseregs () == 0
8576 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
8578 if (cfun->machine->stack_frame_required)
8580 /* Stack frame is required. If stack alignment needed is less
8581 than incoming stack boundary, don't realign stack. */
8582 stack_realign = incoming_stack_boundary < stack_alignment;
8583 if (!stack_realign)
8585 crtl->max_used_stack_slot_alignment
8586 = incoming_stack_boundary;
8587 crtl->stack_alignment_needed
8588 = incoming_stack_boundary;
8589 /* Also update preferred_stack_boundary for leaf
8590 functions. */
8591 crtl->preferred_stack_boundary
8592 = incoming_stack_boundary;
8595 else
8597 /* If drap has been set, but it actually isn't live at the
8598 start of the function, there is no reason to set it up. */
8599 if (crtl->drap_reg)
8601 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8602 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
8603 REGNO (crtl->drap_reg)))
8605 crtl->drap_reg = NULL_RTX;
8606 crtl->need_drap = false;
8609 else
8610 cfun->machine->no_drap_save_restore = true;
8612 frame_pointer_needed = false;
8613 stack_realign = false;
8614 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
8615 crtl->stack_alignment_needed = incoming_stack_boundary;
8616 crtl->stack_alignment_estimated = incoming_stack_boundary;
8617 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
8618 crtl->preferred_stack_boundary = incoming_stack_boundary;
8619 df_finish_pass (true);
8620 df_scan_alloc (NULL);
8621 df_scan_blocks ();
8622 df_compute_regs_ever_live (true);
8623 df_analyze ();
8625 if (flag_var_tracking)
8627 /* Since frame pointer is no longer available, replace it with
8628 stack pointer - UNITS_PER_WORD in debug insns. */
8629 df_ref ref, next;
8630 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
8631 ref; ref = next)
8633 next = DF_REF_NEXT_REG (ref);
8634 if (!DF_REF_INSN_INFO (ref))
8635 continue;
8637 /* Make sure the next ref is for a different instruction,
8638 so that we're not affected by the rescan. */
8639 rtx_insn *insn = DF_REF_INSN (ref);
8640 while (next && DF_REF_INSN (next) == insn)
8641 next = DF_REF_NEXT_REG (next);
8643 if (DEBUG_INSN_P (insn))
8645 bool changed = false;
8646 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
8648 rtx *loc = DF_REF_LOC (ref);
8649 if (*loc == hard_frame_pointer_rtx)
8651 *loc = plus_constant (Pmode,
8652 stack_pointer_rtx,
8653 -UNITS_PER_WORD);
8654 changed = true;
8657 if (changed)
8658 df_insn_rescan (insn);
8663 recompute_frame_layout_p = true;
8666 else if (crtl->max_used_stack_slot_alignment >= 128
8667 && cfun->machine->stack_frame_required)
8669 /* We don't need to realign stack. max_used_stack_alignment is
8670 used to decide how stack frame should be aligned. This is
8671 independent of any psABIs nor 32-bit vs 64-bit. */
8672 cfun->machine->max_used_stack_alignment
8673 = stack_alignment / BITS_PER_UNIT;
8676 if (crtl->stack_realign_needed != stack_realign)
8677 recompute_frame_layout_p = true;
8678 crtl->stack_realign_needed = stack_realign;
8679 crtl->stack_realign_finalized = true;
8680 if (recompute_frame_layout_p)
8681 ix86_compute_frame_layout ();
8684 /* Delete SET_GOT right after entry block if it is allocated to reg. */
8686 static void
8687 ix86_elim_entry_set_got (rtx reg)
8689 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8690 rtx_insn *c_insn = BB_HEAD (bb);
8691 if (!NONDEBUG_INSN_P (c_insn))
8692 c_insn = next_nonnote_nondebug_insn (c_insn);
8693 if (c_insn && NONJUMP_INSN_P (c_insn))
8695 rtx pat = PATTERN (c_insn);
8696 if (GET_CODE (pat) == PARALLEL)
8698 rtx set = XVECEXP (pat, 0, 0);
8699 if (GET_CODE (set) == SET
8700 && GET_CODE (SET_SRC (set)) == UNSPEC
8701 && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
8702 && REGNO (SET_DEST (set)) == REGNO (reg))
8703 delete_insn (c_insn);
8708 static rtx
8709 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
8711 rtx addr, mem;
8713 if (offset)
8714 addr = plus_constant (Pmode, frame_reg, offset);
8715 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
8716 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
8719 static inline rtx
8720 gen_frame_load (rtx reg, rtx frame_reg, int offset)
8722 return gen_frame_set (reg, frame_reg, offset, false);
8725 static inline rtx
8726 gen_frame_store (rtx reg, rtx frame_reg, int offset)
8728 return gen_frame_set (reg, frame_reg, offset, true);
8731 static void
8732 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
8734 struct machine_function *m = cfun->machine;
8735 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8736 + m->call_ms2sysv_extra_regs;
8737 rtvec v = rtvec_alloc (ncregs + 1);
8738 unsigned int align, i, vi = 0;
8739 rtx_insn *insn;
8740 rtx sym, addr;
8741 rtx rax = gen_rtx_REG (word_mode, AX_REG);
8742 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8744 /* AL should only be live with sysv_abi. */
8745 gcc_assert (!ix86_eax_live_at_start_p ());
8746 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
8748 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8749 we've actually realigned the stack or not. */
8750 align = GET_MODE_ALIGNMENT (V4SFmode);
8751 addr = choose_baseaddr (frame.stack_realign_offset
8752 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
8753 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8755 emit_insn (gen_rtx_SET (rax, addr));
8757 /* Get the stub symbol. */
8758 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
8759 : XLOGUE_STUB_SAVE);
8760 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8762 for (i = 0; i < ncregs; ++i)
8764 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8765 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
8766 r.regno);
8767 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
8770 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
8772 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
8773 RTX_FRAME_RELATED_P (insn) = true;
8776 /* Generate and return an insn body to AND X with Y. */
8778 static rtx_insn *
8779 gen_and2_insn (rtx x, rtx y)
8781 enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
8783 gcc_assert (insn_operand_matches (icode, 0, x));
8784 gcc_assert (insn_operand_matches (icode, 1, x));
8785 gcc_assert (insn_operand_matches (icode, 2, y));
8787 return GEN_FCN (icode) (x, x, y);
8790 /* Expand the prologue into a bunch of separate insns. */
8792 void
8793 ix86_expand_prologue (void)
8795 struct machine_function *m = cfun->machine;
8796 rtx insn, t;
8797 HOST_WIDE_INT allocate;
8798 bool int_registers_saved;
8799 bool sse_registers_saved;
8800 bool save_stub_call_needed;
8801 rtx static_chain = NULL_RTX;
8803 ix86_last_zero_store_uid = 0;
8804 if (ix86_function_naked (current_function_decl))
8806 if (flag_stack_usage_info)
8807 current_function_static_stack_size = 0;
8808 return;
8811 ix86_finalize_stack_frame_flags ();
8813 /* DRAP should not coexist with stack_realign_fp */
8814 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8816 memset (&m->fs, 0, sizeof (m->fs));
8818 /* Initialize CFA state for before the prologue. */
8819 m->fs.cfa_reg = stack_pointer_rtx;
8820 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8822 /* Track SP offset to the CFA. We continue tracking this after we've
8823 swapped the CFA register away from SP. In the case of re-alignment
8824 this is fudged; we're interested to offsets within the local frame. */
8825 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8826 m->fs.sp_valid = true;
8827 m->fs.sp_realigned = false;
8829 const struct ix86_frame &frame = cfun->machine->frame;
8831 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
8833 /* We should have already generated an error for any use of
8834 ms_hook on a nested function. */
8835 gcc_checking_assert (!ix86_static_chain_on_stack);
8837 /* Check if profiling is active and we shall use profiling before
8838 prologue variant. If so sorry. */
8839 if (crtl->profile && flag_fentry != 0)
8840 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8841 "with %<-mfentry%> for 32-bit");
8843 /* In ix86_asm_output_function_label we emitted:
8844 8b ff movl.s %edi,%edi
8845 55 push %ebp
8846 8b ec movl.s %esp,%ebp
8848 This matches the hookable function prologue in Win32 API
8849 functions in Microsoft Windows XP Service Pack 2 and newer.
8850 Wine uses this to enable Windows apps to hook the Win32 API
8851 functions provided by Wine.
8853 What that means is that we've already set up the frame pointer. */
8855 if (frame_pointer_needed
8856 && !(crtl->drap_reg && crtl->stack_realign_needed))
8858 rtx push, mov;
8860 /* We've decided to use the frame pointer already set up.
8861 Describe this to the unwinder by pretending that both
8862 push and mov insns happen right here.
8864 Putting the unwind info here at the end of the ms_hook
8865 is done so that we can make absolutely certain we get
8866 the required byte sequence at the start of the function,
8867 rather than relying on an assembler that can produce
8868 the exact encoding required.
8870 However it does mean (in the unpatched case) that we have
8871 a 1 insn window where the asynchronous unwind info is
8872 incorrect. However, if we placed the unwind info at
8873 its correct location we would have incorrect unwind info
8874 in the patched case. Which is probably all moot since
8875 I don't expect Wine generates dwarf2 unwind info for the
8876 system libraries that use this feature. */
8878 insn = emit_insn (gen_blockage ());
8880 push = gen_push (hard_frame_pointer_rtx);
8881 mov = gen_rtx_SET (hard_frame_pointer_rtx,
8882 stack_pointer_rtx);
8883 RTX_FRAME_RELATED_P (push) = 1;
8884 RTX_FRAME_RELATED_P (mov) = 1;
8886 RTX_FRAME_RELATED_P (insn) = 1;
8887 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8888 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8890 /* Note that gen_push incremented m->fs.cfa_offset, even
8891 though we didn't emit the push insn here. */
8892 m->fs.cfa_reg = hard_frame_pointer_rtx;
8893 m->fs.fp_offset = m->fs.cfa_offset;
8894 m->fs.fp_valid = true;
8896 else
8898 /* The frame pointer is not needed so pop %ebp again.
8899 This leaves us with a pristine state. */
8900 emit_insn (gen_pop (hard_frame_pointer_rtx));
8904 /* The first insn of a function that accepts its static chain on the
8905 stack is to push the register that would be filled in by a direct
8906 call. This insn will be skipped by the trampoline. */
8907 else if (ix86_static_chain_on_stack)
8909 static_chain = ix86_static_chain (cfun->decl, false);
8910 insn = emit_insn (gen_push (static_chain));
8911 emit_insn (gen_blockage ());
8913 /* We don't want to interpret this push insn as a register save,
8914 only as a stack adjustment. The real copy of the register as
8915 a save will be done later, if needed. */
8916 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8917 t = gen_rtx_SET (stack_pointer_rtx, t);
8918 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8919 RTX_FRAME_RELATED_P (insn) = 1;
8922 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8923 of DRAP is needed and stack realignment is really needed after reload */
8924 if (stack_realign_drap)
8926 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8928 /* Can't use DRAP in interrupt function. */
8929 if (cfun->machine->func_type != TYPE_NORMAL)
8930 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8931 "in interrupt service routine. This may be worked "
8932 "around by avoiding functions with aggregate return.");
8934 /* Only need to push parameter pointer reg if it is caller saved. */
8935 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8937 /* Push arg pointer reg */
8938 insn = emit_insn (gen_push (crtl->drap_reg));
8939 RTX_FRAME_RELATED_P (insn) = 1;
8942 /* Grab the argument pointer. */
8943 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8944 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8945 RTX_FRAME_RELATED_P (insn) = 1;
8946 m->fs.cfa_reg = crtl->drap_reg;
8947 m->fs.cfa_offset = 0;
8949 /* Align the stack. */
8950 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8951 GEN_INT (-align_bytes)));
8952 RTX_FRAME_RELATED_P (insn) = 1;
8954 /* Replicate the return address on the stack so that return
8955 address can be reached via (argp - 1) slot. This is needed
8956 to implement macro RETURN_ADDR_RTX and intrinsic function
8957 expand_builtin_return_addr etc. */
8958 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8959 t = gen_frame_mem (word_mode, t);
8960 insn = emit_insn (gen_push (t));
8961 RTX_FRAME_RELATED_P (insn) = 1;
8963 /* For the purposes of frame and register save area addressing,
8964 we've started over with a new frame. */
8965 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8966 m->fs.realigned = true;
8968 if (static_chain)
8970 /* Replicate static chain on the stack so that static chain
8971 can be reached via (argp - 2) slot. This is needed for
8972 nested function with stack realignment. */
8973 insn = emit_insn (gen_push (static_chain));
8974 RTX_FRAME_RELATED_P (insn) = 1;
8978 int_registers_saved = (frame.nregs == 0);
8979 sse_registers_saved = (frame.nsseregs == 0);
8980 save_stub_call_needed = (m->call_ms2sysv);
8981 gcc_assert (sse_registers_saved || !save_stub_call_needed);
8983 if (frame_pointer_needed && !m->fs.fp_valid)
8985 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8986 slower on all targets. Also sdb didn't like it. */
8987 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8988 RTX_FRAME_RELATED_P (insn) = 1;
8990 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8992 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8993 RTX_FRAME_RELATED_P (insn) = 1;
8995 if (m->fs.cfa_reg == stack_pointer_rtx)
8996 m->fs.cfa_reg = hard_frame_pointer_rtx;
8997 m->fs.fp_offset = m->fs.sp_offset;
8998 m->fs.fp_valid = true;
9002 if (!int_registers_saved)
9004 /* If saving registers via PUSH, do so now. */
9005 if (!frame.save_regs_using_mov)
9007 ix86_emit_save_regs ();
9008 m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
9009 int_registers_saved = true;
9010 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9013 /* When using red zone we may start register saving before allocating
9014 the stack frame saving one cycle of the prologue. However, avoid
9015 doing this if we have to probe the stack; at least on x86_64 the
9016 stack probe can turn into a call that clobbers a red zone location. */
9017 else if (ix86_using_red_zone ()
9018 && (! TARGET_STACK_PROBE
9019 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9021 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9022 cfun->machine->red_zone_used = true;
9023 int_registers_saved = true;
9027 if (frame.red_zone_size != 0)
9028 cfun->machine->red_zone_used = true;
9030 if (stack_realign_fp)
9032 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9033 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9035 /* Record last valid frame pointer offset. */
9036 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
9038 /* The computation of the size of the re-aligned stack frame means
9039 that we must allocate the size of the register save area before
9040 performing the actual alignment. Otherwise we cannot guarantee
9041 that there's enough storage above the realignment point. */
9042 allocate = frame.reg_save_offset - m->fs.sp_offset
9043 + frame.stack_realign_allocate;
9044 if (allocate)
9045 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9046 GEN_INT (-allocate), -1, false);
9048 /* Align the stack. */
9049 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
9050 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
9051 m->fs.sp_realigned_offset = m->fs.sp_offset
9052 - frame.stack_realign_allocate;
9053 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
9054 Beyond this point, stack access should be done via choose_baseaddr or
9055 by using sp_valid_at and fp_valid_at to determine the correct base
9056 register. Henceforth, any CFA offset should be thought of as logical
9057 and not physical. */
9058 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
9059 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
9060 m->fs.sp_realigned = true;
9062 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
9063 is needed to describe where a register is saved using a realigned
9064 stack pointer, so we need to invalidate the stack pointer for that
9065 target. */
9066 if (TARGET_SEH)
9067 m->fs.sp_valid = false;
9069 /* If SP offset is non-immediate after allocation of the stack frame,
9070 then emit SSE saves or stub call prior to allocating the rest of the
9071 stack frame. This is less efficient for the out-of-line stub because
9072 we can't combine allocations across the call barrier, but it's better
9073 than using a scratch register. */
9074 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
9075 - m->fs.sp_realigned_offset),
9076 Pmode))
9078 if (!sse_registers_saved)
9080 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9081 sse_registers_saved = true;
9083 else if (save_stub_call_needed)
9085 ix86_emit_outlined_ms2sysv_save (frame);
9086 save_stub_call_needed = false;
9091 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9093 if (flag_stack_usage_info)
9095 /* We start to count from ARG_POINTER. */
9096 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9098 /* If it was realigned, take into account the fake frame. */
9099 if (stack_realign_drap)
9101 if (ix86_static_chain_on_stack)
9102 stack_size += UNITS_PER_WORD;
9104 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9105 stack_size += UNITS_PER_WORD;
9107 /* This over-estimates by 1 minimal-stack-alignment-unit but
9108 mitigates that by counting in the new return address slot. */
9109 current_function_dynamic_stack_size
9110 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9113 current_function_static_stack_size = stack_size;
9116 /* On SEH target with very large frame size, allocate an area to save
9117 SSE registers (as the very large allocation won't be described). */
9118 if (TARGET_SEH
9119 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
9120 && !sse_registers_saved)
9122 HOST_WIDE_INT sse_size
9123 = frame.sse_reg_save_offset - frame.reg_save_offset;
9125 gcc_assert (int_registers_saved);
9127 /* No need to do stack checking as the area will be immediately
9128 written. */
9129 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9130 GEN_INT (-sse_size), -1,
9131 m->fs.cfa_reg == stack_pointer_rtx);
9132 allocate -= sse_size;
9133 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9134 sse_registers_saved = true;
9137 /* If stack clash protection is requested, then probe the stack, unless it
9138 is already probed on the target. */
9139 if (allocate >= 0
9140 && flag_stack_clash_protection
9141 && !ix86_target_stack_probe ())
9143 ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
9144 allocate = 0;
9147 /* The stack has already been decremented by the instruction calling us
9148 so probe if the size is non-negative to preserve the protection area. */
9149 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9151 const HOST_WIDE_INT probe_interval = get_probe_interval ();
9153 if (STACK_CHECK_MOVING_SP)
9155 if (crtl->is_leaf
9156 && !cfun->calls_alloca
9157 && allocate <= probe_interval)
9160 else
9162 ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
9163 allocate = 0;
9167 else
9169 HOST_WIDE_INT size = allocate;
9171 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
9172 size = 0x80000000 - get_stack_check_protect () - 1;
9174 if (TARGET_STACK_PROBE)
9176 if (crtl->is_leaf && !cfun->calls_alloca)
9178 if (size > probe_interval)
9179 ix86_emit_probe_stack_range (0, size, int_registers_saved);
9181 else
9182 ix86_emit_probe_stack_range (0,
9183 size + get_stack_check_protect (),
9184 int_registers_saved);
9186 else
9188 if (crtl->is_leaf && !cfun->calls_alloca)
9190 if (size > probe_interval
9191 && size > get_stack_check_protect ())
9192 ix86_emit_probe_stack_range (get_stack_check_protect (),
9193 (size
9194 - get_stack_check_protect ()),
9195 int_registers_saved);
9197 else
9198 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
9199 int_registers_saved);
9204 if (allocate == 0)
9206 else if (!ix86_target_stack_probe ()
9207 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9209 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9210 GEN_INT (-allocate), -1,
9211 m->fs.cfa_reg == stack_pointer_rtx);
9213 else
9215 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9216 rtx r10 = NULL;
9217 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
9218 bool eax_live = ix86_eax_live_at_start_p ();
9219 bool r10_live = false;
9221 if (TARGET_64BIT)
9222 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9224 if (eax_live)
9226 insn = emit_insn (gen_push (eax));
9227 allocate -= UNITS_PER_WORD;
9228 /* Note that SEH directives need to continue tracking the stack
9229 pointer even after the frame pointer has been set up. */
9230 if (sp_is_cfa_reg || TARGET_SEH)
9232 if (sp_is_cfa_reg)
9233 m->fs.cfa_offset += UNITS_PER_WORD;
9234 RTX_FRAME_RELATED_P (insn) = 1;
9235 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9236 gen_rtx_SET (stack_pointer_rtx,
9237 plus_constant (Pmode,
9238 stack_pointer_rtx,
9239 -UNITS_PER_WORD)));
9243 if (r10_live)
9245 r10 = gen_rtx_REG (Pmode, R10_REG);
9246 insn = emit_insn (gen_push (r10));
9247 allocate -= UNITS_PER_WORD;
9248 if (sp_is_cfa_reg || TARGET_SEH)
9250 if (sp_is_cfa_reg)
9251 m->fs.cfa_offset += UNITS_PER_WORD;
9252 RTX_FRAME_RELATED_P (insn) = 1;
9253 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9254 gen_rtx_SET (stack_pointer_rtx,
9255 plus_constant (Pmode,
9256 stack_pointer_rtx,
9257 -UNITS_PER_WORD)));
9261 emit_move_insn (eax, GEN_INT (allocate));
9262 emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
9264 /* Use the fact that AX still contains ALLOCATE. */
9265 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
9266 (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
9268 if (sp_is_cfa_reg || TARGET_SEH)
9270 if (sp_is_cfa_reg)
9271 m->fs.cfa_offset += allocate;
9272 RTX_FRAME_RELATED_P (insn) = 1;
9273 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9274 gen_rtx_SET (stack_pointer_rtx,
9275 plus_constant (Pmode, stack_pointer_rtx,
9276 -allocate)));
9278 m->fs.sp_offset += allocate;
9280 /* Use stack_pointer_rtx for relative addressing so that code works for
9281 realigned stack. But this means that we need a blockage to prevent
9282 stores based on the frame pointer from being scheduled before. */
9283 if (r10_live && eax_live)
9285 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9286 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9287 gen_frame_mem (word_mode, t));
9288 t = plus_constant (Pmode, t, UNITS_PER_WORD);
9289 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
9290 gen_frame_mem (word_mode, t));
9291 emit_insn (gen_memory_blockage ());
9293 else if (eax_live || r10_live)
9295 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9296 emit_move_insn (gen_rtx_REG (word_mode,
9297 (eax_live ? AX_REG : R10_REG)),
9298 gen_frame_mem (word_mode, t));
9299 emit_insn (gen_memory_blockage ());
9302 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9304 /* If we havn't already set up the frame pointer, do so now. */
9305 if (frame_pointer_needed && !m->fs.fp_valid)
9307 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
9308 GEN_INT (frame.stack_pointer_offset
9309 - frame.hard_frame_pointer_offset));
9310 insn = emit_insn (insn);
9311 RTX_FRAME_RELATED_P (insn) = 1;
9312 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
9314 if (m->fs.cfa_reg == stack_pointer_rtx)
9315 m->fs.cfa_reg = hard_frame_pointer_rtx;
9316 m->fs.fp_offset = frame.hard_frame_pointer_offset;
9317 m->fs.fp_valid = true;
9320 if (!int_registers_saved)
9321 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9322 if (!sse_registers_saved)
9323 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9324 else if (save_stub_call_needed)
9325 ix86_emit_outlined_ms2sysv_save (frame);
9327 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
9328 in PROLOGUE. */
9329 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
9331 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
9332 insn = emit_insn (gen_set_got (pic));
9333 RTX_FRAME_RELATED_P (insn) = 1;
9334 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
9335 emit_insn (gen_prologue_use (pic));
9336 /* Deleting already emmitted SET_GOT if exist and allocated to
9337 REAL_PIC_OFFSET_TABLE_REGNUM. */
9338 ix86_elim_entry_set_got (pic);
9341 if (crtl->drap_reg && !crtl->stack_realign_needed)
9343 /* vDRAP is setup but after reload it turns out stack realign
9344 isn't necessary, here we will emit prologue to setup DRAP
9345 without stack realign adjustment */
9346 t = choose_baseaddr (0, NULL);
9347 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9350 /* Prevent instructions from being scheduled into register save push
9351 sequence when access to the redzone area is done through frame pointer.
9352 The offset between the frame pointer and the stack pointer is calculated
9353 relative to the value of the stack pointer at the end of the function
9354 prologue, and moving instructions that access redzone area via frame
9355 pointer inside push sequence violates this assumption. */
9356 if (frame_pointer_needed && frame.red_zone_size)
9357 emit_insn (gen_memory_blockage ());
9359 /* SEH requires that the prologue end within 256 bytes of the start of
9360 the function. Prevent instruction schedules that would extend that.
9361 Further, prevent alloca modifications to the stack pointer from being
9362 combined with prologue modifications. */
9363 if (TARGET_SEH)
9364 emit_insn (gen_prologue_use (stack_pointer_rtx));
9367 /* Emit code to restore REG using a POP or POPP insn. */
9369 static void
9370 ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
9372 struct machine_function *m = cfun->machine;
9373 rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
9375 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9376 m->fs.sp_offset -= UNITS_PER_WORD;
9378 if (m->fs.cfa_reg == crtl->drap_reg
9379 && REGNO (reg) == REGNO (crtl->drap_reg))
9381 /* Previously we'd represented the CFA as an expression
9382 like *(%ebp - 8). We've just popped that value from
9383 the stack, which means we need to reset the CFA to
9384 the drap register. This will remain until we restore
9385 the stack pointer. */
9386 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9387 RTX_FRAME_RELATED_P (insn) = 1;
9389 /* This means that the DRAP register is valid for addressing too. */
9390 m->fs.drap_valid = true;
9391 return;
9394 if (m->fs.cfa_reg == stack_pointer_rtx)
9396 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9397 x = gen_rtx_SET (stack_pointer_rtx, x);
9398 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9399 RTX_FRAME_RELATED_P (insn) = 1;
9401 m->fs.cfa_offset -= UNITS_PER_WORD;
9404 /* When the frame pointer is the CFA, and we pop it, we are
9405 swapping back to the stack pointer as the CFA. This happens
9406 for stack frames that don't allocate other data, so we assume
9407 the stack pointer is now pointing at the return address, i.e.
9408 the function entry state, which makes the offset be 1 word. */
9409 if (reg == hard_frame_pointer_rtx)
9411 m->fs.fp_valid = false;
9412 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9414 m->fs.cfa_reg = stack_pointer_rtx;
9415 m->fs.cfa_offset -= UNITS_PER_WORD;
9417 add_reg_note (insn, REG_CFA_DEF_CFA,
9418 plus_constant (Pmode, stack_pointer_rtx,
9419 m->fs.cfa_offset));
9420 RTX_FRAME_RELATED_P (insn) = 1;
9425 /* Emit code to restore REG using a POP2 insn. */
9426 static void
9427 ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
9429 struct machine_function *m = cfun->machine;
9430 const int offset = UNITS_PER_WORD * 2;
9431 rtx_insn *insn;
9433 rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
9434 stack_pointer_rtx));
9436 if (ppx_p)
9437 insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
9438 else
9439 insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
9441 RTX_FRAME_RELATED_P (insn) = 1;
9443 rtx dwarf = NULL_RTX;
9444 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
9445 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
9446 REG_NOTES (insn) = dwarf;
9447 m->fs.sp_offset -= offset;
9449 if (m->fs.cfa_reg == crtl->drap_reg
9450 && (REGNO (reg1) == REGNO (crtl->drap_reg)
9451 || REGNO (reg2) == REGNO (crtl->drap_reg)))
9453 /* Previously we'd represented the CFA as an expression
9454 like *(%ebp - 8). We've just popped that value from
9455 the stack, which means we need to reset the CFA to
9456 the drap register. This will remain until we restore
9457 the stack pointer. */
9458 add_reg_note (insn, REG_CFA_DEF_CFA,
9459 REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
9460 RTX_FRAME_RELATED_P (insn) = 1;
9462 /* This means that the DRAP register is valid for addressing too. */
9463 m->fs.drap_valid = true;
9464 return;
9467 if (m->fs.cfa_reg == stack_pointer_rtx)
9469 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
9470 x = gen_rtx_SET (stack_pointer_rtx, x);
9471 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9472 RTX_FRAME_RELATED_P (insn) = 1;
9474 m->fs.cfa_offset -= offset;
9477 /* When the frame pointer is the CFA, and we pop it, we are
9478 swapping back to the stack pointer as the CFA. This happens
9479 for stack frames that don't allocate other data, so we assume
9480 the stack pointer is now pointing at the return address, i.e.
9481 the function entry state, which makes the offset be 1 word. */
9482 if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
9484 m->fs.fp_valid = false;
9485 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9487 m->fs.cfa_reg = stack_pointer_rtx;
9488 m->fs.cfa_offset -= offset;
9490 add_reg_note (insn, REG_CFA_DEF_CFA,
9491 plus_constant (Pmode, stack_pointer_rtx,
9492 m->fs.cfa_offset));
9493 RTX_FRAME_RELATED_P (insn) = 1;
9498 /* Emit code to restore saved registers using POP insns. */
9500 static void
9501 ix86_emit_restore_regs_using_pop (bool ppx_p)
9503 unsigned int regno;
9505 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9506 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
9507 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
9510 /* Emit code to restore saved registers using POP2 insns. */
9512 static void
9513 ix86_emit_restore_regs_using_pop2 (void)
9515 int regno;
9516 int regno_list[2];
9517 regno_list[0] = regno_list[1] = -1;
9518 int loaded_regnum = 0;
9519 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
9521 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9522 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
9524 if (aligned)
9526 regno_list[loaded_regnum++] = regno;
9527 if (loaded_regnum == 2)
9529 gcc_assert (regno_list[0] != -1
9530 && regno_list[1] != -1
9531 && regno_list[0] != regno_list[1]);
9533 ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode,
9534 regno_list[0]),
9535 gen_rtx_REG (word_mode,
9536 regno_list[1]),
9537 TARGET_APX_PPX);
9538 loaded_regnum = 0;
9539 regno_list[0] = regno_list[1] = -1;
9542 else
9544 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno),
9545 TARGET_APX_PPX);
9546 aligned = true;
9550 if (loaded_regnum == 1)
9551 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]),
9552 TARGET_APX_PPX);
9555 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
9556 omits the emit and only attaches the notes. */
9558 static void
9559 ix86_emit_leave (rtx_insn *insn)
9561 struct machine_function *m = cfun->machine;
9563 if (!insn)
9564 insn = emit_insn (gen_leave (word_mode));
9566 ix86_add_queued_cfa_restore_notes (insn);
9568 gcc_assert (m->fs.fp_valid);
9569 m->fs.sp_valid = true;
9570 m->fs.sp_realigned = false;
9571 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9572 m->fs.fp_valid = false;
9574 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9576 m->fs.cfa_reg = stack_pointer_rtx;
9577 m->fs.cfa_offset = m->fs.sp_offset;
9579 add_reg_note (insn, REG_CFA_DEF_CFA,
9580 plus_constant (Pmode, stack_pointer_rtx,
9581 m->fs.sp_offset));
9582 RTX_FRAME_RELATED_P (insn) = 1;
9584 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9585 m->fs.fp_offset);
9588 /* Emit code to restore saved registers using MOV insns.
9589 First register is restored from CFA - CFA_OFFSET. */
9590 static void
9591 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9592 bool maybe_eh_return)
9594 struct machine_function *m = cfun->machine;
9595 unsigned int regno;
9597 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9598 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9600 rtx reg = gen_rtx_REG (word_mode, regno);
9601 rtx mem;
9602 rtx_insn *insn;
9604 mem = choose_baseaddr (cfa_offset, NULL);
9605 mem = gen_frame_mem (word_mode, mem);
9606 insn = emit_move_insn (reg, mem);
9608 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9610 /* Previously we'd represented the CFA as an expression
9611 like *(%ebp - 8). We've just popped that value from
9612 the stack, which means we need to reset the CFA to
9613 the drap register. This will remain until we restore
9614 the stack pointer. */
9615 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9616 RTX_FRAME_RELATED_P (insn) = 1;
9618 /* This means that the DRAP register is valid for addressing. */
9619 m->fs.drap_valid = true;
9621 else
9622 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9624 cfa_offset -= UNITS_PER_WORD;
9628 /* Emit code to restore saved registers using MOV insns.
9629 First register is restored from CFA - CFA_OFFSET. */
9630 static void
9631 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9632 bool maybe_eh_return)
9634 unsigned int regno;
9636 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9637 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9639 rtx reg = gen_rtx_REG (V4SFmode, regno);
9640 rtx mem;
9641 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
9643 mem = choose_baseaddr (cfa_offset, &align);
9644 mem = gen_rtx_MEM (V4SFmode, mem);
9646 /* The location aligment depends upon the base register. */
9647 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
9648 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
9649 set_mem_align (mem, align);
9650 emit_insn (gen_rtx_SET (reg, mem));
9652 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9654 cfa_offset -= GET_MODE_SIZE (V4SFmode);
9658 static void
9659 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
9660 bool use_call, int style)
9662 struct machine_function *m = cfun->machine;
9663 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9664 + m->call_ms2sysv_extra_regs;
9665 rtvec v;
9666 unsigned int elems_needed, align, i, vi = 0;
9667 rtx_insn *insn;
9668 rtx sym, tmp;
9669 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
9670 rtx r10 = NULL_RTX;
9671 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9672 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
9673 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
9674 rtx rsi_frame_load = NULL_RTX;
9675 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
9676 enum xlogue_stub stub;
9678 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
9680 /* If using a realigned stack, we should never start with padding. */
9681 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
9683 /* Setup RSI as the stub's base pointer. */
9684 align = GET_MODE_ALIGNMENT (V4SFmode);
9685 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
9686 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9688 emit_insn (gen_rtx_SET (rsi, tmp));
9690 /* Get a symbol for the stub. */
9691 if (frame_pointer_needed)
9692 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
9693 : XLOGUE_STUB_RESTORE_HFP_TAIL;
9694 else
9695 stub = use_call ? XLOGUE_STUB_RESTORE
9696 : XLOGUE_STUB_RESTORE_TAIL;
9697 sym = xlogue.get_stub_rtx (stub);
9699 elems_needed = ncregs;
9700 if (use_call)
9701 elems_needed += 1;
9702 else
9703 elems_needed += frame_pointer_needed ? 5 : 3;
9704 v = rtvec_alloc (elems_needed);
9706 /* We call the epilogue stub when we need to pop incoming args or we are
9707 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
9708 epilogue stub and it is the tail-call. */
9709 if (use_call)
9710 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9711 else
9713 RTVEC_ELT (v, vi++) = ret_rtx;
9714 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9715 if (frame_pointer_needed)
9717 rtx rbp = gen_rtx_REG (DImode, BP_REG);
9718 gcc_assert (m->fs.fp_valid);
9719 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
9721 tmp = plus_constant (DImode, rbp, 8);
9722 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
9723 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
9724 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
9725 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
9727 else
9729 /* If no hard frame pointer, we set R10 to the SP restore value. */
9730 gcc_assert (!m->fs.fp_valid);
9731 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9732 gcc_assert (m->fs.sp_valid);
9734 r10 = gen_rtx_REG (DImode, R10_REG);
9735 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
9736 emit_insn (gen_rtx_SET (r10, tmp));
9738 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
9742 /* Generate frame load insns and restore notes. */
9743 for (i = 0; i < ncregs; ++i)
9745 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
9746 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
9747 rtx reg, frame_load;
9749 reg = gen_rtx_REG (mode, r.regno);
9750 frame_load = gen_frame_load (reg, rsi, r.offset);
9752 /* Save RSI frame load insn & note to add last. */
9753 if (r.regno == SI_REG)
9755 gcc_assert (!rsi_frame_load);
9756 rsi_frame_load = frame_load;
9757 rsi_restore_offset = r.offset;
9759 else
9761 RTVEC_ELT (v, vi++) = frame_load;
9762 ix86_add_cfa_restore_note (NULL, reg, r.offset);
9766 /* Add RSI frame load & restore note at the end. */
9767 gcc_assert (rsi_frame_load);
9768 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
9769 RTVEC_ELT (v, vi++) = rsi_frame_load;
9770 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
9771 rsi_restore_offset);
9773 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9774 if (!use_call && !frame_pointer_needed)
9776 gcc_assert (m->fs.sp_valid);
9777 gcc_assert (!m->fs.sp_realigned);
9779 /* At this point, R10 should point to frame.stack_realign_offset. */
9780 if (m->fs.cfa_reg == stack_pointer_rtx)
9781 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
9782 m->fs.sp_offset = frame.stack_realign_offset;
9785 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
9786 tmp = gen_rtx_PARALLEL (VOIDmode, v);
9787 if (use_call)
9788 insn = emit_insn (tmp);
9789 else
9791 insn = emit_jump_insn (tmp);
9792 JUMP_LABEL (insn) = ret_rtx;
9794 if (frame_pointer_needed)
9795 ix86_emit_leave (insn);
9796 else
9798 /* Need CFA adjust note. */
9799 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
9800 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
9804 RTX_FRAME_RELATED_P (insn) = true;
9805 ix86_add_queued_cfa_restore_notes (insn);
9807 /* If we're not doing a tail-call, we need to adjust the stack. */
9808 if (use_call && m->fs.sp_valid)
9810 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
9811 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9812 GEN_INT (dealloc), style,
9813 m->fs.cfa_reg == stack_pointer_rtx);
9817 /* Restore function stack, frame, and registers. */
9819 void
9820 ix86_expand_epilogue (int style)
9822 struct machine_function *m = cfun->machine;
9823 struct machine_frame_state frame_state_save = m->fs;
9824 bool restore_regs_via_mov;
9825 bool using_drap;
9826 bool restore_stub_is_tail = false;
9828 if (ix86_function_naked (current_function_decl))
9830 /* The program should not reach this point. */
9831 emit_insn (gen_ud2 ());
9832 return;
9835 ix86_finalize_stack_frame_flags ();
9836 const struct ix86_frame &frame = cfun->machine->frame;
9838 m->fs.sp_realigned = stack_realign_fp;
9839 m->fs.sp_valid = stack_realign_fp
9840 || !frame_pointer_needed
9841 || crtl->sp_is_unchanging;
9842 gcc_assert (!m->fs.sp_valid
9843 || m->fs.sp_offset == frame.stack_pointer_offset);
9845 /* The FP must be valid if the frame pointer is present. */
9846 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9847 gcc_assert (!m->fs.fp_valid
9848 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9850 /* We must have *some* valid pointer to the stack frame. */
9851 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9853 /* The DRAP is never valid at this point. */
9854 gcc_assert (!m->fs.drap_valid);
9856 /* See the comment about red zone and frame
9857 pointer usage in ix86_expand_prologue. */
9858 if (frame_pointer_needed && frame.red_zone_size)
9859 emit_insn (gen_memory_blockage ());
9861 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9862 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9864 /* Determine the CFA offset of the end of the red-zone. */
9865 m->fs.red_zone_offset = 0;
9866 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9868 /* The red-zone begins below return address and error code in
9869 exception handler. */
9870 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
9872 /* When the register save area is in the aligned portion of
9873 the stack, determine the maximum runtime displacement that
9874 matches up with the aligned frame. */
9875 if (stack_realign_drap)
9876 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
9877 + UNITS_PER_WORD);
9880 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
9882 /* Special care must be taken for the normal return case of a function
9883 using eh_return: the eax and edx registers are marked as saved, but
9884 not restored along this path. Adjust the save location to match. */
9885 if (crtl->calls_eh_return && style != 2)
9886 reg_save_offset -= 2 * UNITS_PER_WORD;
9888 /* EH_RETURN requires the use of moves to function properly. */
9889 if (crtl->calls_eh_return)
9890 restore_regs_via_mov = true;
9891 /* SEH requires the use of pops to identify the epilogue. */
9892 else if (TARGET_SEH)
9893 restore_regs_via_mov = false;
9894 /* If we already save reg with pushp, don't use move at epilogue. */
9895 else if (m->fs.apx_ppx_used)
9896 restore_regs_via_mov = false;
9897 /* If we're only restoring one register and sp cannot be used then
9898 using a move instruction to restore the register since it's
9899 less work than reloading sp and popping the register. */
9900 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
9901 restore_regs_via_mov = true;
9902 else if (TARGET_EPILOGUE_USING_MOVE
9903 && cfun->machine->use_fast_prologue_epilogue
9904 && (frame.nregs > 1
9905 || m->fs.sp_offset != reg_save_offset))
9906 restore_regs_via_mov = true;
9907 else if (frame_pointer_needed
9908 && !frame.nregs
9909 && m->fs.sp_offset != reg_save_offset)
9910 restore_regs_via_mov = true;
9911 else if (frame_pointer_needed
9912 && TARGET_USE_LEAVE
9913 && cfun->machine->use_fast_prologue_epilogue
9914 && frame.nregs == 1)
9915 restore_regs_via_mov = true;
9916 else
9917 restore_regs_via_mov = false;
9919 if (restore_regs_via_mov || frame.nsseregs)
9921 /* Ensure that the entire register save area is addressable via
9922 the stack pointer, if we will restore SSE regs via sp. */
9923 if (TARGET_64BIT
9924 && m->fs.sp_offset > 0x7fffffff
9925 && sp_valid_at (frame.stack_realign_offset + 1)
9926 && (frame.nsseregs + frame.nregs) != 0)
9928 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9929 GEN_INT (m->fs.sp_offset
9930 - frame.sse_reg_save_offset),
9931 style,
9932 m->fs.cfa_reg == stack_pointer_rtx);
9936 /* If there are any SSE registers to restore, then we have to do it
9937 via moves, since there's obviously no pop for SSE regs. */
9938 if (frame.nsseregs)
9939 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
9940 style == 2);
9942 if (m->call_ms2sysv)
9944 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9946 /* We cannot use a tail-call for the stub if:
9947 1. We have to pop incoming args,
9948 2. We have additional int regs to restore, or
9949 3. A sibling call will be the tail-call, or
9950 4. We are emitting an eh_return_internal epilogue.
9952 TODO: Item 4 has not yet tested!
9954 If any of the above are true, we will call the stub rather than
9955 jump to it. */
9956 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9957 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
9960 /* If using out-of-line stub that is a tail-call, then...*/
9961 if (m->call_ms2sysv && restore_stub_is_tail)
9963 /* TODO: parinoid tests. (remove eventually) */
9964 gcc_assert (m->fs.sp_valid);
9965 gcc_assert (!m->fs.sp_realigned);
9966 gcc_assert (!m->fs.fp_valid);
9967 gcc_assert (!m->fs.realigned);
9968 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9969 gcc_assert (!crtl->drap_reg);
9970 gcc_assert (!frame.nregs);
9972 else if (restore_regs_via_mov)
9974 rtx t;
9976 if (frame.nregs)
9977 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
9979 /* eh_return epilogues need %ecx added to the stack pointer. */
9980 if (style == 2)
9982 rtx sa = EH_RETURN_STACKADJ_RTX;
9983 rtx_insn *insn;
9985 /* Stack realignment doesn't work with eh_return. */
9986 if (crtl->stack_realign_needed)
9987 sorry ("Stack realignment not supported with "
9988 "%<__builtin_eh_return%>");
9990 /* regparm nested functions don't work with eh_return. */
9991 if (ix86_static_chain_on_stack)
9992 sorry ("regparm nested function not supported with "
9993 "%<__builtin_eh_return%>");
9995 if (frame_pointer_needed)
9997 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9998 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9999 emit_insn (gen_rtx_SET (sa, t));
10001 /* NB: eh_return epilogues must restore the frame pointer
10002 in word_mode since the upper 32 bits of RBP register
10003 can have any values. */
10004 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
10005 rtx frame_reg = gen_rtx_REG (word_mode,
10006 HARD_FRAME_POINTER_REGNUM);
10007 insn = emit_move_insn (frame_reg, t);
10009 /* Note that we use SA as a temporary CFA, as the return
10010 address is at the proper place relative to it. We
10011 pretend this happens at the FP restore insn because
10012 prior to this insn the FP would be stored at the wrong
10013 offset relative to SA, and after this insn we have no
10014 other reasonable register to use for the CFA. We don't
10015 bother resetting the CFA to the SP for the duration of
10016 the return insn, unless the control flow instrumentation
10017 is done. In this case the SP is used later and we have
10018 to reset CFA to SP. */
10019 add_reg_note (insn, REG_CFA_DEF_CFA,
10020 plus_constant (Pmode, sa, UNITS_PER_WORD));
10021 ix86_add_queued_cfa_restore_notes (insn);
10022 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
10023 RTX_FRAME_RELATED_P (insn) = 1;
10025 m->fs.cfa_reg = sa;
10026 m->fs.cfa_offset = UNITS_PER_WORD;
10027 m->fs.fp_valid = false;
10029 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10030 const0_rtx, style,
10031 flag_cf_protection);
10033 else
10035 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10036 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
10037 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
10038 ix86_add_queued_cfa_restore_notes (insn);
10040 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10041 if (m->fs.cfa_offset != UNITS_PER_WORD)
10043 m->fs.cfa_offset = UNITS_PER_WORD;
10044 add_reg_note (insn, REG_CFA_DEF_CFA,
10045 plus_constant (Pmode, stack_pointer_rtx,
10046 UNITS_PER_WORD));
10047 RTX_FRAME_RELATED_P (insn) = 1;
10050 m->fs.sp_offset = UNITS_PER_WORD;
10051 m->fs.sp_valid = true;
10052 m->fs.sp_realigned = false;
10055 else
10057 /* SEH requires that the function end with (1) a stack adjustment
10058 if necessary, (2) a sequence of pops, and (3) a return or
10059 jump instruction. Prevent insns from the function body from
10060 being scheduled into this sequence. */
10061 if (TARGET_SEH)
10063 /* Prevent a catch region from being adjacent to the standard
10064 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
10065 nor several other flags that would be interesting to test are
10066 set up yet. */
10067 if (flag_non_call_exceptions)
10068 emit_insn (gen_nops (const1_rtx));
10069 else
10070 emit_insn (gen_blockage ());
10073 /* First step is to deallocate the stack frame so that we can
10074 pop the registers. If the stack pointer was realigned, it needs
10075 to be restored now. Also do it on SEH target for very large
10076 frame as the emitted instructions aren't allowed by the ABI
10077 in epilogues. */
10078 if (!m->fs.sp_valid || m->fs.sp_realigned
10079 || (TARGET_SEH
10080 && (m->fs.sp_offset - reg_save_offset
10081 >= SEH_MAX_FRAME_SIZE)))
10083 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10084 GEN_INT (m->fs.fp_offset
10085 - reg_save_offset),
10086 style, false);
10088 else if (m->fs.sp_offset != reg_save_offset)
10090 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10091 GEN_INT (m->fs.sp_offset
10092 - reg_save_offset),
10093 style,
10094 m->fs.cfa_reg == stack_pointer_rtx);
10097 if (TARGET_APX_PUSH2POP2
10098 && ix86_can_use_push2pop2 ()
10099 && m->func_type == TYPE_NORMAL)
10100 ix86_emit_restore_regs_using_pop2 ();
10101 else
10102 ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
10105 /* If we used a stack pointer and haven't already got rid of it,
10106 then do so now. */
10107 if (m->fs.fp_valid)
10109 /* If the stack pointer is valid and pointing at the frame
10110 pointer store address, then we only need a pop. */
10111 if (sp_valid_at (frame.hfp_save_offset)
10112 && m->fs.sp_offset == frame.hfp_save_offset)
10113 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10114 /* Leave results in shorter dependency chains on CPUs that are
10115 able to grok it fast. */
10116 else if (TARGET_USE_LEAVE
10117 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
10118 || !cfun->machine->use_fast_prologue_epilogue)
10119 ix86_emit_leave (NULL);
10120 else
10122 pro_epilogue_adjust_stack (stack_pointer_rtx,
10123 hard_frame_pointer_rtx,
10124 const0_rtx, style, !using_drap);
10125 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10129 if (using_drap)
10131 int param_ptr_offset = UNITS_PER_WORD;
10132 rtx_insn *insn;
10134 gcc_assert (stack_realign_drap);
10136 if (ix86_static_chain_on_stack)
10137 param_ptr_offset += UNITS_PER_WORD;
10138 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10139 param_ptr_offset += UNITS_PER_WORD;
10141 insn = emit_insn (gen_rtx_SET
10142 (stack_pointer_rtx,
10143 plus_constant (Pmode, crtl->drap_reg,
10144 -param_ptr_offset)));
10145 m->fs.cfa_reg = stack_pointer_rtx;
10146 m->fs.cfa_offset = param_ptr_offset;
10147 m->fs.sp_offset = param_ptr_offset;
10148 m->fs.realigned = false;
10150 add_reg_note (insn, REG_CFA_DEF_CFA,
10151 plus_constant (Pmode, stack_pointer_rtx,
10152 param_ptr_offset));
10153 RTX_FRAME_RELATED_P (insn) = 1;
10155 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10156 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10159 /* At this point the stack pointer must be valid, and we must have
10160 restored all of the registers. We may not have deallocated the
10161 entire stack frame. We've delayed this until now because it may
10162 be possible to merge the local stack deallocation with the
10163 deallocation forced by ix86_static_chain_on_stack. */
10164 gcc_assert (m->fs.sp_valid);
10165 gcc_assert (!m->fs.sp_realigned);
10166 gcc_assert (!m->fs.fp_valid);
10167 gcc_assert (!m->fs.realigned);
10168 if (m->fs.sp_offset != UNITS_PER_WORD)
10170 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10171 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10172 style, true);
10174 else
10175 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10177 /* Sibcall epilogues don't want a return instruction. */
10178 if (style == 0)
10180 m->fs = frame_state_save;
10181 return;
10184 if (cfun->machine->func_type != TYPE_NORMAL)
10185 emit_jump_insn (gen_interrupt_return ());
10186 else if (crtl->args.pops_args && crtl->args.size)
10188 rtx popc = GEN_INT (crtl->args.pops_args);
10190 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10191 address, do explicit add, and jump indirectly to the caller. */
10193 if (crtl->args.pops_args >= 65536)
10195 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10196 rtx_insn *insn;
10198 /* There is no "pascal" calling convention in any 64bit ABI. */
10199 gcc_assert (!TARGET_64BIT);
10201 insn = emit_insn (gen_pop (ecx));
10202 m->fs.cfa_offset -= UNITS_PER_WORD;
10203 m->fs.sp_offset -= UNITS_PER_WORD;
10205 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10206 x = gen_rtx_SET (stack_pointer_rtx, x);
10207 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10208 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10209 RTX_FRAME_RELATED_P (insn) = 1;
10211 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10212 popc, -1, true);
10213 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10215 else
10216 emit_jump_insn (gen_simple_return_pop_internal (popc));
10218 else if (!m->call_ms2sysv || !restore_stub_is_tail)
10220 /* In case of return from EH a simple return cannot be used
10221 as a return address will be compared with a shadow stack
10222 return address. Use indirect jump instead. */
10223 if (style == 2 && flag_cf_protection)
10225 /* Register used in indirect jump must be in word_mode. But
10226 Pmode may not be the same as word_mode for x32. */
10227 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
10228 rtx_insn *insn;
10230 insn = emit_insn (gen_pop (ecx));
10231 m->fs.cfa_offset -= UNITS_PER_WORD;
10232 m->fs.sp_offset -= UNITS_PER_WORD;
10234 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10235 x = gen_rtx_SET (stack_pointer_rtx, x);
10236 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10237 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10238 RTX_FRAME_RELATED_P (insn) = 1;
10240 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10242 else
10243 emit_jump_insn (gen_simple_return_internal ());
10246 /* Restore the state back to the state from the prologue,
10247 so that it's correct for the next epilogue. */
10248 m->fs = frame_state_save;
10251 /* Reset from the function's potential modifications. */
10253 static void
10254 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
10256 if (pic_offset_table_rtx
10257 && !ix86_use_pseudo_pic_reg ())
10258 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10260 if (TARGET_MACHO)
10262 rtx_insn *insn = get_last_insn ();
10263 rtx_insn *deleted_debug_label = NULL;
10265 /* Mach-O doesn't support labels at the end of objects, so if
10266 it looks like we might want one, take special action.
10267 First, collect any sequence of deleted debug labels. */
10268 while (insn
10269 && NOTE_P (insn)
10270 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10272 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10273 notes only, instead set their CODE_LABEL_NUMBER to -1,
10274 otherwise there would be code generation differences
10275 in between -g and -g0. */
10276 if (NOTE_P (insn) && NOTE_KIND (insn)
10277 == NOTE_INSN_DELETED_DEBUG_LABEL)
10278 deleted_debug_label = insn;
10279 insn = PREV_INSN (insn);
10282 /* If we have:
10283 label:
10284 barrier
10285 then this needs to be detected, so skip past the barrier. */
10287 if (insn && BARRIER_P (insn))
10288 insn = PREV_INSN (insn);
10290 /* Up to now we've only seen notes or barriers. */
10291 if (insn)
10293 if (LABEL_P (insn)
10294 || (NOTE_P (insn)
10295 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
10296 /* Trailing label. */
10297 fputs ("\tnop\n", file);
10298 else if (cfun && ! cfun->is_thunk)
10300 /* See if we have a completely empty function body, skipping
10301 the special case of the picbase thunk emitted as asm. */
10302 while (insn && ! INSN_P (insn))
10303 insn = PREV_INSN (insn);
10304 /* If we don't find any insns, we've got an empty function body;
10305 I.e. completely empty - without a return or branch. This is
10306 taken as the case where a function body has been removed
10307 because it contains an inline __builtin_unreachable(). GCC
10308 declares that reaching __builtin_unreachable() means UB so
10309 we're not obliged to do anything special; however, we want
10310 non-zero-sized function bodies. To meet this, and help the
10311 user out, let's trap the case. */
10312 if (insn == NULL)
10313 fputs ("\tud2\n", file);
10316 else if (deleted_debug_label)
10317 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
10318 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10319 CODE_LABEL_NUMBER (insn) = -1;
10323 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
10325 void
10326 ix86_print_patchable_function_entry (FILE *file,
10327 unsigned HOST_WIDE_INT patch_area_size,
10328 bool record_p)
10330 if (cfun->machine->function_label_emitted)
10332 /* NB: When ix86_print_patchable_function_entry is called after
10333 function table has been emitted, we have inserted or queued
10334 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
10335 place. There is nothing to do here. */
10336 return;
10339 default_print_patchable_function_entry (file, patch_area_size,
10340 record_p);
10343 /* Output patchable area. NB: default_print_patchable_function_entry
10344 isn't available in i386.md. */
10346 void
10347 ix86_output_patchable_area (unsigned int patch_area_size,
10348 bool record_p)
10350 default_print_patchable_function_entry (asm_out_file,
10351 patch_area_size,
10352 record_p);
10355 /* Return a scratch register to use in the split stack prologue. The
10356 split stack prologue is used for -fsplit-stack. It is the first
10357 instructions in the function, even before the regular prologue.
10358 The scratch register can be any caller-saved register which is not
10359 used for parameters or for the static chain. */
10361 static unsigned int
10362 split_stack_prologue_scratch_regno (void)
10364 if (TARGET_64BIT)
10365 return R11_REG;
10366 else
10368 bool is_fastcall, is_thiscall;
10369 int regparm;
10371 is_fastcall = (lookup_attribute ("fastcall",
10372 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10373 != NULL);
10374 is_thiscall = (lookup_attribute ("thiscall",
10375 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10376 != NULL);
10377 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10379 if (is_fastcall)
10381 if (DECL_STATIC_CHAIN (cfun->decl))
10383 sorry ("%<-fsplit-stack%> does not support fastcall with "
10384 "nested function");
10385 return INVALID_REGNUM;
10387 return AX_REG;
10389 else if (is_thiscall)
10391 if (!DECL_STATIC_CHAIN (cfun->decl))
10392 return DX_REG;
10393 return AX_REG;
10395 else if (regparm < 3)
10397 if (!DECL_STATIC_CHAIN (cfun->decl))
10398 return CX_REG;
10399 else
10401 if (regparm >= 2)
10403 sorry ("%<-fsplit-stack%> does not support 2 register "
10404 "parameters for a nested function");
10405 return INVALID_REGNUM;
10407 return DX_REG;
10410 else
10412 /* FIXME: We could make this work by pushing a register
10413 around the addition and comparison. */
10414 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
10415 return INVALID_REGNUM;
10420 /* A SYMBOL_REF for the function which allocates new stackspace for
10421 -fsplit-stack. */
10423 static GTY(()) rtx split_stack_fn;
10425 /* A SYMBOL_REF for the more stack function when using the large
10426 model. */
10428 static GTY(()) rtx split_stack_fn_large;
10430 /* Return location of the stack guard value in the TLS block. */
10433 ix86_split_stack_guard (void)
10435 int offset;
10436 addr_space_t as = DEFAULT_TLS_SEG_REG;
10437 rtx r;
10439 gcc_assert (flag_split_stack);
10441 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
10442 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
10443 #else
10444 gcc_unreachable ();
10445 #endif
10447 r = GEN_INT (offset);
10448 r = gen_const_mem (Pmode, r);
10449 set_mem_addr_space (r, as);
10451 return r;
10454 /* Handle -fsplit-stack. These are the first instructions in the
10455 function, even before the regular prologue. */
10457 void
10458 ix86_expand_split_stack_prologue (void)
10460 HOST_WIDE_INT allocate;
10461 unsigned HOST_WIDE_INT args_size;
10462 rtx_code_label *label;
10463 rtx limit, current, allocate_rtx, call_fusage;
10464 rtx_insn *call_insn;
10465 unsigned int scratch_regno = INVALID_REGNUM;
10466 rtx scratch_reg = NULL_RTX;
10467 rtx_code_label *varargs_label = NULL;
10468 rtx fn;
10470 gcc_assert (flag_split_stack && reload_completed);
10472 ix86_finalize_stack_frame_flags ();
10473 struct ix86_frame &frame = cfun->machine->frame;
10474 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10476 /* This is the label we will branch to if we have enough stack
10477 space. We expect the basic block reordering pass to reverse this
10478 branch if optimizing, so that we branch in the unlikely case. */
10479 label = gen_label_rtx ();
10481 /* We need to compare the stack pointer minus the frame size with
10482 the stack boundary in the TCB. The stack boundary always gives
10483 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10484 can compare directly. Otherwise we need to do an addition. */
10486 limit = ix86_split_stack_guard ();
10488 if (allocate >= SPLIT_STACK_AVAILABLE
10489 || flag_force_indirect_call)
10491 scratch_regno = split_stack_prologue_scratch_regno ();
10492 if (scratch_regno == INVALID_REGNUM)
10493 return;
10496 if (allocate >= SPLIT_STACK_AVAILABLE)
10498 rtx offset;
10500 /* We need a scratch register to hold the stack pointer minus
10501 the required frame size. Since this is the very start of the
10502 function, the scratch register can be any caller-saved
10503 register which is not used for parameters. */
10504 offset = GEN_INT (- allocate);
10506 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10507 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10509 /* We don't use gen_add in this case because it will
10510 want to split to lea, but when not optimizing the insn
10511 will not be split after this point. */
10512 emit_insn (gen_rtx_SET (scratch_reg,
10513 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10514 offset)));
10516 else
10518 emit_move_insn (scratch_reg, offset);
10519 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
10521 current = scratch_reg;
10523 else
10524 current = stack_pointer_rtx;
10526 ix86_expand_branch (GEU, current, limit, label);
10527 rtx_insn *jump_insn = get_last_insn ();
10528 JUMP_LABEL (jump_insn) = label;
10530 /* Mark the jump as very likely to be taken. */
10531 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
10533 if (split_stack_fn == NULL_RTX)
10535 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10536 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
10538 fn = split_stack_fn;
10540 /* Get more stack space. We pass in the desired stack space and the
10541 size of the arguments to copy to the new stack. In 32-bit mode
10542 we push the parameters; __morestack will return on a new stack
10543 anyhow. In 64-bit mode we pass the parameters in r10 and
10544 r11. */
10545 allocate_rtx = GEN_INT (allocate);
10546 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
10547 call_fusage = NULL_RTX;
10548 rtx pop = NULL_RTX;
10549 if (TARGET_64BIT)
10551 rtx reg10, reg11;
10553 reg10 = gen_rtx_REG (DImode, R10_REG);
10554 reg11 = gen_rtx_REG (DImode, R11_REG);
10556 /* If this function uses a static chain, it will be in %r10.
10557 Preserve it across the call to __morestack. */
10558 if (DECL_STATIC_CHAIN (cfun->decl))
10560 rtx rax;
10562 rax = gen_rtx_REG (word_mode, AX_REG);
10563 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
10564 use_reg (&call_fusage, rax);
10567 if (flag_force_indirect_call
10568 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10570 HOST_WIDE_INT argval;
10572 if (split_stack_fn_large == NULL_RTX)
10574 split_stack_fn_large
10575 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10576 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
10579 fn = split_stack_fn_large;
10581 if (ix86_cmodel == CM_LARGE_PIC)
10583 rtx_code_label *label;
10584 rtx x;
10586 gcc_assert (Pmode == DImode);
10588 label = gen_label_rtx ();
10589 emit_label (label);
10590 LABEL_PRESERVE_P (label) = 1;
10591 emit_insn (gen_set_rip_rex64 (reg10, label));
10592 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10593 emit_insn (gen_add2_insn (reg10, reg11));
10594 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
10595 x = gen_rtx_CONST (Pmode, x);
10596 emit_move_insn (reg11, x);
10597 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10598 x = gen_const_mem (Pmode, x);
10599 fn = copy_to_suggested_reg (x, reg11, Pmode);
10601 else if (ix86_cmodel == CM_LARGE)
10602 fn = copy_to_suggested_reg (fn, reg11, Pmode);
10604 /* When using the large model we need to load the address
10605 into a register, and we've run out of registers. So we
10606 switch to a different calling convention, and we call a
10607 different function: __morestack_large. We pass the
10608 argument size in the upper 32 bits of r10 and pass the
10609 frame size in the lower 32 bits. */
10610 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
10611 gcc_assert ((args_size & 0xffffffff) == args_size);
10613 argval = ((args_size << 16) << 16) + allocate;
10614 emit_move_insn (reg10, GEN_INT (argval));
10616 else
10618 emit_move_insn (reg10, allocate_rtx);
10619 emit_move_insn (reg11, GEN_INT (args_size));
10620 use_reg (&call_fusage, reg11);
10623 use_reg (&call_fusage, reg10);
10625 else
10627 if (flag_force_indirect_call && flag_pic)
10629 rtx x;
10631 gcc_assert (Pmode == SImode);
10633 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10635 emit_insn (gen_set_got (scratch_reg));
10636 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
10637 UNSPEC_GOT);
10638 x = gen_rtx_CONST (Pmode, x);
10639 x = gen_rtx_PLUS (Pmode, scratch_reg, x);
10640 x = gen_const_mem (Pmode, x);
10641 fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
10644 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
10645 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
10646 insn = emit_insn (gen_push (allocate_rtx));
10647 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
10648 pop = GEN_INT (2 * UNITS_PER_WORD);
10651 if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
10653 scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
10655 if (GET_MODE (fn) != word_mode)
10656 fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
10658 fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
10661 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10662 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10663 pop, false);
10664 add_function_usage_to (call_insn, call_fusage);
10665 if (!TARGET_64BIT)
10666 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
10667 /* Indicate that this function can't jump to non-local gotos. */
10668 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
10670 /* In order to make call/return prediction work right, we now need
10671 to execute a return instruction. See
10672 libgcc/config/i386/morestack.S for the details on how this works.
10674 For flow purposes gcc must not see this as a return
10675 instruction--we need control flow to continue at the subsequent
10676 label. Therefore, we use an unspec. */
10677 gcc_assert (crtl->args.pops_args < 65536);
10678 rtx_insn *ret_insn
10679 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10681 if ((flag_cf_protection & CF_BRANCH))
10683 /* Insert ENDBR since __morestack will jump back here via indirect
10684 call. */
10685 rtx cet_eb = gen_nop_endbr ();
10686 emit_insn_after (cet_eb, ret_insn);
10689 /* If we are in 64-bit mode and this function uses a static chain,
10690 we saved %r10 in %rax before calling _morestack. */
10691 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10692 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10693 gen_rtx_REG (word_mode, AX_REG));
10695 /* If this function calls va_start, we need to store a pointer to
10696 the arguments on the old stack, because they may not have been
10697 all copied to the new stack. At this point the old stack can be
10698 found at the frame pointer value used by __morestack, because
10699 __morestack has set that up before calling back to us. Here we
10700 store that pointer in a scratch register, and in
10701 ix86_expand_prologue we store the scratch register in a stack
10702 slot. */
10703 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10705 rtx frame_reg;
10706 int words;
10708 scratch_regno = split_stack_prologue_scratch_regno ();
10709 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10710 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10712 /* 64-bit:
10713 fp -> old fp value
10714 return address within this function
10715 return address of caller of this function
10716 stack arguments
10717 So we add three words to get to the stack arguments.
10719 32-bit:
10720 fp -> old fp value
10721 return address within this function
10722 first argument to __morestack
10723 second argument to __morestack
10724 return address of caller of this function
10725 stack arguments
10726 So we add five words to get to the stack arguments.
10728 words = TARGET_64BIT ? 3 : 5;
10729 emit_insn (gen_rtx_SET (scratch_reg,
10730 plus_constant (Pmode, frame_reg,
10731 words * UNITS_PER_WORD)));
10733 varargs_label = gen_label_rtx ();
10734 emit_jump_insn (gen_jump (varargs_label));
10735 JUMP_LABEL (get_last_insn ()) = varargs_label;
10737 emit_barrier ();
10740 emit_label (label);
10741 LABEL_NUSES (label) = 1;
10743 /* If this function calls va_start, we now have to set the scratch
10744 register for the case where we do not call __morestack. In this
10745 case we need to set it based on the stack pointer. */
10746 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10748 emit_insn (gen_rtx_SET (scratch_reg,
10749 plus_constant (Pmode, stack_pointer_rtx,
10750 UNITS_PER_WORD)));
10752 emit_label (varargs_label);
10753 LABEL_NUSES (varargs_label) = 1;
10757 /* We may have to tell the dataflow pass that the split stack prologue
10758 is initializing a scratch register. */
10760 static void
10761 ix86_live_on_entry (bitmap regs)
10763 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10765 gcc_assert (flag_split_stack);
10766 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10770 /* Extract the parts of an RTL expression that is a valid memory address
10771 for an instruction. Return false if the structure of the address is
10772 grossly off. */
10774 bool
10775 ix86_decompose_address (rtx addr, struct ix86_address *out)
10777 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10778 rtx base_reg, index_reg;
10779 HOST_WIDE_INT scale = 1;
10780 rtx scale_rtx = NULL_RTX;
10781 rtx tmp;
10782 addr_space_t seg = ADDR_SPACE_GENERIC;
10784 /* Allow zero-extended SImode addresses,
10785 they will be emitted with addr32 prefix. */
10786 if (TARGET_64BIT && GET_MODE (addr) == DImode)
10788 if (GET_CODE (addr) == ZERO_EXTEND
10789 && GET_MODE (XEXP (addr, 0)) == SImode)
10791 addr = XEXP (addr, 0);
10792 if (CONST_INT_P (addr))
10793 return false;
10795 else if (GET_CODE (addr) == AND
10796 && const_32bit_mask (XEXP (addr, 1), DImode))
10798 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
10799 if (addr == NULL_RTX)
10800 return false;
10802 if (CONST_INT_P (addr))
10803 return false;
10805 else if (GET_CODE (addr) == AND)
10807 /* For ASHIFT inside AND, combine will not generate
10808 canonical zero-extend. Merge mask for AND and shift_count
10809 to check if it is canonical zero-extend. */
10810 tmp = XEXP (addr, 0);
10811 rtx mask = XEXP (addr, 1);
10812 if (tmp && GET_CODE(tmp) == ASHIFT)
10814 rtx shift_val = XEXP (tmp, 1);
10815 if (CONST_INT_P (mask) && CONST_INT_P (shift_val)
10816 && (((unsigned HOST_WIDE_INT) INTVAL(mask)
10817 | ((HOST_WIDE_INT_1U << INTVAL(shift_val)) - 1))
10818 == 0xffffffff))
10820 addr = lowpart_subreg (SImode, XEXP (addr, 0),
10821 DImode);
10828 /* Allow SImode subregs of DImode addresses,
10829 they will be emitted with addr32 prefix. */
10830 if (TARGET_64BIT && GET_MODE (addr) == SImode)
10832 if (SUBREG_P (addr)
10833 && GET_MODE (SUBREG_REG (addr)) == DImode)
10835 addr = SUBREG_REG (addr);
10836 if (CONST_INT_P (addr))
10837 return false;
10841 if (REG_P (addr))
10842 base = addr;
10843 else if (SUBREG_P (addr))
10845 if (REG_P (SUBREG_REG (addr)))
10846 base = addr;
10847 else
10848 return false;
10850 else if (GET_CODE (addr) == PLUS)
10852 rtx addends[4], op;
10853 int n = 0, i;
10855 op = addr;
10858 if (n >= 4)
10859 return false;
10860 addends[n++] = XEXP (op, 1);
10861 op = XEXP (op, 0);
10863 while (GET_CODE (op) == PLUS);
10864 if (n >= 4)
10865 return false;
10866 addends[n] = op;
10868 for (i = n; i >= 0; --i)
10870 op = addends[i];
10871 switch (GET_CODE (op))
10873 case MULT:
10874 if (index)
10875 return false;
10876 index = XEXP (op, 0);
10877 scale_rtx = XEXP (op, 1);
10878 break;
10880 case ASHIFT:
10881 if (index)
10882 return false;
10883 index = XEXP (op, 0);
10884 tmp = XEXP (op, 1);
10885 if (!CONST_INT_P (tmp))
10886 return false;
10887 scale = INTVAL (tmp);
10888 if ((unsigned HOST_WIDE_INT) scale > 3)
10889 return false;
10890 scale = 1 << scale;
10891 break;
10893 case ZERO_EXTEND:
10894 op = XEXP (op, 0);
10895 if (GET_CODE (op) != UNSPEC)
10896 return false;
10897 /* FALLTHRU */
10899 case UNSPEC:
10900 if (XINT (op, 1) == UNSPEC_TP
10901 && TARGET_TLS_DIRECT_SEG_REFS
10902 && seg == ADDR_SPACE_GENERIC)
10903 seg = DEFAULT_TLS_SEG_REG;
10904 else
10905 return false;
10906 break;
10908 case SUBREG:
10909 if (!REG_P (SUBREG_REG (op)))
10910 return false;
10911 /* FALLTHRU */
10913 case REG:
10914 if (!base)
10915 base = op;
10916 else if (!index)
10917 index = op;
10918 else
10919 return false;
10920 break;
10922 case CONST:
10923 case CONST_INT:
10924 case SYMBOL_REF:
10925 case LABEL_REF:
10926 if (disp)
10927 return false;
10928 disp = op;
10929 break;
10931 default:
10932 return false;
10936 else if (GET_CODE (addr) == MULT)
10938 index = XEXP (addr, 0); /* index*scale */
10939 scale_rtx = XEXP (addr, 1);
10941 else if (GET_CODE (addr) == ASHIFT)
10943 /* We're called for lea too, which implements ashift on occasion. */
10944 index = XEXP (addr, 0);
10945 tmp = XEXP (addr, 1);
10946 if (!CONST_INT_P (tmp))
10947 return false;
10948 scale = INTVAL (tmp);
10949 if ((unsigned HOST_WIDE_INT) scale > 3)
10950 return false;
10951 scale = 1 << scale;
10953 else
10954 disp = addr; /* displacement */
10956 if (index)
10958 if (REG_P (index))
10960 else if (SUBREG_P (index)
10961 && REG_P (SUBREG_REG (index)))
10963 else
10964 return false;
10967 /* Extract the integral value of scale. */
10968 if (scale_rtx)
10970 if (!CONST_INT_P (scale_rtx))
10971 return false;
10972 scale = INTVAL (scale_rtx);
10975 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
10976 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
10978 /* Avoid useless 0 displacement. */
10979 if (disp == const0_rtx && (base || index))
10980 disp = NULL_RTX;
10982 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10983 if (base_reg && index_reg && scale == 1
10984 && (REGNO (index_reg) == ARG_POINTER_REGNUM
10985 || REGNO (index_reg) == FRAME_POINTER_REGNUM
10986 || REGNO (index_reg) == SP_REG))
10988 std::swap (base, index);
10989 std::swap (base_reg, index_reg);
10992 /* Special case: %ebp cannot be encoded as a base without a displacement.
10993 Similarly %r13. */
10994 if (!disp && base_reg
10995 && (REGNO (base_reg) == ARG_POINTER_REGNUM
10996 || REGNO (base_reg) == FRAME_POINTER_REGNUM
10997 || REGNO (base_reg) == BP_REG
10998 || REGNO (base_reg) == R13_REG))
10999 disp = const0_rtx;
11001 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11002 Avoid this by transforming to [%esi+0].
11003 Reload calls address legitimization without cfun defined, so we need
11004 to test cfun for being non-NULL. */
11005 if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
11006 && base_reg && !index_reg && !disp
11007 && REGNO (base_reg) == SI_REG)
11008 disp = const0_rtx;
11010 /* Special case: encode reg+reg instead of reg*2. */
11011 if (!base && index && scale == 2)
11012 base = index, base_reg = index_reg, scale = 1;
11014 /* Special case: scaling cannot be encoded without base or displacement. */
11015 if (!base && !disp && index && scale != 1)
11016 disp = const0_rtx;
11018 out->base = base;
11019 out->index = index;
11020 out->disp = disp;
11021 out->scale = scale;
11022 out->seg = seg;
11024 return true;
11027 /* Return cost of the memory address x.
11028 For i386, it is better to use a complex address than let gcc copy
11029 the address into a reg and make a new pseudo. But not if the address
11030 requires to two regs - that would mean more pseudos with longer
11031 lifetimes. */
11032 static int
11033 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
11035 struct ix86_address parts;
11036 int cost = 1;
11037 int ok = ix86_decompose_address (x, &parts);
11039 gcc_assert (ok);
11041 if (parts.base && SUBREG_P (parts.base))
11042 parts.base = SUBREG_REG (parts.base);
11043 if (parts.index && SUBREG_P (parts.index))
11044 parts.index = SUBREG_REG (parts.index);
11046 /* Attempt to minimize number of registers in the address by increasing
11047 address cost for each used register. We don't increase address cost
11048 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
11049 is not invariant itself it most likely means that base or index is not
11050 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
11051 which is not profitable for x86. */
11052 if (parts.base
11053 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11054 && (current_pass->type == GIMPLE_PASS
11055 || !pic_offset_table_rtx
11056 || !REG_P (parts.base)
11057 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
11058 cost++;
11060 if (parts.index
11061 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11062 && (current_pass->type == GIMPLE_PASS
11063 || !pic_offset_table_rtx
11064 || !REG_P (parts.index)
11065 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
11066 cost++;
11068 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11069 since it's predecode logic can't detect the length of instructions
11070 and it degenerates to vector decoded. Increase cost of such
11071 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11072 to split such addresses or even refuse such addresses at all.
11074 Following addressing modes are affected:
11075 [base+scale*index]
11076 [scale*index+disp]
11077 [base+index]
11079 The first and last case may be avoidable by explicitly coding the zero in
11080 memory address, but I don't have AMD-K6 machine handy to check this
11081 theory. */
11083 if (TARGET_CPU_P (K6)
11084 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11085 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11086 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11087 cost += 10;
11089 return cost;
11092 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11093 this is used for to form addresses to local data when -fPIC is in
11094 use. */
11096 static bool
11097 darwin_local_data_pic (rtx disp)
11099 return (GET_CODE (disp) == UNSPEC
11100 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11103 /* True if the function symbol operand X should be loaded from GOT.
11104 If CALL_P is true, X is a call operand.
11106 NB: -mno-direct-extern-access doesn't force load from GOT for
11107 call.
11109 NB: In 32-bit mode, only non-PIC is allowed in inline assembly
11110 statements, since a PIC register could not be available at the
11111 call site. */
11113 bool
11114 ix86_force_load_from_GOT_p (rtx x, bool call_p)
11116 return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
11117 && !TARGET_PECOFF && !TARGET_MACHO
11118 && (!flag_pic || this_is_asm_operands)
11119 && ix86_cmodel != CM_LARGE
11120 && ix86_cmodel != CM_LARGE_PIC
11121 && GET_CODE (x) == SYMBOL_REF
11122 && ((!call_p
11123 && (!ix86_direct_extern_access
11124 || (SYMBOL_REF_DECL (x)
11125 && lookup_attribute ("nodirect_extern_access",
11126 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
11127 || (SYMBOL_REF_FUNCTION_P (x)
11128 && (!flag_plt
11129 || (SYMBOL_REF_DECL (x)
11130 && lookup_attribute ("noplt",
11131 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
11132 && !SYMBOL_REF_LOCAL_P (x));
11135 /* Determine if a given RTX is a valid constant. We already know this
11136 satisfies CONSTANT_P. */
11138 static bool
11139 ix86_legitimate_constant_p (machine_mode mode, rtx x)
11141 switch (GET_CODE (x))
11143 case CONST:
11144 x = XEXP (x, 0);
11146 if (GET_CODE (x) == PLUS)
11148 if (!CONST_INT_P (XEXP (x, 1)))
11149 return false;
11150 x = XEXP (x, 0);
11153 if (TARGET_MACHO && darwin_local_data_pic (x))
11154 return true;
11156 /* Only some unspecs are valid as "constants". */
11157 if (GET_CODE (x) == UNSPEC)
11158 switch (XINT (x, 1))
11160 case UNSPEC_GOT:
11161 case UNSPEC_GOTOFF:
11162 case UNSPEC_PLTOFF:
11163 return TARGET_64BIT;
11164 case UNSPEC_TPOFF:
11165 case UNSPEC_NTPOFF:
11166 x = XVECEXP (x, 0, 0);
11167 return (GET_CODE (x) == SYMBOL_REF
11168 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11169 case UNSPEC_DTPOFF:
11170 x = XVECEXP (x, 0, 0);
11171 return (GET_CODE (x) == SYMBOL_REF
11172 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11173 default:
11174 return false;
11177 /* We must have drilled down to a symbol. */
11178 if (GET_CODE (x) == LABEL_REF)
11179 return true;
11180 if (GET_CODE (x) != SYMBOL_REF)
11181 return false;
11182 /* FALLTHRU */
11184 case SYMBOL_REF:
11185 /* TLS symbols are never valid. */
11186 if (SYMBOL_REF_TLS_MODEL (x))
11187 return false;
11189 /* DLLIMPORT symbols are never valid. */
11190 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11191 && SYMBOL_REF_DLLIMPORT_P (x))
11192 return false;
11194 #if TARGET_MACHO
11195 /* mdynamic-no-pic */
11196 if (MACHO_DYNAMIC_NO_PIC_P)
11197 return machopic_symbol_defined_p (x);
11198 #endif
11200 /* External function address should be loaded
11201 via the GOT slot to avoid PLT. */
11202 if (ix86_force_load_from_GOT_p (x))
11203 return false;
11205 break;
11207 CASE_CONST_SCALAR_INT:
11208 if (ix86_endbr_immediate_operand (x, VOIDmode))
11209 return false;
11211 switch (mode)
11213 case E_TImode:
11214 if (TARGET_64BIT)
11215 return true;
11216 /* FALLTHRU */
11217 case E_OImode:
11218 case E_XImode:
11219 if (!standard_sse_constant_p (x, mode)
11220 && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512
11221 ? XImode
11222 : (TARGET_AVX
11223 ? OImode
11224 : (TARGET_SSE2
11225 ? TImode : DImode))) < GET_MODE_SIZE (mode))
11226 return false;
11227 default:
11228 break;
11230 break;
11232 case CONST_VECTOR:
11233 if (!standard_sse_constant_p (x, mode))
11234 return false;
11235 break;
11237 case CONST_DOUBLE:
11238 if (mode == E_BFmode)
11239 return false;
11241 default:
11242 break;
11245 /* Otherwise we handle everything else in the move patterns. */
11246 return true;
11249 /* Determine if it's legal to put X into the constant pool. This
11250 is not possible for the address of thread-local symbols, which
11251 is checked above. */
11253 static bool
11254 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
11256 /* We can put any immediate constant in memory. */
11257 switch (GET_CODE (x))
11259 CASE_CONST_ANY:
11260 return false;
11262 default:
11263 break;
11266 return !ix86_legitimate_constant_p (mode, x);
11269 /* Return a unique alias set for the GOT. */
11271 alias_set_type
11272 ix86_GOT_alias_set (void)
11274 static alias_set_type set = -1;
11275 if (set == -1)
11276 set = new_alias_set ();
11277 return set;
11280 /* Nonzero if the constant value X is a legitimate general operand
11281 when generating PIC code. It is given that flag_pic is on and
11282 that X satisfies CONSTANT_P. */
11284 bool
11285 legitimate_pic_operand_p (rtx x)
11287 rtx inner;
11289 switch (GET_CODE (x))
11291 case CONST:
11292 inner = XEXP (x, 0);
11293 if (GET_CODE (inner) == PLUS
11294 && CONST_INT_P (XEXP (inner, 1)))
11295 inner = XEXP (inner, 0);
11297 /* Only some unspecs are valid as "constants". */
11298 if (GET_CODE (inner) == UNSPEC)
11299 switch (XINT (inner, 1))
11301 case UNSPEC_GOT:
11302 case UNSPEC_GOTOFF:
11303 case UNSPEC_PLTOFF:
11304 return TARGET_64BIT;
11305 case UNSPEC_TPOFF:
11306 x = XVECEXP (inner, 0, 0);
11307 return (GET_CODE (x) == SYMBOL_REF
11308 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11309 case UNSPEC_MACHOPIC_OFFSET:
11310 return legitimate_pic_address_disp_p (x);
11311 default:
11312 return false;
11314 /* FALLTHRU */
11316 case SYMBOL_REF:
11317 case LABEL_REF:
11318 return legitimate_pic_address_disp_p (x);
11320 default:
11321 return true;
11325 /* Determine if a given CONST RTX is a valid memory displacement
11326 in PIC mode. */
11328 bool
11329 legitimate_pic_address_disp_p (rtx disp)
11331 bool saw_plus;
11333 /* In 64bit mode we can allow direct addresses of symbols and labels
11334 when they are not dynamic symbols. */
11335 if (TARGET_64BIT)
11337 rtx op0 = disp, op1;
11339 switch (GET_CODE (disp))
11341 case LABEL_REF:
11342 return true;
11344 case CONST:
11345 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11346 break;
11347 op0 = XEXP (XEXP (disp, 0), 0);
11348 op1 = XEXP (XEXP (disp, 0), 1);
11349 if (!CONST_INT_P (op1))
11350 break;
11351 if (GET_CODE (op0) == UNSPEC
11352 && (XINT (op0, 1) == UNSPEC_DTPOFF
11353 || XINT (op0, 1) == UNSPEC_NTPOFF)
11354 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
11355 return true;
11356 if (INTVAL (op1) >= 16*1024*1024
11357 || INTVAL (op1) < -16*1024*1024)
11358 break;
11359 if (GET_CODE (op0) == LABEL_REF)
11360 return true;
11361 if (GET_CODE (op0) == CONST
11362 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11363 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11364 return true;
11365 if (GET_CODE (op0) == UNSPEC
11366 && XINT (op0, 1) == UNSPEC_PCREL)
11367 return true;
11368 if (GET_CODE (op0) != SYMBOL_REF)
11369 break;
11370 /* FALLTHRU */
11372 case SYMBOL_REF:
11373 /* TLS references should always be enclosed in UNSPEC.
11374 The dllimported symbol needs always to be resolved. */
11375 if (SYMBOL_REF_TLS_MODEL (op0)
11376 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
11377 return false;
11379 if (TARGET_PECOFF)
11381 #if TARGET_PECOFF
11382 if (is_imported_p (op0))
11383 return true;
11384 #endif
11386 if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
11387 break;
11389 /* Non-external-weak function symbols need to be resolved only
11390 for the large model. Non-external symbols don't need to be
11391 resolved for large and medium models. For the small model,
11392 we don't need to resolve anything here. */
11393 if ((ix86_cmodel != CM_LARGE_PIC
11394 && SYMBOL_REF_FUNCTION_P (op0)
11395 && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
11396 || !SYMBOL_REF_EXTERNAL_P (op0)
11397 || ix86_cmodel == CM_SMALL_PIC)
11398 return true;
11400 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
11401 && (SYMBOL_REF_LOCAL_P (op0)
11402 || ((ix86_direct_extern_access
11403 && !(SYMBOL_REF_DECL (op0)
11404 && lookup_attribute ("nodirect_extern_access",
11405 DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
11406 && HAVE_LD_PIE_COPYRELOC
11407 && flag_pie
11408 && !SYMBOL_REF_WEAK (op0)
11409 && !SYMBOL_REF_FUNCTION_P (op0)))
11410 && ix86_cmodel != CM_LARGE_PIC)
11411 return true;
11412 break;
11414 default:
11415 break;
11418 if (GET_CODE (disp) != CONST)
11419 return false;
11420 disp = XEXP (disp, 0);
11422 if (TARGET_64BIT)
11424 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11425 of GOT tables. We should not need these anyway. */
11426 if (GET_CODE (disp) != UNSPEC
11427 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11428 && XINT (disp, 1) != UNSPEC_GOTOFF
11429 && XINT (disp, 1) != UNSPEC_PCREL
11430 && XINT (disp, 1) != UNSPEC_PLTOFF))
11431 return false;
11433 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11434 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11435 return false;
11436 return true;
11439 saw_plus = false;
11440 if (GET_CODE (disp) == PLUS)
11442 if (!CONST_INT_P (XEXP (disp, 1)))
11443 return false;
11444 disp = XEXP (disp, 0);
11445 saw_plus = true;
11448 if (TARGET_MACHO && darwin_local_data_pic (disp))
11449 return true;
11451 if (GET_CODE (disp) != UNSPEC)
11452 return false;
11454 switch (XINT (disp, 1))
11456 case UNSPEC_GOT:
11457 if (saw_plus)
11458 return false;
11459 /* We need to check for both symbols and labels because VxWorks loads
11460 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11461 details. */
11462 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11463 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11464 case UNSPEC_GOTOFF:
11465 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11466 While ABI specify also 32bit relocation but we don't produce it in
11467 small PIC model at all. */
11468 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11469 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11470 && !TARGET_64BIT)
11471 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11472 return false;
11473 case UNSPEC_GOTTPOFF:
11474 case UNSPEC_GOTNTPOFF:
11475 case UNSPEC_INDNTPOFF:
11476 if (saw_plus)
11477 return false;
11478 disp = XVECEXP (disp, 0, 0);
11479 return (GET_CODE (disp) == SYMBOL_REF
11480 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11481 case UNSPEC_NTPOFF:
11482 disp = XVECEXP (disp, 0, 0);
11483 return (GET_CODE (disp) == SYMBOL_REF
11484 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11485 case UNSPEC_DTPOFF:
11486 disp = XVECEXP (disp, 0, 0);
11487 return (GET_CODE (disp) == SYMBOL_REF
11488 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11491 return false;
11494 /* Determine if op is suitable RTX for an address register.
11495 Return naked register if a register or a register subreg is
11496 found, otherwise return NULL_RTX. */
11498 static rtx
11499 ix86_validate_address_register (rtx op)
11501 machine_mode mode = GET_MODE (op);
11503 /* Only SImode or DImode registers can form the address. */
11504 if (mode != SImode && mode != DImode)
11505 return NULL_RTX;
11507 if (REG_P (op))
11508 return op;
11509 else if (SUBREG_P (op))
11511 rtx reg = SUBREG_REG (op);
11513 if (!REG_P (reg))
11514 return NULL_RTX;
11516 mode = GET_MODE (reg);
11518 /* Don't allow SUBREGs that span more than a word. It can
11519 lead to spill failures when the register is one word out
11520 of a two word structure. */
11521 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11522 return NULL_RTX;
11524 /* Allow only SUBREGs of non-eliminable hard registers. */
11525 if (register_no_elim_operand (reg, mode))
11526 return reg;
11529 /* Op is not a register. */
11530 return NULL_RTX;
11533 /* Determine which memory address register set insn can use. */
11535 static enum attr_addr
11536 ix86_memory_address_reg_class (rtx_insn* insn)
11538 /* LRA can do some initialization with NULL insn,
11539 return maximum register class in this case. */
11540 enum attr_addr addr_rclass = ADDR_GPR32;
11542 if (!insn)
11543 return addr_rclass;
11545 if (asm_noperands (PATTERN (insn)) >= 0
11546 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
11547 return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
11549 /* Return maximum register class for unrecognized instructions. */
11550 if (INSN_CODE (insn) < 0)
11551 return addr_rclass;
11553 /* Try to recognize the insn before calling get_attr_addr.
11554 Save current recog_data and current alternative. */
11555 struct recog_data_d saved_recog_data = recog_data;
11556 int saved_alternative = which_alternative;
11558 /* Update recog_data for processing of alternatives. */
11559 extract_insn_cached (insn);
11561 /* If current alternative is not set, loop throught enabled
11562 alternatives and get the most limited register class. */
11563 if (saved_alternative == -1)
11565 alternative_mask enabled = get_enabled_alternatives (insn);
11567 for (int i = 0; i < recog_data.n_alternatives; i++)
11569 if (!TEST_BIT (enabled, i))
11570 continue;
11572 which_alternative = i;
11573 addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
11576 else
11578 which_alternative = saved_alternative;
11579 addr_rclass = get_attr_addr (insn);
11582 recog_data = saved_recog_data;
11583 which_alternative = saved_alternative;
11585 return addr_rclass;
11588 /* Return memory address register class insn can use. */
11590 enum reg_class
11591 ix86_insn_base_reg_class (rtx_insn* insn)
11593 switch (ix86_memory_address_reg_class (insn))
11595 case ADDR_GPR8:
11596 return LEGACY_GENERAL_REGS;
11597 case ADDR_GPR16:
11598 return GENERAL_GPR16;
11599 case ADDR_GPR32:
11600 break;
11601 default:
11602 gcc_unreachable ();
11605 return BASE_REG_CLASS;
11608 bool
11609 ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
11611 switch (ix86_memory_address_reg_class (insn))
11613 case ADDR_GPR8:
11614 return LEGACY_INT_REGNO_P (regno);
11615 case ADDR_GPR16:
11616 return GENERAL_GPR16_REGNO_P (regno);
11617 case ADDR_GPR32:
11618 break;
11619 default:
11620 gcc_unreachable ();
11623 return GENERAL_REGNO_P (regno);
11626 enum reg_class
11627 ix86_insn_index_reg_class (rtx_insn* insn)
11629 switch (ix86_memory_address_reg_class (insn))
11631 case ADDR_GPR8:
11632 return LEGACY_INDEX_REGS;
11633 case ADDR_GPR16:
11634 return INDEX_GPR16;
11635 case ADDR_GPR32:
11636 break;
11637 default:
11638 gcc_unreachable ();
11641 return INDEX_REG_CLASS;
11644 /* Recognizes RTL expressions that are valid memory addresses for an
11645 instruction. The MODE argument is the machine mode for the MEM
11646 expression that wants to use this address.
11648 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11649 convert common non-canonical forms to canonical form so that they will
11650 be recognized. */
11652 static bool
11653 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
11654 code_helper = ERROR_MARK)
11656 struct ix86_address parts;
11657 rtx base, index, disp;
11658 HOST_WIDE_INT scale;
11659 addr_space_t seg;
11661 if (ix86_decompose_address (addr, &parts) == 0)
11662 /* Decomposition failed. */
11663 return false;
11665 base = parts.base;
11666 index = parts.index;
11667 disp = parts.disp;
11668 scale = parts.scale;
11669 seg = parts.seg;
11671 /* Validate base register. */
11672 if (base)
11674 rtx reg = ix86_validate_address_register (base);
11676 if (reg == NULL_RTX)
11677 return false;
11679 unsigned int regno = REGNO (reg);
11680 if ((strict && !REGNO_OK_FOR_BASE_P (regno))
11681 || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
11682 /* Base is not valid. */
11683 return false;
11686 /* Validate index register. */
11687 if (index)
11689 rtx reg = ix86_validate_address_register (index);
11691 if (reg == NULL_RTX)
11692 return false;
11694 unsigned int regno = REGNO (reg);
11695 if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
11696 || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
11697 /* Index is not valid. */
11698 return false;
11701 /* Index and base should have the same mode. */
11702 if (base && index
11703 && GET_MODE (base) != GET_MODE (index))
11704 return false;
11706 /* Address override works only on the (%reg) part of %fs:(%reg). */
11707 if (seg != ADDR_SPACE_GENERIC
11708 && ((base && GET_MODE (base) != word_mode)
11709 || (index && GET_MODE (index) != word_mode)))
11710 return false;
11712 /* Validate scale factor. */
11713 if (scale != 1)
11715 if (!index)
11716 /* Scale without index. */
11717 return false;
11719 if (scale != 2 && scale != 4 && scale != 8)
11720 /* Scale is not a valid multiplier. */
11721 return false;
11724 /* Validate displacement. */
11725 if (disp)
11727 if (ix86_endbr_immediate_operand (disp, VOIDmode))
11728 return false;
11730 if (GET_CODE (disp) == CONST
11731 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11732 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11733 switch (XINT (XEXP (disp, 0), 1))
11735 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
11736 when used. While ABI specify also 32bit relocations, we
11737 don't produce them at all and use IP relative instead.
11738 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
11739 should be loaded via GOT. */
11740 case UNSPEC_GOT:
11741 if (!TARGET_64BIT
11742 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11743 goto is_legitimate_pic;
11744 /* FALLTHRU */
11745 case UNSPEC_GOTOFF:
11746 gcc_assert (flag_pic);
11747 if (!TARGET_64BIT)
11748 goto is_legitimate_pic;
11750 /* 64bit address unspec. */
11751 return false;
11753 case UNSPEC_GOTPCREL:
11754 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11755 goto is_legitimate_pic;
11756 /* FALLTHRU */
11757 case UNSPEC_PCREL:
11758 gcc_assert (flag_pic);
11759 goto is_legitimate_pic;
11761 case UNSPEC_GOTTPOFF:
11762 case UNSPEC_GOTNTPOFF:
11763 case UNSPEC_INDNTPOFF:
11764 case UNSPEC_NTPOFF:
11765 case UNSPEC_DTPOFF:
11766 break;
11768 default:
11769 /* Invalid address unspec. */
11770 return false;
11773 else if (SYMBOLIC_CONST (disp)
11774 && (flag_pic
11775 #if TARGET_MACHO
11776 || (MACHOPIC_INDIRECT
11777 && !machopic_operand_p (disp))
11778 #endif
11782 is_legitimate_pic:
11783 if (TARGET_64BIT && (index || base))
11785 /* foo@dtpoff(%rX) is ok. */
11786 if (GET_CODE (disp) != CONST
11787 || GET_CODE (XEXP (disp, 0)) != PLUS
11788 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11789 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11790 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11791 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11792 /* Non-constant pic memory reference. */
11793 return false;
11795 else if ((!TARGET_MACHO || flag_pic)
11796 && ! legitimate_pic_address_disp_p (disp))
11797 /* Displacement is an invalid pic construct. */
11798 return false;
11799 #if TARGET_MACHO
11800 else if (MACHO_DYNAMIC_NO_PIC_P
11801 && !ix86_legitimate_constant_p (Pmode, disp))
11802 /* displacment must be referenced via non_lazy_pointer */
11803 return false;
11804 #endif
11806 /* This code used to verify that a symbolic pic displacement
11807 includes the pic_offset_table_rtx register.
11809 While this is good idea, unfortunately these constructs may
11810 be created by "adds using lea" optimization for incorrect
11811 code like:
11813 int a;
11814 int foo(int i)
11816 return *(&a+i);
11819 This code is nonsensical, but results in addressing
11820 GOT table with pic_offset_table_rtx base. We can't
11821 just refuse it easily, since it gets matched by
11822 "addsi3" pattern, that later gets split to lea in the
11823 case output register differs from input. While this
11824 can be handled by separate addsi pattern for this case
11825 that never results in lea, this seems to be easier and
11826 correct fix for crash to disable this test. */
11828 else if (GET_CODE (disp) != LABEL_REF
11829 && !CONST_INT_P (disp)
11830 && (GET_CODE (disp) != CONST
11831 || !ix86_legitimate_constant_p (Pmode, disp))
11832 && (GET_CODE (disp) != SYMBOL_REF
11833 || !ix86_legitimate_constant_p (Pmode, disp)))
11834 /* Displacement is not constant. */
11835 return false;
11836 else if (TARGET_64BIT
11837 && !x86_64_immediate_operand (disp, VOIDmode))
11838 /* Displacement is out of range. */
11839 return false;
11840 /* In x32 mode, constant addresses are sign extended to 64bit, so
11841 we have to prevent addresses from 0x80000000 to 0xffffffff. */
11842 else if (TARGET_X32 && !(index || base)
11843 && CONST_INT_P (disp)
11844 && val_signbit_known_set_p (SImode, INTVAL (disp)))
11845 return false;
11848 /* Everything looks valid. */
11849 return true;
11852 /* Determine if a given RTX is a valid constant address. */
11854 bool
11855 constant_address_p (rtx x)
11857 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11861 /* Return a legitimate reference for ORIG (an address) using the
11862 register REG. If REG is 0, a new pseudo is generated.
11864 There are two types of references that must be handled:
11866 1. Global data references must load the address from the GOT, via
11867 the PIC reg. An insn is emitted to do this load, and the reg is
11868 returned.
11870 2. Static data references, constant pool addresses, and code labels
11871 compute the address as an offset from the GOT, whose base is in
11872 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11873 differentiate them from global data objects. The returned
11874 address is the PIC reg + an unspec constant.
11876 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11877 reg also appears in the address. */
11880 legitimize_pic_address (rtx orig, rtx reg)
11882 rtx addr = orig;
11883 rtx new_rtx = orig;
11885 #if TARGET_MACHO
11886 if (TARGET_MACHO && !TARGET_64BIT)
11888 if (reg == 0)
11889 reg = gen_reg_rtx (Pmode);
11890 /* Use the generic Mach-O PIC machinery. */
11891 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11893 #endif
11895 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11897 #if TARGET_PECOFF
11898 rtx tmp = legitimize_pe_coff_symbol (addr, true);
11899 if (tmp)
11900 return tmp;
11901 #endif
11904 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11905 new_rtx = addr;
11906 else if ((!TARGET_64BIT
11907 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
11908 && !TARGET_PECOFF
11909 && gotoff_operand (addr, Pmode))
11911 /* This symbol may be referenced via a displacement
11912 from the PIC base address (@GOTOFF). */
11913 if (GET_CODE (addr) == CONST)
11914 addr = XEXP (addr, 0);
11916 if (GET_CODE (addr) == PLUS)
11918 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11919 UNSPEC_GOTOFF);
11920 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11922 else
11923 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11925 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11927 if (TARGET_64BIT)
11928 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11930 if (reg != 0)
11932 gcc_assert (REG_P (reg));
11933 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11934 new_rtx, reg, 1, OPTAB_DIRECT);
11936 else
11937 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11939 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11940 /* We can't always use @GOTOFF for text labels
11941 on VxWorks, see gotoff_operand. */
11942 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11944 #if TARGET_PECOFF
11945 rtx tmp = legitimize_pe_coff_symbol (addr, true);
11946 if (tmp)
11947 return tmp;
11948 #endif
11950 /* For x64 PE-COFF there is no GOT table,
11951 so we use address directly. */
11952 if (TARGET_64BIT && TARGET_PECOFF)
11954 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11955 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11957 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11959 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
11960 UNSPEC_GOTPCREL);
11961 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11962 new_rtx = gen_const_mem (Pmode, new_rtx);
11963 set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
11965 else
11967 /* This symbol must be referenced via a load
11968 from the Global Offset Table (@GOT). */
11969 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11970 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11972 if (TARGET_64BIT)
11973 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11975 if (reg != 0)
11977 gcc_assert (REG_P (reg));
11978 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11979 new_rtx, reg, 1, OPTAB_DIRECT);
11981 else
11982 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11984 new_rtx = gen_const_mem (Pmode, new_rtx);
11985 set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
11988 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11990 else
11992 if (CONST_INT_P (addr)
11993 && !x86_64_immediate_operand (addr, VOIDmode))
11994 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
11995 else if (GET_CODE (addr) == CONST)
11997 addr = XEXP (addr, 0);
11999 /* We must match stuff we generate before. Assume the only
12000 unspecs that can get here are ours. Not that we could do
12001 anything with them anyway.... */
12002 if (GET_CODE (addr) == UNSPEC
12003 || (GET_CODE (addr) == PLUS
12004 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12005 return orig;
12006 gcc_assert (GET_CODE (addr) == PLUS);
12009 if (GET_CODE (addr) == PLUS)
12011 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12013 /* Check first to see if this is a constant
12014 offset from a @GOTOFF symbol reference. */
12015 if (!TARGET_PECOFF
12016 && gotoff_operand (op0, Pmode)
12017 && CONST_INT_P (op1))
12019 if (!TARGET_64BIT)
12021 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12022 UNSPEC_GOTOFF);
12023 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12024 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12026 if (reg != 0)
12028 gcc_assert (REG_P (reg));
12029 new_rtx = expand_simple_binop (Pmode, PLUS,
12030 pic_offset_table_rtx,
12031 new_rtx, reg, 1,
12032 OPTAB_DIRECT);
12034 else
12035 new_rtx
12036 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12038 else
12040 if (INTVAL (op1) < -16*1024*1024
12041 || INTVAL (op1) >= 16*1024*1024)
12043 if (!x86_64_immediate_operand (op1, Pmode))
12044 op1 = force_reg (Pmode, op1);
12046 new_rtx
12047 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12051 else
12053 rtx base = legitimize_pic_address (op0, reg);
12054 machine_mode mode = GET_MODE (base);
12055 new_rtx
12056 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
12058 if (CONST_INT_P (new_rtx))
12060 if (INTVAL (new_rtx) < -16*1024*1024
12061 || INTVAL (new_rtx) >= 16*1024*1024)
12063 if (!x86_64_immediate_operand (new_rtx, mode))
12064 new_rtx = force_reg (mode, new_rtx);
12066 new_rtx
12067 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12069 else
12070 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
12072 else
12074 /* For %rip addressing, we have to use
12075 just disp32, not base nor index. */
12076 if (TARGET_64BIT
12077 && (GET_CODE (base) == SYMBOL_REF
12078 || GET_CODE (base) == LABEL_REF))
12079 base = force_reg (mode, base);
12080 if (GET_CODE (new_rtx) == PLUS
12081 && CONSTANT_P (XEXP (new_rtx, 1)))
12083 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12084 new_rtx = XEXP (new_rtx, 1);
12086 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12091 return new_rtx;
12094 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12096 static rtx
12097 get_thread_pointer (machine_mode tp_mode, bool to_reg)
12099 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12101 if (GET_MODE (tp) != tp_mode)
12103 gcc_assert (GET_MODE (tp) == SImode);
12104 gcc_assert (tp_mode == DImode);
12106 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12109 if (to_reg)
12110 tp = copy_to_mode_reg (tp_mode, tp);
12112 return tp;
12115 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12117 static GTY(()) rtx ix86_tls_symbol;
12119 static rtx
12120 ix86_tls_get_addr (void)
12122 if (!ix86_tls_symbol)
12124 const char *sym
12125 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12126 ? "___tls_get_addr" : "__tls_get_addr");
12128 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12131 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
12133 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
12134 UNSPEC_PLTOFF);
12135 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
12136 gen_rtx_CONST (Pmode, unspec));
12139 return ix86_tls_symbol;
12142 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12144 static GTY(()) rtx ix86_tls_module_base_symbol;
12147 ix86_tls_module_base (void)
12149 if (!ix86_tls_module_base_symbol)
12151 ix86_tls_module_base_symbol
12152 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
12154 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12155 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12158 return ix86_tls_module_base_symbol;
12161 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12162 false if we expect this to be used for a memory address and true if
12163 we expect to load the address into a register. */
12166 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12168 rtx dest, base, off;
12169 rtx pic = NULL_RTX, tp = NULL_RTX;
12170 machine_mode tp_mode = Pmode;
12171 int type;
12173 /* Fall back to global dynamic model if tool chain cannot support local
12174 dynamic. */
12175 if (TARGET_SUN_TLS && !TARGET_64BIT
12176 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
12177 && model == TLS_MODEL_LOCAL_DYNAMIC)
12178 model = TLS_MODEL_GLOBAL_DYNAMIC;
12180 switch (model)
12182 case TLS_MODEL_GLOBAL_DYNAMIC:
12183 if (!TARGET_64BIT)
12185 if (flag_pic && !TARGET_PECOFF)
12186 pic = pic_offset_table_rtx;
12187 else
12189 pic = gen_reg_rtx (Pmode);
12190 emit_insn (gen_set_got (pic));
12194 if (TARGET_GNU2_TLS)
12196 dest = gen_reg_rtx (ptr_mode);
12197 if (TARGET_64BIT)
12198 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
12199 else
12200 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12202 tp = get_thread_pointer (ptr_mode, true);
12203 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12204 if (GET_MODE (dest) != Pmode)
12205 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12206 dest = force_reg (Pmode, dest);
12208 if (GET_MODE (x) != Pmode)
12209 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12211 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12213 else
12215 rtx caddr = ix86_tls_get_addr ();
12217 dest = gen_reg_rtx (Pmode);
12218 if (TARGET_64BIT)
12220 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12221 rtx_insn *insns;
12223 start_sequence ();
12224 emit_call_insn
12225 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
12226 insns = get_insns ();
12227 end_sequence ();
12229 if (GET_MODE (x) != Pmode)
12230 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12232 RTL_CONST_CALL_P (insns) = 1;
12233 emit_libcall_block (insns, dest, rax, x);
12235 else
12236 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12238 break;
12240 case TLS_MODEL_LOCAL_DYNAMIC:
12241 if (!TARGET_64BIT)
12243 if (flag_pic)
12244 pic = pic_offset_table_rtx;
12245 else
12247 pic = gen_reg_rtx (Pmode);
12248 emit_insn (gen_set_got (pic));
12252 if (TARGET_GNU2_TLS)
12254 rtx tmp = ix86_tls_module_base ();
12256 base = gen_reg_rtx (ptr_mode);
12257 if (TARGET_64BIT)
12258 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
12259 else
12260 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12262 tp = get_thread_pointer (ptr_mode, true);
12263 if (GET_MODE (base) != Pmode)
12264 base = gen_rtx_ZERO_EXTEND (Pmode, base);
12265 base = force_reg (Pmode, base);
12267 else
12269 rtx caddr = ix86_tls_get_addr ();
12271 base = gen_reg_rtx (Pmode);
12272 if (TARGET_64BIT)
12274 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12275 rtx_insn *insns;
12276 rtx eqv;
12278 start_sequence ();
12279 emit_call_insn
12280 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
12281 insns = get_insns ();
12282 end_sequence ();
12284 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12285 share the LD_BASE result with other LD model accesses. */
12286 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12287 UNSPEC_TLS_LD_BASE);
12289 RTL_CONST_CALL_P (insns) = 1;
12290 emit_libcall_block (insns, base, rax, eqv);
12292 else
12293 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12296 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12297 off = gen_rtx_CONST (Pmode, off);
12299 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12301 if (TARGET_GNU2_TLS)
12303 if (GET_MODE (tp) != Pmode)
12305 dest = lowpart_subreg (ptr_mode, dest, Pmode);
12306 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12307 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12309 else
12310 dest = gen_rtx_PLUS (Pmode, tp, dest);
12311 dest = force_reg (Pmode, dest);
12313 if (GET_MODE (x) != Pmode)
12314 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12316 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12318 break;
12320 case TLS_MODEL_INITIAL_EXEC:
12321 if (TARGET_64BIT)
12323 if (TARGET_SUN_TLS && !TARGET_X32)
12325 /* The Sun linker took the AMD64 TLS spec literally
12326 and can only handle %rax as destination of the
12327 initial executable code sequence. */
12329 dest = gen_reg_rtx (DImode);
12330 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12331 return dest;
12334 /* Generate DImode references to avoid %fs:(%reg32)
12335 problems and linker IE->LE relaxation bug. */
12336 tp_mode = DImode;
12337 pic = NULL;
12338 type = UNSPEC_GOTNTPOFF;
12340 else if (flag_pic)
12342 pic = pic_offset_table_rtx;
12343 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12345 else if (!TARGET_ANY_GNU_TLS)
12347 pic = gen_reg_rtx (Pmode);
12348 emit_insn (gen_set_got (pic));
12349 type = UNSPEC_GOTTPOFF;
12351 else
12353 pic = NULL;
12354 type = UNSPEC_INDNTPOFF;
12357 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
12358 off = gen_rtx_CONST (tp_mode, off);
12359 if (pic)
12360 off = gen_rtx_PLUS (tp_mode, pic, off);
12361 off = gen_const_mem (tp_mode, off);
12362 set_mem_alias_set (off, GOT_ALIAS_SET);
12364 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12366 base = get_thread_pointer (tp_mode,
12367 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12368 off = force_reg (tp_mode, off);
12369 dest = gen_rtx_PLUS (tp_mode, base, off);
12370 if (tp_mode != Pmode)
12371 dest = convert_to_mode (Pmode, dest, 1);
12373 else
12375 base = get_thread_pointer (Pmode, true);
12376 dest = gen_reg_rtx (Pmode);
12377 emit_insn (gen_sub3_insn (dest, base, off));
12379 break;
12381 case TLS_MODEL_LOCAL_EXEC:
12382 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12383 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12384 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12385 off = gen_rtx_CONST (Pmode, off);
12387 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12389 base = get_thread_pointer (Pmode,
12390 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12391 return gen_rtx_PLUS (Pmode, base, off);
12393 else
12395 base = get_thread_pointer (Pmode, true);
12396 dest = gen_reg_rtx (Pmode);
12397 emit_insn (gen_sub3_insn (dest, base, off));
12399 break;
12401 default:
12402 gcc_unreachable ();
12405 return dest;
12408 /* Return true if the TLS address requires insn using integer registers.
12409 It's used to prevent KMOV/VMOV in TLS code sequences which require integer
12410 MOV instructions, refer to PR103275. */
12411 bool
12412 ix86_gpr_tls_address_pattern_p (rtx mem)
12414 gcc_assert (MEM_P (mem));
12416 rtx addr = XEXP (mem, 0);
12417 subrtx_var_iterator::array_type array;
12418 FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
12420 rtx op = *iter;
12421 if (GET_CODE (op) == UNSPEC)
12422 switch (XINT (op, 1))
12424 case UNSPEC_GOTNTPOFF:
12425 return true;
12426 case UNSPEC_TPOFF:
12427 if (!TARGET_64BIT)
12428 return true;
12429 break;
12430 default:
12431 break;
12435 return false;
12438 /* Return true if OP refers to a TLS address. */
12439 bool
12440 ix86_tls_address_pattern_p (rtx op)
12442 subrtx_var_iterator::array_type array;
12443 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
12445 rtx op = *iter;
12446 if (MEM_P (op))
12448 rtx *x = &XEXP (op, 0);
12449 while (GET_CODE (*x) == PLUS)
12451 int i;
12452 for (i = 0; i < 2; i++)
12454 rtx u = XEXP (*x, i);
12455 if (GET_CODE (u) == ZERO_EXTEND)
12456 u = XEXP (u, 0);
12457 if (GET_CODE (u) == UNSPEC
12458 && XINT (u, 1) == UNSPEC_TP)
12459 return true;
12461 x = &XEXP (*x, 0);
12464 iter.skip_subrtxes ();
12468 return false;
12471 /* Rewrite *LOC so that it refers to a default TLS address space. */
12472 static void
12473 ix86_rewrite_tls_address_1 (rtx *loc)
12475 subrtx_ptr_iterator::array_type array;
12476 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
12478 rtx *loc = *iter;
12479 if (MEM_P (*loc))
12481 rtx addr = XEXP (*loc, 0);
12482 rtx *x = &addr;
12483 while (GET_CODE (*x) == PLUS)
12485 int i;
12486 for (i = 0; i < 2; i++)
12488 rtx u = XEXP (*x, i);
12489 if (GET_CODE (u) == ZERO_EXTEND)
12490 u = XEXP (u, 0);
12491 if (GET_CODE (u) == UNSPEC
12492 && XINT (u, 1) == UNSPEC_TP)
12494 /* NB: Since address override only applies to the
12495 (reg32) part in fs:(reg32), return if address
12496 override is used. */
12497 if (Pmode != word_mode
12498 && REG_P (XEXP (*x, 1 - i)))
12499 return;
12501 addr_space_t as = DEFAULT_TLS_SEG_REG;
12503 *x = XEXP (*x, 1 - i);
12505 *loc = replace_equiv_address_nv (*loc, addr, true);
12506 set_mem_addr_space (*loc, as);
12507 return;
12510 x = &XEXP (*x, 0);
12513 iter.skip_subrtxes ();
12518 /* Rewrite instruction pattern involvning TLS address
12519 so that it refers to a default TLS address space. */
12521 ix86_rewrite_tls_address (rtx pattern)
12523 pattern = copy_insn (pattern);
12524 ix86_rewrite_tls_address_1 (&pattern);
12525 return pattern;
12528 /* Try machine-dependent ways of modifying an illegitimate address
12529 to be legitimate. If we find one, return the new, valid address.
12530 This macro is used in only one place: `memory_address' in explow.cc.
12532 OLDX is the address as it was before break_out_memory_refs was called.
12533 In some cases it is useful to look at this to decide what needs to be done.
12535 It is always safe for this macro to do nothing. It exists to recognize
12536 opportunities to optimize the output.
12538 For the 80386, we handle X+REG by loading X into a register R and
12539 using R+REG. R will go in a general reg and indexing will be used.
12540 However, if REG is a broken-out memory address or multiplication,
12541 nothing needs to be done because REG can certainly go in a general reg.
12543 When -fpic is used, special handling is needed for symbolic references.
12544 See comments by legitimize_pic_address in i386.cc for details. */
12546 static rtx
12547 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
12549 bool changed = false;
12550 unsigned log;
12552 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12553 if (log)
12554 return legitimize_tls_address (x, (enum tls_model) log, false);
12555 if (GET_CODE (x) == CONST
12556 && GET_CODE (XEXP (x, 0)) == PLUS
12557 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12558 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12560 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12561 (enum tls_model) log, false);
12562 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12565 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12567 #if TARGET_PECOFF
12568 rtx tmp = legitimize_pe_coff_symbol (x, true);
12569 if (tmp)
12570 return tmp;
12571 #endif
12574 if (flag_pic && SYMBOLIC_CONST (x))
12575 return legitimize_pic_address (x, 0);
12577 #if TARGET_MACHO
12578 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12579 return machopic_indirect_data_reference (x, 0);
12580 #endif
12582 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12583 if (GET_CODE (x) == ASHIFT
12584 && CONST_INT_P (XEXP (x, 1))
12585 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12587 changed = true;
12588 log = INTVAL (XEXP (x, 1));
12589 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12590 GEN_INT (1 << log));
12593 if (GET_CODE (x) == PLUS)
12595 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12597 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12598 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12599 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12601 changed = true;
12602 log = INTVAL (XEXP (XEXP (x, 0), 1));
12603 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12604 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12605 GEN_INT (1 << log));
12608 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12609 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12610 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12612 changed = true;
12613 log = INTVAL (XEXP (XEXP (x, 1), 1));
12614 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12615 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12616 GEN_INT (1 << log));
12619 /* Put multiply first if it isn't already. */
12620 if (GET_CODE (XEXP (x, 1)) == MULT)
12622 std::swap (XEXP (x, 0), XEXP (x, 1));
12623 changed = true;
12626 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12627 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12628 created by virtual register instantiation, register elimination, and
12629 similar optimizations. */
12630 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12632 changed = true;
12633 x = gen_rtx_PLUS (Pmode,
12634 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12635 XEXP (XEXP (x, 1), 0)),
12636 XEXP (XEXP (x, 1), 1));
12639 /* Canonicalize
12640 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12641 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12642 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12643 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12644 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12645 && CONSTANT_P (XEXP (x, 1)))
12647 rtx constant;
12648 rtx other = NULL_RTX;
12650 if (CONST_INT_P (XEXP (x, 1)))
12652 constant = XEXP (x, 1);
12653 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12655 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12657 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12658 other = XEXP (x, 1);
12660 else
12661 constant = 0;
12663 if (constant)
12665 changed = true;
12666 x = gen_rtx_PLUS (Pmode,
12667 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12668 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12669 plus_constant (Pmode, other,
12670 INTVAL (constant)));
12674 if (changed && ix86_legitimate_address_p (mode, x, false))
12675 return x;
12677 if (GET_CODE (XEXP (x, 0)) == MULT)
12679 changed = true;
12680 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
12683 if (GET_CODE (XEXP (x, 1)) == MULT)
12685 changed = true;
12686 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
12689 if (changed
12690 && REG_P (XEXP (x, 1))
12691 && REG_P (XEXP (x, 0)))
12692 return x;
12694 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12696 changed = true;
12697 x = legitimize_pic_address (x, 0);
12700 if (changed && ix86_legitimate_address_p (mode, x, false))
12701 return x;
12703 if (REG_P (XEXP (x, 0)))
12705 rtx temp = gen_reg_rtx (Pmode);
12706 rtx val = force_operand (XEXP (x, 1), temp);
12707 if (val != temp)
12709 val = convert_to_mode (Pmode, val, 1);
12710 emit_move_insn (temp, val);
12713 XEXP (x, 1) = temp;
12714 return x;
12717 else if (REG_P (XEXP (x, 1)))
12719 rtx temp = gen_reg_rtx (Pmode);
12720 rtx val = force_operand (XEXP (x, 0), temp);
12721 if (val != temp)
12723 val = convert_to_mode (Pmode, val, 1);
12724 emit_move_insn (temp, val);
12727 XEXP (x, 0) = temp;
12728 return x;
12732 return x;
12735 /* Print an integer constant expression in assembler syntax. Addition
12736 and subtraction are the only arithmetic that may appear in these
12737 expressions. FILE is the stdio stream to write to, X is the rtx, and
12738 CODE is the operand print code from the output string. */
12740 static void
12741 output_pic_addr_const (FILE *file, rtx x, int code)
12743 char buf[256];
12745 switch (GET_CODE (x))
12747 case PC:
12748 gcc_assert (flag_pic);
12749 putc ('.', file);
12750 break;
12752 case SYMBOL_REF:
12753 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
12754 output_addr_const (file, x);
12755 else
12757 const char *name = XSTR (x, 0);
12759 /* Mark the decl as referenced so that cgraph will
12760 output the function. */
12761 if (SYMBOL_REF_DECL (x))
12762 mark_decl_referenced (SYMBOL_REF_DECL (x));
12764 #if TARGET_MACHO
12765 if (MACHOPIC_INDIRECT
12766 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12767 name = machopic_indirection_name (x, /*stub_p=*/true);
12768 #endif
12769 assemble_name (file, name);
12771 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
12772 && code == 'P' && ix86_call_use_plt_p (x))
12773 fputs ("@PLT", file);
12774 break;
12776 case LABEL_REF:
12777 x = XEXP (x, 0);
12778 /* FALLTHRU */
12779 case CODE_LABEL:
12780 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12781 assemble_name (asm_out_file, buf);
12782 break;
12784 CASE_CONST_SCALAR_INT:
12785 output_addr_const (file, x);
12786 break;
12788 case CONST:
12789 /* This used to output parentheses around the expression,
12790 but that does not work on the 386 (either ATT or BSD assembler). */
12791 output_pic_addr_const (file, XEXP (x, 0), code);
12792 break;
12794 case CONST_DOUBLE:
12795 /* We can't handle floating point constants;
12796 TARGET_PRINT_OPERAND must handle them. */
12797 output_operand_lossage ("floating constant misused");
12798 break;
12800 case PLUS:
12801 /* Some assemblers need integer constants to appear first. */
12802 if (CONST_INT_P (XEXP (x, 0)))
12804 output_pic_addr_const (file, XEXP (x, 0), code);
12805 putc ('+', file);
12806 output_pic_addr_const (file, XEXP (x, 1), code);
12808 else
12810 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12811 output_pic_addr_const (file, XEXP (x, 1), code);
12812 putc ('+', file);
12813 output_pic_addr_const (file, XEXP (x, 0), code);
12815 break;
12817 case MINUS:
12818 if (!TARGET_MACHO)
12819 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12820 output_pic_addr_const (file, XEXP (x, 0), code);
12821 putc ('-', file);
12822 output_pic_addr_const (file, XEXP (x, 1), code);
12823 if (!TARGET_MACHO)
12824 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12825 break;
12827 case UNSPEC:
12828 gcc_assert (XVECLEN (x, 0) == 1);
12829 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12830 switch (XINT (x, 1))
12832 case UNSPEC_GOT:
12833 fputs ("@GOT", file);
12834 break;
12835 case UNSPEC_GOTOFF:
12836 fputs ("@GOTOFF", file);
12837 break;
12838 case UNSPEC_PLTOFF:
12839 fputs ("@PLTOFF", file);
12840 break;
12841 case UNSPEC_PCREL:
12842 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12843 "(%rip)" : "[rip]", file);
12844 break;
12845 case UNSPEC_GOTPCREL:
12846 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12847 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12848 break;
12849 case UNSPEC_GOTTPOFF:
12850 /* FIXME: This might be @TPOFF in Sun ld too. */
12851 fputs ("@gottpoff", file);
12852 break;
12853 case UNSPEC_TPOFF:
12854 fputs ("@tpoff", file);
12855 break;
12856 case UNSPEC_NTPOFF:
12857 if (TARGET_64BIT)
12858 fputs ("@tpoff", file);
12859 else
12860 fputs ("@ntpoff", file);
12861 break;
12862 case UNSPEC_DTPOFF:
12863 fputs ("@dtpoff", file);
12864 break;
12865 case UNSPEC_GOTNTPOFF:
12866 if (TARGET_64BIT)
12867 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12868 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12869 else
12870 fputs ("@gotntpoff", file);
12871 break;
12872 case UNSPEC_INDNTPOFF:
12873 fputs ("@indntpoff", file);
12874 break;
12875 #if TARGET_MACHO
12876 case UNSPEC_MACHOPIC_OFFSET:
12877 putc ('-', file);
12878 machopic_output_function_base_name (file);
12879 break;
12880 #endif
12881 default:
12882 output_operand_lossage ("invalid UNSPEC as operand");
12883 break;
12885 break;
12887 default:
12888 output_operand_lossage ("invalid expression as operand");
12892 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12893 We need to emit DTP-relative relocations. */
12895 static void ATTRIBUTE_UNUSED
12896 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12898 fputs (ASM_LONG, file);
12899 output_addr_const (file, x);
12900 fputs ("@dtpoff", file);
12901 switch (size)
12903 case 4:
12904 break;
12905 case 8:
12906 fputs (", 0", file);
12907 break;
12908 default:
12909 gcc_unreachable ();
12913 /* Return true if X is a representation of the PIC register. This copes
12914 with calls from ix86_find_base_term, where the register might have
12915 been replaced by a cselib value. */
12917 static bool
12918 ix86_pic_register_p (rtx x)
12920 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12921 return (pic_offset_table_rtx
12922 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12923 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
12924 return true;
12925 else if (!REG_P (x))
12926 return false;
12927 else if (pic_offset_table_rtx)
12929 if (REGNO (x) == REGNO (pic_offset_table_rtx))
12930 return true;
12931 if (HARD_REGISTER_P (x)
12932 && !HARD_REGISTER_P (pic_offset_table_rtx)
12933 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
12934 return true;
12935 return false;
12937 else
12938 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12941 /* Helper function for ix86_delegitimize_address.
12942 Attempt to delegitimize TLS local-exec accesses. */
12944 static rtx
12945 ix86_delegitimize_tls_address (rtx orig_x)
12947 rtx x = orig_x, unspec;
12948 struct ix86_address addr;
12950 if (!TARGET_TLS_DIRECT_SEG_REFS)
12951 return orig_x;
12952 if (MEM_P (x))
12953 x = XEXP (x, 0);
12954 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12955 return orig_x;
12956 if (ix86_decompose_address (x, &addr) == 0
12957 || addr.seg != DEFAULT_TLS_SEG_REG
12958 || addr.disp == NULL_RTX
12959 || GET_CODE (addr.disp) != CONST)
12960 return orig_x;
12961 unspec = XEXP (addr.disp, 0);
12962 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12963 unspec = XEXP (unspec, 0);
12964 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12965 return orig_x;
12966 x = XVECEXP (unspec, 0, 0);
12967 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12968 if (unspec != XEXP (addr.disp, 0))
12969 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12970 if (addr.index)
12972 rtx idx = addr.index;
12973 if (addr.scale != 1)
12974 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12975 x = gen_rtx_PLUS (Pmode, idx, x);
12977 if (addr.base)
12978 x = gen_rtx_PLUS (Pmode, addr.base, x);
12979 if (MEM_P (orig_x))
12980 x = replace_equiv_address_nv (orig_x, x);
12981 return x;
12984 /* In the name of slightly smaller debug output, and to cater to
12985 general assembler lossage, recognize PIC+GOTOFF and turn it back
12986 into a direct symbol reference.
12988 On Darwin, this is necessary to avoid a crash, because Darwin
12989 has a different PIC label for each routine but the DWARF debugging
12990 information is not associated with any particular routine, so it's
12991 necessary to remove references to the PIC label from RTL stored by
12992 the DWARF output code.
12994 This helper is used in the normal ix86_delegitimize_address
12995 entrypoint (e.g. used in the target delegitimization hook) and
12996 in ix86_find_base_term. As compile time memory optimization, we
12997 avoid allocating rtxes that will not change anything on the outcome
12998 of the callers (find_base_value and find_base_term). */
13000 static inline rtx
13001 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
13003 rtx orig_x = delegitimize_mem_from_attrs (x);
13004 /* addend is NULL or some rtx if x is something+GOTOFF where
13005 something doesn't include the PIC register. */
13006 rtx addend = NULL_RTX;
13007 /* reg_addend is NULL or a multiple of some register. */
13008 rtx reg_addend = NULL_RTX;
13009 /* const_addend is NULL or a const_int. */
13010 rtx const_addend = NULL_RTX;
13011 /* This is the result, or NULL. */
13012 rtx result = NULL_RTX;
13014 x = orig_x;
13016 if (MEM_P (x))
13017 x = XEXP (x, 0);
13019 if (TARGET_64BIT)
13021 if (GET_CODE (x) == CONST
13022 && GET_CODE (XEXP (x, 0)) == PLUS
13023 && GET_MODE (XEXP (x, 0)) == Pmode
13024 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13025 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13026 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13028 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
13029 base. A CONST can't be arg_pointer_rtx based. */
13030 if (base_term_p && MEM_P (orig_x))
13031 return orig_x;
13032 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13033 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13034 if (MEM_P (orig_x))
13035 x = replace_equiv_address_nv (orig_x, x);
13036 return x;
13039 if (GET_CODE (x) == CONST
13040 && GET_CODE (XEXP (x, 0)) == UNSPEC
13041 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
13042 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
13043 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
13045 x = XVECEXP (XEXP (x, 0), 0, 0);
13046 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13048 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
13049 if (x == NULL_RTX)
13050 return orig_x;
13052 return x;
13055 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
13056 return ix86_delegitimize_tls_address (orig_x);
13058 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13059 and -mcmodel=medium -fpic. */
13062 if (GET_CODE (x) != PLUS
13063 || GET_CODE (XEXP (x, 1)) != CONST)
13064 return ix86_delegitimize_tls_address (orig_x);
13066 if (ix86_pic_register_p (XEXP (x, 0)))
13067 /* %ebx + GOT/GOTOFF */
13069 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13071 /* %ebx + %reg * scale + GOT/GOTOFF */
13072 reg_addend = XEXP (x, 0);
13073 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13074 reg_addend = XEXP (reg_addend, 1);
13075 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13076 reg_addend = XEXP (reg_addend, 0);
13077 else
13079 reg_addend = NULL_RTX;
13080 addend = XEXP (x, 0);
13083 else
13084 addend = XEXP (x, 0);
13086 x = XEXP (XEXP (x, 1), 0);
13087 if (GET_CODE (x) == PLUS
13088 && CONST_INT_P (XEXP (x, 1)))
13090 const_addend = XEXP (x, 1);
13091 x = XEXP (x, 0);
13094 if (GET_CODE (x) == UNSPEC
13095 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13096 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
13097 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
13098 && !MEM_P (orig_x) && !addend)))
13099 result = XVECEXP (x, 0, 0);
13101 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
13102 && !MEM_P (orig_x))
13103 result = XVECEXP (x, 0, 0);
13105 if (! result)
13106 return ix86_delegitimize_tls_address (orig_x);
13108 /* For (PLUS something CONST_INT) both find_base_{value,term} just
13109 recurse on the first operand. */
13110 if (const_addend && !base_term_p)
13111 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13112 if (reg_addend)
13113 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13114 if (addend)
13116 /* If the rest of original X doesn't involve the PIC register, add
13117 addend and subtract pic_offset_table_rtx. This can happen e.g.
13118 for code like:
13119 leal (%ebx, %ecx, 4), %ecx
13121 movl foo@GOTOFF(%ecx), %edx
13122 in which case we return (%ecx - %ebx) + foo
13123 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
13124 and reload has completed. Don't do the latter for debug,
13125 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
13126 if (pic_offset_table_rtx
13127 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
13128 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13129 pic_offset_table_rtx),
13130 result);
13131 else if (base_term_p
13132 && pic_offset_table_rtx
13133 && !TARGET_MACHO
13134 && !TARGET_VXWORKS_RTP)
13136 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13137 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
13138 result = gen_rtx_PLUS (Pmode, tmp, result);
13140 else
13141 return orig_x;
13143 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13145 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
13146 if (result == NULL_RTX)
13147 return orig_x;
13149 return result;
13152 /* The normal instantiation of the above template. */
13154 static rtx
13155 ix86_delegitimize_address (rtx x)
13157 return ix86_delegitimize_address_1 (x, false);
13160 /* If X is a machine specific address (i.e. a symbol or label being
13161 referenced as a displacement from the GOT implemented using an
13162 UNSPEC), then return the base term. Otherwise return X. */
13165 ix86_find_base_term (rtx x)
13167 rtx term;
13169 if (TARGET_64BIT)
13171 if (GET_CODE (x) != CONST)
13172 return x;
13173 term = XEXP (x, 0);
13174 if (GET_CODE (term) == PLUS
13175 && CONST_INT_P (XEXP (term, 1)))
13176 term = XEXP (term, 0);
13177 if (GET_CODE (term) != UNSPEC
13178 || (XINT (term, 1) != UNSPEC_GOTPCREL
13179 && XINT (term, 1) != UNSPEC_PCREL))
13180 return x;
13182 return XVECEXP (term, 0, 0);
13185 return ix86_delegitimize_address_1 (x, true);
13188 /* Return true if X shouldn't be emitted into the debug info.
13189 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
13190 symbol easily into the .debug_info section, so we need not to
13191 delegitimize, but instead assemble as @gotoff.
13192 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
13193 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
13195 static bool
13196 ix86_const_not_ok_for_debug_p (rtx x)
13198 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
13199 return true;
13201 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
13202 return true;
13204 return false;
13207 static void
13208 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
13209 bool fp, FILE *file)
13211 const char *suffix;
13213 if (mode == CCFPmode)
13215 code = ix86_fp_compare_code_to_integer (code);
13216 mode = CCmode;
13218 if (reverse)
13219 code = reverse_condition (code);
13221 switch (code)
13223 case EQ:
13224 gcc_assert (mode != CCGZmode);
13225 switch (mode)
13227 case E_CCAmode:
13228 suffix = "a";
13229 break;
13230 case E_CCCmode:
13231 suffix = "c";
13232 break;
13233 case E_CCOmode:
13234 suffix = "o";
13235 break;
13236 case E_CCPmode:
13237 suffix = "p";
13238 break;
13239 case E_CCSmode:
13240 suffix = "s";
13241 break;
13242 default:
13243 suffix = "e";
13244 break;
13246 break;
13247 case NE:
13248 gcc_assert (mode != CCGZmode);
13249 switch (mode)
13251 case E_CCAmode:
13252 suffix = "na";
13253 break;
13254 case E_CCCmode:
13255 suffix = "nc";
13256 break;
13257 case E_CCOmode:
13258 suffix = "no";
13259 break;
13260 case E_CCPmode:
13261 suffix = "np";
13262 break;
13263 case E_CCSmode:
13264 suffix = "ns";
13265 break;
13266 default:
13267 suffix = "ne";
13268 break;
13270 break;
13271 case GT:
13272 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13273 suffix = "g";
13274 break;
13275 case GTU:
13276 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13277 Those same assemblers have the same but opposite lossage on cmov. */
13278 if (mode == CCmode)
13279 suffix = fp ? "nbe" : "a";
13280 else
13281 gcc_unreachable ();
13282 break;
13283 case LT:
13284 switch (mode)
13286 case E_CCNOmode:
13287 case E_CCGOCmode:
13288 suffix = "s";
13289 break;
13291 case E_CCmode:
13292 case E_CCGCmode:
13293 case E_CCGZmode:
13294 suffix = "l";
13295 break;
13297 default:
13298 gcc_unreachable ();
13300 break;
13301 case LTU:
13302 if (mode == CCmode || mode == CCGZmode)
13303 suffix = "b";
13304 else if (mode == CCCmode)
13305 suffix = fp ? "b" : "c";
13306 else
13307 gcc_unreachable ();
13308 break;
13309 case GE:
13310 switch (mode)
13312 case E_CCNOmode:
13313 case E_CCGOCmode:
13314 suffix = "ns";
13315 break;
13317 case E_CCmode:
13318 case E_CCGCmode:
13319 case E_CCGZmode:
13320 suffix = "ge";
13321 break;
13323 default:
13324 gcc_unreachable ();
13326 break;
13327 case GEU:
13328 if (mode == CCmode || mode == CCGZmode)
13329 suffix = "nb";
13330 else if (mode == CCCmode)
13331 suffix = fp ? "nb" : "nc";
13332 else
13333 gcc_unreachable ();
13334 break;
13335 case LE:
13336 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13337 suffix = "le";
13338 break;
13339 case LEU:
13340 if (mode == CCmode)
13341 suffix = "be";
13342 else
13343 gcc_unreachable ();
13344 break;
13345 case UNORDERED:
13346 suffix = fp ? "u" : "p";
13347 break;
13348 case ORDERED:
13349 suffix = fp ? "nu" : "np";
13350 break;
13351 default:
13352 gcc_unreachable ();
13354 fputs (suffix, file);
13357 /* Print the name of register X to FILE based on its machine mode and number.
13358 If CODE is 'w', pretend the mode is HImode.
13359 If CODE is 'b', pretend the mode is QImode.
13360 If CODE is 'k', pretend the mode is SImode.
13361 If CODE is 'q', pretend the mode is DImode.
13362 If CODE is 'x', pretend the mode is V4SFmode.
13363 If CODE is 't', pretend the mode is V8SFmode.
13364 If CODE is 'g', pretend the mode is V16SFmode.
13365 If CODE is 'h', pretend the reg is the 'high' byte register.
13366 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13367 If CODE is 'd', duplicate the operand for AVX instruction.
13368 If CODE is 'V', print naked full integer register name without %.
13371 void
13372 print_reg (rtx x, int code, FILE *file)
13374 const char *reg;
13375 int msize;
13376 unsigned int regno;
13377 bool duplicated;
13379 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
13380 putc ('%', file);
13382 if (x == pc_rtx)
13384 gcc_assert (TARGET_64BIT);
13385 fputs ("rip", file);
13386 return;
13389 if (code == 'y' && STACK_TOP_P (x))
13391 fputs ("st(0)", file);
13392 return;
13395 if (code == 'w')
13396 msize = 2;
13397 else if (code == 'b')
13398 msize = 1;
13399 else if (code == 'k')
13400 msize = 4;
13401 else if (code == 'q')
13402 msize = 8;
13403 else if (code == 'h')
13404 msize = 0;
13405 else if (code == 'x')
13406 msize = 16;
13407 else if (code == 't')
13408 msize = 32;
13409 else if (code == 'g')
13410 msize = 64;
13411 else
13412 msize = GET_MODE_SIZE (GET_MODE (x));
13414 regno = REGNO (x);
13416 if (regno == ARG_POINTER_REGNUM
13417 || regno == FRAME_POINTER_REGNUM
13418 || regno == FPSR_REG)
13420 output_operand_lossage
13421 ("invalid use of register '%s'", reg_names[regno]);
13422 return;
13424 else if (regno == FLAGS_REG)
13426 output_operand_lossage ("invalid use of asm flag output");
13427 return;
13430 if (code == 'V')
13432 if (GENERAL_REGNO_P (regno))
13433 msize = GET_MODE_SIZE (word_mode);
13434 else
13435 error ("%<V%> modifier on non-integer register");
13438 duplicated = code == 'd' && TARGET_AVX;
13440 switch (msize)
13442 case 16:
13443 case 12:
13444 case 8:
13445 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
13446 warning (0, "unsupported size for integer register");
13447 /* FALLTHRU */
13448 case 4:
13449 if (LEGACY_INT_REGNO_P (regno))
13450 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
13451 /* FALLTHRU */
13452 case 2:
13453 normal:
13454 reg = hi_reg_name[regno];
13455 break;
13456 case 1:
13457 if (regno >= ARRAY_SIZE (qi_reg_name))
13458 goto normal;
13459 if (!ANY_QI_REGNO_P (regno))
13460 error ("unsupported size for integer register");
13461 reg = qi_reg_name[regno];
13462 break;
13463 case 0:
13464 if (regno >= ARRAY_SIZE (qi_high_reg_name))
13465 goto normal;
13466 reg = qi_high_reg_name[regno];
13467 break;
13468 case 32:
13469 case 64:
13470 if (SSE_REGNO_P (regno))
13472 gcc_assert (!duplicated);
13473 putc (msize == 32 ? 'y' : 'z', file);
13474 reg = hi_reg_name[regno] + 1;
13475 break;
13477 goto normal;
13478 default:
13479 gcc_unreachable ();
13482 fputs (reg, file);
13484 /* Irritatingly, AMD extended registers use
13485 different naming convention: "r%d[bwd]" */
13486 if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
13488 gcc_assert (TARGET_64BIT);
13489 switch (msize)
13491 case 0:
13492 error ("extended registers have no high halves");
13493 break;
13494 case 1:
13495 putc ('b', file);
13496 break;
13497 case 2:
13498 putc ('w', file);
13499 break;
13500 case 4:
13501 putc ('d', file);
13502 break;
13503 case 8:
13504 /* no suffix */
13505 break;
13506 default:
13507 error ("unsupported operand size for extended register");
13508 break;
13510 return;
13513 if (duplicated)
13515 if (ASSEMBLER_DIALECT == ASM_ATT)
13516 fprintf (file, ", %%%s", reg);
13517 else
13518 fprintf (file, ", %s", reg);
13522 /* Meaning of CODE:
13523 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13524 C -- print opcode suffix for set/cmov insn.
13525 c -- like C, but print reversed condition
13526 F,f -- likewise, but for floating-point.
13527 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13528 otherwise nothing
13529 R -- print embedded rounding and sae.
13530 r -- print only sae.
13531 z -- print the opcode suffix for the size of the current operand.
13532 Z -- likewise, with special suffixes for x87 instructions.
13533 * -- print a star (in certain assembler syntax)
13534 A -- print an absolute memory reference.
13535 E -- print address with DImode register names if TARGET_64BIT.
13536 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13537 s -- print a shift double count, followed by the assemblers argument
13538 delimiter.
13539 b -- print the QImode name of the register for the indicated operand.
13540 %b0 would print %al if operands[0] is reg 0.
13541 w -- likewise, print the HImode name of the register.
13542 k -- likewise, print the SImode name of the register.
13543 q -- likewise, print the DImode name of the register.
13544 x -- likewise, print the V4SFmode name of the register.
13545 t -- likewise, print the V8SFmode name of the register.
13546 g -- likewise, print the V16SFmode name of the register.
13547 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13548 y -- print "st(0)" instead of "st" as a register.
13549 d -- print duplicated register operand for AVX instruction.
13550 D -- print condition for SSE cmp instruction.
13551 P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13552 address from GOT.
13553 p -- print raw symbol name.
13554 X -- don't print any sort of PIC '@' suffix for a symbol.
13555 & -- print some in-use local-dynamic symbol name.
13556 H -- print a memory address offset by 8; used for sse high-parts
13557 Y -- print condition for XOP pcom* instruction.
13558 V -- print naked full integer register name without %.
13559 + -- print a branch hint as 'cs' or 'ds' prefix
13560 ; -- print a semicolon (after prefixes due to bug in older gas).
13561 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13562 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13563 M -- print addr32 prefix for TARGET_X32 with VSIB address.
13564 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
13565 N -- print maskz if it's constant 0 operand.
13566 G -- print embedded flag for ccmp/ctest.
13569 void
13570 ix86_print_operand (FILE *file, rtx x, int code)
13572 if (code)
13574 switch (code)
13576 case 'A':
13577 switch (ASSEMBLER_DIALECT)
13579 case ASM_ATT:
13580 putc ('*', file);
13581 break;
13583 case ASM_INTEL:
13584 /* Intel syntax. For absolute addresses, registers should not
13585 be surrounded by braces. */
13586 if (!REG_P (x))
13588 putc ('[', file);
13589 ix86_print_operand (file, x, 0);
13590 putc (']', file);
13591 return;
13593 break;
13595 default:
13596 gcc_unreachable ();
13599 ix86_print_operand (file, x, 0);
13600 return;
13602 case 'E':
13603 /* Wrap address in an UNSPEC to declare special handling. */
13604 if (TARGET_64BIT)
13605 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13607 output_address (VOIDmode, x);
13608 return;
13610 case 'L':
13611 if (ASSEMBLER_DIALECT == ASM_ATT)
13612 putc ('l', file);
13613 return;
13615 case 'W':
13616 if (ASSEMBLER_DIALECT == ASM_ATT)
13617 putc ('w', file);
13618 return;
13620 case 'B':
13621 if (ASSEMBLER_DIALECT == ASM_ATT)
13622 putc ('b', file);
13623 return;
13625 case 'Q':
13626 if (ASSEMBLER_DIALECT == ASM_ATT)
13627 putc ('l', file);
13628 return;
13630 case 'S':
13631 if (ASSEMBLER_DIALECT == ASM_ATT)
13632 putc ('s', file);
13633 return;
13635 case 'T':
13636 if (ASSEMBLER_DIALECT == ASM_ATT)
13637 putc ('t', file);
13638 return;
13640 case 'O':
13641 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13642 if (ASSEMBLER_DIALECT != ASM_ATT)
13643 return;
13645 switch (GET_MODE_SIZE (GET_MODE (x)))
13647 case 2:
13648 putc ('w', file);
13649 break;
13651 case 4:
13652 putc ('l', file);
13653 break;
13655 case 8:
13656 putc ('q', file);
13657 break;
13659 default:
13660 output_operand_lossage ("invalid operand size for operand "
13661 "code 'O'");
13662 return;
13665 putc ('.', file);
13666 #endif
13667 return;
13669 case 'z':
13670 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13672 /* Opcodes don't get size suffixes if using Intel opcodes. */
13673 if (ASSEMBLER_DIALECT == ASM_INTEL)
13674 return;
13676 switch (GET_MODE_SIZE (GET_MODE (x)))
13678 case 1:
13679 putc ('b', file);
13680 return;
13682 case 2:
13683 putc ('w', file);
13684 return;
13686 case 4:
13687 putc ('l', file);
13688 return;
13690 case 8:
13691 putc ('q', file);
13692 return;
13694 default:
13695 output_operand_lossage ("invalid operand size for operand "
13696 "code 'z'");
13697 return;
13701 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13703 if (this_is_asm_operands)
13704 warning_for_asm (this_is_asm_operands,
13705 "non-integer operand used with operand code %<z%>");
13706 else
13707 warning (0, "non-integer operand used with operand code %<z%>");
13709 /* FALLTHRU */
13711 case 'Z':
13712 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13713 if (ASSEMBLER_DIALECT == ASM_INTEL)
13714 return;
13716 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13718 switch (GET_MODE_SIZE (GET_MODE (x)))
13720 case 2:
13721 #ifdef HAVE_AS_IX86_FILDS
13722 putc ('s', file);
13723 #endif
13724 return;
13726 case 4:
13727 putc ('l', file);
13728 return;
13730 case 8:
13731 #ifdef HAVE_AS_IX86_FILDQ
13732 putc ('q', file);
13733 #else
13734 fputs ("ll", file);
13735 #endif
13736 return;
13738 default:
13739 break;
13742 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13744 /* 387 opcodes don't get size suffixes
13745 if the operands are registers. */
13746 if (STACK_REG_P (x))
13747 return;
13749 switch (GET_MODE_SIZE (GET_MODE (x)))
13751 case 4:
13752 putc ('s', file);
13753 return;
13755 case 8:
13756 putc ('l', file);
13757 return;
13759 case 12:
13760 case 16:
13761 putc ('t', file);
13762 return;
13764 default:
13765 break;
13768 else
13770 output_operand_lossage ("invalid operand type used with "
13771 "operand code '%c'", code);
13772 return;
13775 output_operand_lossage ("invalid operand size for operand code '%c'",
13776 code);
13777 return;
13779 case 'd':
13780 case 'b':
13781 case 'w':
13782 case 'k':
13783 case 'q':
13784 case 'h':
13785 case 't':
13786 case 'g':
13787 case 'y':
13788 case 'x':
13789 case 'X':
13790 case 'P':
13791 case 'p':
13792 case 'V':
13793 break;
13795 case 's':
13796 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13798 ix86_print_operand (file, x, 0);
13799 fputs (", ", file);
13801 return;
13803 case 'Y':
13804 switch (GET_CODE (x))
13806 case NE:
13807 fputs ("neq", file);
13808 break;
13809 case EQ:
13810 fputs ("eq", file);
13811 break;
13812 case GE:
13813 case GEU:
13814 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13815 break;
13816 case GT:
13817 case GTU:
13818 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13819 break;
13820 case LE:
13821 case LEU:
13822 fputs ("le", file);
13823 break;
13824 case LT:
13825 case LTU:
13826 fputs ("lt", file);
13827 break;
13828 case UNORDERED:
13829 fputs ("unord", file);
13830 break;
13831 case ORDERED:
13832 fputs ("ord", file);
13833 break;
13834 case UNEQ:
13835 fputs ("ueq", file);
13836 break;
13837 case UNGE:
13838 fputs ("nlt", file);
13839 break;
13840 case UNGT:
13841 fputs ("nle", file);
13842 break;
13843 case UNLE:
13844 fputs ("ule", file);
13845 break;
13846 case UNLT:
13847 fputs ("ult", file);
13848 break;
13849 case LTGT:
13850 fputs ("une", file);
13851 break;
13852 default:
13853 output_operand_lossage ("operand is not a condition code, "
13854 "invalid operand code 'Y'");
13855 return;
13857 return;
13859 case 'D':
13860 /* Little bit of braindamage here. The SSE compare instructions
13861 does use completely different names for the comparisons that the
13862 fp conditional moves. */
13863 switch (GET_CODE (x))
13865 case UNEQ:
13866 if (TARGET_AVX)
13868 fputs ("eq_us", file);
13869 break;
13871 /* FALLTHRU */
13872 case EQ:
13873 fputs ("eq", file);
13874 break;
13875 case UNLT:
13876 if (TARGET_AVX)
13878 fputs ("nge", file);
13879 break;
13881 /* FALLTHRU */
13882 case LT:
13883 fputs ("lt", file);
13884 break;
13885 case UNLE:
13886 if (TARGET_AVX)
13888 fputs ("ngt", file);
13889 break;
13891 /* FALLTHRU */
13892 case LE:
13893 fputs ("le", file);
13894 break;
13895 case UNORDERED:
13896 fputs ("unord", file);
13897 break;
13898 case LTGT:
13899 if (TARGET_AVX)
13901 fputs ("neq_oq", file);
13902 break;
13904 /* FALLTHRU */
13905 case NE:
13906 fputs ("neq", file);
13907 break;
13908 case GE:
13909 if (TARGET_AVX)
13911 fputs ("ge", file);
13912 break;
13914 /* FALLTHRU */
13915 case UNGE:
13916 fputs ("nlt", file);
13917 break;
13918 case GT:
13919 if (TARGET_AVX)
13921 fputs ("gt", file);
13922 break;
13924 /* FALLTHRU */
13925 case UNGT:
13926 fputs ("nle", file);
13927 break;
13928 case ORDERED:
13929 fputs ("ord", file);
13930 break;
13931 default:
13932 output_operand_lossage ("operand is not a condition code, "
13933 "invalid operand code 'D'");
13934 return;
13936 return;
13938 case 'F':
13939 case 'f':
13940 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13941 if (ASSEMBLER_DIALECT == ASM_ATT)
13942 putc ('.', file);
13943 gcc_fallthrough ();
13944 #endif
13946 case 'C':
13947 case 'c':
13948 if (!COMPARISON_P (x))
13950 output_operand_lossage ("operand is not a condition code, "
13951 "invalid operand code '%c'", code);
13952 return;
13954 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
13955 code == 'c' || code == 'f',
13956 code == 'F' || code == 'f',
13957 file);
13958 return;
13960 case 'G':
13962 int dfv = INTVAL (x);
13963 const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
13964 fputs (dfv_suffix, file);
13966 return;
13968 case 'H':
13969 if (!offsettable_memref_p (x))
13971 output_operand_lossage ("operand is not an offsettable memory "
13972 "reference, invalid operand code 'H'");
13973 return;
13975 /* It doesn't actually matter what mode we use here, as we're
13976 only going to use this for printing. */
13977 x = adjust_address_nv (x, DImode, 8);
13978 /* Output 'qword ptr' for intel assembler dialect. */
13979 if (ASSEMBLER_DIALECT == ASM_INTEL)
13980 code = 'q';
13981 break;
13983 case 'K':
13984 if (!CONST_INT_P (x))
13986 output_operand_lossage ("operand is not an integer, invalid "
13987 "operand code 'K'");
13988 return;
13991 if (INTVAL (x) & IX86_HLE_ACQUIRE)
13992 #ifdef HAVE_AS_IX86_HLE
13993 fputs ("xacquire ", file);
13994 #else
13995 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
13996 #endif
13997 else if (INTVAL (x) & IX86_HLE_RELEASE)
13998 #ifdef HAVE_AS_IX86_HLE
13999 fputs ("xrelease ", file);
14000 #else
14001 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14002 #endif
14003 /* We do not want to print value of the operand. */
14004 return;
14006 case 'N':
14007 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
14008 fputs ("{z}", file);
14009 return;
14011 case 'r':
14012 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
14014 output_operand_lossage ("operand is not a specific integer, "
14015 "invalid operand code 'r'");
14016 return;
14019 if (ASSEMBLER_DIALECT == ASM_INTEL)
14020 fputs (", ", file);
14022 fputs ("{sae}", file);
14024 if (ASSEMBLER_DIALECT == ASM_ATT)
14025 fputs (", ", file);
14027 return;
14029 case 'R':
14030 if (!CONST_INT_P (x))
14032 output_operand_lossage ("operand is not an integer, invalid "
14033 "operand code 'R'");
14034 return;
14037 if (ASSEMBLER_DIALECT == ASM_INTEL)
14038 fputs (", ", file);
14040 switch (INTVAL (x))
14042 case ROUND_NEAREST_INT | ROUND_SAE:
14043 fputs ("{rn-sae}", file);
14044 break;
14045 case ROUND_NEG_INF | ROUND_SAE:
14046 fputs ("{rd-sae}", file);
14047 break;
14048 case ROUND_POS_INF | ROUND_SAE:
14049 fputs ("{ru-sae}", file);
14050 break;
14051 case ROUND_ZERO | ROUND_SAE:
14052 fputs ("{rz-sae}", file);
14053 break;
14054 default:
14055 output_operand_lossage ("operand is not a specific integer, "
14056 "invalid operand code 'R'");
14059 if (ASSEMBLER_DIALECT == ASM_ATT)
14060 fputs (", ", file);
14062 return;
14064 case '*':
14065 if (ASSEMBLER_DIALECT == ASM_ATT)
14066 putc ('*', file);
14067 return;
14069 case '&':
14071 const char *name = get_some_local_dynamic_name ();
14072 if (name == NULL)
14073 output_operand_lossage ("'%%&' used without any "
14074 "local dynamic TLS references");
14075 else
14076 assemble_name (file, name);
14077 return;
14080 case '+':
14082 rtx x;
14084 if (!optimize
14085 || optimize_function_for_size_p (cfun)
14086 || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
14087 && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
14088 return;
14090 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14091 if (x)
14093 int pred_val = profile_probability::from_reg_br_prob_note
14094 (XINT (x, 0)).to_reg_br_prob_base ();
14096 bool taken = pred_val > REG_BR_PROB_BASE / 2;
14097 /* We use 3e (DS) prefix for taken branches and
14098 2e (CS) prefix for not taken branches. */
14099 if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
14100 fputs ("ds ; ", file);
14101 else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
14102 fputs ("cs ; ", file);
14104 return;
14107 case ';':
14108 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14109 putc (';', file);
14110 #endif
14111 return;
14113 case '~':
14114 putc (TARGET_AVX2 ? 'i' : 'f', file);
14115 return;
14117 case 'M':
14118 if (TARGET_X32)
14120 /* NB: 32-bit indices in VSIB address are sign-extended
14121 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
14122 sign-extended to 0xfffffffff7fa3010 which is invalid
14123 address. Add addr32 prefix if there is no base
14124 register nor symbol. */
14125 bool ok;
14126 struct ix86_address parts;
14127 ok = ix86_decompose_address (x, &parts);
14128 gcc_assert (ok && parts.index == NULL_RTX);
14129 if (parts.base == NULL_RTX
14130 && (parts.disp == NULL_RTX
14131 || !symbolic_operand (parts.disp,
14132 GET_MODE (parts.disp))))
14133 fputs ("addr32 ", file);
14135 return;
14137 case '^':
14138 if (TARGET_64BIT && Pmode != word_mode)
14139 fputs ("addr32 ", file);
14140 return;
14142 case '!':
14143 if (ix86_notrack_prefixed_insn_p (current_output_insn))
14144 fputs ("notrack ", file);
14145 return;
14147 default:
14148 output_operand_lossage ("invalid operand code '%c'", code);
14152 if (REG_P (x))
14153 print_reg (x, code, file);
14155 else if (MEM_P (x))
14157 rtx addr = XEXP (x, 0);
14159 /* No `byte ptr' prefix for call instructions ... */
14160 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
14162 machine_mode mode = GET_MODE (x);
14163 const char *size;
14165 /* Check for explicit size override codes. */
14166 if (code == 'b')
14167 size = "BYTE";
14168 else if (code == 'w')
14169 size = "WORD";
14170 else if (code == 'k')
14171 size = "DWORD";
14172 else if (code == 'q')
14173 size = "QWORD";
14174 else if (code == 'x')
14175 size = "XMMWORD";
14176 else if (code == 't')
14177 size = "YMMWORD";
14178 else if (code == 'g')
14179 size = "ZMMWORD";
14180 else if (mode == BLKmode)
14181 /* ... or BLKmode operands, when not overridden. */
14182 size = NULL;
14183 else
14184 switch (GET_MODE_SIZE (mode))
14186 case 1: size = "BYTE"; break;
14187 case 2: size = "WORD"; break;
14188 case 4: size = "DWORD"; break;
14189 case 8: size = "QWORD"; break;
14190 case 12: size = "TBYTE"; break;
14191 case 16:
14192 if (mode == XFmode)
14193 size = "TBYTE";
14194 else
14195 size = "XMMWORD";
14196 break;
14197 case 32: size = "YMMWORD"; break;
14198 case 64: size = "ZMMWORD"; break;
14199 default:
14200 gcc_unreachable ();
14202 if (size)
14204 fputs (size, file);
14205 fputs (" PTR ", file);
14209 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14210 output_operand_lossage ("invalid constraints for operand");
14211 else
14212 ix86_print_operand_address_as
14213 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
14216 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
14218 long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
14219 REAL_MODE_FORMAT (HFmode));
14220 if (ASSEMBLER_DIALECT == ASM_ATT)
14221 putc ('$', file);
14222 fprintf (file, "0x%04x", (unsigned int) l);
14225 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
14227 long l;
14229 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14231 if (ASSEMBLER_DIALECT == ASM_ATT)
14232 putc ('$', file);
14233 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14234 if (code == 'q')
14235 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
14236 (unsigned long long) (int) l);
14237 else
14238 fprintf (file, "0x%08x", (unsigned int) l);
14241 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
14243 long l[2];
14245 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14247 if (ASSEMBLER_DIALECT == ASM_ATT)
14248 putc ('$', file);
14249 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14252 /* These float cases don't actually occur as immediate operands. */
14253 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
14255 char dstr[30];
14257 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14258 fputs (dstr, file);
14261 /* Print bcst_mem_operand. */
14262 else if (GET_CODE (x) == VEC_DUPLICATE)
14264 machine_mode vmode = GET_MODE (x);
14265 /* Must be bcst_memory_operand. */
14266 gcc_assert (bcst_mem_operand (x, vmode));
14268 rtx mem = XEXP (x,0);
14269 ix86_print_operand (file, mem, 0);
14271 switch (vmode)
14273 case E_V2DImode:
14274 case E_V2DFmode:
14275 fputs ("{1to2}", file);
14276 break;
14277 case E_V4SImode:
14278 case E_V4SFmode:
14279 case E_V4DImode:
14280 case E_V4DFmode:
14281 fputs ("{1to4}", file);
14282 break;
14283 case E_V8SImode:
14284 case E_V8SFmode:
14285 case E_V8DFmode:
14286 case E_V8DImode:
14287 case E_V8HFmode:
14288 fputs ("{1to8}", file);
14289 break;
14290 case E_V16SFmode:
14291 case E_V16SImode:
14292 case E_V16HFmode:
14293 fputs ("{1to16}", file);
14294 break;
14295 case E_V32HFmode:
14296 fputs ("{1to32}", file);
14297 break;
14298 default:
14299 gcc_unreachable ();
14303 else
14305 /* We have patterns that allow zero sets of memory, for instance.
14306 In 64-bit mode, we should probably support all 8-byte vectors,
14307 since we can in fact encode that into an immediate. */
14308 if (GET_CODE (x) == CONST_VECTOR)
14310 if (x != CONST0_RTX (GET_MODE (x)))
14311 output_operand_lossage ("invalid vector immediate");
14312 x = const0_rtx;
14315 if (code == 'P')
14317 if (ix86_force_load_from_GOT_p (x, true))
14319 /* For inline assembly statement, load function address
14320 from GOT with 'P' operand modifier to avoid PLT. */
14321 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14322 (TARGET_64BIT
14323 ? UNSPEC_GOTPCREL
14324 : UNSPEC_GOT));
14325 x = gen_rtx_CONST (Pmode, x);
14326 x = gen_const_mem (Pmode, x);
14327 ix86_print_operand (file, x, 'A');
14328 return;
14331 else if (code != 'p')
14333 if (CONST_INT_P (x))
14335 if (ASSEMBLER_DIALECT == ASM_ATT)
14336 putc ('$', file);
14338 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14339 || GET_CODE (x) == LABEL_REF)
14341 if (ASSEMBLER_DIALECT == ASM_ATT)
14342 putc ('$', file);
14343 else
14344 fputs ("OFFSET FLAT:", file);
14347 if (CONST_INT_P (x))
14348 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14349 else if (flag_pic || MACHOPIC_INDIRECT)
14350 output_pic_addr_const (file, x, code);
14351 else
14352 output_addr_const (file, x);
14356 static bool
14357 ix86_print_operand_punct_valid_p (unsigned char code)
14359 return (code == '*' || code == '+' || code == '&' || code == ';'
14360 || code == '~' || code == '^' || code == '!');
14363 /* Print a memory operand whose address is ADDR. */
14365 static void
14366 ix86_print_operand_address_as (FILE *file, rtx addr,
14367 addr_space_t as, bool raw)
14369 struct ix86_address parts;
14370 rtx base, index, disp;
14371 int scale;
14372 int ok;
14373 bool vsib = false;
14374 int code = 0;
14376 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14378 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14379 gcc_assert (parts.index == NULL_RTX);
14380 parts.index = XVECEXP (addr, 0, 1);
14381 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14382 addr = XVECEXP (addr, 0, 0);
14383 vsib = true;
14385 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14387 gcc_assert (TARGET_64BIT);
14388 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14389 code = 'q';
14391 else
14392 ok = ix86_decompose_address (addr, &parts);
14394 gcc_assert (ok);
14396 base = parts.base;
14397 index = parts.index;
14398 disp = parts.disp;
14399 scale = parts.scale;
14401 if (ADDR_SPACE_GENERIC_P (as))
14402 as = parts.seg;
14403 else
14404 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
14406 if (!ADDR_SPACE_GENERIC_P (as) && !raw)
14408 if (ASSEMBLER_DIALECT == ASM_ATT)
14409 putc ('%', file);
14411 switch (as)
14413 case ADDR_SPACE_SEG_FS:
14414 fputs ("fs:", file);
14415 break;
14416 case ADDR_SPACE_SEG_GS:
14417 fputs ("gs:", file);
14418 break;
14419 default:
14420 gcc_unreachable ();
14424 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14425 if (TARGET_64BIT && !base && !index && !raw)
14427 rtx symbol = disp;
14429 if (GET_CODE (disp) == CONST
14430 && GET_CODE (XEXP (disp, 0)) == PLUS
14431 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14432 symbol = XEXP (XEXP (disp, 0), 0);
14434 if (GET_CODE (symbol) == LABEL_REF
14435 || (GET_CODE (symbol) == SYMBOL_REF
14436 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14437 base = pc_rtx;
14440 if (!base && !index)
14442 /* Displacement only requires special attention. */
14443 if (CONST_INT_P (disp))
14445 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
14446 fputs ("ds:", file);
14447 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14449 /* Load the external function address via the GOT slot to avoid PLT. */
14450 else if (GET_CODE (disp) == CONST
14451 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14452 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
14453 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
14454 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
14455 output_pic_addr_const (file, disp, 0);
14456 else if (flag_pic)
14457 output_pic_addr_const (file, disp, 0);
14458 else
14459 output_addr_const (file, disp);
14461 else
14463 /* Print SImode register names to force addr32 prefix. */
14464 if (SImode_address_operand (addr, VOIDmode))
14466 if (flag_checking)
14468 gcc_assert (TARGET_64BIT);
14469 switch (GET_CODE (addr))
14471 case SUBREG:
14472 gcc_assert (GET_MODE (addr) == SImode);
14473 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14474 break;
14475 case ZERO_EXTEND:
14476 case AND:
14477 gcc_assert (GET_MODE (addr) == DImode);
14478 break;
14479 default:
14480 gcc_unreachable ();
14483 gcc_assert (!code);
14484 code = 'k';
14486 else if (code == 0
14487 && TARGET_X32
14488 && disp
14489 && CONST_INT_P (disp)
14490 && INTVAL (disp) < -16*1024*1024)
14492 /* X32 runs in 64-bit mode, where displacement, DISP, in
14493 address DISP(%r64), is encoded as 32-bit immediate sign-
14494 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14495 address is %r64 + 0xffffffffbffffd00. When %r64 <
14496 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14497 which is invalid for x32. The correct address is %r64
14498 - 0x40000300 == 0xf7ffdd64. To properly encode
14499 -0x40000300(%r64) for x32, we zero-extend negative
14500 displacement by forcing addr32 prefix which truncates
14501 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14502 zero-extend all negative displacements, including -1(%rsp).
14503 However, for small negative displacements, sign-extension
14504 won't cause overflow. We only zero-extend negative
14505 displacements if they < -16*1024*1024, which is also used
14506 to check legitimate address displacements for PIC. */
14507 code = 'k';
14510 /* Since the upper 32 bits of RSP are always zero for x32,
14511 we can encode %esp as %rsp to avoid 0x67 prefix if
14512 there is no index register. */
14513 if (TARGET_X32 && Pmode == SImode
14514 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
14515 code = 'q';
14517 if (ASSEMBLER_DIALECT == ASM_ATT)
14519 if (disp)
14521 if (flag_pic)
14522 output_pic_addr_const (file, disp, 0);
14523 else if (GET_CODE (disp) == LABEL_REF)
14524 output_asm_label (disp);
14525 else
14526 output_addr_const (file, disp);
14529 putc ('(', file);
14530 if (base)
14531 print_reg (base, code, file);
14532 if (index)
14534 putc (',', file);
14535 print_reg (index, vsib ? 0 : code, file);
14536 if (scale != 1 || vsib)
14537 fprintf (file, ",%d", scale);
14539 putc (')', file);
14541 else
14543 rtx offset = NULL_RTX;
14545 if (disp)
14547 /* Pull out the offset of a symbol; print any symbol itself. */
14548 if (GET_CODE (disp) == CONST
14549 && GET_CODE (XEXP (disp, 0)) == PLUS
14550 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14552 offset = XEXP (XEXP (disp, 0), 1);
14553 disp = gen_rtx_CONST (VOIDmode,
14554 XEXP (XEXP (disp, 0), 0));
14557 if (flag_pic)
14558 output_pic_addr_const (file, disp, 0);
14559 else if (GET_CODE (disp) == LABEL_REF)
14560 output_asm_label (disp);
14561 else if (CONST_INT_P (disp))
14562 offset = disp;
14563 else
14564 output_addr_const (file, disp);
14567 putc ('[', file);
14568 if (base)
14570 print_reg (base, code, file);
14571 if (offset)
14573 if (INTVAL (offset) >= 0)
14574 putc ('+', file);
14575 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14578 else if (offset)
14579 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14580 else
14581 putc ('0', file);
14583 if (index)
14585 putc ('+', file);
14586 print_reg (index, vsib ? 0 : code, file);
14587 if (scale != 1 || vsib)
14588 fprintf (file, "*%d", scale);
14590 putc (']', file);
14595 static void
14596 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
14598 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14599 output_operand_lossage ("invalid constraints for operand");
14600 else
14601 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
14604 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14606 static bool
14607 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14609 rtx op;
14611 if (GET_CODE (x) != UNSPEC)
14612 return false;
14614 op = XVECEXP (x, 0, 0);
14615 switch (XINT (x, 1))
14617 case UNSPEC_GOTOFF:
14618 output_addr_const (file, op);
14619 fputs ("@gotoff", file);
14620 break;
14621 case UNSPEC_GOTTPOFF:
14622 output_addr_const (file, op);
14623 /* FIXME: This might be @TPOFF in Sun ld. */
14624 fputs ("@gottpoff", file);
14625 break;
14626 case UNSPEC_TPOFF:
14627 output_addr_const (file, op);
14628 fputs ("@tpoff", file);
14629 break;
14630 case UNSPEC_NTPOFF:
14631 output_addr_const (file, op);
14632 if (TARGET_64BIT)
14633 fputs ("@tpoff", file);
14634 else
14635 fputs ("@ntpoff", file);
14636 break;
14637 case UNSPEC_DTPOFF:
14638 output_addr_const (file, op);
14639 fputs ("@dtpoff", file);
14640 break;
14641 case UNSPEC_GOTNTPOFF:
14642 output_addr_const (file, op);
14643 if (TARGET_64BIT)
14644 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14645 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14646 else
14647 fputs ("@gotntpoff", file);
14648 break;
14649 case UNSPEC_INDNTPOFF:
14650 output_addr_const (file, op);
14651 fputs ("@indntpoff", file);
14652 break;
14653 #if TARGET_MACHO
14654 case UNSPEC_MACHOPIC_OFFSET:
14655 output_addr_const (file, op);
14656 putc ('-', file);
14657 machopic_output_function_base_name (file);
14658 break;
14659 #endif
14661 default:
14662 return false;
14665 return true;
14669 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14670 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14671 is the expression of the binary operation. The output may either be
14672 emitted here, or returned to the caller, like all output_* functions.
14674 There is no guarantee that the operands are the same mode, as they
14675 might be within FLOAT or FLOAT_EXTEND expressions. */
14677 #ifndef SYSV386_COMPAT
14678 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14679 wants to fix the assemblers because that causes incompatibility
14680 with gcc. No-one wants to fix gcc because that causes
14681 incompatibility with assemblers... You can use the option of
14682 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14683 #define SYSV386_COMPAT 1
14684 #endif
14686 const char *
14687 output_387_binary_op (rtx_insn *insn, rtx *operands)
14689 static char buf[40];
14690 const char *p;
14691 bool is_sse
14692 = (SSE_REG_P (operands[0])
14693 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
14695 if (is_sse)
14696 p = "%v";
14697 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14698 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14699 p = "fi";
14700 else
14701 p = "f";
14703 strcpy (buf, p);
14705 switch (GET_CODE (operands[3]))
14707 case PLUS:
14708 p = "add"; break;
14709 case MINUS:
14710 p = "sub"; break;
14711 case MULT:
14712 p = "mul"; break;
14713 case DIV:
14714 p = "div"; break;
14715 default:
14716 gcc_unreachable ();
14719 strcat (buf, p);
14721 if (is_sse)
14723 p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
14724 strcat (buf, p);
14726 if (TARGET_AVX)
14727 p = "\t{%2, %1, %0|%0, %1, %2}";
14728 else
14729 p = "\t{%2, %0|%0, %2}";
14731 strcat (buf, p);
14732 return buf;
14735 /* Even if we do not want to check the inputs, this documents input
14736 constraints. Which helps in understanding the following code. */
14737 if (flag_checking)
14739 if (STACK_REG_P (operands[0])
14740 && ((REG_P (operands[1])
14741 && REGNO (operands[0]) == REGNO (operands[1])
14742 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14743 || (REG_P (operands[2])
14744 && REGNO (operands[0]) == REGNO (operands[2])
14745 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14746 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14747 ; /* ok */
14748 else
14749 gcc_unreachable ();
14752 switch (GET_CODE (operands[3]))
14754 case MULT:
14755 case PLUS:
14756 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14757 std::swap (operands[1], operands[2]);
14759 /* know operands[0] == operands[1]. */
14761 if (MEM_P (operands[2]))
14763 p = "%Z2\t%2";
14764 break;
14767 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14769 if (STACK_TOP_P (operands[0]))
14770 /* How is it that we are storing to a dead operand[2]?
14771 Well, presumably operands[1] is dead too. We can't
14772 store the result to st(0) as st(0) gets popped on this
14773 instruction. Instead store to operands[2] (which I
14774 think has to be st(1)). st(1) will be popped later.
14775 gcc <= 2.8.1 didn't have this check and generated
14776 assembly code that the Unixware assembler rejected. */
14777 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14778 else
14779 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14780 break;
14783 if (STACK_TOP_P (operands[0]))
14784 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14785 else
14786 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14787 break;
14789 case MINUS:
14790 case DIV:
14791 if (MEM_P (operands[1]))
14793 p = "r%Z1\t%1";
14794 break;
14797 if (MEM_P (operands[2]))
14799 p = "%Z2\t%2";
14800 break;
14803 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14805 #if SYSV386_COMPAT
14806 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14807 derived assemblers, confusingly reverse the direction of
14808 the operation for fsub{r} and fdiv{r} when the
14809 destination register is not st(0). The Intel assembler
14810 doesn't have this brain damage. Read !SYSV386_COMPAT to
14811 figure out what the hardware really does. */
14812 if (STACK_TOP_P (operands[0]))
14813 p = "{p\t%0, %2|rp\t%2, %0}";
14814 else
14815 p = "{rp\t%2, %0|p\t%0, %2}";
14816 #else
14817 if (STACK_TOP_P (operands[0]))
14818 /* As above for fmul/fadd, we can't store to st(0). */
14819 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14820 else
14821 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14822 #endif
14823 break;
14826 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14828 #if SYSV386_COMPAT
14829 if (STACK_TOP_P (operands[0]))
14830 p = "{rp\t%0, %1|p\t%1, %0}";
14831 else
14832 p = "{p\t%1, %0|rp\t%0, %1}";
14833 #else
14834 if (STACK_TOP_P (operands[0]))
14835 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14836 else
14837 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14838 #endif
14839 break;
14842 if (STACK_TOP_P (operands[0]))
14844 if (STACK_TOP_P (operands[1]))
14845 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14846 else
14847 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14848 break;
14850 else if (STACK_TOP_P (operands[1]))
14852 #if SYSV386_COMPAT
14853 p = "{\t%1, %0|r\t%0, %1}";
14854 #else
14855 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14856 #endif
14858 else
14860 #if SYSV386_COMPAT
14861 p = "{r\t%2, %0|\t%0, %2}";
14862 #else
14863 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14864 #endif
14866 break;
14868 default:
14869 gcc_unreachable ();
14872 strcat (buf, p);
14873 return buf;
14876 /* Return needed mode for entity in optimize_mode_switching pass. */
14878 static int
14879 ix86_dirflag_mode_needed (rtx_insn *insn)
14881 if (CALL_P (insn))
14883 if (cfun->machine->func_type == TYPE_NORMAL)
14884 return X86_DIRFLAG_ANY;
14885 else
14886 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
14887 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
14890 if (recog_memoized (insn) < 0)
14891 return X86_DIRFLAG_ANY;
14893 if (get_attr_type (insn) == TYPE_STR)
14895 /* Emit cld instruction if stringops are used in the function. */
14896 if (cfun->machine->func_type == TYPE_NORMAL)
14897 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
14898 else
14899 return X86_DIRFLAG_RESET;
14902 return X86_DIRFLAG_ANY;
14905 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
14907 static bool
14908 ix86_check_avx_upper_register (const_rtx exp)
14910 /* construct_container may return a parallel with expr_list
14911 which contains the real reg and mode */
14912 subrtx_iterator::array_type array;
14913 FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
14915 const_rtx x = *iter;
14916 if (SSE_REG_P (x)
14917 && !EXT_REX_SSE_REG_P (x)
14918 && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
14919 return true;
14922 return false;
14925 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
14927 static void
14928 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
14930 if (SSE_REG_P (dest)
14931 && !EXT_REX_SSE_REG_P (dest)
14932 && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
14934 bool *used = (bool *) data;
14935 *used = true;
14939 /* Return needed mode for entity in optimize_mode_switching pass. */
14941 static int
14942 ix86_avx_u128_mode_needed (rtx_insn *insn)
14944 if (DEBUG_INSN_P (insn))
14945 return AVX_U128_ANY;
14947 if (CALL_P (insn))
14949 rtx link;
14951 /* Needed mode is set to AVX_U128_CLEAN if there are
14952 no 256bit or 512bit modes used in function arguments. */
14953 for (link = CALL_INSN_FUNCTION_USAGE (insn);
14954 link;
14955 link = XEXP (link, 1))
14957 if (GET_CODE (XEXP (link, 0)) == USE)
14959 rtx arg = XEXP (XEXP (link, 0), 0);
14961 if (ix86_check_avx_upper_register (arg))
14962 return AVX_U128_DIRTY;
14966 /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
14967 nor 512bit registers used in the function return register. */
14968 bool avx_upper_reg_found = false;
14969 note_stores (insn, ix86_check_avx_upper_stores,
14970 &avx_upper_reg_found);
14971 if (avx_upper_reg_found)
14972 return AVX_U128_DIRTY;
14974 /* If the function is known to preserve some SSE registers,
14975 RA and previous passes can legitimately rely on that for
14976 modes wider than 256 bits. It's only safe to issue a
14977 vzeroupper if all SSE registers are clobbered. */
14978 const function_abi &abi = insn_callee_abi (insn);
14979 if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
14980 /* Should be safe to issue an vzeroupper before sibling_call_p.
14981 Also there not mode_exit for sibling_call, so there could be
14982 missing vzeroupper for that. */
14983 || !(SIBLING_CALL_P (insn)
14984 || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
14985 abi.mode_clobbers (V4DImode))))
14986 return AVX_U128_ANY;
14988 return AVX_U128_CLEAN;
14991 rtx set = single_set (insn);
14992 if (set)
14994 rtx dest = SET_DEST (set);
14995 rtx src = SET_SRC (set);
14996 if (SSE_REG_P (dest)
14997 && !EXT_REX_SSE_REG_P (dest)
14998 && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
15000 /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
15001 source isn't zero. */
15002 if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
15003 return AVX_U128_DIRTY;
15004 else
15005 return AVX_U128_ANY;
15007 else
15009 if (ix86_check_avx_upper_register (src))
15010 return AVX_U128_DIRTY;
15013 /* This isn't YMM/ZMM load/store. */
15014 return AVX_U128_ANY;
15017 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
15018 Hardware changes state only when a 256bit register is written to,
15019 but we need to prevent the compiler from moving optimal insertion
15020 point above eventual read from 256bit or 512 bit register. */
15021 if (ix86_check_avx_upper_register (PATTERN (insn)))
15022 return AVX_U128_DIRTY;
15024 return AVX_U128_ANY;
15027 /* Return mode that i387 must be switched into
15028 prior to the execution of insn. */
15030 static int
15031 ix86_i387_mode_needed (int entity, rtx_insn *insn)
15033 enum attr_i387_cw mode;
15035 /* The mode UNINITIALIZED is used to store control word after a
15036 function call or ASM pattern. The mode ANY specify that function
15037 has no requirements on the control word and make no changes in the
15038 bits we are interested in. */
15040 if (CALL_P (insn)
15041 || (NONJUMP_INSN_P (insn)
15042 && (asm_noperands (PATTERN (insn)) >= 0
15043 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15044 return I387_CW_UNINITIALIZED;
15046 if (recog_memoized (insn) < 0)
15047 return I387_CW_ANY;
15049 mode = get_attr_i387_cw (insn);
15051 switch (entity)
15053 case I387_ROUNDEVEN:
15054 if (mode == I387_CW_ROUNDEVEN)
15055 return mode;
15056 break;
15058 case I387_TRUNC:
15059 if (mode == I387_CW_TRUNC)
15060 return mode;
15061 break;
15063 case I387_FLOOR:
15064 if (mode == I387_CW_FLOOR)
15065 return mode;
15066 break;
15068 case I387_CEIL:
15069 if (mode == I387_CW_CEIL)
15070 return mode;
15071 break;
15073 default:
15074 gcc_unreachable ();
15077 return I387_CW_ANY;
15080 /* Return mode that entity must be switched into
15081 prior to the execution of insn. */
15083 static int
15084 ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
15086 switch (entity)
15088 case X86_DIRFLAG:
15089 return ix86_dirflag_mode_needed (insn);
15090 case AVX_U128:
15091 return ix86_avx_u128_mode_needed (insn);
15092 case I387_ROUNDEVEN:
15093 case I387_TRUNC:
15094 case I387_FLOOR:
15095 case I387_CEIL:
15096 return ix86_i387_mode_needed (entity, insn);
15097 default:
15098 gcc_unreachable ();
15100 return 0;
15103 /* Calculate mode of upper 128bit AVX registers after the insn. */
15105 static int
15106 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
15108 rtx pat = PATTERN (insn);
15110 if (vzeroupper_pattern (pat, VOIDmode)
15111 || vzeroall_pattern (pat, VOIDmode))
15112 return AVX_U128_CLEAN;
15114 /* We know that state is clean after CALL insn if there are no
15115 256bit or 512bit registers used in the function return register. */
15116 if (CALL_P (insn))
15118 bool avx_upper_reg_found = false;
15119 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
15121 if (avx_upper_reg_found)
15122 return AVX_U128_DIRTY;
15124 /* If the function desn't clobber any sse registers or only clobber
15125 128-bit part, Then vzeroupper isn't issued before the function exit.
15126 the status not CLEAN but ANY after the function. */
15127 const function_abi &abi = insn_callee_abi (insn);
15128 if (!(SIBLING_CALL_P (insn)
15129 || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15130 abi.mode_clobbers (V4DImode))))
15131 return AVX_U128_ANY;
15133 return AVX_U128_CLEAN;
15136 /* Otherwise, return current mode. Remember that if insn
15137 references AVX 256bit or 512bit registers, the mode was already
15138 changed to DIRTY from MODE_NEEDED. */
15139 return mode;
15142 /* Return the mode that an insn results in. */
15144 static int
15145 ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
15147 switch (entity)
15149 case X86_DIRFLAG:
15150 return mode;
15151 case AVX_U128:
15152 return ix86_avx_u128_mode_after (mode, insn);
15153 case I387_ROUNDEVEN:
15154 case I387_TRUNC:
15155 case I387_FLOOR:
15156 case I387_CEIL:
15157 return mode;
15158 default:
15159 gcc_unreachable ();
15163 static int
15164 ix86_dirflag_mode_entry (void)
15166 /* For TARGET_CLD or in the interrupt handler we can't assume
15167 direction flag state at function entry. */
15168 if (TARGET_CLD
15169 || cfun->machine->func_type != TYPE_NORMAL)
15170 return X86_DIRFLAG_ANY;
15172 return X86_DIRFLAG_RESET;
15175 static int
15176 ix86_avx_u128_mode_entry (void)
15178 tree arg;
15180 /* Entry mode is set to AVX_U128_DIRTY if there are
15181 256bit or 512bit modes used in function arguments. */
15182 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15183 arg = TREE_CHAIN (arg))
15185 rtx incoming = DECL_INCOMING_RTL (arg);
15187 if (incoming && ix86_check_avx_upper_register (incoming))
15188 return AVX_U128_DIRTY;
15191 return AVX_U128_CLEAN;
15194 /* Return a mode that ENTITY is assumed to be
15195 switched to at function entry. */
15197 static int
15198 ix86_mode_entry (int entity)
15200 switch (entity)
15202 case X86_DIRFLAG:
15203 return ix86_dirflag_mode_entry ();
15204 case AVX_U128:
15205 return ix86_avx_u128_mode_entry ();
15206 case I387_ROUNDEVEN:
15207 case I387_TRUNC:
15208 case I387_FLOOR:
15209 case I387_CEIL:
15210 return I387_CW_ANY;
15211 default:
15212 gcc_unreachable ();
15216 static int
15217 ix86_avx_u128_mode_exit (void)
15219 rtx reg = crtl->return_rtx;
15221 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
15222 or 512 bit modes used in the function return register. */
15223 if (reg && ix86_check_avx_upper_register (reg))
15224 return AVX_U128_DIRTY;
15226 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
15227 modes used in function arguments, otherwise return AVX_U128_CLEAN.
15229 return ix86_avx_u128_mode_entry ();
15232 /* Return a mode that ENTITY is assumed to be
15233 switched to at function exit. */
15235 static int
15236 ix86_mode_exit (int entity)
15238 switch (entity)
15240 case X86_DIRFLAG:
15241 return X86_DIRFLAG_ANY;
15242 case AVX_U128:
15243 return ix86_avx_u128_mode_exit ();
15244 case I387_ROUNDEVEN:
15245 case I387_TRUNC:
15246 case I387_FLOOR:
15247 case I387_CEIL:
15248 return I387_CW_ANY;
15249 default:
15250 gcc_unreachable ();
15254 static int
15255 ix86_mode_priority (int, int n)
15257 return n;
15260 /* Output code to initialize control word copies used by trunc?f?i and
15261 rounding patterns. CURRENT_MODE is set to current control word,
15262 while NEW_MODE is set to new control word. */
15264 static void
15265 emit_i387_cw_initialization (int mode)
15267 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15268 rtx new_mode;
15270 enum ix86_stack_slot slot;
15272 rtx reg = gen_reg_rtx (HImode);
15274 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15275 emit_move_insn (reg, copy_rtx (stored_mode));
15277 switch (mode)
15279 case I387_CW_ROUNDEVEN:
15280 /* round to nearest */
15281 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15282 slot = SLOT_CW_ROUNDEVEN;
15283 break;
15285 case I387_CW_TRUNC:
15286 /* round toward zero (truncate) */
15287 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15288 slot = SLOT_CW_TRUNC;
15289 break;
15291 case I387_CW_FLOOR:
15292 /* round down toward -oo */
15293 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15294 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15295 slot = SLOT_CW_FLOOR;
15296 break;
15298 case I387_CW_CEIL:
15299 /* round up toward +oo */
15300 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15301 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15302 slot = SLOT_CW_CEIL;
15303 break;
15305 default:
15306 gcc_unreachable ();
15309 gcc_assert (slot < MAX_386_STACK_LOCALS);
15311 new_mode = assign_386_stack_local (HImode, slot);
15312 emit_move_insn (new_mode, reg);
15315 /* Generate one or more insns to set ENTITY to MODE. */
15317 static void
15318 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
15319 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
15321 switch (entity)
15323 case X86_DIRFLAG:
15324 if (mode == X86_DIRFLAG_RESET)
15325 emit_insn (gen_cld ());
15326 break;
15327 case AVX_U128:
15328 if (mode == AVX_U128_CLEAN)
15329 ix86_expand_avx_vzeroupper ();
15330 break;
15331 case I387_ROUNDEVEN:
15332 case I387_TRUNC:
15333 case I387_FLOOR:
15334 case I387_CEIL:
15335 if (mode != I387_CW_ANY
15336 && mode != I387_CW_UNINITIALIZED)
15337 emit_i387_cw_initialization (mode);
15338 break;
15339 default:
15340 gcc_unreachable ();
15344 /* Output code for INSN to convert a float to a signed int. OPERANDS
15345 are the insn operands. The output may be [HSD]Imode and the input
15346 operand may be [SDX]Fmode. */
15348 const char *
15349 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
15351 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15352 bool dimode_p = GET_MODE (operands[0]) == DImode;
15353 int round_mode = get_attr_i387_cw (insn);
15355 static char buf[40];
15356 const char *p;
15358 /* Jump through a hoop or two for DImode, since the hardware has no
15359 non-popping instruction. We used to do this a different way, but
15360 that was somewhat fragile and broke with post-reload splitters. */
15361 if ((dimode_p || fisttp) && !stack_top_dies)
15362 output_asm_insn ("fld\t%y1", operands);
15364 gcc_assert (STACK_TOP_P (operands[1]));
15365 gcc_assert (MEM_P (operands[0]));
15366 gcc_assert (GET_MODE (operands[1]) != TFmode);
15368 if (fisttp)
15369 return "fisttp%Z0\t%0";
15371 strcpy (buf, "fist");
15373 if (round_mode != I387_CW_ANY)
15374 output_asm_insn ("fldcw\t%3", operands);
15376 p = "p%Z0\t%0";
15377 strcat (buf, p + !(stack_top_dies || dimode_p));
15379 output_asm_insn (buf, operands);
15381 if (round_mode != I387_CW_ANY)
15382 output_asm_insn ("fldcw\t%2", operands);
15384 return "";
15387 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15388 have the values zero or one, indicates the ffreep insn's operand
15389 from the OPERANDS array. */
15391 static const char *
15392 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15394 if (TARGET_USE_FFREEP)
15395 #ifdef HAVE_AS_IX86_FFREEP
15396 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15397 #else
15399 static char retval[32];
15400 int regno = REGNO (operands[opno]);
15402 gcc_assert (STACK_REGNO_P (regno));
15404 regno -= FIRST_STACK_REG;
15406 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15407 return retval;
15409 #endif
15411 return opno ? "fstp\t%y1" : "fstp\t%y0";
15415 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15416 should be used. UNORDERED_P is true when fucom should be used. */
15418 const char *
15419 output_fp_compare (rtx_insn *insn, rtx *operands,
15420 bool eflags_p, bool unordered_p)
15422 rtx *xops = eflags_p ? &operands[0] : &operands[1];
15423 bool stack_top_dies;
15425 static char buf[40];
15426 const char *p;
15428 gcc_assert (STACK_TOP_P (xops[0]));
15430 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15432 if (eflags_p)
15434 p = unordered_p ? "fucomi" : "fcomi";
15435 strcpy (buf, p);
15437 p = "p\t{%y1, %0|%0, %y1}";
15438 strcat (buf, p + !stack_top_dies);
15440 return buf;
15443 if (STACK_REG_P (xops[1])
15444 && stack_top_dies
15445 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
15447 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
15449 /* If both the top of the 387 stack die, and the other operand
15450 is also a stack register that dies, then this must be a
15451 `fcompp' float compare. */
15452 p = unordered_p ? "fucompp" : "fcompp";
15453 strcpy (buf, p);
15455 else if (const0_operand (xops[1], VOIDmode))
15457 gcc_assert (!unordered_p);
15458 strcpy (buf, "ftst");
15460 else
15462 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
15464 gcc_assert (!unordered_p);
15465 p = "ficom";
15467 else
15468 p = unordered_p ? "fucom" : "fcom";
15470 strcpy (buf, p);
15472 p = "p%Z2\t%y2";
15473 strcat (buf, p + !stack_top_dies);
15476 output_asm_insn (buf, operands);
15477 return "fnstsw\t%0";
15480 void
15481 ix86_output_addr_vec_elt (FILE *file, int value)
15483 const char *directive = ASM_LONG;
15485 #ifdef ASM_QUAD
15486 if (TARGET_LP64)
15487 directive = ASM_QUAD;
15488 #else
15489 gcc_assert (!TARGET_64BIT);
15490 #endif
15492 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15495 void
15496 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15498 const char *directive = ASM_LONG;
15500 #ifdef ASM_QUAD
15501 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15502 directive = ASM_QUAD;
15503 #else
15504 gcc_assert (!TARGET_64BIT);
15505 #endif
15506 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15507 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15508 fprintf (file, "%s%s%d-%s%d\n",
15509 directive, LPREFIX, value, LPREFIX, rel);
15510 #if TARGET_MACHO
15511 else if (TARGET_MACHO)
15513 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15514 machopic_output_function_base_name (file);
15515 putc ('\n', file);
15517 #endif
15518 else if (HAVE_AS_GOTOFF_IN_DATA)
15519 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15520 else
15521 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15522 GOT_SYMBOL_NAME, LPREFIX, value);
15525 #define LEA_MAX_STALL (3)
15526 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15528 /* Increase given DISTANCE in half-cycles according to
15529 dependencies between PREV and NEXT instructions.
15530 Add 1 half-cycle if there is no dependency and
15531 go to next cycle if there is some dependecy. */
15533 static unsigned int
15534 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
15536 df_ref def, use;
15538 if (!prev || !next)
15539 return distance + (distance & 1) + 2;
15541 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
15542 return distance + 1;
15544 FOR_EACH_INSN_USE (use, next)
15545 FOR_EACH_INSN_DEF (def, prev)
15546 if (!DF_REF_IS_ARTIFICIAL (def)
15547 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
15548 return distance + (distance & 1) + 2;
15550 return distance + 1;
15553 /* Function checks if instruction INSN defines register number
15554 REGNO1 or REGNO2. */
15556 bool
15557 insn_defines_reg (unsigned int regno1, unsigned int regno2,
15558 rtx_insn *insn)
15560 df_ref def;
15562 FOR_EACH_INSN_DEF (def, insn)
15563 if (DF_REF_REG_DEF_P (def)
15564 && !DF_REF_IS_ARTIFICIAL (def)
15565 && (regno1 == DF_REF_REGNO (def)
15566 || regno2 == DF_REF_REGNO (def)))
15567 return true;
15569 return false;
15572 /* Function checks if instruction INSN uses register number
15573 REGNO as a part of address expression. */
15575 static bool
15576 insn_uses_reg_mem (unsigned int regno, rtx insn)
15578 df_ref use;
15580 FOR_EACH_INSN_USE (use, insn)
15581 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
15582 return true;
15584 return false;
15587 /* Search backward for non-agu definition of register number REGNO1
15588 or register number REGNO2 in basic block starting from instruction
15589 START up to head of basic block or instruction INSN.
15591 Function puts true value into *FOUND var if definition was found
15592 and false otherwise.
15594 Distance in half-cycles between START and found instruction or head
15595 of BB is added to DISTANCE and returned. */
15597 static int
15598 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
15599 rtx_insn *insn, int distance,
15600 rtx_insn *start, bool *found)
15602 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
15603 rtx_insn *prev = start;
15604 rtx_insn *next = NULL;
15606 *found = false;
15608 while (prev
15609 && prev != insn
15610 && distance < LEA_SEARCH_THRESHOLD)
15612 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
15614 distance = increase_distance (prev, next, distance);
15615 if (insn_defines_reg (regno1, regno2, prev))
15617 if (recog_memoized (prev) < 0
15618 || get_attr_type (prev) != TYPE_LEA)
15620 *found = true;
15621 return distance;
15625 next = prev;
15627 if (prev == BB_HEAD (bb))
15628 break;
15630 prev = PREV_INSN (prev);
15633 return distance;
15636 /* Search backward for non-agu definition of register number REGNO1
15637 or register number REGNO2 in INSN's basic block until
15638 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15639 2. Reach neighbor BBs boundary, or
15640 3. Reach agu definition.
15641 Returns the distance between the non-agu definition point and INSN.
15642 If no definition point, returns -1. */
15644 static int
15645 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15646 rtx_insn *insn)
15648 basic_block bb = BLOCK_FOR_INSN (insn);
15649 int distance = 0;
15650 bool found = false;
15652 if (insn != BB_HEAD (bb))
15653 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
15654 distance, PREV_INSN (insn),
15655 &found);
15657 if (!found && distance < LEA_SEARCH_THRESHOLD)
15659 edge e;
15660 edge_iterator ei;
15661 bool simple_loop = false;
15663 FOR_EACH_EDGE (e, ei, bb->preds)
15664 if (e->src == bb)
15666 simple_loop = true;
15667 break;
15670 if (simple_loop)
15671 distance = distance_non_agu_define_in_bb (regno1, regno2,
15672 insn, distance,
15673 BB_END (bb), &found);
15674 else
15676 int shortest_dist = -1;
15677 bool found_in_bb = false;
15679 FOR_EACH_EDGE (e, ei, bb->preds)
15681 int bb_dist
15682 = distance_non_agu_define_in_bb (regno1, regno2,
15683 insn, distance,
15684 BB_END (e->src),
15685 &found_in_bb);
15686 if (found_in_bb)
15688 if (shortest_dist < 0)
15689 shortest_dist = bb_dist;
15690 else if (bb_dist > 0)
15691 shortest_dist = MIN (bb_dist, shortest_dist);
15693 found = true;
15697 distance = shortest_dist;
15701 if (!found)
15702 return -1;
15704 return distance >> 1;
15707 /* Return the distance in half-cycles between INSN and the next
15708 insn that uses register number REGNO in memory address added
15709 to DISTANCE. Return -1 if REGNO0 is set.
15711 Put true value into *FOUND if register usage was found and
15712 false otherwise.
15713 Put true value into *REDEFINED if register redefinition was
15714 found and false otherwise. */
15716 static int
15717 distance_agu_use_in_bb (unsigned int regno,
15718 rtx_insn *insn, int distance, rtx_insn *start,
15719 bool *found, bool *redefined)
15721 basic_block bb = NULL;
15722 rtx_insn *next = start;
15723 rtx_insn *prev = NULL;
15725 *found = false;
15726 *redefined = false;
15728 if (start != NULL_RTX)
15730 bb = BLOCK_FOR_INSN (start);
15731 if (start != BB_HEAD (bb))
15732 /* If insn and start belong to the same bb, set prev to insn,
15733 so the call to increase_distance will increase the distance
15734 between insns by 1. */
15735 prev = insn;
15738 while (next
15739 && next != insn
15740 && distance < LEA_SEARCH_THRESHOLD)
15742 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
15744 distance = increase_distance(prev, next, distance);
15745 if (insn_uses_reg_mem (regno, next))
15747 /* Return DISTANCE if OP0 is used in memory
15748 address in NEXT. */
15749 *found = true;
15750 return distance;
15753 if (insn_defines_reg (regno, INVALID_REGNUM, next))
15755 /* Return -1 if OP0 is set in NEXT. */
15756 *redefined = true;
15757 return -1;
15760 prev = next;
15763 if (next == BB_END (bb))
15764 break;
15766 next = NEXT_INSN (next);
15769 return distance;
15772 /* Return the distance between INSN and the next insn that uses
15773 register number REGNO0 in memory address. Return -1 if no such
15774 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15776 static int
15777 distance_agu_use (unsigned int regno0, rtx_insn *insn)
15779 basic_block bb = BLOCK_FOR_INSN (insn);
15780 int distance = 0;
15781 bool found = false;
15782 bool redefined = false;
15784 if (insn != BB_END (bb))
15785 distance = distance_agu_use_in_bb (regno0, insn, distance,
15786 NEXT_INSN (insn),
15787 &found, &redefined);
15789 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
15791 edge e;
15792 edge_iterator ei;
15793 bool simple_loop = false;
15795 FOR_EACH_EDGE (e, ei, bb->succs)
15796 if (e->dest == bb)
15798 simple_loop = true;
15799 break;
15802 if (simple_loop)
15803 distance = distance_agu_use_in_bb (regno0, insn,
15804 distance, BB_HEAD (bb),
15805 &found, &redefined);
15806 else
15808 int shortest_dist = -1;
15809 bool found_in_bb = false;
15810 bool redefined_in_bb = false;
15812 FOR_EACH_EDGE (e, ei, bb->succs)
15814 int bb_dist
15815 = distance_agu_use_in_bb (regno0, insn,
15816 distance, BB_HEAD (e->dest),
15817 &found_in_bb, &redefined_in_bb);
15818 if (found_in_bb)
15820 if (shortest_dist < 0)
15821 shortest_dist = bb_dist;
15822 else if (bb_dist > 0)
15823 shortest_dist = MIN (bb_dist, shortest_dist);
15825 found = true;
15829 distance = shortest_dist;
15833 if (!found || redefined)
15834 return -1;
15836 return distance >> 1;
15839 /* Define this macro to tune LEA priority vs ADD, it take effect when
15840 there is a dilemma of choosing LEA or ADD
15841 Negative value: ADD is more preferred than LEA
15842 Zero: Neutral
15843 Positive value: LEA is more preferred than ADD. */
15844 #define IX86_LEA_PRIORITY 0
15846 /* Return true if usage of lea INSN has performance advantage
15847 over a sequence of instructions. Instructions sequence has
15848 SPLIT_COST cycles higher latency than lea latency. */
15850 static bool
15851 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
15852 unsigned int regno2, int split_cost, bool has_scale)
15854 int dist_define, dist_use;
15856 /* For Atom processors newer than Bonnell, if using a 2-source or
15857 3-source LEA for non-destructive destination purposes, or due to
15858 wanting ability to use SCALE, the use of LEA is justified. */
15859 if (!TARGET_CPU_P (BONNELL))
15861 if (has_scale)
15862 return true;
15863 if (split_cost < 1)
15864 return false;
15865 if (regno0 == regno1 || regno0 == regno2)
15866 return false;
15867 return true;
15870 /* Remember recog_data content. */
15871 struct recog_data_d recog_data_save = recog_data;
15873 dist_define = distance_non_agu_define (regno1, regno2, insn);
15874 dist_use = distance_agu_use (regno0, insn);
15876 /* distance_non_agu_define can call get_attr_type which can call
15877 recog_memoized, restore recog_data back to previous content. */
15878 recog_data = recog_data_save;
15880 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
15882 /* If there is no non AGU operand definition, no AGU
15883 operand usage and split cost is 0 then both lea
15884 and non lea variants have same priority. Currently
15885 we prefer lea for 64 bit code and non lea on 32 bit
15886 code. */
15887 if (dist_use < 0 && split_cost == 0)
15888 return TARGET_64BIT || IX86_LEA_PRIORITY;
15889 else
15890 return true;
15893 /* With longer definitions distance lea is more preferable.
15894 Here we change it to take into account splitting cost and
15895 lea priority. */
15896 dist_define += split_cost + IX86_LEA_PRIORITY;
15898 /* If there is no use in memory addess then we just check
15899 that split cost exceeds AGU stall. */
15900 if (dist_use < 0)
15901 return dist_define > LEA_MAX_STALL;
15903 /* If this insn has both backward non-agu dependence and forward
15904 agu dependence, the one with short distance takes effect. */
15905 return dist_define >= dist_use;
15908 /* Return true if we need to split op0 = op1 + op2 into a sequence of
15909 move and add to avoid AGU stalls. */
15911 bool
15912 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
15914 unsigned int regno0, regno1, regno2;
15916 /* Check if we need to optimize. */
15917 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15918 return false;
15920 regno0 = true_regnum (operands[0]);
15921 regno1 = true_regnum (operands[1]);
15922 regno2 = true_regnum (operands[2]);
15924 /* We need to split only adds with non destructive
15925 destination operand. */
15926 if (regno0 == regno1 || regno0 == regno2)
15927 return false;
15928 else
15929 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
15932 /* Return true if we should emit lea instruction instead of mov
15933 instruction. */
15935 bool
15936 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
15938 unsigned int regno0, regno1;
15940 /* Check if we need to optimize. */
15941 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15942 return false;
15944 /* Use lea for reg to reg moves only. */
15945 if (!REG_P (operands[0]) || !REG_P (operands[1]))
15946 return false;
15948 regno0 = true_regnum (operands[0]);
15949 regno1 = true_regnum (operands[1]);
15951 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
15954 /* Return true if we need to split lea into a sequence of
15955 instructions to avoid AGU stalls during peephole2. */
15957 bool
15958 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
15960 unsigned int regno0, regno1, regno2;
15961 int split_cost;
15962 struct ix86_address parts;
15963 int ok;
15965 /* The "at least two components" test below might not catch simple
15966 move or zero extension insns if parts.base is non-NULL and parts.disp
15967 is const0_rtx as the only components in the address, e.g. if the
15968 register is %rbp or %r13. As this test is much cheaper and moves or
15969 zero extensions are the common case, do this check first. */
15970 if (REG_P (operands[1])
15971 || (SImode_address_operand (operands[1], VOIDmode)
15972 && REG_P (XEXP (operands[1], 0))))
15973 return false;
15975 ok = ix86_decompose_address (operands[1], &parts);
15976 gcc_assert (ok);
15978 /* There should be at least two components in the address. */
15979 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
15980 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
15981 return false;
15983 /* We should not split into add if non legitimate pic
15984 operand is used as displacement. */
15985 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
15986 return false;
15988 regno0 = true_regnum (operands[0]) ;
15989 regno1 = INVALID_REGNUM;
15990 regno2 = INVALID_REGNUM;
15992 if (parts.base)
15993 regno1 = true_regnum (parts.base);
15994 if (parts.index)
15995 regno2 = true_regnum (parts.index);
15997 /* Use add for a = a + b and a = b + a since it is faster and shorter
15998 than lea for most processors. For the processors like BONNELL, if
15999 the destination register of LEA holds an actual address which will
16000 be used soon, LEA is better and otherwise ADD is better. */
16001 if (!TARGET_CPU_P (BONNELL)
16002 && parts.scale == 1
16003 && (!parts.disp || parts.disp == const0_rtx)
16004 && (regno0 == regno1 || regno0 == regno2))
16005 return true;
16007 /* Split with -Oz if the encoding requires fewer bytes. */
16008 if (optimize_size > 1
16009 && parts.scale > 1
16010 && !parts.base
16011 && (!parts.disp || parts.disp == const0_rtx))
16012 return true;
16014 /* Check we need to optimize. */
16015 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
16016 return false;
16018 split_cost = 0;
16020 /* Compute how many cycles we will add to execution time
16021 if split lea into a sequence of instructions. */
16022 if (parts.base || parts.index)
16024 /* Have to use mov instruction if non desctructive
16025 destination form is used. */
16026 if (regno1 != regno0 && regno2 != regno0)
16027 split_cost += 1;
16029 /* Have to add index to base if both exist. */
16030 if (parts.base && parts.index)
16031 split_cost += 1;
16033 /* Have to use shift and adds if scale is 2 or greater. */
16034 if (parts.scale > 1)
16036 if (regno0 != regno1)
16037 split_cost += 1;
16038 else if (regno2 == regno0)
16039 split_cost += 4;
16040 else
16041 split_cost += parts.scale;
16044 /* Have to use add instruction with immediate if
16045 disp is non zero. */
16046 if (parts.disp && parts.disp != const0_rtx)
16047 split_cost += 1;
16049 /* Subtract the price of lea. */
16050 split_cost -= 1;
16053 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
16054 parts.scale > 1);
16057 /* Return true if it is ok to optimize an ADD operation to LEA
16058 operation to avoid flag register consumation. For most processors,
16059 ADD is faster than LEA. For the processors like BONNELL, if the
16060 destination register of LEA holds an actual address which will be
16061 used soon, LEA is better and otherwise ADD is better. */
16063 bool
16064 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
16066 unsigned int regno0 = true_regnum (operands[0]);
16067 unsigned int regno1 = true_regnum (operands[1]);
16068 unsigned int regno2 = true_regnum (operands[2]);
16070 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16071 if (regno0 != regno1 && regno0 != regno2)
16072 return true;
16074 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16075 return false;
16077 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
16080 /* Return true if destination reg of SET_BODY is shift count of
16081 USE_BODY. */
16083 static bool
16084 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16086 rtx set_dest;
16087 rtx shift_rtx;
16088 int i;
16090 /* Retrieve destination of SET_BODY. */
16091 switch (GET_CODE (set_body))
16093 case SET:
16094 set_dest = SET_DEST (set_body);
16095 if (!set_dest || !REG_P (set_dest))
16096 return false;
16097 break;
16098 case PARALLEL:
16099 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16100 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16101 use_body))
16102 return true;
16103 /* FALLTHROUGH */
16104 default:
16105 return false;
16108 /* Retrieve shift count of USE_BODY. */
16109 switch (GET_CODE (use_body))
16111 case SET:
16112 shift_rtx = XEXP (use_body, 1);
16113 break;
16114 case PARALLEL:
16115 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16116 if (ix86_dep_by_shift_count_body (set_body,
16117 XVECEXP (use_body, 0, i)))
16118 return true;
16119 /* FALLTHROUGH */
16120 default:
16121 return false;
16124 if (shift_rtx
16125 && (GET_CODE (shift_rtx) == ASHIFT
16126 || GET_CODE (shift_rtx) == LSHIFTRT
16127 || GET_CODE (shift_rtx) == ASHIFTRT
16128 || GET_CODE (shift_rtx) == ROTATE
16129 || GET_CODE (shift_rtx) == ROTATERT))
16131 rtx shift_count = XEXP (shift_rtx, 1);
16133 /* Return true if shift count is dest of SET_BODY. */
16134 if (REG_P (shift_count))
16136 /* Add check since it can be invoked before register
16137 allocation in pre-reload schedule. */
16138 if (reload_completed
16139 && true_regnum (set_dest) == true_regnum (shift_count))
16140 return true;
16141 else if (REGNO(set_dest) == REGNO(shift_count))
16142 return true;
16146 return false;
16149 /* Return true if destination reg of SET_INSN is shift count of
16150 USE_INSN. */
16152 bool
16153 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16155 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16156 PATTERN (use_insn));
16159 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16160 are ok, keeping in mind the possible movddup alternative. */
16162 bool
16163 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16165 if (MEM_P (operands[0]))
16166 return rtx_equal_p (operands[0], operands[1 + high]);
16167 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16168 return false;
16169 return true;
16172 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16173 then replicate the value for all elements of the vector
16174 register. */
16177 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
16179 int i, n_elt;
16180 rtvec v;
16181 machine_mode scalar_mode;
16183 switch (mode)
16185 case E_V64QImode:
16186 case E_V32QImode:
16187 case E_V16QImode:
16188 case E_V32HImode:
16189 case E_V16HImode:
16190 case E_V8HImode:
16191 case E_V16SImode:
16192 case E_V8SImode:
16193 case E_V4SImode:
16194 case E_V2SImode:
16195 case E_V8DImode:
16196 case E_V4DImode:
16197 case E_V2DImode:
16198 gcc_assert (vect);
16199 /* FALLTHRU */
16200 case E_V2HFmode:
16201 case E_V4HFmode:
16202 case E_V8HFmode:
16203 case E_V16HFmode:
16204 case E_V32HFmode:
16205 case E_V16SFmode:
16206 case E_V8SFmode:
16207 case E_V4SFmode:
16208 case E_V2SFmode:
16209 case E_V8DFmode:
16210 case E_V4DFmode:
16211 case E_V2DFmode:
16212 case E_V32BFmode:
16213 case E_V16BFmode:
16214 case E_V8BFmode:
16215 case E_V4BFmode:
16216 case E_V2BFmode:
16217 n_elt = GET_MODE_NUNITS (mode);
16218 v = rtvec_alloc (n_elt);
16219 scalar_mode = GET_MODE_INNER (mode);
16221 RTVEC_ELT (v, 0) = value;
16223 for (i = 1; i < n_elt; ++i)
16224 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16226 return gen_rtx_CONST_VECTOR (mode, v);
16228 default:
16229 gcc_unreachable ();
16233 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16234 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16235 for an SSE register. If VECT is true, then replicate the mask for
16236 all elements of the vector register. If INVERT is true, then create
16237 a mask excluding the sign bit. */
16240 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
16242 machine_mode vec_mode, imode;
16243 wide_int w;
16244 rtx mask, v;
16246 switch (mode)
16248 case E_V2HFmode:
16249 case E_V4HFmode:
16250 case E_V8HFmode:
16251 case E_V16HFmode:
16252 case E_V32HFmode:
16253 case E_V32BFmode:
16254 case E_V16BFmode:
16255 case E_V8BFmode:
16256 case E_V4BFmode:
16257 case E_V2BFmode:
16258 vec_mode = mode;
16259 imode = HImode;
16260 break;
16262 case E_V16SImode:
16263 case E_V16SFmode:
16264 case E_V8SImode:
16265 case E_V4SImode:
16266 case E_V8SFmode:
16267 case E_V4SFmode:
16268 case E_V2SFmode:
16269 case E_V2SImode:
16270 vec_mode = mode;
16271 imode = SImode;
16272 break;
16274 case E_V8DImode:
16275 case E_V4DImode:
16276 case E_V2DImode:
16277 case E_V8DFmode:
16278 case E_V4DFmode:
16279 case E_V2DFmode:
16280 vec_mode = mode;
16281 imode = DImode;
16282 break;
16284 case E_TImode:
16285 case E_TFmode:
16286 vec_mode = VOIDmode;
16287 imode = TImode;
16288 break;
16290 default:
16291 gcc_unreachable ();
16294 machine_mode inner_mode = GET_MODE_INNER (mode);
16295 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
16296 GET_MODE_BITSIZE (inner_mode));
16297 if (invert)
16298 w = wi::bit_not (w);
16300 /* Force this value into the low part of a fp vector constant. */
16301 mask = immed_wide_int_const (w, imode);
16302 mask = gen_lowpart (inner_mode, mask);
16304 if (vec_mode == VOIDmode)
16305 return force_reg (inner_mode, mask);
16307 v = ix86_build_const_vector (vec_mode, vect, mask);
16308 return force_reg (vec_mode, v);
16311 /* Return HOST_WIDE_INT for const vector OP in MODE. */
16313 HOST_WIDE_INT
16314 ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
16316 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16317 gcc_unreachable ();
16319 int nunits = GET_MODE_NUNITS (mode);
16320 wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
16321 machine_mode innermode = GET_MODE_INNER (mode);
16322 unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
16324 switch (mode)
16326 case E_V2QImode:
16327 case E_V4QImode:
16328 case E_V2HImode:
16329 case E_V8QImode:
16330 case E_V4HImode:
16331 case E_V2SImode:
16332 for (int i = 0; i < nunits; ++i)
16334 int v = INTVAL (XVECEXP (op, 0, i));
16335 wide_int wv = wi::shwi (v, innermode_bits);
16336 val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16338 break;
16339 case E_V2HFmode:
16340 case E_V2BFmode:
16341 case E_V4HFmode:
16342 case E_V4BFmode:
16343 case E_V2SFmode:
16344 for (int i = 0; i < nunits; ++i)
16346 rtx x = XVECEXP (op, 0, i);
16347 int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
16348 REAL_MODE_FORMAT (innermode));
16349 wide_int wv = wi::shwi (v, innermode_bits);
16350 val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16352 break;
16353 default:
16354 gcc_unreachable ();
16357 return val.to_shwi ();
16360 int ix86_get_flags_cc (rtx_code code)
16362 switch (code)
16364 case NE: return X86_CCNE;
16365 case EQ: return X86_CCE;
16366 case GE: return X86_CCNL;
16367 case GT: return X86_CCNLE;
16368 case LE: return X86_CCLE;
16369 case LT: return X86_CCL;
16370 case GEU: return X86_CCNB;
16371 case GTU: return X86_CCNBE;
16372 case LEU: return X86_CCBE;
16373 case LTU: return X86_CCB;
16374 default: return -1;
16378 /* Return TRUE or FALSE depending on whether the first SET in INSN
16379 has source and destination with matching CC modes, and that the
16380 CC mode is at least as constrained as REQ_MODE. */
16382 bool
16383 ix86_match_ccmode (rtx insn, machine_mode req_mode)
16385 rtx set;
16386 machine_mode set_mode;
16388 set = PATTERN (insn);
16389 if (GET_CODE (set) == PARALLEL)
16390 set = XVECEXP (set, 0, 0);
16391 gcc_assert (GET_CODE (set) == SET);
16392 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16394 set_mode = GET_MODE (SET_DEST (set));
16395 switch (set_mode)
16397 case E_CCNOmode:
16398 if (req_mode != CCNOmode
16399 && (req_mode != CCmode
16400 || XEXP (SET_SRC (set), 1) != const0_rtx))
16401 return false;
16402 break;
16403 case E_CCmode:
16404 if (req_mode == CCGCmode)
16405 return false;
16406 /* FALLTHRU */
16407 case E_CCGCmode:
16408 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16409 return false;
16410 /* FALLTHRU */
16411 case E_CCGOCmode:
16412 if (req_mode == CCZmode)
16413 return false;
16414 /* FALLTHRU */
16415 case E_CCZmode:
16416 break;
16418 case E_CCGZmode:
16420 case E_CCAmode:
16421 case E_CCCmode:
16422 case E_CCOmode:
16423 case E_CCPmode:
16424 case E_CCSmode:
16425 if (set_mode != req_mode)
16426 return false;
16427 break;
16429 default:
16430 gcc_unreachable ();
16433 return GET_MODE (SET_SRC (set)) == set_mode;
16436 machine_mode
16437 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16439 machine_mode mode = GET_MODE (op0);
16441 if (SCALAR_FLOAT_MODE_P (mode))
16443 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16444 return CCFPmode;
16447 switch (code)
16449 /* Only zero flag is needed. */
16450 case EQ: /* ZF=0 */
16451 case NE: /* ZF!=0 */
16452 return CCZmode;
16453 /* Codes needing carry flag. */
16454 case GEU: /* CF=0 */
16455 case LTU: /* CF=1 */
16456 rtx geu;
16457 /* Detect overflow checks. They need just the carry flag. */
16458 if (GET_CODE (op0) == PLUS
16459 && (rtx_equal_p (op1, XEXP (op0, 0))
16460 || rtx_equal_p (op1, XEXP (op0, 1))))
16461 return CCCmode;
16462 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
16463 Match LTU of op0
16464 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
16465 and op1
16466 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
16467 where CC_CCC is either CC or CCC. */
16468 else if (code == LTU
16469 && GET_CODE (op0) == NEG
16470 && GET_CODE (geu = XEXP (op0, 0)) == GEU
16471 && REG_P (XEXP (geu, 0))
16472 && (GET_MODE (XEXP (geu, 0)) == CCCmode
16473 || GET_MODE (XEXP (geu, 0)) == CCmode)
16474 && REGNO (XEXP (geu, 0)) == FLAGS_REG
16475 && XEXP (geu, 1) == const0_rtx
16476 && GET_CODE (op1) == LTU
16477 && REG_P (XEXP (op1, 0))
16478 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
16479 && REGNO (XEXP (op1, 0)) == FLAGS_REG
16480 && XEXP (op1, 1) == const0_rtx)
16481 return CCCmode;
16482 /* Similarly for *x86_cmc pattern.
16483 Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
16484 and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
16485 It is sufficient to test that the operand modes are CCCmode. */
16486 else if (code == LTU
16487 && GET_CODE (op0) == NEG
16488 && GET_CODE (XEXP (op0, 0)) == LTU
16489 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
16490 && GET_CODE (op1) == GEU
16491 && GET_MODE (XEXP (op1, 0)) == CCCmode)
16492 return CCCmode;
16493 else
16494 return CCmode;
16495 case GTU: /* CF=0 & ZF=0 */
16496 case LEU: /* CF=1 | ZF=1 */
16497 return CCmode;
16498 /* Codes possibly doable only with sign flag when
16499 comparing against zero. */
16500 case GE: /* SF=OF or SF=0 */
16501 case LT: /* SF<>OF or SF=1 */
16502 if (op1 == const0_rtx)
16503 return CCGOCmode;
16504 else
16505 /* For other cases Carry flag is not required. */
16506 return CCGCmode;
16507 /* Codes doable only with sign flag when comparing
16508 against zero, but we miss jump instruction for it
16509 so we need to use relational tests against overflow
16510 that thus needs to be zero. */
16511 case GT: /* ZF=0 & SF=OF */
16512 case LE: /* ZF=1 | SF<>OF */
16513 if (op1 == const0_rtx)
16514 return CCNOmode;
16515 else
16516 return CCGCmode;
16517 default:
16518 /* CCmode should be used in all other cases. */
16519 return CCmode;
16523 /* Return TRUE or FALSE depending on whether the ptest instruction
16524 INSN has source and destination with suitable matching CC modes. */
16526 bool
16527 ix86_match_ptest_ccmode (rtx insn)
16529 rtx set, src;
16530 machine_mode set_mode;
16532 set = PATTERN (insn);
16533 gcc_assert (GET_CODE (set) == SET);
16534 src = SET_SRC (set);
16535 gcc_assert (GET_CODE (src) == UNSPEC
16536 && XINT (src, 1) == UNSPEC_PTEST);
16538 set_mode = GET_MODE (src);
16539 if (set_mode != CCZmode
16540 && set_mode != CCCmode
16541 && set_mode != CCmode)
16542 return false;
16543 return GET_MODE (SET_DEST (set)) == set_mode;
16546 /* Return the fixed registers used for condition codes. */
16548 static bool
16549 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16551 *p1 = FLAGS_REG;
16552 *p2 = INVALID_REGNUM;
16553 return true;
16556 /* If two condition code modes are compatible, return a condition code
16557 mode which is compatible with both. Otherwise, return
16558 VOIDmode. */
16560 static machine_mode
16561 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
16563 if (m1 == m2)
16564 return m1;
16566 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16567 return VOIDmode;
16569 if ((m1 == CCGCmode && m2 == CCGOCmode)
16570 || (m1 == CCGOCmode && m2 == CCGCmode))
16571 return CCGCmode;
16573 if ((m1 == CCNOmode && m2 == CCGOCmode)
16574 || (m1 == CCGOCmode && m2 == CCNOmode))
16575 return CCNOmode;
16577 if (m1 == CCZmode
16578 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
16579 return m2;
16580 else if (m2 == CCZmode
16581 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
16582 return m1;
16584 switch (m1)
16586 default:
16587 gcc_unreachable ();
16589 case E_CCmode:
16590 case E_CCGCmode:
16591 case E_CCGOCmode:
16592 case E_CCNOmode:
16593 case E_CCAmode:
16594 case E_CCCmode:
16595 case E_CCOmode:
16596 case E_CCPmode:
16597 case E_CCSmode:
16598 case E_CCZmode:
16599 switch (m2)
16601 default:
16602 return VOIDmode;
16604 case E_CCmode:
16605 case E_CCGCmode:
16606 case E_CCGOCmode:
16607 case E_CCNOmode:
16608 case E_CCAmode:
16609 case E_CCCmode:
16610 case E_CCOmode:
16611 case E_CCPmode:
16612 case E_CCSmode:
16613 case E_CCZmode:
16614 return CCmode;
16617 case E_CCFPmode:
16618 /* These are only compatible with themselves, which we already
16619 checked above. */
16620 return VOIDmode;
16624 /* Return strategy to use for floating-point. We assume that fcomi is always
16625 preferrable where available, since that is also true when looking at size
16626 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16628 enum ix86_fpcmp_strategy
16629 ix86_fp_comparison_strategy (enum rtx_code)
16631 /* Do fcomi/sahf based test when profitable. */
16633 if (TARGET_CMOVE)
16634 return IX86_FPCMP_COMI;
16636 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
16637 return IX86_FPCMP_SAHF;
16639 return IX86_FPCMP_ARITH;
16642 /* Convert comparison codes we use to represent FP comparison to integer
16643 code that will result in proper branch. Return UNKNOWN if no such code
16644 is available. */
16646 enum rtx_code
16647 ix86_fp_compare_code_to_integer (enum rtx_code code)
16649 switch (code)
16651 case GT:
16652 return GTU;
16653 case GE:
16654 return GEU;
16655 case ORDERED:
16656 case UNORDERED:
16657 return code;
16658 case UNEQ:
16659 return EQ;
16660 case UNLT:
16661 return LTU;
16662 case UNLE:
16663 return LEU;
16664 case LTGT:
16665 return NE;
16666 case EQ:
16667 case NE:
16668 if (TARGET_AVX10_2_256)
16669 return code;
16670 /* FALLTHRU. */
16671 default:
16672 return UNKNOWN;
16676 /* Zero extend possibly SImode EXP to Pmode register. */
16678 ix86_zero_extend_to_Pmode (rtx exp)
16680 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
16683 /* Return true if the function is called via PLT. */
16685 bool
16686 ix86_call_use_plt_p (rtx call_op)
16688 if (SYMBOL_REF_LOCAL_P (call_op))
16690 if (SYMBOL_REF_DECL (call_op)
16691 && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
16693 /* NB: All ifunc functions must be called via PLT. */
16694 cgraph_node *node
16695 = cgraph_node::get (SYMBOL_REF_DECL (call_op));
16696 if (node && node->ifunc_resolver)
16697 return true;
16699 return false;
16701 return true;
16704 /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
16705 the PLT entry will be used as the function address for local IFUNC
16706 functions. When the PIC register is needed for PLT call, indirect
16707 call via the PLT entry will fail since the PIC register may not be
16708 set up properly for indirect call. In this case, we should return
16709 false. */
16711 static bool
16712 ix86_ifunc_ref_local_ok (void)
16714 return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
16717 /* Return true if the function being called was marked with attribute
16718 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
16719 to handle the non-PIC case in the backend because there is no easy
16720 interface for the front-end to force non-PLT calls to use the GOT.
16721 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
16722 to call the function marked "noplt" indirectly. */
16724 static bool
16725 ix86_nopic_noplt_attribute_p (rtx call_op)
16727 if (flag_pic || ix86_cmodel == CM_LARGE
16728 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
16729 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
16730 || SYMBOL_REF_LOCAL_P (call_op))
16731 return false;
16733 tree symbol_decl = SYMBOL_REF_DECL (call_op);
16735 if (!flag_plt
16736 || (symbol_decl != NULL_TREE
16737 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
16738 return true;
16740 return false;
16743 /* Helper to output the jmp/call. */
16744 static void
16745 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
16747 if (thunk_name != NULL)
16749 if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
16750 && ix86_indirect_branch_cs_prefix)
16751 fprintf (asm_out_file, "\tcs\n");
16752 fprintf (asm_out_file, "\tjmp\t");
16753 assemble_name (asm_out_file, thunk_name);
16754 putc ('\n', asm_out_file);
16755 if ((ix86_harden_sls & harden_sls_indirect_jmp))
16756 fputs ("\tint3\n", asm_out_file);
16758 else
16759 output_indirect_thunk (regno);
16762 /* Output indirect branch via a call and return thunk. CALL_OP is a
16763 register which contains the branch target. XASM is the assembly
16764 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
16765 A normal call is converted to:
16767 call __x86_indirect_thunk_reg
16769 and a tail call is converted to:
16771 jmp __x86_indirect_thunk_reg
16774 static void
16775 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
16777 char thunk_name_buf[32];
16778 char *thunk_name;
16779 enum indirect_thunk_prefix need_prefix
16780 = indirect_thunk_need_prefix (current_output_insn);
16781 int regno = REGNO (call_op);
16783 if (cfun->machine->indirect_branch_type
16784 != indirect_branch_thunk_inline)
16786 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16787 SET_HARD_REG_BIT (indirect_thunks_used, regno);
16789 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
16790 thunk_name = thunk_name_buf;
16792 else
16793 thunk_name = NULL;
16795 if (sibcall_p)
16796 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16797 else
16799 if (thunk_name != NULL)
16801 if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
16802 && ix86_indirect_branch_cs_prefix)
16803 fprintf (asm_out_file, "\tcs\n");
16804 fprintf (asm_out_file, "\tcall\t");
16805 assemble_name (asm_out_file, thunk_name);
16806 putc ('\n', asm_out_file);
16807 return;
16810 char indirectlabel1[32];
16811 char indirectlabel2[32];
16813 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16814 INDIRECT_LABEL,
16815 indirectlabelno++);
16816 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16817 INDIRECT_LABEL,
16818 indirectlabelno++);
16820 /* Jump. */
16821 fputs ("\tjmp\t", asm_out_file);
16822 assemble_name_raw (asm_out_file, indirectlabel2);
16823 fputc ('\n', asm_out_file);
16825 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16827 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16829 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16831 /* Call. */
16832 fputs ("\tcall\t", asm_out_file);
16833 assemble_name_raw (asm_out_file, indirectlabel1);
16834 fputc ('\n', asm_out_file);
16838 /* Output indirect branch via a call and return thunk. CALL_OP is
16839 the branch target. XASM is the assembly template for CALL_OP.
16840 Branch is a tail call if SIBCALL_P is true. A normal call is
16841 converted to:
16843 jmp L2
16845 push CALL_OP
16846 jmp __x86_indirect_thunk
16848 call L1
16850 and a tail call is converted to:
16852 push CALL_OP
16853 jmp __x86_indirect_thunk
16856 static void
16857 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
16858 bool sibcall_p)
16860 char thunk_name_buf[32];
16861 char *thunk_name;
16862 char push_buf[64];
16863 enum indirect_thunk_prefix need_prefix
16864 = indirect_thunk_need_prefix (current_output_insn);
16865 int regno = -1;
16867 if (cfun->machine->indirect_branch_type
16868 != indirect_branch_thunk_inline)
16870 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16871 indirect_thunk_needed = true;
16872 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
16873 thunk_name = thunk_name_buf;
16875 else
16876 thunk_name = NULL;
16878 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
16879 TARGET_64BIT ? 'q' : 'l', xasm);
16881 if (sibcall_p)
16883 output_asm_insn (push_buf, &call_op);
16884 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16886 else
16888 char indirectlabel1[32];
16889 char indirectlabel2[32];
16891 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16892 INDIRECT_LABEL,
16893 indirectlabelno++);
16894 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16895 INDIRECT_LABEL,
16896 indirectlabelno++);
16898 /* Jump. */
16899 fputs ("\tjmp\t", asm_out_file);
16900 assemble_name_raw (asm_out_file, indirectlabel2);
16901 fputc ('\n', asm_out_file);
16903 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16905 /* An external function may be called via GOT, instead of PLT. */
16906 if (MEM_P (call_op))
16908 struct ix86_address parts;
16909 rtx addr = XEXP (call_op, 0);
16910 if (ix86_decompose_address (addr, &parts)
16911 && parts.base == stack_pointer_rtx)
16913 /* Since call will adjust stack by -UNITS_PER_WORD,
16914 we must convert "disp(stack, index, scale)" to
16915 "disp+UNITS_PER_WORD(stack, index, scale)". */
16916 if (parts.index)
16918 addr = gen_rtx_MULT (Pmode, parts.index,
16919 GEN_INT (parts.scale));
16920 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16921 addr);
16923 else
16924 addr = stack_pointer_rtx;
16926 rtx disp;
16927 if (parts.disp != NULL_RTX)
16928 disp = plus_constant (Pmode, parts.disp,
16929 UNITS_PER_WORD);
16930 else
16931 disp = GEN_INT (UNITS_PER_WORD);
16933 addr = gen_rtx_PLUS (Pmode, addr, disp);
16934 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
16938 output_asm_insn (push_buf, &call_op);
16940 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16942 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16944 /* Call. */
16945 fputs ("\tcall\t", asm_out_file);
16946 assemble_name_raw (asm_out_file, indirectlabel1);
16947 fputc ('\n', asm_out_file);
16951 /* Output indirect branch via a call and return thunk. CALL_OP is
16952 the branch target. XASM is the assembly template for CALL_OP.
16953 Branch is a tail call if SIBCALL_P is true. */
16955 static void
16956 ix86_output_indirect_branch (rtx call_op, const char *xasm,
16957 bool sibcall_p)
16959 if (REG_P (call_op))
16960 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
16961 else
16962 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
16965 /* Output indirect jump. CALL_OP is the jump target. */
16967 const char *
16968 ix86_output_indirect_jmp (rtx call_op)
16970 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
16972 /* We can't have red-zone since "call" in the indirect thunk
16973 pushes the return address onto stack, destroying red-zone. */
16974 if (ix86_red_zone_used)
16975 gcc_unreachable ();
16977 ix86_output_indirect_branch (call_op, "%0", true);
16979 else
16980 output_asm_insn ("%!jmp\t%A0", &call_op);
16981 return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
16984 /* Output return instrumentation for current function if needed. */
16986 static void
16987 output_return_instrumentation (void)
16989 if (ix86_instrument_return != instrument_return_none
16990 && flag_fentry
16991 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
16993 if (ix86_flag_record_return)
16994 fprintf (asm_out_file, "1:\n");
16995 switch (ix86_instrument_return)
16997 case instrument_return_call:
16998 fprintf (asm_out_file, "\tcall\t__return__\n");
16999 break;
17000 case instrument_return_nop5:
17001 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
17002 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
17003 break;
17004 case instrument_return_none:
17005 break;
17008 if (ix86_flag_record_return)
17010 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
17011 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
17012 fprintf (asm_out_file, "\t.previous\n");
17017 /* Output function return. CALL_OP is the jump target. Add a REP
17018 prefix to RET if LONG_P is true and function return is kept. */
17020 const char *
17021 ix86_output_function_return (bool long_p)
17023 output_return_instrumentation ();
17025 if (cfun->machine->function_return_type != indirect_branch_keep)
17027 char thunk_name[32];
17028 enum indirect_thunk_prefix need_prefix
17029 = indirect_thunk_need_prefix (current_output_insn);
17031 if (cfun->machine->function_return_type
17032 != indirect_branch_thunk_inline)
17034 bool need_thunk = (cfun->machine->function_return_type
17035 == indirect_branch_thunk);
17036 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
17037 true);
17038 indirect_return_needed |= need_thunk;
17039 fprintf (asm_out_file, "\tjmp\t");
17040 assemble_name (asm_out_file, thunk_name);
17041 putc ('\n', asm_out_file);
17043 else
17044 output_indirect_thunk (INVALID_REGNUM);
17046 return "";
17049 output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
17050 return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
17053 /* Output indirect function return. RET_OP is the function return
17054 target. */
17056 const char *
17057 ix86_output_indirect_function_return (rtx ret_op)
17059 if (cfun->machine->function_return_type != indirect_branch_keep)
17061 char thunk_name[32];
17062 enum indirect_thunk_prefix need_prefix
17063 = indirect_thunk_need_prefix (current_output_insn);
17064 unsigned int regno = REGNO (ret_op);
17065 gcc_assert (regno == CX_REG);
17067 if (cfun->machine->function_return_type
17068 != indirect_branch_thunk_inline)
17070 bool need_thunk = (cfun->machine->function_return_type
17071 == indirect_branch_thunk);
17072 indirect_thunk_name (thunk_name, regno, need_prefix, true);
17074 if (need_thunk)
17076 indirect_return_via_cx = true;
17077 SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
17079 fprintf (asm_out_file, "\tjmp\t");
17080 assemble_name (asm_out_file, thunk_name);
17081 putc ('\n', asm_out_file);
17083 else
17084 output_indirect_thunk (regno);
17086 else
17088 output_asm_insn ("%!jmp\t%A0", &ret_op);
17089 if (ix86_harden_sls & harden_sls_indirect_jmp)
17090 fputs ("\tint3\n", asm_out_file);
17092 return "";
17095 /* Output the assembly for a call instruction. */
17097 const char *
17098 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
17100 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
17101 bool output_indirect_p
17102 = (!TARGET_SEH
17103 && cfun->machine->indirect_branch_type != indirect_branch_keep);
17104 bool seh_nop_p = false;
17105 const char *xasm;
17107 if (SIBLING_CALL_P (insn))
17109 output_return_instrumentation ();
17110 if (direct_p)
17112 if (ix86_nopic_noplt_attribute_p (call_op))
17114 direct_p = false;
17115 if (TARGET_64BIT)
17117 if (output_indirect_p)
17118 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17119 else
17120 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17122 else
17124 if (output_indirect_p)
17125 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17126 else
17127 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17130 else
17131 xasm = "%!jmp\t%P0";
17133 /* SEH epilogue detection requires the indirect branch case
17134 to include REX.W. */
17135 else if (TARGET_SEH)
17136 xasm = "%!rex.W jmp\t%A0";
17137 else
17139 if (output_indirect_p)
17140 xasm = "%0";
17141 else
17142 xasm = "%!jmp\t%A0";
17145 if (output_indirect_p && !direct_p)
17146 ix86_output_indirect_branch (call_op, xasm, true);
17147 else
17149 output_asm_insn (xasm, &call_op);
17150 if (!direct_p
17151 && (ix86_harden_sls & harden_sls_indirect_jmp))
17152 return "int3";
17154 return "";
17157 /* SEH unwinding can require an extra nop to be emitted in several
17158 circumstances. Determine if we have one of those. */
17159 if (TARGET_SEH)
17161 rtx_insn *i;
17163 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
17165 /* Prevent a catch region from being adjacent to a jump that would
17166 be interpreted as an epilogue sequence by the unwinder. */
17167 if (JUMP_P(i) && CROSSING_JUMP_P (i))
17169 seh_nop_p = true;
17170 break;
17173 /* If we get to another real insn, we don't need the nop. */
17174 if (INSN_P (i))
17175 break;
17177 /* If we get to the epilogue note, prevent a catch region from
17178 being adjacent to the standard epilogue sequence. Note that,
17179 if non-call exceptions are enabled, we already did it during
17180 epilogue expansion, or else, if the insn can throw internally,
17181 we already did it during the reorg pass. */
17182 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
17183 && !flag_non_call_exceptions
17184 && !can_throw_internal (insn))
17186 seh_nop_p = true;
17187 break;
17191 /* If we didn't find a real insn following the call, prevent the
17192 unwinder from looking into the next function. */
17193 if (i == NULL)
17194 seh_nop_p = true;
17197 if (direct_p)
17199 if (ix86_nopic_noplt_attribute_p (call_op))
17201 direct_p = false;
17202 if (TARGET_64BIT)
17204 if (output_indirect_p)
17205 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17206 else
17207 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17209 else
17211 if (output_indirect_p)
17212 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17213 else
17214 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17217 else
17218 xasm = "%!call\t%P0";
17220 else
17222 if (output_indirect_p)
17223 xasm = "%0";
17224 else
17225 xasm = "%!call\t%A0";
17228 if (output_indirect_p && !direct_p)
17229 ix86_output_indirect_branch (call_op, xasm, false);
17230 else
17231 output_asm_insn (xasm, &call_op);
17233 if (seh_nop_p)
17234 return "nop";
17236 return "";
17239 /* Return a MEM corresponding to a stack slot with mode MODE.
17240 Allocate a new slot if necessary.
17242 The RTL for a function can have several slots available: N is
17243 which slot to use. */
17246 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
17248 struct stack_local_entry *s;
17250 gcc_assert (n < MAX_386_STACK_LOCALS);
17252 for (s = ix86_stack_locals; s; s = s->next)
17253 if (s->mode == mode && s->n == n)
17254 return validize_mem (copy_rtx (s->rtl));
17256 int align = 0;
17257 /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
17258 alignment with -m32 -mpreferred-stack-boundary=2. */
17259 if (mode == DImode
17260 && !TARGET_64BIT
17261 && n == SLOT_FLOATxFDI_387
17262 && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
17263 align = 32;
17264 s = ggc_alloc<stack_local_entry> ();
17265 s->n = n;
17266 s->mode = mode;
17267 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
17269 s->next = ix86_stack_locals;
17270 ix86_stack_locals = s;
17271 return validize_mem (copy_rtx (s->rtl));
17274 static void
17275 ix86_instantiate_decls (void)
17277 struct stack_local_entry *s;
17279 for (s = ix86_stack_locals; s; s = s->next)
17280 if (s->rtl != NULL_RTX)
17281 instantiate_decl_rtl (s->rtl);
17284 /* Check whether x86 address PARTS is a pc-relative address. */
17286 bool
17287 ix86_rip_relative_addr_p (struct ix86_address *parts)
17289 rtx base, index, disp;
17291 base = parts->base;
17292 index = parts->index;
17293 disp = parts->disp;
17295 if (disp && !base && !index)
17297 if (TARGET_64BIT)
17299 rtx symbol = disp;
17301 if (GET_CODE (disp) == CONST)
17302 symbol = XEXP (disp, 0);
17303 if (GET_CODE (symbol) == PLUS
17304 && CONST_INT_P (XEXP (symbol, 1)))
17305 symbol = XEXP (symbol, 0);
17307 if (GET_CODE (symbol) == LABEL_REF
17308 || (GET_CODE (symbol) == SYMBOL_REF
17309 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
17310 || (GET_CODE (symbol) == UNSPEC
17311 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
17312 || XINT (symbol, 1) == UNSPEC_PCREL
17313 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
17314 return true;
17317 return false;
17320 /* Calculate the length of the memory address in the instruction encoding.
17321 Includes addr32 prefix, does not include the one-byte modrm, opcode,
17322 or other prefixes. We never generate addr32 prefix for LEA insn. */
17325 memory_address_length (rtx addr, bool lea)
17327 struct ix86_address parts;
17328 rtx base, index, disp;
17329 int len;
17330 int ok;
17332 if (GET_CODE (addr) == PRE_DEC
17333 || GET_CODE (addr) == POST_INC
17334 || GET_CODE (addr) == PRE_MODIFY
17335 || GET_CODE (addr) == POST_MODIFY)
17336 return 0;
17338 ok = ix86_decompose_address (addr, &parts);
17339 gcc_assert (ok);
17341 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
17343 /* If this is not LEA instruction, add the length of addr32 prefix. */
17344 if (TARGET_64BIT && !lea
17345 && (SImode_address_operand (addr, VOIDmode)
17346 || (parts.base && GET_MODE (parts.base) == SImode)
17347 || (parts.index && GET_MODE (parts.index) == SImode)))
17348 len++;
17350 base = parts.base;
17351 index = parts.index;
17352 disp = parts.disp;
17354 if (base && SUBREG_P (base))
17355 base = SUBREG_REG (base);
17356 if (index && SUBREG_P (index))
17357 index = SUBREG_REG (index);
17359 gcc_assert (base == NULL_RTX || REG_P (base));
17360 gcc_assert (index == NULL_RTX || REG_P (index));
17362 /* Rule of thumb:
17363 - esp as the base always wants an index,
17364 - ebp as the base always wants a displacement,
17365 - r12 as the base always wants an index,
17366 - r13 as the base always wants a displacement. */
17368 /* Register Indirect. */
17369 if (base && !index && !disp)
17371 /* esp (for its index) and ebp (for its displacement) need
17372 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
17373 code. */
17374 if (base == arg_pointer_rtx
17375 || base == frame_pointer_rtx
17376 || REGNO (base) == SP_REG
17377 || REGNO (base) == BP_REG
17378 || REGNO (base) == R12_REG
17379 || REGNO (base) == R13_REG)
17380 len++;
17383 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
17384 is not disp32, but disp32(%rip), so for disp32
17385 SIB byte is needed, unless print_operand_address
17386 optimizes it into disp32(%rip) or (%rip) is implied
17387 by UNSPEC. */
17388 else if (disp && !base && !index)
17390 len += 4;
17391 if (!ix86_rip_relative_addr_p (&parts))
17392 len++;
17394 else
17396 /* Find the length of the displacement constant. */
17397 if (disp)
17399 if (base && satisfies_constraint_K (disp))
17400 len += 1;
17401 else
17402 len += 4;
17404 /* ebp always wants a displacement. Similarly r13. */
17405 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
17406 len++;
17408 /* An index requires the two-byte modrm form.... */
17409 if (index
17410 /* ...like esp (or r12), which always wants an index. */
17411 || base == arg_pointer_rtx
17412 || base == frame_pointer_rtx
17413 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
17414 len++;
17417 return len;
17420 /* Compute default value for "length_immediate" attribute. When SHORTFORM
17421 is set, expect that insn have 8bit immediate alternative. */
17423 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
17425 int len = 0;
17426 int i;
17427 extract_insn_cached (insn);
17428 for (i = recog_data.n_operands - 1; i >= 0; --i)
17429 if (CONSTANT_P (recog_data.operand[i]))
17431 enum attr_mode mode = get_attr_mode (insn);
17433 gcc_assert (!len);
17434 if (shortform && CONST_INT_P (recog_data.operand[i]))
17436 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
17437 switch (mode)
17439 case MODE_QI:
17440 len = 1;
17441 continue;
17442 case MODE_HI:
17443 ival = trunc_int_for_mode (ival, HImode);
17444 break;
17445 case MODE_SI:
17446 ival = trunc_int_for_mode (ival, SImode);
17447 break;
17448 default:
17449 break;
17451 if (IN_RANGE (ival, -128, 127))
17453 len = 1;
17454 continue;
17457 switch (mode)
17459 case MODE_QI:
17460 len = 1;
17461 break;
17462 case MODE_HI:
17463 len = 2;
17464 break;
17465 case MODE_SI:
17466 len = 4;
17467 break;
17468 /* Immediates for DImode instructions are encoded
17469 as 32bit sign extended values. */
17470 case MODE_DI:
17471 len = 4;
17472 break;
17473 default:
17474 fatal_insn ("unknown insn mode", insn);
17477 return len;
17480 /* Compute default value for "length_address" attribute. */
17482 ix86_attr_length_address_default (rtx_insn *insn)
17484 int i;
17486 if (get_attr_type (insn) == TYPE_LEA)
17488 rtx set = PATTERN (insn), addr;
17490 if (GET_CODE (set) == PARALLEL)
17491 set = XVECEXP (set, 0, 0);
17493 gcc_assert (GET_CODE (set) == SET);
17495 addr = SET_SRC (set);
17497 return memory_address_length (addr, true);
17500 extract_insn_cached (insn);
17501 for (i = recog_data.n_operands - 1; i >= 0; --i)
17503 rtx op = recog_data.operand[i];
17504 if (MEM_P (op))
17506 constrain_operands_cached (insn, reload_completed);
17507 if (which_alternative != -1)
17509 const char *constraints = recog_data.constraints[i];
17510 int alt = which_alternative;
17512 while (*constraints == '=' || *constraints == '+')
17513 constraints++;
17514 while (alt-- > 0)
17515 while (*constraints++ != ',')
17517 /* Skip ignored operands. */
17518 if (*constraints == 'X')
17519 continue;
17522 int len = memory_address_length (XEXP (op, 0), false);
17524 /* Account for segment prefix for non-default addr spaces. */
17525 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
17526 len++;
17528 return len;
17531 return 0;
17534 /* Compute default value for "length_vex" attribute. It includes
17535 2 or 3 byte VEX prefix and 1 opcode byte. */
17538 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
17539 bool has_vex_w)
17541 int i, reg_only = 2 + 1;
17542 bool has_mem = false;
17544 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
17545 byte VEX prefix. */
17546 if (!has_0f_opcode || has_vex_w)
17547 return 3 + 1;
17549 /* We can always use 2 byte VEX prefix in 32bit. */
17550 if (!TARGET_64BIT)
17551 return 2 + 1;
17553 extract_insn_cached (insn);
17555 for (i = recog_data.n_operands - 1; i >= 0; --i)
17556 if (REG_P (recog_data.operand[i]))
17558 /* REX.W bit uses 3 byte VEX prefix.
17559 REX2 with vex use extended EVEX prefix length is 4-byte. */
17560 if (GET_MODE (recog_data.operand[i]) == DImode
17561 && GENERAL_REG_P (recog_data.operand[i]))
17562 return 3 + 1;
17564 /* REX.B bit requires 3-byte VEX. Right here we don't know which
17565 operand will be encoded using VEX.B, so be conservative.
17566 REX2 with vex use extended EVEX prefix length is 4-byte. */
17567 if (REX_INT_REGNO_P (recog_data.operand[i])
17568 || REX2_INT_REGNO_P (recog_data.operand[i])
17569 || REX_SSE_REGNO_P (recog_data.operand[i]))
17570 reg_only = 3 + 1;
17572 else if (MEM_P (recog_data.operand[i]))
17574 /* REX2.X or REX2.B bits use 3 byte VEX prefix. */
17575 if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
17576 return 4;
17578 /* REX.X or REX.B bits use 3 byte VEX prefix. */
17579 if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
17580 return 3 + 1;
17582 has_mem = true;
17585 return has_mem ? 2 + 1 : reg_only;
17589 static bool
17590 ix86_class_likely_spilled_p (reg_class_t);
17592 /* Returns true if lhs of insn is HW function argument register and set up
17593 is_spilled to true if it is likely spilled HW register. */
17594 static bool
17595 insn_is_function_arg (rtx insn, bool* is_spilled)
17597 rtx dst;
17599 if (!NONDEBUG_INSN_P (insn))
17600 return false;
17601 /* Call instructions are not movable, ignore it. */
17602 if (CALL_P (insn))
17603 return false;
17604 insn = PATTERN (insn);
17605 if (GET_CODE (insn) == PARALLEL)
17606 insn = XVECEXP (insn, 0, 0);
17607 if (GET_CODE (insn) != SET)
17608 return false;
17609 dst = SET_DEST (insn);
17610 if (REG_P (dst) && HARD_REGISTER_P (dst)
17611 && ix86_function_arg_regno_p (REGNO (dst)))
17613 /* Is it likely spilled HW register? */
17614 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
17615 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
17616 *is_spilled = true;
17617 return true;
17619 return false;
17622 /* Add output dependencies for chain of function adjacent arguments if only
17623 there is a move to likely spilled HW register. Return first argument
17624 if at least one dependence was added or NULL otherwise. */
17625 static rtx_insn *
17626 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
17628 rtx_insn *insn;
17629 rtx_insn *last = call;
17630 rtx_insn *first_arg = NULL;
17631 bool is_spilled = false;
17633 head = PREV_INSN (head);
17635 /* Find nearest to call argument passing instruction. */
17636 while (true)
17638 last = PREV_INSN (last);
17639 if (last == head)
17640 return NULL;
17641 if (!NONDEBUG_INSN_P (last))
17642 continue;
17643 if (insn_is_function_arg (last, &is_spilled))
17644 break;
17645 return NULL;
17648 first_arg = last;
17649 while (true)
17651 insn = PREV_INSN (last);
17652 if (!INSN_P (insn))
17653 break;
17654 if (insn == head)
17655 break;
17656 if (!NONDEBUG_INSN_P (insn))
17658 last = insn;
17659 continue;
17661 if (insn_is_function_arg (insn, &is_spilled))
17663 /* Add output depdendence between two function arguments if chain
17664 of output arguments contains likely spilled HW registers. */
17665 if (is_spilled)
17666 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17667 first_arg = last = insn;
17669 else
17670 break;
17672 if (!is_spilled)
17673 return NULL;
17674 return first_arg;
17677 /* Add output or anti dependency from insn to first_arg to restrict its code
17678 motion. */
17679 static void
17680 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
17682 rtx set;
17683 rtx tmp;
17685 set = single_set (insn);
17686 if (!set)
17687 return;
17688 tmp = SET_DEST (set);
17689 if (REG_P (tmp))
17691 /* Add output dependency to the first function argument. */
17692 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17693 return;
17695 /* Add anti dependency. */
17696 add_dependence (first_arg, insn, REG_DEP_ANTI);
17699 /* Avoid cross block motion of function argument through adding dependency
17700 from the first non-jump instruction in bb. */
17701 static void
17702 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
17704 rtx_insn *insn = BB_END (bb);
17706 while (insn)
17708 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
17710 rtx set = single_set (insn);
17711 if (set)
17713 avoid_func_arg_motion (arg, insn);
17714 return;
17717 if (insn == BB_HEAD (bb))
17718 return;
17719 insn = PREV_INSN (insn);
17723 /* Hook for pre-reload schedule - avoid motion of function arguments
17724 passed in likely spilled HW registers. */
17725 static void
17726 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
17728 rtx_insn *insn;
17729 rtx_insn *first_arg = NULL;
17730 if (reload_completed)
17731 return;
17732 while (head != tail && DEBUG_INSN_P (head))
17733 head = NEXT_INSN (head);
17734 for (insn = tail; insn != head; insn = PREV_INSN (insn))
17735 if (INSN_P (insn) && CALL_P (insn))
17737 first_arg = add_parameter_dependencies (insn, head);
17738 if (first_arg)
17740 /* Add dependee for first argument to predecessors if only
17741 region contains more than one block. */
17742 basic_block bb = BLOCK_FOR_INSN (insn);
17743 int rgn = CONTAINING_RGN (bb->index);
17744 int nr_blks = RGN_NR_BLOCKS (rgn);
17745 /* Skip trivial regions and region head blocks that can have
17746 predecessors outside of region. */
17747 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
17749 edge e;
17750 edge_iterator ei;
17752 /* Regions are SCCs with the exception of selective
17753 scheduling with pipelining of outer blocks enabled.
17754 So also check that immediate predecessors of a non-head
17755 block are in the same region. */
17756 FOR_EACH_EDGE (e, ei, bb->preds)
17758 /* Avoid creating of loop-carried dependencies through
17759 using topological ordering in the region. */
17760 if (rgn == CONTAINING_RGN (e->src->index)
17761 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
17762 add_dependee_for_func_arg (first_arg, e->src);
17765 insn = first_arg;
17766 if (insn == head)
17767 break;
17770 else if (first_arg)
17771 avoid_func_arg_motion (first_arg, insn);
17774 /* Hook for pre-reload schedule - set priority of moves from likely spilled
17775 HW registers to maximum, to schedule them at soon as possible. These are
17776 moves from function argument registers at the top of the function entry
17777 and moves from function return value registers after call. */
17778 static int
17779 ix86_adjust_priority (rtx_insn *insn, int priority)
17781 rtx set;
17783 if (reload_completed)
17784 return priority;
17786 if (!NONDEBUG_INSN_P (insn))
17787 return priority;
17789 set = single_set (insn);
17790 if (set)
17792 rtx tmp = SET_SRC (set);
17793 if (REG_P (tmp)
17794 && HARD_REGISTER_P (tmp)
17795 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
17796 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
17797 return current_sched_info->sched_max_insns_priority;
17800 return priority;
17803 /* Prepare for scheduling pass. */
17804 static void
17805 ix86_sched_init_global (FILE *, int, int)
17807 /* Install scheduling hooks for current CPU. Some of these hooks are used
17808 in time-critical parts of the scheduler, so we only set them up when
17809 they are actually used. */
17810 switch (ix86_tune)
17812 case PROCESSOR_CORE2:
17813 case PROCESSOR_NEHALEM:
17814 case PROCESSOR_SANDYBRIDGE:
17815 case PROCESSOR_HASWELL:
17816 case PROCESSOR_TREMONT:
17817 case PROCESSOR_ALDERLAKE:
17818 case PROCESSOR_GENERIC:
17819 /* Do not perform multipass scheduling for pre-reload schedule
17820 to save compile time. */
17821 if (reload_completed)
17823 ix86_core2i7_init_hooks ();
17824 break;
17826 /* Fall through. */
17827 default:
17828 targetm.sched.dfa_post_advance_cycle = NULL;
17829 targetm.sched.first_cycle_multipass_init = NULL;
17830 targetm.sched.first_cycle_multipass_begin = NULL;
17831 targetm.sched.first_cycle_multipass_issue = NULL;
17832 targetm.sched.first_cycle_multipass_backtrack = NULL;
17833 targetm.sched.first_cycle_multipass_end = NULL;
17834 targetm.sched.first_cycle_multipass_fini = NULL;
17835 break;
17840 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
17842 static HOST_WIDE_INT
17843 ix86_static_rtx_alignment (machine_mode mode)
17845 if (mode == DFmode)
17846 return 64;
17847 if (ALIGN_MODE_128 (mode))
17848 return MAX (128, GET_MODE_ALIGNMENT (mode));
17849 return GET_MODE_ALIGNMENT (mode);
17852 /* Implement TARGET_CONSTANT_ALIGNMENT. */
17854 static HOST_WIDE_INT
17855 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
17857 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17858 || TREE_CODE (exp) == INTEGER_CST)
17860 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
17861 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
17862 return MAX (mode_align, align);
17864 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17865 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17866 return BITS_PER_WORD;
17868 return align;
17871 /* Implement TARGET_EMPTY_RECORD_P. */
17873 static bool
17874 ix86_is_empty_record (const_tree type)
17876 if (!TARGET_64BIT)
17877 return false;
17878 return default_is_empty_record (type);
17881 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
17883 static void
17884 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
17886 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
17888 if (!cum->warn_empty)
17889 return;
17891 if (!TYPE_EMPTY_P (type))
17892 return;
17894 /* Don't warn if the function isn't visible outside of the TU. */
17895 if (cum->decl && !TREE_PUBLIC (cum->decl))
17896 return;
17898 const_tree ctx = get_ultimate_context (cum->decl);
17899 if (ctx != NULL_TREE
17900 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
17901 return;
17903 /* If the actual size of the type is zero, then there is no change
17904 in how objects of this size are passed. */
17905 if (int_size_in_bytes (type) == 0)
17906 return;
17908 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
17909 "changes in %<-fabi-version=12%> (GCC 8)", type);
17911 /* Only warn once. */
17912 cum->warn_empty = false;
17915 /* This hook returns name of multilib ABI. */
17917 static const char *
17918 ix86_get_multilib_abi_name (void)
17920 if (!(TARGET_64BIT_P (ix86_isa_flags)))
17921 return "i386";
17922 else if (TARGET_X32_P (ix86_isa_flags))
17923 return "x32";
17924 else
17925 return "x86_64";
17928 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
17929 the data type, and ALIGN is the alignment that the object would
17930 ordinarily have. */
17932 static int
17933 iamcu_alignment (tree type, int align)
17935 machine_mode mode;
17937 if (align < 32 || TYPE_USER_ALIGN (type))
17938 return align;
17940 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
17941 bytes. */
17942 type = strip_array_types (type);
17943 if (TYPE_ATOMIC (type))
17944 return align;
17946 mode = TYPE_MODE (type);
17947 switch (GET_MODE_CLASS (mode))
17949 case MODE_INT:
17950 case MODE_COMPLEX_INT:
17951 case MODE_COMPLEX_FLOAT:
17952 case MODE_FLOAT:
17953 case MODE_DECIMAL_FLOAT:
17954 return 32;
17955 default:
17956 return align;
17960 /* Compute the alignment for a static variable.
17961 TYPE is the data type, and ALIGN is the alignment that
17962 the object would ordinarily have. The value of this function is used
17963 instead of that alignment to align the object. */
17966 ix86_data_alignment (tree type, unsigned int align, bool opt)
17968 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
17969 for symbols from other compilation units or symbols that don't need
17970 to bind locally. In order to preserve some ABI compatibility with
17971 those compilers, ensure we don't decrease alignment from what we
17972 used to assume. */
17974 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
17976 /* A data structure, equal or greater than the size of a cache line
17977 (64 bytes in the Pentium 4 and other recent Intel processors, including
17978 processors based on Intel Core microarchitecture) should be aligned
17979 so that its base address is a multiple of a cache line size. */
17981 unsigned int max_align
17982 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
17984 if (max_align < BITS_PER_WORD)
17985 max_align = BITS_PER_WORD;
17987 switch (ix86_align_data_type)
17989 case ix86_align_data_type_abi: opt = false; break;
17990 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
17991 case ix86_align_data_type_cacheline: break;
17994 if (TARGET_IAMCU)
17995 align = iamcu_alignment (type, align);
17997 if (opt
17998 && AGGREGATE_TYPE_P (type)
17999 && TYPE_SIZE (type)
18000 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
18002 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
18003 && align < max_align_compat)
18004 align = max_align_compat;
18005 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
18006 && align < max_align)
18007 align = max_align;
18010 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18011 to 16byte boundary. */
18012 if (TARGET_64BIT)
18014 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
18015 && TYPE_SIZE (type)
18016 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18017 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18018 && align < 128)
18019 return 128;
18022 if (!opt)
18023 return align;
18025 if (TREE_CODE (type) == ARRAY_TYPE)
18027 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18028 return 64;
18029 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18030 return 128;
18032 else if (TREE_CODE (type) == COMPLEX_TYPE)
18035 if (TYPE_MODE (type) == DCmode && align < 64)
18036 return 64;
18037 if ((TYPE_MODE (type) == XCmode
18038 || TYPE_MODE (type) == TCmode) && align < 128)
18039 return 128;
18041 else if (RECORD_OR_UNION_TYPE_P (type)
18042 && TYPE_FIELDS (type))
18044 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18045 return 64;
18046 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18047 return 128;
18049 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18050 || TREE_CODE (type) == INTEGER_TYPE)
18052 if (TYPE_MODE (type) == DFmode && align < 64)
18053 return 64;
18054 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18055 return 128;
18058 return align;
18061 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
18062 static void
18063 ix86_lower_local_decl_alignment (tree decl)
18065 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
18066 DECL_ALIGN (decl), true);
18067 if (new_align < DECL_ALIGN (decl))
18068 SET_DECL_ALIGN (decl, new_align);
18071 /* Compute the alignment for a local variable or a stack slot. EXP is
18072 the data type or decl itself, MODE is the widest mode available and
18073 ALIGN is the alignment that the object would ordinarily have. The
18074 value of this macro is used instead of that alignment to align the
18075 object. */
18077 unsigned int
18078 ix86_local_alignment (tree exp, machine_mode mode,
18079 unsigned int align, bool may_lower)
18081 tree type, decl;
18083 if (exp && DECL_P (exp))
18085 type = TREE_TYPE (exp);
18086 decl = exp;
18088 else
18090 type = exp;
18091 decl = NULL;
18094 /* Don't do dynamic stack realignment for long long objects with
18095 -mpreferred-stack-boundary=2. */
18096 if (may_lower
18097 && !TARGET_64BIT
18098 && align == 64
18099 && ix86_preferred_stack_boundary < 64
18100 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
18101 && (!type || (!TYPE_USER_ALIGN (type)
18102 && !TYPE_ATOMIC (strip_array_types (type))))
18103 && (!decl || !DECL_USER_ALIGN (decl)))
18104 align = 32;
18106 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18107 register in MODE. We will return the largest alignment of XF
18108 and DF. */
18109 if (!type)
18111 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18112 align = GET_MODE_ALIGNMENT (DFmode);
18113 return align;
18116 /* Don't increase alignment for Intel MCU psABI. */
18117 if (TARGET_IAMCU)
18118 return align;
18120 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18121 to 16byte boundary. Exact wording is:
18123 An array uses the same alignment as its elements, except that a local or
18124 global array variable of length at least 16 bytes or
18125 a C99 variable-length array variable always has alignment of at least 16 bytes.
18127 This was added to allow use of aligned SSE instructions at arrays. This
18128 rule is meant for static storage (where compiler cannot do the analysis
18129 by itself). We follow it for automatic variables only when convenient.
18130 We fully control everything in the function compiled and functions from
18131 other unit cannot rely on the alignment.
18133 Exclude va_list type. It is the common case of local array where
18134 we cannot benefit from the alignment.
18136 TODO: Probably one should optimize for size only when var is not escaping. */
18137 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
18138 && TARGET_SSE)
18140 if (AGGREGATE_TYPE_P (type)
18141 && (va_list_type_node == NULL_TREE
18142 || (TYPE_MAIN_VARIANT (type)
18143 != TYPE_MAIN_VARIANT (va_list_type_node)))
18144 && TYPE_SIZE (type)
18145 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18146 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18147 && align < 128)
18148 return 128;
18150 if (TREE_CODE (type) == ARRAY_TYPE)
18152 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18153 return 64;
18154 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18155 return 128;
18157 else if (TREE_CODE (type) == COMPLEX_TYPE)
18159 if (TYPE_MODE (type) == DCmode && align < 64)
18160 return 64;
18161 if ((TYPE_MODE (type) == XCmode
18162 || TYPE_MODE (type) == TCmode) && align < 128)
18163 return 128;
18165 else if (RECORD_OR_UNION_TYPE_P (type)
18166 && TYPE_FIELDS (type))
18168 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18169 return 64;
18170 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18171 return 128;
18173 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18174 || TREE_CODE (type) == INTEGER_TYPE)
18177 if (TYPE_MODE (type) == DFmode && align < 64)
18178 return 64;
18179 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18180 return 128;
18182 return align;
18185 /* Compute the minimum required alignment for dynamic stack realignment
18186 purposes for a local variable, parameter or a stack slot. EXP is
18187 the data type or decl itself, MODE is its mode and ALIGN is the
18188 alignment that the object would ordinarily have. */
18190 unsigned int
18191 ix86_minimum_alignment (tree exp, machine_mode mode,
18192 unsigned int align)
18194 tree type, decl;
18196 if (exp && DECL_P (exp))
18198 type = TREE_TYPE (exp);
18199 decl = exp;
18201 else
18203 type = exp;
18204 decl = NULL;
18207 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
18208 return align;
18210 /* Don't do dynamic stack realignment for long long objects with
18211 -mpreferred-stack-boundary=2. */
18212 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
18213 && (!type || (!TYPE_USER_ALIGN (type)
18214 && !TYPE_ATOMIC (strip_array_types (type))))
18215 && (!decl || !DECL_USER_ALIGN (decl)))
18217 gcc_checking_assert (!TARGET_STV);
18218 return 32;
18221 return align;
18224 /* Find a location for the static chain incoming to a nested function.
18225 This is a register, unless all free registers are used by arguments. */
18227 static rtx
18228 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
18230 unsigned regno;
18232 if (TARGET_64BIT)
18234 /* We always use R10 in 64-bit mode. */
18235 regno = R10_REG;
18237 else
18239 const_tree fntype, fndecl;
18240 unsigned int ccvt;
18242 /* By default in 32-bit mode we use ECX to pass the static chain. */
18243 regno = CX_REG;
18245 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
18247 fntype = TREE_TYPE (fndecl_or_type);
18248 fndecl = fndecl_or_type;
18250 else
18252 fntype = fndecl_or_type;
18253 fndecl = NULL;
18256 ccvt = ix86_get_callcvt (fntype);
18257 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
18259 /* Fastcall functions use ecx/edx for arguments, which leaves
18260 us with EAX for the static chain.
18261 Thiscall functions use ecx for arguments, which also
18262 leaves us with EAX for the static chain. */
18263 regno = AX_REG;
18265 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
18267 /* Thiscall functions use ecx for arguments, which leaves
18268 us with EAX and EDX for the static chain.
18269 We are using for abi-compatibility EAX. */
18270 regno = AX_REG;
18272 else if (ix86_function_regparm (fntype, fndecl) == 3)
18274 /* For regparm 3, we have no free call-clobbered registers in
18275 which to store the static chain. In order to implement this,
18276 we have the trampoline push the static chain to the stack.
18277 However, we can't push a value below the return address when
18278 we call the nested function directly, so we have to use an
18279 alternate entry point. For this we use ESI, and have the
18280 alternate entry point push ESI, so that things appear the
18281 same once we're executing the nested function. */
18282 if (incoming_p)
18284 if (fndecl == current_function_decl
18285 && !ix86_static_chain_on_stack)
18287 gcc_assert (!reload_completed);
18288 ix86_static_chain_on_stack = true;
18290 return gen_frame_mem (SImode,
18291 plus_constant (Pmode,
18292 arg_pointer_rtx, -8));
18294 regno = SI_REG;
18298 return gen_rtx_REG (Pmode, regno);
18301 /* Emit RTL insns to initialize the variable parts of a trampoline.
18302 FNDECL is the decl of the target address; M_TRAMP is a MEM for
18303 the trampoline, and CHAIN_VALUE is an RTX for the static chain
18304 to be passed to the target function. */
18306 static void
18307 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
18309 rtx mem, fnaddr;
18310 int opcode;
18311 int offset = 0;
18312 bool need_endbr = (flag_cf_protection & CF_BRANCH);
18314 fnaddr = XEXP (DECL_RTL (fndecl), 0);
18316 if (TARGET_64BIT)
18318 int size;
18320 if (need_endbr)
18322 /* Insert ENDBR64. */
18323 mem = adjust_address (m_tramp, SImode, offset);
18324 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
18325 offset += 4;
18328 /* Load the function address to r11. Try to load address using
18329 the shorter movl instead of movabs. We may want to support
18330 movq for kernel mode, but kernel does not use trampolines at
18331 the moment. FNADDR is a 32bit address and may not be in
18332 DImode when ptr_mode == SImode. Always use movl in this
18333 case. */
18334 if (ptr_mode == SImode
18335 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18337 fnaddr = copy_addr_to_reg (fnaddr);
18339 mem = adjust_address (m_tramp, HImode, offset);
18340 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
18342 mem = adjust_address (m_tramp, SImode, offset + 2);
18343 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
18344 offset += 6;
18346 else
18348 mem = adjust_address (m_tramp, HImode, offset);
18349 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
18351 mem = adjust_address (m_tramp, DImode, offset + 2);
18352 emit_move_insn (mem, fnaddr);
18353 offset += 10;
18356 /* Load static chain using movabs to r10. Use the shorter movl
18357 instead of movabs when ptr_mode == SImode. */
18358 if (ptr_mode == SImode)
18360 opcode = 0xba41;
18361 size = 6;
18363 else
18365 opcode = 0xba49;
18366 size = 10;
18369 mem = adjust_address (m_tramp, HImode, offset);
18370 emit_move_insn (mem, gen_int_mode (opcode, HImode));
18372 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
18373 emit_move_insn (mem, chain_value);
18374 offset += size;
18376 /* Jump to r11; the last (unused) byte is a nop, only there to
18377 pad the write out to a single 32-bit store. */
18378 mem = adjust_address (m_tramp, SImode, offset);
18379 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
18380 offset += 4;
18382 else
18384 rtx disp, chain;
18386 /* Depending on the static chain location, either load a register
18387 with a constant, or push the constant to the stack. All of the
18388 instructions are the same size. */
18389 chain = ix86_static_chain (fndecl, true);
18390 if (REG_P (chain))
18392 switch (REGNO (chain))
18394 case AX_REG:
18395 opcode = 0xb8; break;
18396 case CX_REG:
18397 opcode = 0xb9; break;
18398 default:
18399 gcc_unreachable ();
18402 else
18403 opcode = 0x68;
18405 if (need_endbr)
18407 /* Insert ENDBR32. */
18408 mem = adjust_address (m_tramp, SImode, offset);
18409 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
18410 offset += 4;
18413 mem = adjust_address (m_tramp, QImode, offset);
18414 emit_move_insn (mem, gen_int_mode (opcode, QImode));
18416 mem = adjust_address (m_tramp, SImode, offset + 1);
18417 emit_move_insn (mem, chain_value);
18418 offset += 5;
18420 mem = adjust_address (m_tramp, QImode, offset);
18421 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
18423 mem = adjust_address (m_tramp, SImode, offset + 1);
18425 /* Compute offset from the end of the jmp to the target function.
18426 In the case in which the trampoline stores the static chain on
18427 the stack, we need to skip the first insn which pushes the
18428 (call-saved) register static chain; this push is 1 byte. */
18429 offset += 5;
18430 int skip = MEM_P (chain) ? 1 : 0;
18431 /* Skip ENDBR32 at the entry of the target function. */
18432 if (need_endbr
18433 && !cgraph_node::get (fndecl)->only_called_directly_p ())
18434 skip += 4;
18435 disp = expand_binop (SImode, sub_optab, fnaddr,
18436 plus_constant (Pmode, XEXP (m_tramp, 0),
18437 offset - skip),
18438 NULL_RTX, 1, OPTAB_DIRECT);
18439 emit_move_insn (mem, disp);
18442 gcc_assert (offset <= TRAMPOLINE_SIZE);
18444 #ifdef HAVE_ENABLE_EXECUTE_STACK
18445 #ifdef CHECK_EXECUTE_STACK_ENABLED
18446 if (CHECK_EXECUTE_STACK_ENABLED)
18447 #endif
18448 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18449 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
18450 #endif
18453 static bool
18454 ix86_allocate_stack_slots_for_args (void)
18456 /* Naked functions should not allocate stack slots for arguments. */
18457 return !ix86_function_naked (current_function_decl);
18460 static bool
18461 ix86_warn_func_return (tree decl)
18463 /* Naked functions are implemented entirely in assembly, including the
18464 return sequence, so suppress warnings about this. */
18465 return !ix86_function_naked (decl);
18468 /* Return the shift count of a vector by scalar shift builtin second argument
18469 ARG1. */
18470 static tree
18471 ix86_vector_shift_count (tree arg1)
18473 if (tree_fits_uhwi_p (arg1))
18474 return arg1;
18475 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
18477 /* The count argument is weird, passed in as various 128-bit
18478 (or 64-bit) vectors, the low 64 bits from it are the count. */
18479 unsigned char buf[16];
18480 int len = native_encode_expr (arg1, buf, 16);
18481 if (len == 0)
18482 return NULL_TREE;
18483 tree t = native_interpret_expr (uint64_type_node, buf, len);
18484 if (t && tree_fits_uhwi_p (t))
18485 return t;
18487 return NULL_TREE;
18490 /* Return true if arg_mask is all ones, ELEMS is elements number of
18491 corresponding vector. */
18492 static bool
18493 ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
18495 if (TREE_CODE (arg_mask) != INTEGER_CST)
18496 return false;
18498 unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
18499 if (elems == HOST_BITS_PER_WIDE_INT)
18500 return mask == HOST_WIDE_INT_M1U;
18501 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
18502 return false;
18504 return true;
18507 static tree
18508 ix86_fold_builtin (tree fndecl, int n_args,
18509 tree *args, bool ignore ATTRIBUTE_UNUSED)
18511 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
18513 enum ix86_builtins fn_code
18514 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
18515 enum rtx_code rcode;
18516 bool is_vshift;
18517 enum tree_code tcode;
18518 bool is_scalar;
18519 unsigned HOST_WIDE_INT mask;
18521 switch (fn_code)
18523 case IX86_BUILTIN_CPU_IS:
18524 case IX86_BUILTIN_CPU_SUPPORTS:
18525 gcc_assert (n_args == 1);
18526 return fold_builtin_cpu (fndecl, args);
18528 case IX86_BUILTIN_NANQ:
18529 case IX86_BUILTIN_NANSQ:
18531 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18532 const char *str = c_getstr (*args);
18533 int quiet = fn_code == IX86_BUILTIN_NANQ;
18534 REAL_VALUE_TYPE real;
18536 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
18537 return build_real (type, real);
18538 return NULL_TREE;
18541 case IX86_BUILTIN_INFQ:
18542 case IX86_BUILTIN_HUGE_VALQ:
18544 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18545 REAL_VALUE_TYPE inf;
18546 real_inf (&inf);
18547 return build_real (type, inf);
18550 case IX86_BUILTIN_TZCNT16:
18551 case IX86_BUILTIN_CTZS:
18552 case IX86_BUILTIN_TZCNT32:
18553 case IX86_BUILTIN_TZCNT64:
18554 gcc_assert (n_args == 1);
18555 if (TREE_CODE (args[0]) == INTEGER_CST)
18557 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18558 tree arg = args[0];
18559 if (fn_code == IX86_BUILTIN_TZCNT16
18560 || fn_code == IX86_BUILTIN_CTZS)
18561 arg = fold_convert (short_unsigned_type_node, arg);
18562 if (integer_zerop (arg))
18563 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18564 else
18565 return fold_const_call (CFN_CTZ, type, arg);
18567 break;
18569 case IX86_BUILTIN_LZCNT16:
18570 case IX86_BUILTIN_CLZS:
18571 case IX86_BUILTIN_LZCNT32:
18572 case IX86_BUILTIN_LZCNT64:
18573 gcc_assert (n_args == 1);
18574 if (TREE_CODE (args[0]) == INTEGER_CST)
18576 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18577 tree arg = args[0];
18578 if (fn_code == IX86_BUILTIN_LZCNT16
18579 || fn_code == IX86_BUILTIN_CLZS)
18580 arg = fold_convert (short_unsigned_type_node, arg);
18581 if (integer_zerop (arg))
18582 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18583 else
18584 return fold_const_call (CFN_CLZ, type, arg);
18586 break;
18588 case IX86_BUILTIN_BEXTR32:
18589 case IX86_BUILTIN_BEXTR64:
18590 case IX86_BUILTIN_BEXTRI32:
18591 case IX86_BUILTIN_BEXTRI64:
18592 gcc_assert (n_args == 2);
18593 if (tree_fits_uhwi_p (args[1]))
18595 unsigned HOST_WIDE_INT res = 0;
18596 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
18597 unsigned int start = tree_to_uhwi (args[1]);
18598 unsigned int len = (start & 0xff00) >> 8;
18599 tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
18600 start &= 0xff;
18601 if (start >= prec || len == 0)
18602 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
18603 args[0]);
18604 else if (!tree_fits_uhwi_p (args[0]))
18605 break;
18606 else
18607 res = tree_to_uhwi (args[0]) >> start;
18608 if (len > prec)
18609 len = prec;
18610 if (len < HOST_BITS_PER_WIDE_INT)
18611 res &= (HOST_WIDE_INT_1U << len) - 1;
18612 return build_int_cstu (lhs_type, res);
18614 break;
18616 case IX86_BUILTIN_BZHI32:
18617 case IX86_BUILTIN_BZHI64:
18618 gcc_assert (n_args == 2);
18619 if (tree_fits_uhwi_p (args[1]))
18621 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
18622 tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
18623 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
18624 return args[0];
18625 if (idx == 0)
18626 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
18627 args[0]);
18628 if (!tree_fits_uhwi_p (args[0]))
18629 break;
18630 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
18631 res &= ~(HOST_WIDE_INT_M1U << idx);
18632 return build_int_cstu (lhs_type, res);
18634 break;
18636 case IX86_BUILTIN_PDEP32:
18637 case IX86_BUILTIN_PDEP64:
18638 gcc_assert (n_args == 2);
18639 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18641 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18642 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18643 unsigned HOST_WIDE_INT res = 0;
18644 unsigned HOST_WIDE_INT m, k = 1;
18645 for (m = 1; m; m <<= 1)
18646 if ((mask & m) != 0)
18648 if ((src & k) != 0)
18649 res |= m;
18650 k <<= 1;
18652 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18654 break;
18656 case IX86_BUILTIN_PEXT32:
18657 case IX86_BUILTIN_PEXT64:
18658 gcc_assert (n_args == 2);
18659 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18661 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18662 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18663 unsigned HOST_WIDE_INT res = 0;
18664 unsigned HOST_WIDE_INT m, k = 1;
18665 for (m = 1; m; m <<= 1)
18666 if ((mask & m) != 0)
18668 if ((src & m) != 0)
18669 res |= k;
18670 k <<= 1;
18672 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18674 break;
18676 case IX86_BUILTIN_MOVMSKPS:
18677 case IX86_BUILTIN_PMOVMSKB:
18678 case IX86_BUILTIN_MOVMSKPD:
18679 case IX86_BUILTIN_PMOVMSKB128:
18680 case IX86_BUILTIN_MOVMSKPD256:
18681 case IX86_BUILTIN_MOVMSKPS256:
18682 case IX86_BUILTIN_PMOVMSKB256:
18683 gcc_assert (n_args == 1);
18684 if (TREE_CODE (args[0]) == VECTOR_CST)
18686 HOST_WIDE_INT res = 0;
18687 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
18689 tree e = VECTOR_CST_ELT (args[0], i);
18690 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
18692 if (wi::neg_p (wi::to_wide (e)))
18693 res |= HOST_WIDE_INT_1 << i;
18695 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
18697 if (TREE_REAL_CST (e).sign)
18698 res |= HOST_WIDE_INT_1 << i;
18700 else
18701 return NULL_TREE;
18703 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
18705 break;
18707 case IX86_BUILTIN_PSLLD:
18708 case IX86_BUILTIN_PSLLD128:
18709 case IX86_BUILTIN_PSLLD128_MASK:
18710 case IX86_BUILTIN_PSLLD256:
18711 case IX86_BUILTIN_PSLLD256_MASK:
18712 case IX86_BUILTIN_PSLLD512:
18713 case IX86_BUILTIN_PSLLDI:
18714 case IX86_BUILTIN_PSLLDI128:
18715 case IX86_BUILTIN_PSLLDI128_MASK:
18716 case IX86_BUILTIN_PSLLDI256:
18717 case IX86_BUILTIN_PSLLDI256_MASK:
18718 case IX86_BUILTIN_PSLLDI512:
18719 case IX86_BUILTIN_PSLLQ:
18720 case IX86_BUILTIN_PSLLQ128:
18721 case IX86_BUILTIN_PSLLQ128_MASK:
18722 case IX86_BUILTIN_PSLLQ256:
18723 case IX86_BUILTIN_PSLLQ256_MASK:
18724 case IX86_BUILTIN_PSLLQ512:
18725 case IX86_BUILTIN_PSLLQI:
18726 case IX86_BUILTIN_PSLLQI128:
18727 case IX86_BUILTIN_PSLLQI128_MASK:
18728 case IX86_BUILTIN_PSLLQI256:
18729 case IX86_BUILTIN_PSLLQI256_MASK:
18730 case IX86_BUILTIN_PSLLQI512:
18731 case IX86_BUILTIN_PSLLW:
18732 case IX86_BUILTIN_PSLLW128:
18733 case IX86_BUILTIN_PSLLW128_MASK:
18734 case IX86_BUILTIN_PSLLW256:
18735 case IX86_BUILTIN_PSLLW256_MASK:
18736 case IX86_BUILTIN_PSLLW512_MASK:
18737 case IX86_BUILTIN_PSLLWI:
18738 case IX86_BUILTIN_PSLLWI128:
18739 case IX86_BUILTIN_PSLLWI128_MASK:
18740 case IX86_BUILTIN_PSLLWI256:
18741 case IX86_BUILTIN_PSLLWI256_MASK:
18742 case IX86_BUILTIN_PSLLWI512_MASK:
18743 rcode = ASHIFT;
18744 is_vshift = false;
18745 goto do_shift;
18746 case IX86_BUILTIN_PSRAD:
18747 case IX86_BUILTIN_PSRAD128:
18748 case IX86_BUILTIN_PSRAD128_MASK:
18749 case IX86_BUILTIN_PSRAD256:
18750 case IX86_BUILTIN_PSRAD256_MASK:
18751 case IX86_BUILTIN_PSRAD512:
18752 case IX86_BUILTIN_PSRADI:
18753 case IX86_BUILTIN_PSRADI128:
18754 case IX86_BUILTIN_PSRADI128_MASK:
18755 case IX86_BUILTIN_PSRADI256:
18756 case IX86_BUILTIN_PSRADI256_MASK:
18757 case IX86_BUILTIN_PSRADI512:
18758 case IX86_BUILTIN_PSRAQ128_MASK:
18759 case IX86_BUILTIN_PSRAQ256_MASK:
18760 case IX86_BUILTIN_PSRAQ512:
18761 case IX86_BUILTIN_PSRAQI128_MASK:
18762 case IX86_BUILTIN_PSRAQI256_MASK:
18763 case IX86_BUILTIN_PSRAQI512:
18764 case IX86_BUILTIN_PSRAW:
18765 case IX86_BUILTIN_PSRAW128:
18766 case IX86_BUILTIN_PSRAW128_MASK:
18767 case IX86_BUILTIN_PSRAW256:
18768 case IX86_BUILTIN_PSRAW256_MASK:
18769 case IX86_BUILTIN_PSRAW512:
18770 case IX86_BUILTIN_PSRAWI:
18771 case IX86_BUILTIN_PSRAWI128:
18772 case IX86_BUILTIN_PSRAWI128_MASK:
18773 case IX86_BUILTIN_PSRAWI256:
18774 case IX86_BUILTIN_PSRAWI256_MASK:
18775 case IX86_BUILTIN_PSRAWI512:
18776 rcode = ASHIFTRT;
18777 is_vshift = false;
18778 goto do_shift;
18779 case IX86_BUILTIN_PSRLD:
18780 case IX86_BUILTIN_PSRLD128:
18781 case IX86_BUILTIN_PSRLD128_MASK:
18782 case IX86_BUILTIN_PSRLD256:
18783 case IX86_BUILTIN_PSRLD256_MASK:
18784 case IX86_BUILTIN_PSRLD512:
18785 case IX86_BUILTIN_PSRLDI:
18786 case IX86_BUILTIN_PSRLDI128:
18787 case IX86_BUILTIN_PSRLDI128_MASK:
18788 case IX86_BUILTIN_PSRLDI256:
18789 case IX86_BUILTIN_PSRLDI256_MASK:
18790 case IX86_BUILTIN_PSRLDI512:
18791 case IX86_BUILTIN_PSRLQ:
18792 case IX86_BUILTIN_PSRLQ128:
18793 case IX86_BUILTIN_PSRLQ128_MASK:
18794 case IX86_BUILTIN_PSRLQ256:
18795 case IX86_BUILTIN_PSRLQ256_MASK:
18796 case IX86_BUILTIN_PSRLQ512:
18797 case IX86_BUILTIN_PSRLQI:
18798 case IX86_BUILTIN_PSRLQI128:
18799 case IX86_BUILTIN_PSRLQI128_MASK:
18800 case IX86_BUILTIN_PSRLQI256:
18801 case IX86_BUILTIN_PSRLQI256_MASK:
18802 case IX86_BUILTIN_PSRLQI512:
18803 case IX86_BUILTIN_PSRLW:
18804 case IX86_BUILTIN_PSRLW128:
18805 case IX86_BUILTIN_PSRLW128_MASK:
18806 case IX86_BUILTIN_PSRLW256:
18807 case IX86_BUILTIN_PSRLW256_MASK:
18808 case IX86_BUILTIN_PSRLW512:
18809 case IX86_BUILTIN_PSRLWI:
18810 case IX86_BUILTIN_PSRLWI128:
18811 case IX86_BUILTIN_PSRLWI128_MASK:
18812 case IX86_BUILTIN_PSRLWI256:
18813 case IX86_BUILTIN_PSRLWI256_MASK:
18814 case IX86_BUILTIN_PSRLWI512:
18815 rcode = LSHIFTRT;
18816 is_vshift = false;
18817 goto do_shift;
18818 case IX86_BUILTIN_PSLLVV16HI:
18819 case IX86_BUILTIN_PSLLVV16SI:
18820 case IX86_BUILTIN_PSLLVV2DI:
18821 case IX86_BUILTIN_PSLLVV2DI_MASK:
18822 case IX86_BUILTIN_PSLLVV32HI:
18823 case IX86_BUILTIN_PSLLVV4DI:
18824 case IX86_BUILTIN_PSLLVV4DI_MASK:
18825 case IX86_BUILTIN_PSLLVV4SI:
18826 case IX86_BUILTIN_PSLLVV4SI_MASK:
18827 case IX86_BUILTIN_PSLLVV8DI:
18828 case IX86_BUILTIN_PSLLVV8HI:
18829 case IX86_BUILTIN_PSLLVV8SI:
18830 case IX86_BUILTIN_PSLLVV8SI_MASK:
18831 rcode = ASHIFT;
18832 is_vshift = true;
18833 goto do_shift;
18834 case IX86_BUILTIN_PSRAVQ128:
18835 case IX86_BUILTIN_PSRAVQ256:
18836 case IX86_BUILTIN_PSRAVV16HI:
18837 case IX86_BUILTIN_PSRAVV16SI:
18838 case IX86_BUILTIN_PSRAVV32HI:
18839 case IX86_BUILTIN_PSRAVV4SI:
18840 case IX86_BUILTIN_PSRAVV4SI_MASK:
18841 case IX86_BUILTIN_PSRAVV8DI:
18842 case IX86_BUILTIN_PSRAVV8HI:
18843 case IX86_BUILTIN_PSRAVV8SI:
18844 case IX86_BUILTIN_PSRAVV8SI_MASK:
18845 rcode = ASHIFTRT;
18846 is_vshift = true;
18847 goto do_shift;
18848 case IX86_BUILTIN_PSRLVV16HI:
18849 case IX86_BUILTIN_PSRLVV16SI:
18850 case IX86_BUILTIN_PSRLVV2DI:
18851 case IX86_BUILTIN_PSRLVV2DI_MASK:
18852 case IX86_BUILTIN_PSRLVV32HI:
18853 case IX86_BUILTIN_PSRLVV4DI:
18854 case IX86_BUILTIN_PSRLVV4DI_MASK:
18855 case IX86_BUILTIN_PSRLVV4SI:
18856 case IX86_BUILTIN_PSRLVV4SI_MASK:
18857 case IX86_BUILTIN_PSRLVV8DI:
18858 case IX86_BUILTIN_PSRLVV8HI:
18859 case IX86_BUILTIN_PSRLVV8SI:
18860 case IX86_BUILTIN_PSRLVV8SI_MASK:
18861 rcode = LSHIFTRT;
18862 is_vshift = true;
18863 goto do_shift;
18865 do_shift:
18866 gcc_assert (n_args >= 2);
18867 if (TREE_CODE (args[0]) != VECTOR_CST)
18868 break;
18869 mask = HOST_WIDE_INT_M1U;
18870 if (n_args > 2)
18872 /* This is masked shift. */
18873 if (!tree_fits_uhwi_p (args[n_args - 1])
18874 || TREE_SIDE_EFFECTS (args[n_args - 2]))
18875 break;
18876 mask = tree_to_uhwi (args[n_args - 1]);
18877 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
18878 mask |= HOST_WIDE_INT_M1U << elems;
18879 if (mask != HOST_WIDE_INT_M1U
18880 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
18881 break;
18882 if (mask == (HOST_WIDE_INT_M1U << elems))
18883 return args[n_args - 2];
18885 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
18886 break;
18887 if (tree tem = (is_vshift ? integer_one_node
18888 : ix86_vector_shift_count (args[1])))
18890 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
18891 unsigned HOST_WIDE_INT prec
18892 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
18893 if (count == 0 && mask == HOST_WIDE_INT_M1U)
18894 return args[0];
18895 if (count >= prec)
18897 if (rcode == ASHIFTRT)
18898 count = prec - 1;
18899 else if (mask == HOST_WIDE_INT_M1U)
18900 return build_zero_cst (TREE_TYPE (args[0]));
18902 tree countt = NULL_TREE;
18903 if (!is_vshift)
18905 if (count >= prec)
18906 countt = integer_zero_node;
18907 else
18908 countt = build_int_cst (integer_type_node, count);
18910 tree_vector_builder builder;
18911 if (mask != HOST_WIDE_INT_M1U || is_vshift)
18912 builder.new_vector (TREE_TYPE (args[0]),
18913 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
18915 else
18916 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
18917 false);
18918 unsigned int cnt = builder.encoded_nelts ();
18919 for (unsigned int i = 0; i < cnt; ++i)
18921 tree elt = VECTOR_CST_ELT (args[0], i);
18922 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
18923 return NULL_TREE;
18924 tree type = TREE_TYPE (elt);
18925 if (rcode == LSHIFTRT)
18926 elt = fold_convert (unsigned_type_for (type), elt);
18927 if (is_vshift)
18929 countt = VECTOR_CST_ELT (args[1], i);
18930 if (TREE_CODE (countt) != INTEGER_CST
18931 || TREE_OVERFLOW (countt))
18932 return NULL_TREE;
18933 if (wi::neg_p (wi::to_wide (countt))
18934 || wi::to_widest (countt) >= prec)
18936 if (rcode == ASHIFTRT)
18937 countt = build_int_cst (TREE_TYPE (countt),
18938 prec - 1);
18939 else
18941 elt = build_zero_cst (TREE_TYPE (elt));
18942 countt = build_zero_cst (TREE_TYPE (countt));
18946 else if (count >= prec)
18947 elt = build_zero_cst (TREE_TYPE (elt));
18948 elt = const_binop (rcode == ASHIFT
18949 ? LSHIFT_EXPR : RSHIFT_EXPR,
18950 TREE_TYPE (elt), elt, countt);
18951 if (!elt || TREE_CODE (elt) != INTEGER_CST)
18952 return NULL_TREE;
18953 if (rcode == LSHIFTRT)
18954 elt = fold_convert (type, elt);
18955 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
18957 elt = VECTOR_CST_ELT (args[n_args - 2], i);
18958 if (TREE_CODE (elt) != INTEGER_CST
18959 || TREE_OVERFLOW (elt))
18960 return NULL_TREE;
18962 builder.quick_push (elt);
18964 return builder.build ();
18966 break;
18968 case IX86_BUILTIN_MINSS:
18969 case IX86_BUILTIN_MINSH_MASK:
18970 tcode = LT_EXPR;
18971 is_scalar = true;
18972 goto do_minmax;
18974 case IX86_BUILTIN_MAXSS:
18975 case IX86_BUILTIN_MAXSH_MASK:
18976 tcode = GT_EXPR;
18977 is_scalar = true;
18978 goto do_minmax;
18980 case IX86_BUILTIN_MINPS:
18981 case IX86_BUILTIN_MINPD:
18982 case IX86_BUILTIN_MINPS256:
18983 case IX86_BUILTIN_MINPD256:
18984 case IX86_BUILTIN_MINPS512:
18985 case IX86_BUILTIN_MINPD512:
18986 case IX86_BUILTIN_MINPS128_MASK:
18987 case IX86_BUILTIN_MINPD128_MASK:
18988 case IX86_BUILTIN_MINPS256_MASK:
18989 case IX86_BUILTIN_MINPD256_MASK:
18990 case IX86_BUILTIN_MINPH128_MASK:
18991 case IX86_BUILTIN_MINPH256_MASK:
18992 case IX86_BUILTIN_MINPH512_MASK:
18993 tcode = LT_EXPR;
18994 is_scalar = false;
18995 goto do_minmax;
18997 case IX86_BUILTIN_MAXPS:
18998 case IX86_BUILTIN_MAXPD:
18999 case IX86_BUILTIN_MAXPS256:
19000 case IX86_BUILTIN_MAXPD256:
19001 case IX86_BUILTIN_MAXPS512:
19002 case IX86_BUILTIN_MAXPD512:
19003 case IX86_BUILTIN_MAXPS128_MASK:
19004 case IX86_BUILTIN_MAXPD128_MASK:
19005 case IX86_BUILTIN_MAXPS256_MASK:
19006 case IX86_BUILTIN_MAXPD256_MASK:
19007 case IX86_BUILTIN_MAXPH128_MASK:
19008 case IX86_BUILTIN_MAXPH256_MASK:
19009 case IX86_BUILTIN_MAXPH512_MASK:
19010 tcode = GT_EXPR;
19011 is_scalar = false;
19012 do_minmax:
19013 gcc_assert (n_args >= 2);
19014 if (TREE_CODE (args[0]) != VECTOR_CST
19015 || TREE_CODE (args[1]) != VECTOR_CST)
19016 break;
19017 mask = HOST_WIDE_INT_M1U;
19018 if (n_args > 2)
19020 gcc_assert (n_args >= 4);
19021 /* This is masked minmax. */
19022 if (TREE_CODE (args[3]) != INTEGER_CST
19023 || TREE_SIDE_EFFECTS (args[2]))
19024 break;
19025 mask = TREE_INT_CST_LOW (args[3]);
19026 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19027 mask |= HOST_WIDE_INT_M1U << elems;
19028 if (mask != HOST_WIDE_INT_M1U
19029 && TREE_CODE (args[2]) != VECTOR_CST)
19030 break;
19031 if (n_args >= 5)
19033 if (!tree_fits_uhwi_p (args[4]))
19034 break;
19035 if (tree_to_uhwi (args[4]) != 4
19036 && tree_to_uhwi (args[4]) != 8)
19037 break;
19039 if (mask == (HOST_WIDE_INT_M1U << elems))
19040 return args[2];
19042 /* Punt on NaNs, unless exceptions are disabled. */
19043 if (HONOR_NANS (args[0])
19044 && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
19045 for (int i = 0; i < 2; ++i)
19047 unsigned count = vector_cst_encoded_nelts (args[i]);
19048 for (unsigned j = 0; j < count; ++j)
19049 if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
19050 return NULL_TREE;
19053 tree res = const_binop (tcode,
19054 truth_type_for (TREE_TYPE (args[0])),
19055 args[0], args[1]);
19056 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19057 break;
19058 res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
19059 args[0], args[1]);
19060 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19061 break;
19062 if (mask != HOST_WIDE_INT_M1U)
19064 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19065 vec_perm_builder sel (nelts, nelts, 1);
19066 for (unsigned int i = 0; i < nelts; i++)
19067 if (mask & (HOST_WIDE_INT_1U << i))
19068 sel.quick_push (i);
19069 else
19070 sel.quick_push (nelts + i);
19071 vec_perm_indices indices (sel, 2, nelts);
19072 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
19073 indices);
19074 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19075 break;
19077 if (is_scalar)
19079 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
19080 vec_perm_builder sel (nelts, nelts, 1);
19081 sel.quick_push (0);
19082 for (unsigned int i = 1; i < nelts; i++)
19083 sel.quick_push (nelts + i);
19084 vec_perm_indices indices (sel, 2, nelts);
19085 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
19086 indices);
19087 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
19088 break;
19090 return res;
19093 default:
19094 break;
19098 #ifdef SUBTARGET_FOLD_BUILTIN
19099 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
19100 #endif
19102 return NULL_TREE;
19105 /* Fold a MD builtin (use ix86_fold_builtin for folding into
19106 constant) in GIMPLE. */
19108 bool
19109 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
19111 gimple *stmt = gsi_stmt (*gsi), *g;
19112 gimple_seq stmts = NULL;
19113 tree fndecl = gimple_call_fndecl (stmt);
19114 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
19115 int n_args = gimple_call_num_args (stmt);
19116 enum ix86_builtins fn_code
19117 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
19118 tree decl = NULL_TREE;
19119 tree arg0, arg1, arg2;
19120 enum rtx_code rcode;
19121 enum tree_code tcode;
19122 unsigned HOST_WIDE_INT count;
19123 bool is_vshift;
19124 unsigned HOST_WIDE_INT elems;
19125 location_t loc;
19127 /* Don't fold when there's isa mismatch. */
19128 if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
19129 return false;
19131 switch (fn_code)
19133 case IX86_BUILTIN_TZCNT32:
19134 decl = builtin_decl_implicit (BUILT_IN_CTZ);
19135 goto fold_tzcnt_lzcnt;
19137 case IX86_BUILTIN_TZCNT64:
19138 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
19139 goto fold_tzcnt_lzcnt;
19141 case IX86_BUILTIN_LZCNT32:
19142 decl = builtin_decl_implicit (BUILT_IN_CLZ);
19143 goto fold_tzcnt_lzcnt;
19145 case IX86_BUILTIN_LZCNT64:
19146 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
19147 goto fold_tzcnt_lzcnt;
19149 fold_tzcnt_lzcnt:
19150 gcc_assert (n_args == 1);
19151 arg0 = gimple_call_arg (stmt, 0);
19152 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
19154 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
19155 /* If arg0 is provably non-zero, optimize into generic
19156 __builtin_c[tl]z{,ll} function the middle-end handles
19157 better. */
19158 if (!expr_not_equal_to (arg0, wi::zero (prec)))
19159 return false;
19161 loc = gimple_location (stmt);
19162 g = gimple_build_call (decl, 1, arg0);
19163 gimple_set_location (g, loc);
19164 tree lhs = make_ssa_name (integer_type_node);
19165 gimple_call_set_lhs (g, lhs);
19166 gsi_insert_before (gsi, g, GSI_SAME_STMT);
19167 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
19168 gimple_set_location (g, loc);
19169 gsi_replace (gsi, g, false);
19170 return true;
19172 break;
19174 case IX86_BUILTIN_BZHI32:
19175 case IX86_BUILTIN_BZHI64:
19176 gcc_assert (n_args == 2);
19177 arg1 = gimple_call_arg (stmt, 1);
19178 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
19180 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
19181 arg0 = gimple_call_arg (stmt, 0);
19182 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
19183 break;
19184 loc = gimple_location (stmt);
19185 g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19186 gimple_set_location (g, loc);
19187 gsi_replace (gsi, g, false);
19188 return true;
19190 break;
19192 case IX86_BUILTIN_PDEP32:
19193 case IX86_BUILTIN_PDEP64:
19194 case IX86_BUILTIN_PEXT32:
19195 case IX86_BUILTIN_PEXT64:
19196 gcc_assert (n_args == 2);
19197 arg1 = gimple_call_arg (stmt, 1);
19198 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
19200 loc = gimple_location (stmt);
19201 arg0 = gimple_call_arg (stmt, 0);
19202 g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19203 gimple_set_location (g, loc);
19204 gsi_replace (gsi, g, false);
19205 return true;
19207 break;
19209 case IX86_BUILTIN_PBLENDVB256:
19210 case IX86_BUILTIN_BLENDVPS256:
19211 case IX86_BUILTIN_BLENDVPD256:
19212 /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
19213 to scalar operations and not combined back. */
19214 if (!TARGET_AVX2)
19215 break;
19217 /* FALLTHRU. */
19218 case IX86_BUILTIN_BLENDVPD:
19219 /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
19220 w/o sse4.2, it's veclowered to scalar operations and
19221 not combined back. */
19222 if (!TARGET_SSE4_2)
19223 break;
19224 /* FALLTHRU. */
19225 case IX86_BUILTIN_PBLENDVB128:
19226 case IX86_BUILTIN_BLENDVPS:
19227 gcc_assert (n_args == 3);
19228 arg0 = gimple_call_arg (stmt, 0);
19229 arg1 = gimple_call_arg (stmt, 1);
19230 arg2 = gimple_call_arg (stmt, 2);
19231 if (gimple_call_lhs (stmt))
19233 loc = gimple_location (stmt);
19234 tree type = TREE_TYPE (arg2);
19235 if (VECTOR_FLOAT_TYPE_P (type))
19237 tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
19238 ? intSI_type_node : intDI_type_node;
19239 type = get_same_sized_vectype (itype, type);
19241 else
19242 type = signed_type_for (type);
19243 arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
19244 tree zero_vec = build_zero_cst (type);
19245 tree cmp_type = truth_type_for (type);
19246 tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
19247 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19248 g = gimple_build_assign (gimple_call_lhs (stmt),
19249 VEC_COND_EXPR, cmp,
19250 arg1, arg0);
19251 gimple_set_location (g, loc);
19252 gsi_replace (gsi, g, false);
19254 else
19255 gsi_replace (gsi, gimple_build_nop (), false);
19256 return true;
19259 case IX86_BUILTIN_PCMPEQB128:
19260 case IX86_BUILTIN_PCMPEQW128:
19261 case IX86_BUILTIN_PCMPEQD128:
19262 case IX86_BUILTIN_PCMPEQQ:
19263 case IX86_BUILTIN_PCMPEQB256:
19264 case IX86_BUILTIN_PCMPEQW256:
19265 case IX86_BUILTIN_PCMPEQD256:
19266 case IX86_BUILTIN_PCMPEQQ256:
19267 tcode = EQ_EXPR;
19268 goto do_cmp;
19270 case IX86_BUILTIN_PCMPGTB128:
19271 case IX86_BUILTIN_PCMPGTW128:
19272 case IX86_BUILTIN_PCMPGTD128:
19273 case IX86_BUILTIN_PCMPGTQ:
19274 case IX86_BUILTIN_PCMPGTB256:
19275 case IX86_BUILTIN_PCMPGTW256:
19276 case IX86_BUILTIN_PCMPGTD256:
19277 case IX86_BUILTIN_PCMPGTQ256:
19278 tcode = GT_EXPR;
19280 do_cmp:
19281 gcc_assert (n_args == 2);
19282 arg0 = gimple_call_arg (stmt, 0);
19283 arg1 = gimple_call_arg (stmt, 1);
19284 if (gimple_call_lhs (stmt))
19286 loc = gimple_location (stmt);
19287 tree type = TREE_TYPE (arg0);
19288 tree zero_vec = build_zero_cst (type);
19289 tree minus_one_vec = build_minus_one_cst (type);
19290 tree cmp_type = truth_type_for (type);
19291 tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
19292 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19293 g = gimple_build_assign (gimple_call_lhs (stmt),
19294 VEC_COND_EXPR, cmp,
19295 minus_one_vec, zero_vec);
19296 gimple_set_location (g, loc);
19297 gsi_replace (gsi, g, false);
19299 else
19300 gsi_replace (gsi, gimple_build_nop (), false);
19301 return true;
19303 case IX86_BUILTIN_PSLLD:
19304 case IX86_BUILTIN_PSLLD128:
19305 case IX86_BUILTIN_PSLLD128_MASK:
19306 case IX86_BUILTIN_PSLLD256:
19307 case IX86_BUILTIN_PSLLD256_MASK:
19308 case IX86_BUILTIN_PSLLD512:
19309 case IX86_BUILTIN_PSLLDI:
19310 case IX86_BUILTIN_PSLLDI128:
19311 case IX86_BUILTIN_PSLLDI128_MASK:
19312 case IX86_BUILTIN_PSLLDI256:
19313 case IX86_BUILTIN_PSLLDI256_MASK:
19314 case IX86_BUILTIN_PSLLDI512:
19315 case IX86_BUILTIN_PSLLQ:
19316 case IX86_BUILTIN_PSLLQ128:
19317 case IX86_BUILTIN_PSLLQ128_MASK:
19318 case IX86_BUILTIN_PSLLQ256:
19319 case IX86_BUILTIN_PSLLQ256_MASK:
19320 case IX86_BUILTIN_PSLLQ512:
19321 case IX86_BUILTIN_PSLLQI:
19322 case IX86_BUILTIN_PSLLQI128:
19323 case IX86_BUILTIN_PSLLQI128_MASK:
19324 case IX86_BUILTIN_PSLLQI256:
19325 case IX86_BUILTIN_PSLLQI256_MASK:
19326 case IX86_BUILTIN_PSLLQI512:
19327 case IX86_BUILTIN_PSLLW:
19328 case IX86_BUILTIN_PSLLW128:
19329 case IX86_BUILTIN_PSLLW128_MASK:
19330 case IX86_BUILTIN_PSLLW256:
19331 case IX86_BUILTIN_PSLLW256_MASK:
19332 case IX86_BUILTIN_PSLLW512_MASK:
19333 case IX86_BUILTIN_PSLLWI:
19334 case IX86_BUILTIN_PSLLWI128:
19335 case IX86_BUILTIN_PSLLWI128_MASK:
19336 case IX86_BUILTIN_PSLLWI256:
19337 case IX86_BUILTIN_PSLLWI256_MASK:
19338 case IX86_BUILTIN_PSLLWI512_MASK:
19339 rcode = ASHIFT;
19340 is_vshift = false;
19341 goto do_shift;
19342 case IX86_BUILTIN_PSRAD:
19343 case IX86_BUILTIN_PSRAD128:
19344 case IX86_BUILTIN_PSRAD128_MASK:
19345 case IX86_BUILTIN_PSRAD256:
19346 case IX86_BUILTIN_PSRAD256_MASK:
19347 case IX86_BUILTIN_PSRAD512:
19348 case IX86_BUILTIN_PSRADI:
19349 case IX86_BUILTIN_PSRADI128:
19350 case IX86_BUILTIN_PSRADI128_MASK:
19351 case IX86_BUILTIN_PSRADI256:
19352 case IX86_BUILTIN_PSRADI256_MASK:
19353 case IX86_BUILTIN_PSRADI512:
19354 case IX86_BUILTIN_PSRAQ128_MASK:
19355 case IX86_BUILTIN_PSRAQ256_MASK:
19356 case IX86_BUILTIN_PSRAQ512:
19357 case IX86_BUILTIN_PSRAQI128_MASK:
19358 case IX86_BUILTIN_PSRAQI256_MASK:
19359 case IX86_BUILTIN_PSRAQI512:
19360 case IX86_BUILTIN_PSRAW:
19361 case IX86_BUILTIN_PSRAW128:
19362 case IX86_BUILTIN_PSRAW128_MASK:
19363 case IX86_BUILTIN_PSRAW256:
19364 case IX86_BUILTIN_PSRAW256_MASK:
19365 case IX86_BUILTIN_PSRAW512:
19366 case IX86_BUILTIN_PSRAWI:
19367 case IX86_BUILTIN_PSRAWI128:
19368 case IX86_BUILTIN_PSRAWI128_MASK:
19369 case IX86_BUILTIN_PSRAWI256:
19370 case IX86_BUILTIN_PSRAWI256_MASK:
19371 case IX86_BUILTIN_PSRAWI512:
19372 rcode = ASHIFTRT;
19373 is_vshift = false;
19374 goto do_shift;
19375 case IX86_BUILTIN_PSRLD:
19376 case IX86_BUILTIN_PSRLD128:
19377 case IX86_BUILTIN_PSRLD128_MASK:
19378 case IX86_BUILTIN_PSRLD256:
19379 case IX86_BUILTIN_PSRLD256_MASK:
19380 case IX86_BUILTIN_PSRLD512:
19381 case IX86_BUILTIN_PSRLDI:
19382 case IX86_BUILTIN_PSRLDI128:
19383 case IX86_BUILTIN_PSRLDI128_MASK:
19384 case IX86_BUILTIN_PSRLDI256:
19385 case IX86_BUILTIN_PSRLDI256_MASK:
19386 case IX86_BUILTIN_PSRLDI512:
19387 case IX86_BUILTIN_PSRLQ:
19388 case IX86_BUILTIN_PSRLQ128:
19389 case IX86_BUILTIN_PSRLQ128_MASK:
19390 case IX86_BUILTIN_PSRLQ256:
19391 case IX86_BUILTIN_PSRLQ256_MASK:
19392 case IX86_BUILTIN_PSRLQ512:
19393 case IX86_BUILTIN_PSRLQI:
19394 case IX86_BUILTIN_PSRLQI128:
19395 case IX86_BUILTIN_PSRLQI128_MASK:
19396 case IX86_BUILTIN_PSRLQI256:
19397 case IX86_BUILTIN_PSRLQI256_MASK:
19398 case IX86_BUILTIN_PSRLQI512:
19399 case IX86_BUILTIN_PSRLW:
19400 case IX86_BUILTIN_PSRLW128:
19401 case IX86_BUILTIN_PSRLW128_MASK:
19402 case IX86_BUILTIN_PSRLW256:
19403 case IX86_BUILTIN_PSRLW256_MASK:
19404 case IX86_BUILTIN_PSRLW512:
19405 case IX86_BUILTIN_PSRLWI:
19406 case IX86_BUILTIN_PSRLWI128:
19407 case IX86_BUILTIN_PSRLWI128_MASK:
19408 case IX86_BUILTIN_PSRLWI256:
19409 case IX86_BUILTIN_PSRLWI256_MASK:
19410 case IX86_BUILTIN_PSRLWI512:
19411 rcode = LSHIFTRT;
19412 is_vshift = false;
19413 goto do_shift;
19414 case IX86_BUILTIN_PSLLVV16HI:
19415 case IX86_BUILTIN_PSLLVV16SI:
19416 case IX86_BUILTIN_PSLLVV2DI:
19417 case IX86_BUILTIN_PSLLVV2DI_MASK:
19418 case IX86_BUILTIN_PSLLVV32HI:
19419 case IX86_BUILTIN_PSLLVV4DI:
19420 case IX86_BUILTIN_PSLLVV4DI_MASK:
19421 case IX86_BUILTIN_PSLLVV4SI:
19422 case IX86_BUILTIN_PSLLVV4SI_MASK:
19423 case IX86_BUILTIN_PSLLVV8DI:
19424 case IX86_BUILTIN_PSLLVV8HI:
19425 case IX86_BUILTIN_PSLLVV8SI:
19426 case IX86_BUILTIN_PSLLVV8SI_MASK:
19427 rcode = ASHIFT;
19428 is_vshift = true;
19429 goto do_shift;
19430 case IX86_BUILTIN_PSRAVQ128:
19431 case IX86_BUILTIN_PSRAVQ256:
19432 case IX86_BUILTIN_PSRAVV16HI:
19433 case IX86_BUILTIN_PSRAVV16SI:
19434 case IX86_BUILTIN_PSRAVV32HI:
19435 case IX86_BUILTIN_PSRAVV4SI:
19436 case IX86_BUILTIN_PSRAVV4SI_MASK:
19437 case IX86_BUILTIN_PSRAVV8DI:
19438 case IX86_BUILTIN_PSRAVV8HI:
19439 case IX86_BUILTIN_PSRAVV8SI:
19440 case IX86_BUILTIN_PSRAVV8SI_MASK:
19441 rcode = ASHIFTRT;
19442 is_vshift = true;
19443 goto do_shift;
19444 case IX86_BUILTIN_PSRLVV16HI:
19445 case IX86_BUILTIN_PSRLVV16SI:
19446 case IX86_BUILTIN_PSRLVV2DI:
19447 case IX86_BUILTIN_PSRLVV2DI_MASK:
19448 case IX86_BUILTIN_PSRLVV32HI:
19449 case IX86_BUILTIN_PSRLVV4DI:
19450 case IX86_BUILTIN_PSRLVV4DI_MASK:
19451 case IX86_BUILTIN_PSRLVV4SI:
19452 case IX86_BUILTIN_PSRLVV4SI_MASK:
19453 case IX86_BUILTIN_PSRLVV8DI:
19454 case IX86_BUILTIN_PSRLVV8HI:
19455 case IX86_BUILTIN_PSRLVV8SI:
19456 case IX86_BUILTIN_PSRLVV8SI_MASK:
19457 rcode = LSHIFTRT;
19458 is_vshift = true;
19459 goto do_shift;
19461 do_shift:
19462 gcc_assert (n_args >= 2);
19463 if (!gimple_call_lhs (stmt))
19465 gsi_replace (gsi, gimple_build_nop (), false);
19466 return true;
19468 arg0 = gimple_call_arg (stmt, 0);
19469 arg1 = gimple_call_arg (stmt, 1);
19470 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19471 /* For masked shift, only optimize if the mask is all ones. */
19472 if (n_args > 2
19473 && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
19474 break;
19475 if (is_vshift)
19477 if (TREE_CODE (arg1) != VECTOR_CST)
19478 break;
19479 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
19480 if (integer_zerop (arg1))
19481 count = 0;
19482 else if (rcode == ASHIFTRT)
19483 break;
19484 else
19485 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
19487 tree elt = VECTOR_CST_ELT (arg1, i);
19488 if (!wi::neg_p (wi::to_wide (elt))
19489 && wi::to_widest (elt) < count)
19490 return false;
19493 else
19495 arg1 = ix86_vector_shift_count (arg1);
19496 if (!arg1)
19497 break;
19498 count = tree_to_uhwi (arg1);
19500 if (count == 0)
19502 /* Just return the first argument for shift by 0. */
19503 loc = gimple_location (stmt);
19504 g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19505 gimple_set_location (g, loc);
19506 gsi_replace (gsi, g, false);
19507 return true;
19509 if (rcode != ASHIFTRT
19510 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
19512 /* For shift counts equal or greater than precision, except for
19513 arithmetic right shift the result is zero. */
19514 loc = gimple_location (stmt);
19515 g = gimple_build_assign (gimple_call_lhs (stmt),
19516 build_zero_cst (TREE_TYPE (arg0)));
19517 gimple_set_location (g, loc);
19518 gsi_replace (gsi, g, false);
19519 return true;
19521 break;
19523 case IX86_BUILTIN_SHUFPD512:
19524 case IX86_BUILTIN_SHUFPS512:
19525 case IX86_BUILTIN_SHUFPD:
19526 case IX86_BUILTIN_SHUFPD256:
19527 case IX86_BUILTIN_SHUFPS:
19528 case IX86_BUILTIN_SHUFPS256:
19529 arg0 = gimple_call_arg (stmt, 0);
19530 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19531 /* This is masked shuffle. Only optimize if the mask is all ones. */
19532 if (n_args > 3
19533 && !ix86_masked_all_ones (elems,
19534 gimple_call_arg (stmt, n_args - 1)))
19535 break;
19536 arg2 = gimple_call_arg (stmt, 2);
19537 if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
19539 unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
19540 /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
19541 if (shuffle_mask > 255)
19542 return false;
19544 machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
19545 loc = gimple_location (stmt);
19546 tree itype = (imode == E_DFmode
19547 ? long_long_integer_type_node : integer_type_node);
19548 tree vtype = build_vector_type (itype, elems);
19549 tree_vector_builder elts (vtype, elems, 1);
19552 /* Transform integer shuffle_mask to vector perm_mask which
19553 is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
19554 for (unsigned i = 0; i != elems; i++)
19556 unsigned sel_idx;
19557 /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
19558 provide 2 select constrols for each element of the
19559 destination. */
19560 if (imode == E_DFmode)
19561 sel_idx = (i & 1) * elems + (i & ~1)
19562 + ((shuffle_mask >> i) & 1);
19563 else
19565 /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
19566 controls for each element of the destination. */
19567 unsigned j = i % 4;
19568 sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
19569 + ((shuffle_mask >> 2 * j) & 3);
19571 elts.quick_push (build_int_cst (itype, sel_idx));
19574 tree perm_mask = elts.build ();
19575 arg1 = gimple_call_arg (stmt, 1);
19576 g = gimple_build_assign (gimple_call_lhs (stmt),
19577 VEC_PERM_EXPR,
19578 arg0, arg1, perm_mask);
19579 gimple_set_location (g, loc);
19580 gsi_replace (gsi, g, false);
19581 return true;
19583 // Do not error yet, the constant could be propagated later?
19584 break;
19586 case IX86_BUILTIN_PABSB:
19587 case IX86_BUILTIN_PABSW:
19588 case IX86_BUILTIN_PABSD:
19589 /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
19590 if (!TARGET_MMX_WITH_SSE)
19591 break;
19592 /* FALLTHRU. */
19593 case IX86_BUILTIN_PABSB128:
19594 case IX86_BUILTIN_PABSB256:
19595 case IX86_BUILTIN_PABSB512:
19596 case IX86_BUILTIN_PABSW128:
19597 case IX86_BUILTIN_PABSW256:
19598 case IX86_BUILTIN_PABSW512:
19599 case IX86_BUILTIN_PABSD128:
19600 case IX86_BUILTIN_PABSD256:
19601 case IX86_BUILTIN_PABSD512:
19602 case IX86_BUILTIN_PABSQ128:
19603 case IX86_BUILTIN_PABSQ256:
19604 case IX86_BUILTIN_PABSQ512:
19605 case IX86_BUILTIN_PABSB128_MASK:
19606 case IX86_BUILTIN_PABSB256_MASK:
19607 case IX86_BUILTIN_PABSW128_MASK:
19608 case IX86_BUILTIN_PABSW256_MASK:
19609 case IX86_BUILTIN_PABSD128_MASK:
19610 case IX86_BUILTIN_PABSD256_MASK:
19611 gcc_assert (n_args >= 1);
19612 if (!gimple_call_lhs (stmt))
19614 gsi_replace (gsi, gimple_build_nop (), false);
19615 return true;
19617 arg0 = gimple_call_arg (stmt, 0);
19618 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19619 /* For masked ABS, only optimize if the mask is all ones. */
19620 if (n_args > 1
19621 && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
19622 break;
19624 tree utype, ures, vce;
19625 utype = unsigned_type_for (TREE_TYPE (arg0));
19626 /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
19627 instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
19628 ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
19629 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19630 loc = gimple_location (stmt);
19631 vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
19632 g = gimple_build_assign (gimple_call_lhs (stmt),
19633 VIEW_CONVERT_EXPR, vce);
19634 gsi_replace (gsi, g, false);
19636 return true;
19638 case IX86_BUILTIN_MINPS:
19639 case IX86_BUILTIN_MINPD:
19640 case IX86_BUILTIN_MINPS256:
19641 case IX86_BUILTIN_MINPD256:
19642 case IX86_BUILTIN_MINPS512:
19643 case IX86_BUILTIN_MINPD512:
19644 case IX86_BUILTIN_MINPS128_MASK:
19645 case IX86_BUILTIN_MINPD128_MASK:
19646 case IX86_BUILTIN_MINPS256_MASK:
19647 case IX86_BUILTIN_MINPD256_MASK:
19648 case IX86_BUILTIN_MINPH128_MASK:
19649 case IX86_BUILTIN_MINPH256_MASK:
19650 case IX86_BUILTIN_MINPH512_MASK:
19651 tcode = LT_EXPR;
19652 goto do_minmax;
19654 case IX86_BUILTIN_MAXPS:
19655 case IX86_BUILTIN_MAXPD:
19656 case IX86_BUILTIN_MAXPS256:
19657 case IX86_BUILTIN_MAXPD256:
19658 case IX86_BUILTIN_MAXPS512:
19659 case IX86_BUILTIN_MAXPD512:
19660 case IX86_BUILTIN_MAXPS128_MASK:
19661 case IX86_BUILTIN_MAXPD128_MASK:
19662 case IX86_BUILTIN_MAXPS256_MASK:
19663 case IX86_BUILTIN_MAXPD256_MASK:
19664 case IX86_BUILTIN_MAXPH128_MASK:
19665 case IX86_BUILTIN_MAXPH256_MASK:
19666 case IX86_BUILTIN_MAXPH512_MASK:
19667 tcode = GT_EXPR;
19668 do_minmax:
19669 gcc_assert (n_args >= 2);
19670 /* Without SSE4.1 we often aren't able to pattern match it back to the
19671 desired instruction. */
19672 if (!gimple_call_lhs (stmt) || !optimize || !TARGET_SSE4_1)
19673 break;
19674 arg0 = gimple_call_arg (stmt, 0);
19675 arg1 = gimple_call_arg (stmt, 1);
19676 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19677 /* For masked minmax, only optimize if the mask is all ones. */
19678 if (n_args > 2
19679 && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, 3)))
19680 break;
19681 if (n_args >= 5)
19683 tree arg4 = gimple_call_arg (stmt, 4);
19684 if (!tree_fits_uhwi_p (arg4))
19685 break;
19686 if (tree_to_uhwi (arg4) == 4)
19687 /* Ok. */;
19688 else if (tree_to_uhwi (arg4) != 8)
19689 /* Invalid round argument. */
19690 break;
19691 else if (HONOR_NANS (arg0))
19692 /* Lowering to comparison would raise exceptions which
19693 shouldn't be raised. */
19694 break;
19697 tree type = truth_type_for (TREE_TYPE (arg0));
19698 tree cmpres = gimple_build (&stmts, tcode, type, arg0, arg1);
19699 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19700 g = gimple_build_assign (gimple_call_lhs (stmt),
19701 VEC_COND_EXPR, cmpres, arg0, arg1);
19702 gsi_replace (gsi, g, false);
19704 return true;
19706 default:
19707 break;
19710 return false;
19713 /* Handler for an SVML-style interface to
19714 a library with vectorized intrinsics. */
19716 tree
19717 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
19719 char name[20];
19720 tree fntype, new_fndecl, args;
19721 unsigned arity;
19722 const char *bname;
19723 machine_mode el_mode, in_mode;
19724 int n, in_n;
19726 /* The SVML is suitable for unsafe math only. */
19727 if (!flag_unsafe_math_optimizations)
19728 return NULL_TREE;
19730 el_mode = TYPE_MODE (TREE_TYPE (type_out));
19731 n = TYPE_VECTOR_SUBPARTS (type_out);
19732 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19733 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19734 if (el_mode != in_mode
19735 || n != in_n)
19736 return NULL_TREE;
19738 switch (fn)
19740 CASE_CFN_EXP:
19741 CASE_CFN_LOG:
19742 CASE_CFN_LOG10:
19743 CASE_CFN_POW:
19744 CASE_CFN_TANH:
19745 CASE_CFN_TAN:
19746 CASE_CFN_ATAN:
19747 CASE_CFN_ATAN2:
19748 CASE_CFN_ATANH:
19749 CASE_CFN_CBRT:
19750 CASE_CFN_SINH:
19751 CASE_CFN_SIN:
19752 CASE_CFN_ASINH:
19753 CASE_CFN_ASIN:
19754 CASE_CFN_COSH:
19755 CASE_CFN_COS:
19756 CASE_CFN_ACOSH:
19757 CASE_CFN_ACOS:
19758 if ((el_mode != DFmode || n != 2)
19759 && (el_mode != SFmode || n != 4))
19760 return NULL_TREE;
19761 break;
19763 default:
19764 return NULL_TREE;
19767 tree fndecl = mathfn_built_in (el_mode == DFmode
19768 ? double_type_node : float_type_node, fn);
19769 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
19771 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
19772 strcpy (name, "vmlsLn4");
19773 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
19774 strcpy (name, "vmldLn2");
19775 else if (n == 4)
19777 sprintf (name, "vmls%s", bname+10);
19778 name[strlen (name)-1] = '4';
19780 else
19781 sprintf (name, "vmld%s2", bname+10);
19783 /* Convert to uppercase. */
19784 name[4] &= ~0x20;
19786 arity = 0;
19787 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
19788 arity++;
19790 if (arity == 1)
19791 fntype = build_function_type_list (type_out, type_in, NULL);
19792 else
19793 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
19795 /* Build a function declaration for the vectorized function. */
19796 new_fndecl = build_decl (BUILTINS_LOCATION,
19797 FUNCTION_DECL, get_identifier (name), fntype);
19798 TREE_PUBLIC (new_fndecl) = 1;
19799 DECL_EXTERNAL (new_fndecl) = 1;
19800 DECL_IS_NOVOPS (new_fndecl) = 1;
19801 TREE_READONLY (new_fndecl) = 1;
19803 return new_fndecl;
19806 /* Handler for an ACML-style interface to
19807 a library with vectorized intrinsics. */
19809 tree
19810 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
19812 char name[20] = "__vr.._";
19813 tree fntype, new_fndecl, args;
19814 unsigned arity;
19815 const char *bname;
19816 machine_mode el_mode, in_mode;
19817 int n, in_n;
19819 /* The ACML is 64bits only and suitable for unsafe math only as
19820 it does not correctly support parts of IEEE with the required
19821 precision such as denormals. */
19822 if (!TARGET_64BIT
19823 || !flag_unsafe_math_optimizations)
19824 return NULL_TREE;
19826 el_mode = TYPE_MODE (TREE_TYPE (type_out));
19827 n = TYPE_VECTOR_SUBPARTS (type_out);
19828 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19829 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19830 if (el_mode != in_mode
19831 || n != in_n)
19832 return NULL_TREE;
19834 switch (fn)
19836 CASE_CFN_SIN:
19837 CASE_CFN_COS:
19838 CASE_CFN_EXP:
19839 CASE_CFN_LOG:
19840 CASE_CFN_LOG2:
19841 CASE_CFN_LOG10:
19842 if (el_mode == DFmode && n == 2)
19844 name[4] = 'd';
19845 name[5] = '2';
19847 else if (el_mode == SFmode && n == 4)
19849 name[4] = 's';
19850 name[5] = '4';
19852 else
19853 return NULL_TREE;
19854 break;
19856 default:
19857 return NULL_TREE;
19860 tree fndecl = mathfn_built_in (el_mode == DFmode
19861 ? double_type_node : float_type_node, fn);
19862 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
19863 sprintf (name + 7, "%s", bname+10);
19865 arity = 0;
19866 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
19867 arity++;
19869 if (arity == 1)
19870 fntype = build_function_type_list (type_out, type_in, NULL);
19871 else
19872 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
19874 /* Build a function declaration for the vectorized function. */
19875 new_fndecl = build_decl (BUILTINS_LOCATION,
19876 FUNCTION_DECL, get_identifier (name), fntype);
19877 TREE_PUBLIC (new_fndecl) = 1;
19878 DECL_EXTERNAL (new_fndecl) = 1;
19879 DECL_IS_NOVOPS (new_fndecl) = 1;
19880 TREE_READONLY (new_fndecl) = 1;
19882 return new_fndecl;
19885 /* Returns a decl of a function that implements scatter store with
19886 register type VECTYPE and index type INDEX_TYPE and SCALE.
19887 Return NULL_TREE if it is not available. */
19889 static tree
19890 ix86_vectorize_builtin_scatter (const_tree vectype,
19891 const_tree index_type, int scale)
19893 bool si;
19894 enum ix86_builtins code;
19895 const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype));
19897 if (!TARGET_AVX512F)
19898 return NULL_TREE;
19900 if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64)
19901 return NULL_TREE;
19903 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
19904 ? !TARGET_USE_SCATTER_2PARTS
19905 : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
19906 ? !TARGET_USE_SCATTER_4PARTS
19907 : !TARGET_USE_SCATTER_8PARTS))
19908 return NULL_TREE;
19910 if ((TREE_CODE (index_type) != INTEGER_TYPE
19911 && !POINTER_TYPE_P (index_type))
19912 || (TYPE_MODE (index_type) != SImode
19913 && TYPE_MODE (index_type) != DImode))
19914 return NULL_TREE;
19916 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
19917 return NULL_TREE;
19919 /* v*scatter* insn sign extends index to pointer mode. */
19920 if (TYPE_PRECISION (index_type) < POINTER_SIZE
19921 && TYPE_UNSIGNED (index_type))
19922 return NULL_TREE;
19924 /* Scale can be 1, 2, 4 or 8. */
19925 if (scale <= 0
19926 || scale > 8
19927 || (scale & (scale - 1)) != 0)
19928 return NULL_TREE;
19930 si = TYPE_MODE (index_type) == SImode;
19931 switch (TYPE_MODE (vectype))
19933 case E_V8DFmode:
19934 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
19935 break;
19936 case E_V8DImode:
19937 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
19938 break;
19939 case E_V16SFmode:
19940 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
19941 break;
19942 case E_V16SImode:
19943 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
19944 break;
19945 case E_V4DFmode:
19946 if (TARGET_AVX512VL)
19947 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
19948 else
19949 return NULL_TREE;
19950 break;
19951 case E_V4DImode:
19952 if (TARGET_AVX512VL)
19953 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
19954 else
19955 return NULL_TREE;
19956 break;
19957 case E_V8SFmode:
19958 if (TARGET_AVX512VL)
19959 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
19960 else
19961 return NULL_TREE;
19962 break;
19963 case E_V8SImode:
19964 if (TARGET_AVX512VL)
19965 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
19966 else
19967 return NULL_TREE;
19968 break;
19969 case E_V2DFmode:
19970 if (TARGET_AVX512VL)
19971 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
19972 else
19973 return NULL_TREE;
19974 break;
19975 case E_V2DImode:
19976 if (TARGET_AVX512VL)
19977 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
19978 else
19979 return NULL_TREE;
19980 break;
19981 case E_V4SFmode:
19982 if (TARGET_AVX512VL)
19983 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
19984 else
19985 return NULL_TREE;
19986 break;
19987 case E_V4SImode:
19988 if (TARGET_AVX512VL)
19989 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
19990 else
19991 return NULL_TREE;
19992 break;
19993 default:
19994 return NULL_TREE;
19997 return get_ix86_builtin (code);
20000 /* Return true if it is safe to use the rsqrt optabs to optimize
20001 1.0/sqrt. */
20003 static bool
20004 use_rsqrt_p (machine_mode mode)
20006 return ((mode == HFmode
20007 || (TARGET_SSE && TARGET_SSE_MATH))
20008 && flag_finite_math_only
20009 && !flag_trapping_math
20010 && flag_unsafe_math_optimizations);
20013 /* Helper for avx_vpermilps256_operand et al. This is also used by
20014 the expansion functions to turn the parallel back into a mask.
20015 The return value is 0 for no match and the imm8+1 for a match. */
20018 avx_vpermilp_parallel (rtx par, machine_mode mode)
20020 unsigned i, nelt = GET_MODE_NUNITS (mode);
20021 unsigned mask = 0;
20022 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
20024 if (XVECLEN (par, 0) != (int) nelt)
20025 return 0;
20027 /* Validate that all of the elements are constants, and not totally
20028 out of range. Copy the data into an integral array to make the
20029 subsequent checks easier. */
20030 for (i = 0; i < nelt; ++i)
20032 rtx er = XVECEXP (par, 0, i);
20033 unsigned HOST_WIDE_INT ei;
20035 if (!CONST_INT_P (er))
20036 return 0;
20037 ei = INTVAL (er);
20038 if (ei >= nelt)
20039 return 0;
20040 ipar[i] = ei;
20043 switch (mode)
20045 case E_V8DFmode:
20046 /* In the 512-bit DFmode case, we can only move elements within
20047 a 128-bit lane. First fill the second part of the mask,
20048 then fallthru. */
20049 for (i = 4; i < 6; ++i)
20051 if (ipar[i] < 4 || ipar[i] >= 6)
20052 return 0;
20053 mask |= (ipar[i] - 4) << i;
20055 for (i = 6; i < 8; ++i)
20057 if (ipar[i] < 6)
20058 return 0;
20059 mask |= (ipar[i] - 6) << i;
20061 /* FALLTHRU */
20063 case E_V4DFmode:
20064 /* In the 256-bit DFmode case, we can only move elements within
20065 a 128-bit lane. */
20066 for (i = 0; i < 2; ++i)
20068 if (ipar[i] >= 2)
20069 return 0;
20070 mask |= ipar[i] << i;
20072 for (i = 2; i < 4; ++i)
20074 if (ipar[i] < 2)
20075 return 0;
20076 mask |= (ipar[i] - 2) << i;
20078 break;
20080 case E_V16SFmode:
20081 /* In 512 bit SFmode case, permutation in the upper 256 bits
20082 must mirror the permutation in the lower 256-bits. */
20083 for (i = 0; i < 8; ++i)
20084 if (ipar[i] + 8 != ipar[i + 8])
20085 return 0;
20086 /* FALLTHRU */
20088 case E_V8SFmode:
20089 /* In 256 bit SFmode case, we have full freedom of
20090 movement within the low 128-bit lane, but the high 128-bit
20091 lane must mirror the exact same pattern. */
20092 for (i = 0; i < 4; ++i)
20093 if (ipar[i] + 4 != ipar[i + 4])
20094 return 0;
20095 nelt = 4;
20096 /* FALLTHRU */
20098 case E_V2DFmode:
20099 case E_V4SFmode:
20100 /* In the 128-bit case, we've full freedom in the placement of
20101 the elements from the source operand. */
20102 for (i = 0; i < nelt; ++i)
20103 mask |= ipar[i] << (i * (nelt / 2));
20104 break;
20106 default:
20107 gcc_unreachable ();
20110 /* Make sure success has a non-zero value by adding one. */
20111 return mask + 1;
20114 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
20115 the expansion functions to turn the parallel back into a mask.
20116 The return value is 0 for no match and the imm8+1 for a match. */
20119 avx_vperm2f128_parallel (rtx par, machine_mode mode)
20121 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
20122 unsigned mask = 0;
20123 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
20125 if (XVECLEN (par, 0) != (int) nelt)
20126 return 0;
20128 /* Validate that all of the elements are constants, and not totally
20129 out of range. Copy the data into an integral array to make the
20130 subsequent checks easier. */
20131 for (i = 0; i < nelt; ++i)
20133 rtx er = XVECEXP (par, 0, i);
20134 unsigned HOST_WIDE_INT ei;
20136 if (!CONST_INT_P (er))
20137 return 0;
20138 ei = INTVAL (er);
20139 if (ei >= 2 * nelt)
20140 return 0;
20141 ipar[i] = ei;
20144 /* Validate that the halves of the permute are halves. */
20145 for (i = 0; i < nelt2 - 1; ++i)
20146 if (ipar[i] + 1 != ipar[i + 1])
20147 return 0;
20148 for (i = nelt2; i < nelt - 1; ++i)
20149 if (ipar[i] + 1 != ipar[i + 1])
20150 return 0;
20152 /* Reconstruct the mask. */
20153 for (i = 0; i < 2; ++i)
20155 unsigned e = ipar[i * nelt2];
20156 if (e % nelt2)
20157 return 0;
20158 e /= nelt2;
20159 mask |= e << (i * 4);
20162 /* Make sure success has a non-zero value by adding one. */
20163 return mask + 1;
20166 /* Return a mask of VPTERNLOG operands that do not affect output. */
20169 vpternlog_redundant_operand_mask (rtx pternlog_imm)
20171 int mask = 0;
20172 int imm8 = INTVAL (pternlog_imm);
20174 if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
20175 mask |= 1;
20176 if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
20177 mask |= 2;
20178 if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
20179 mask |= 4;
20181 return mask;
20184 /* Eliminate false dependencies on operands that do not affect output
20185 by substituting other operands of a VPTERNLOG. */
20187 void
20188 substitute_vpternlog_operands (rtx *operands)
20190 int mask = vpternlog_redundant_operand_mask (operands[4]);
20192 if (mask & 1) /* The first operand is redundant. */
20193 operands[1] = operands[2];
20195 if (mask & 2) /* The second operand is redundant. */
20196 operands[2] = operands[1];
20198 if (mask & 4) /* The third operand is redundant. */
20199 operands[3] = operands[1];
20200 else if (REG_P (operands[3]))
20202 if (mask & 1)
20203 operands[1] = operands[3];
20204 if (mask & 2)
20205 operands[2] = operands[3];
20209 /* Return a register priority for hard reg REGNO. */
20210 static int
20211 ix86_register_priority (int hard_regno)
20213 /* ebp and r13 as the base always wants a displacement, r12 as the
20214 base always wants an index. So discourage their usage in an
20215 address. */
20216 if (hard_regno == R12_REG || hard_regno == R13_REG)
20217 return 0;
20218 if (hard_regno == BP_REG)
20219 return 1;
20220 /* New x86-64 int registers result in bigger code size. Discourage them. */
20221 if (REX_INT_REGNO_P (hard_regno))
20222 return 2;
20223 if (REX2_INT_REGNO_P (hard_regno))
20224 return 2;
20225 /* New x86-64 SSE registers result in bigger code size. Discourage them. */
20226 if (REX_SSE_REGNO_P (hard_regno))
20227 return 2;
20228 if (EXT_REX_SSE_REGNO_P (hard_regno))
20229 return 1;
20230 /* Usage of AX register results in smaller code. Prefer it. */
20231 if (hard_regno == AX_REG)
20232 return 4;
20233 return 3;
20236 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
20238 Put float CONST_DOUBLE in the constant pool instead of fp regs.
20239 QImode must go into class Q_REGS.
20240 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20241 movdf to do mem-to-mem moves through integer regs. */
20243 static reg_class_t
20244 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
20246 machine_mode mode = GET_MODE (x);
20248 /* We're only allowed to return a subclass of CLASS. Many of the
20249 following checks fail for NO_REGS, so eliminate that early. */
20250 if (regclass == NO_REGS)
20251 return NO_REGS;
20253 /* All classes can load zeros. */
20254 if (x == CONST0_RTX (mode))
20255 return regclass;
20257 /* Force constants into memory if we are loading a (nonzero) constant into
20258 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
20259 instructions to load from a constant. */
20260 if (CONSTANT_P (x)
20261 && (MAYBE_MMX_CLASS_P (regclass)
20262 || MAYBE_SSE_CLASS_P (regclass)
20263 || MAYBE_MASK_CLASS_P (regclass)))
20264 return NO_REGS;
20266 /* Floating-point constants need more complex checks. */
20267 if (CONST_DOUBLE_P (x))
20269 /* General regs can load everything. */
20270 if (INTEGER_CLASS_P (regclass))
20271 return regclass;
20273 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20274 zero above. We only want to wind up preferring 80387 registers if
20275 we plan on doing computation with them. */
20276 if (IS_STACK_MODE (mode)
20277 && standard_80387_constant_p (x) > 0)
20279 /* Limit class to FP regs. */
20280 if (FLOAT_CLASS_P (regclass))
20281 return FLOAT_REGS;
20284 return NO_REGS;
20287 /* Prefer SSE if we can use them for math. Also allow integer regs
20288 when moves between register units are cheap. */
20289 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20291 if (TARGET_INTER_UNIT_MOVES_FROM_VEC
20292 && TARGET_INTER_UNIT_MOVES_TO_VEC
20293 && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
20294 return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20295 else
20296 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20299 /* Generally when we see PLUS here, it's the function invariant
20300 (plus soft-fp const_int). Which can only be computed into general
20301 regs. */
20302 if (GET_CODE (x) == PLUS)
20303 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
20305 /* QImode constants are easy to load, but non-constant QImode data
20306 must go into Q_REGS or ALL_MASK_REGS. */
20307 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20309 if (Q_CLASS_P (regclass))
20310 return regclass;
20311 else if (reg_class_subset_p (Q_REGS, regclass))
20312 return Q_REGS;
20313 else if (MASK_CLASS_P (regclass))
20314 return regclass;
20315 else
20316 return NO_REGS;
20319 return regclass;
20322 /* Discourage putting floating-point values in SSE registers unless
20323 SSE math is being used, and likewise for the 387 registers. */
20324 static reg_class_t
20325 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
20327 /* Restrict the output reload class to the register bank that we are doing
20328 math on. If we would like not to return a subset of CLASS, reject this
20329 alternative: if reload cannot do this, it will still use its choice. */
20330 machine_mode mode = GET_MODE (x);
20331 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20332 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
20334 if (IS_STACK_MODE (mode))
20335 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20337 return regclass;
20340 static reg_class_t
20341 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
20342 machine_mode mode, secondary_reload_info *sri)
20344 /* Double-word spills from general registers to non-offsettable memory
20345 references (zero-extended addresses) require special handling. */
20346 if (TARGET_64BIT
20347 && MEM_P (x)
20348 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
20349 && INTEGER_CLASS_P (rclass)
20350 && !offsettable_memref_p (x))
20352 sri->icode = (in_p
20353 ? CODE_FOR_reload_noff_load
20354 : CODE_FOR_reload_noff_store);
20355 /* Add the cost of moving address to a temporary. */
20356 sri->extra_cost = 1;
20358 return NO_REGS;
20361 /* QImode spills from non-QI registers require
20362 intermediate register on 32bit targets. */
20363 if (mode == QImode
20364 && ((!TARGET_64BIT && !in_p
20365 && INTEGER_CLASS_P (rclass)
20366 && MAYBE_NON_Q_CLASS_P (rclass))
20367 || (!TARGET_AVX512DQ
20368 && MAYBE_MASK_CLASS_P (rclass))))
20370 int regno = true_regnum (x);
20372 /* Return Q_REGS if the operand is in memory. */
20373 if (regno == -1)
20374 return Q_REGS;
20376 return NO_REGS;
20379 /* Require movement to gpr, and then store to memory. */
20380 if ((mode == HFmode || mode == HImode || mode == V2QImode
20381 || mode == BFmode)
20382 && !TARGET_SSE4_1
20383 && SSE_CLASS_P (rclass)
20384 && !in_p && MEM_P (x))
20386 sri->extra_cost = 1;
20387 return GENERAL_REGS;
20390 /* This condition handles corner case where an expression involving
20391 pointers gets vectorized. We're trying to use the address of a
20392 stack slot as a vector initializer.
20394 (set (reg:V2DI 74 [ vect_cst_.2 ])
20395 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
20397 Eventually frame gets turned into sp+offset like this:
20399 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20400 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20401 (const_int 392 [0x188]))))
20403 That later gets turned into:
20405 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20406 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20407 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
20409 We'll have the following reload recorded:
20411 Reload 0: reload_in (DI) =
20412 (plus:DI (reg/f:DI 7 sp)
20413 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
20414 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20415 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
20416 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
20417 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20418 reload_reg_rtx: (reg:V2DI 22 xmm1)
20420 Which isn't going to work since SSE instructions can't handle scalar
20421 additions. Returning GENERAL_REGS forces the addition into integer
20422 register and reload can handle subsequent reloads without problems. */
20424 if (in_p && GET_CODE (x) == PLUS
20425 && SSE_CLASS_P (rclass)
20426 && SCALAR_INT_MODE_P (mode))
20427 return GENERAL_REGS;
20429 return NO_REGS;
20432 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
20434 static bool
20435 ix86_class_likely_spilled_p (reg_class_t rclass)
20437 switch (rclass)
20439 case AREG:
20440 case DREG:
20441 case CREG:
20442 case BREG:
20443 case AD_REGS:
20444 case SIREG:
20445 case DIREG:
20446 case SSE_FIRST_REG:
20447 case FP_TOP_REG:
20448 case FP_SECOND_REG:
20449 return true;
20451 default:
20452 break;
20455 return false;
20458 /* Return true if a set of DST by the expression SRC should be allowed.
20459 This prevents complex sets of likely_spilled hard regs before split1. */
20461 bool
20462 ix86_hardreg_mov_ok (rtx dst, rtx src)
20464 /* Avoid complex sets of likely_spilled hard registers before reload. */
20465 if (REG_P (dst) && HARD_REGISTER_P (dst)
20466 && !REG_P (src) && !MEM_P (src)
20467 && !(VECTOR_MODE_P (GET_MODE (dst))
20468 ? standard_sse_constant_p (src, GET_MODE (dst))
20469 : x86_64_immediate_operand (src, GET_MODE (dst)))
20470 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
20471 && ix86_pre_reload_split ())
20472 return false;
20473 return true;
20476 /* If we are copying between registers from different register sets
20477 (e.g. FP and integer), we may need a memory location.
20479 The function can't work reliably when one of the CLASSES is a class
20480 containing registers from multiple sets. We avoid this by never combining
20481 different sets in a single alternative in the machine description.
20482 Ensure that this constraint holds to avoid unexpected surprises.
20484 When STRICT is false, we are being called from REGISTER_MOVE_COST,
20485 so do not enforce these sanity checks.
20487 To optimize register_move_cost performance, define inline variant. */
20489 static inline bool
20490 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
20491 reg_class_t class2, int strict)
20493 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
20494 return false;
20496 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20497 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20498 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20499 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20500 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20501 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
20502 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
20503 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
20505 gcc_assert (!strict || lra_in_progress);
20506 return true;
20509 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20510 return true;
20512 /* ??? This is a lie. We do have moves between mmx/general, and for
20513 mmx/sse2. But by saying we need secondary memory we discourage the
20514 register allocator from using the mmx registers unless needed. */
20515 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20516 return true;
20518 /* Between mask and general, we have moves no larger than word size. */
20519 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
20521 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
20522 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20523 return true;
20526 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20528 /* SSE1 doesn't have any direct moves from other classes. */
20529 if (!TARGET_SSE2)
20530 return true;
20532 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
20533 return true;
20535 /* If the target says that inter-unit moves are more expensive
20536 than moving through memory, then don't generate them. */
20537 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
20538 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
20539 return true;
20541 /* With SSE4.1, *mov{ti,di}_internal supports moves between
20542 SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}. */
20543 if (TARGET_SSE4_1
20544 && (TARGET_64BIT ? mode == TImode : mode == DImode))
20545 return false;
20547 int msize = GET_MODE_SIZE (mode);
20549 /* Between SSE and general, we have moves no larger than word size. */
20550 if (msize > UNITS_PER_WORD)
20551 return true;
20553 /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
20554 Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
20555 int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
20557 if (msize < minsize)
20558 return true;
20561 return false;
20564 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
20566 static bool
20567 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
20568 reg_class_t class2)
20570 return inline_secondary_memory_needed (mode, class1, class2, true);
20573 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
20575 get_secondary_mem widens integral modes to BITS_PER_WORD.
20576 There is no need to emit full 64 bit move on 64 bit targets
20577 for integral modes that can be moved using 32 bit move. */
20579 static machine_mode
20580 ix86_secondary_memory_needed_mode (machine_mode mode)
20582 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
20583 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
20584 return mode;
20587 /* Implement the TARGET_CLASS_MAX_NREGS hook.
20589 On the 80386, this is the size of MODE in words,
20590 except in the FP regs, where a single reg is always enough. */
20592 static unsigned char
20593 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
20595 if (MAYBE_INTEGER_CLASS_P (rclass))
20597 if (mode == XFmode)
20598 return (TARGET_64BIT ? 2 : 3);
20599 else if (mode == XCmode)
20600 return (TARGET_64BIT ? 4 : 6);
20601 else
20602 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
20604 else
20606 if (COMPLEX_MODE_P (mode))
20607 return 2;
20608 else
20609 return 1;
20613 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
20615 static bool
20616 ix86_can_change_mode_class (machine_mode from, machine_mode to,
20617 reg_class_t regclass)
20619 if (from == to)
20620 return true;
20622 /* x87 registers can't do subreg at all, as all values are reformatted
20623 to extended precision. */
20624 if (MAYBE_FLOAT_CLASS_P (regclass))
20625 return false;
20627 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20629 /* Vector registers do not support QI or HImode loads. If we don't
20630 disallow a change to these modes, reload will assume it's ok to
20631 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20632 the vec_dupv4hi pattern.
20633 NB: SSE2 can load 16bit data to sse register via pinsrw. */
20634 int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
20635 if (GET_MODE_SIZE (from) < mov_size
20636 || GET_MODE_SIZE (to) < mov_size)
20637 return false;
20640 return true;
20643 /* Return index of MODE in the sse load/store tables. */
20645 static inline int
20646 sse_store_index (machine_mode mode)
20648 /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
20649 costs to processor_costs, which requires changes to all entries in
20650 processor cost table. */
20651 if (mode == E_HFmode)
20652 mode = E_SFmode;
20654 switch (GET_MODE_SIZE (mode))
20656 case 4:
20657 return 0;
20658 case 8:
20659 return 1;
20660 case 16:
20661 return 2;
20662 case 32:
20663 return 3;
20664 case 64:
20665 return 4;
20666 default:
20667 return -1;
20671 /* Return the cost of moving data of mode M between a
20672 register and memory. A value of 2 is the default; this cost is
20673 relative to those in `REGISTER_MOVE_COST'.
20675 This function is used extensively by register_move_cost that is used to
20676 build tables at startup. Make it inline in this case.
20677 When IN is 2, return maximum of in and out move cost.
20679 If moving between registers and memory is more expensive than
20680 between two registers, you should define this macro to express the
20681 relative cost.
20683 Model also increased moving costs of QImode registers in non
20684 Q_REGS classes.
20686 static inline int
20687 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
20689 int cost;
20691 if (FLOAT_CLASS_P (regclass))
20693 int index;
20694 switch (mode)
20696 case E_SFmode:
20697 index = 0;
20698 break;
20699 case E_DFmode:
20700 index = 1;
20701 break;
20702 case E_XFmode:
20703 index = 2;
20704 break;
20705 default:
20706 return 100;
20708 if (in == 2)
20709 return MAX (ix86_cost->hard_register.fp_load [index],
20710 ix86_cost->hard_register.fp_store [index]);
20711 return in ? ix86_cost->hard_register.fp_load [index]
20712 : ix86_cost->hard_register.fp_store [index];
20714 if (SSE_CLASS_P (regclass))
20716 int index = sse_store_index (mode);
20717 if (index == -1)
20718 return 100;
20719 if (in == 2)
20720 return MAX (ix86_cost->hard_register.sse_load [index],
20721 ix86_cost->hard_register.sse_store [index]);
20722 return in ? ix86_cost->hard_register.sse_load [index]
20723 : ix86_cost->hard_register.sse_store [index];
20725 if (MASK_CLASS_P (regclass))
20727 int index;
20728 switch (GET_MODE_SIZE (mode))
20730 case 1:
20731 index = 0;
20732 break;
20733 case 2:
20734 index = 1;
20735 break;
20736 /* DImode loads and stores assumed to cost the same as SImode. */
20737 case 4:
20738 case 8:
20739 index = 2;
20740 break;
20741 default:
20742 return 100;
20745 if (in == 2)
20746 return MAX (ix86_cost->hard_register.mask_load[index],
20747 ix86_cost->hard_register.mask_store[index]);
20748 return in ? ix86_cost->hard_register.mask_load[2]
20749 : ix86_cost->hard_register.mask_store[2];
20751 if (MMX_CLASS_P (regclass))
20753 int index;
20754 switch (GET_MODE_SIZE (mode))
20756 case 4:
20757 index = 0;
20758 break;
20759 case 8:
20760 index = 1;
20761 break;
20762 default:
20763 return 100;
20765 if (in == 2)
20766 return MAX (ix86_cost->hard_register.mmx_load [index],
20767 ix86_cost->hard_register.mmx_store [index]);
20768 return in ? ix86_cost->hard_register.mmx_load [index]
20769 : ix86_cost->hard_register.mmx_store [index];
20771 switch (GET_MODE_SIZE (mode))
20773 case 1:
20774 if (Q_CLASS_P (regclass) || TARGET_64BIT)
20776 if (!in)
20777 return ix86_cost->hard_register.int_store[0];
20778 if (TARGET_PARTIAL_REG_DEPENDENCY
20779 && optimize_function_for_speed_p (cfun))
20780 cost = ix86_cost->hard_register.movzbl_load;
20781 else
20782 cost = ix86_cost->hard_register.int_load[0];
20783 if (in == 2)
20784 return MAX (cost, ix86_cost->hard_register.int_store[0]);
20785 return cost;
20787 else
20789 if (in == 2)
20790 return MAX (ix86_cost->hard_register.movzbl_load,
20791 ix86_cost->hard_register.int_store[0] + 4);
20792 if (in)
20793 return ix86_cost->hard_register.movzbl_load;
20794 else
20795 return ix86_cost->hard_register.int_store[0] + 4;
20797 break;
20798 case 2:
20800 int cost;
20801 if (in == 2)
20802 cost = MAX (ix86_cost->hard_register.int_load[1],
20803 ix86_cost->hard_register.int_store[1]);
20804 else
20805 cost = in ? ix86_cost->hard_register.int_load[1]
20806 : ix86_cost->hard_register.int_store[1];
20808 if (mode == E_HFmode)
20810 /* Prefer SSE over GPR for HFmode. */
20811 int sse_cost;
20812 int index = sse_store_index (mode);
20813 if (in == 2)
20814 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
20815 ix86_cost->hard_register.sse_store[index]);
20816 else
20817 sse_cost = (in
20818 ? ix86_cost->hard_register.sse_load [index]
20819 : ix86_cost->hard_register.sse_store [index]);
20820 if (sse_cost >= cost)
20821 cost = sse_cost + 1;
20823 return cost;
20825 default:
20826 if (in == 2)
20827 cost = MAX (ix86_cost->hard_register.int_load[2],
20828 ix86_cost->hard_register.int_store[2]);
20829 else if (in)
20830 cost = ix86_cost->hard_register.int_load[2];
20831 else
20832 cost = ix86_cost->hard_register.int_store[2];
20833 /* Multiply with the number of GPR moves needed. */
20834 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
20838 static int
20839 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
20841 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
20845 /* Return the cost of moving data from a register in class CLASS1 to
20846 one in class CLASS2.
20848 It is not required that the cost always equal 2 when FROM is the same as TO;
20849 on some machines it is expensive to move between registers if they are not
20850 general registers. */
20852 static int
20853 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
20854 reg_class_t class2_i)
20856 enum reg_class class1 = (enum reg_class) class1_i;
20857 enum reg_class class2 = (enum reg_class) class2_i;
20859 /* In case we require secondary memory, compute cost of the store followed
20860 by load. In order to avoid bad register allocation choices, we need
20861 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20863 if (inline_secondary_memory_needed (mode, class1, class2, false))
20865 int cost = 1;
20867 cost += inline_memory_move_cost (mode, class1, 2);
20868 cost += inline_memory_move_cost (mode, class2, 2);
20870 /* In case of copying from general_purpose_register we may emit multiple
20871 stores followed by single load causing memory size mismatch stall.
20872 Count this as arbitrarily high cost of 20. */
20873 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
20874 && TARGET_MEMORY_MISMATCH_STALL
20875 && targetm.class_max_nregs (class1, mode)
20876 > targetm.class_max_nregs (class2, mode))
20877 cost += 20;
20879 /* In the case of FP/MMX moves, the registers actually overlap, and we
20880 have to switch modes in order to treat them differently. */
20881 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20882 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20883 cost += 20;
20885 return cost;
20888 /* Moves between MMX and non-MMX units require secondary memory. */
20889 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20890 gcc_unreachable ();
20892 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20893 return (SSE_CLASS_P (class1)
20894 ? ix86_cost->hard_register.sse_to_integer
20895 : ix86_cost->hard_register.integer_to_sse);
20897 /* Moves between mask register and GPR. */
20898 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
20900 return (MASK_CLASS_P (class1)
20901 ? ix86_cost->hard_register.mask_to_integer
20902 : ix86_cost->hard_register.integer_to_mask);
20904 /* Moving between mask registers. */
20905 if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
20906 return ix86_cost->hard_register.mask_move;
20908 if (MAYBE_FLOAT_CLASS_P (class1))
20909 return ix86_cost->hard_register.fp_move;
20910 if (MAYBE_SSE_CLASS_P (class1))
20912 if (GET_MODE_BITSIZE (mode) <= 128)
20913 return ix86_cost->hard_register.xmm_move;
20914 if (GET_MODE_BITSIZE (mode) <= 256)
20915 return ix86_cost->hard_register.ymm_move;
20916 return ix86_cost->hard_register.zmm_move;
20918 if (MAYBE_MMX_CLASS_P (class1))
20919 return ix86_cost->hard_register.mmx_move;
20920 return 2;
20923 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
20924 words of a value of mode MODE but can be less for certain modes in
20925 special long registers.
20927 Actually there are no two word move instructions for consecutive
20928 registers. And only registers 0-3 may have mov byte instructions
20929 applied to them. */
20931 static unsigned int
20932 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
20934 if (GENERAL_REGNO_P (regno))
20936 if (mode == XFmode)
20937 return TARGET_64BIT ? 2 : 3;
20938 if (mode == XCmode)
20939 return TARGET_64BIT ? 4 : 6;
20940 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
20942 if (COMPLEX_MODE_P (mode))
20943 return 2;
20944 /* Register pair for mask registers. */
20945 if (mode == P2QImode || mode == P2HImode)
20946 return 2;
20947 if (mode == V64SFmode || mode == V64SImode)
20948 return 4;
20949 return 1;
20952 /* Implement REGMODE_NATURAL_SIZE(MODE). */
20953 unsigned int
20954 ix86_regmode_natural_size (machine_mode mode)
20956 if (mode == P2HImode || mode == P2QImode)
20957 return GET_MODE_SIZE (mode) / 2;
20958 return UNITS_PER_WORD;
20961 /* Implement TARGET_HARD_REGNO_MODE_OK. */
20963 static bool
20964 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
20966 /* Flags and only flags can only hold CCmode values. */
20967 if (CC_REGNO_P (regno))
20968 return GET_MODE_CLASS (mode) == MODE_CC;
20969 if (GET_MODE_CLASS (mode) == MODE_CC
20970 || GET_MODE_CLASS (mode) == MODE_RANDOM)
20971 return false;
20972 if (STACK_REGNO_P (regno))
20973 return VALID_FP_MODE_P (mode);
20974 if (MASK_REGNO_P (regno))
20976 /* Register pair only starts at even register number. */
20977 if ((mode == P2QImode || mode == P2HImode))
20978 return MASK_PAIR_REGNO_P(regno);
20980 return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
20981 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
20984 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20985 return false;
20987 if (SSE_REGNO_P (regno))
20989 /* We implement the move patterns for all vector modes into and
20990 out of SSE registers, even when no operation instructions
20991 are available. */
20993 /* For AVX-512 we allow, regardless of regno:
20994 - XI mode
20995 - any of 512-bit wide vector mode
20996 - any scalar mode. */
20997 if (TARGET_AVX512F
20998 && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512)
20999 || VALID_AVX512F_SCALAR_MODE (mode)))
21000 return true;
21002 /* TODO check for QI/HI scalars. */
21003 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
21004 if (TARGET_AVX512VL
21005 && (VALID_AVX256_REG_OR_OI_MODE (mode)
21006 || VALID_AVX512VL_128_REG_MODE (mode)))
21007 return true;
21009 /* xmm16-xmm31 are only available for AVX-512. */
21010 if (EXT_REX_SSE_REGNO_P (regno))
21011 return false;
21013 /* Use pinsrw/pextrw to mov 16-bit data from/to sse to/from integer. */
21014 if (TARGET_SSE2 && mode == HImode)
21015 return true;
21017 /* OImode and AVX modes are available only when AVX is enabled. */
21018 return ((TARGET_AVX
21019 && VALID_AVX256_REG_OR_OI_MODE (mode))
21020 || VALID_SSE_REG_MODE (mode)
21021 || VALID_SSE2_REG_MODE (mode)
21022 || VALID_MMX_REG_MODE (mode)
21023 || VALID_MMX_REG_MODE_3DNOW (mode));
21025 if (MMX_REGNO_P (regno))
21027 /* We implement the move patterns for 3DNOW modes even in MMX mode,
21028 so if the register is available at all, then we can move data of
21029 the given mode into or out of it. */
21030 return (VALID_MMX_REG_MODE (mode)
21031 || VALID_MMX_REG_MODE_3DNOW (mode));
21034 if (mode == QImode)
21036 /* Take care for QImode values - they can be in non-QI regs,
21037 but then they do cause partial register stalls. */
21038 if (ANY_QI_REGNO_P (regno))
21039 return true;
21040 if (!TARGET_PARTIAL_REG_STALL)
21041 return true;
21042 /* LRA checks if the hard register is OK for the given mode.
21043 QImode values can live in non-QI regs, so we allow all
21044 registers here. */
21045 if (lra_in_progress)
21046 return true;
21047 return !can_create_pseudo_p ();
21049 /* We handle both integer and floats in the general purpose registers. */
21050 else if (VALID_INT_MODE_P (mode)
21051 || VALID_FP_MODE_P (mode))
21052 return true;
21053 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
21054 on to use that value in smaller contexts, this can easily force a
21055 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
21056 supporting DImode, allow it. */
21057 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
21058 return true;
21060 return false;
21063 /* Implement TARGET_INSN_CALLEE_ABI. */
21065 const predefined_function_abi &
21066 ix86_insn_callee_abi (const rtx_insn *insn)
21068 unsigned int abi_id = 0;
21069 rtx pat = PATTERN (insn);
21070 if (vzeroupper_pattern (pat, VOIDmode))
21071 abi_id = ABI_VZEROUPPER;
21073 return function_abis[abi_id];
21076 /* Initialize function_abis with corresponding abi_id,
21077 currently only handle vzeroupper. */
21078 void
21079 ix86_initialize_callee_abi (unsigned int abi_id)
21081 gcc_assert (abi_id == ABI_VZEROUPPER);
21082 predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
21083 if (!vzeroupper_abi.initialized_p ())
21085 HARD_REG_SET full_reg_clobbers;
21086 CLEAR_HARD_REG_SET (full_reg_clobbers);
21087 vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
21091 void
21092 ix86_expand_avx_vzeroupper (void)
21094 /* Initialize vzeroupper_abi here. */
21095 ix86_initialize_callee_abi (ABI_VZEROUPPER);
21096 rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
21097 /* Return false for non-local goto in can_nonlocal_goto. */
21098 make_reg_eh_region_note (insn, 0, INT_MIN);
21099 /* Flag used for call_insn indicates it's a fake call. */
21100 RTX_FLAG (insn, used) = 1;
21104 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
21105 saves SSE registers across calls is Win64 (thus no need to check the
21106 current ABI here), and with AVX enabled Win64 only guarantees that
21107 the low 16 bytes are saved. */
21109 static bool
21110 ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
21111 machine_mode mode)
21113 /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
21114 if (abi_id == ABI_VZEROUPPER)
21115 return (GET_MODE_SIZE (mode) > 16
21116 && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
21117 || LEGACY_SSE_REGNO_P (regno)));
21119 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
21122 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21123 tieable integer mode. */
21125 static bool
21126 ix86_tieable_integer_mode_p (machine_mode mode)
21128 switch (mode)
21130 case E_HImode:
21131 case E_SImode:
21132 return true;
21134 case E_QImode:
21135 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
21137 case E_DImode:
21138 return TARGET_64BIT;
21140 default:
21141 return false;
21145 /* Implement TARGET_MODES_TIEABLE_P.
21147 Return true if MODE1 is accessible in a register that can hold MODE2
21148 without copying. That is, all register classes that can hold MODE2
21149 can also hold MODE1. */
21151 static bool
21152 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
21154 if (mode1 == mode2)
21155 return true;
21157 if (ix86_tieable_integer_mode_p (mode1)
21158 && ix86_tieable_integer_mode_p (mode2))
21159 return true;
21161 /* MODE2 being XFmode implies fp stack or general regs, which means we
21162 can tie any smaller floating point modes to it. Note that we do not
21163 tie this with TFmode. */
21164 if (mode2 == XFmode)
21165 return mode1 == SFmode || mode1 == DFmode;
21167 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21168 that we can tie it with SFmode. */
21169 if (mode2 == DFmode)
21170 return mode1 == SFmode;
21172 /* If MODE2 is only appropriate for an SSE register, then tie with
21173 any other mode acceptable to SSE registers. */
21174 if (GET_MODE_SIZE (mode2) == 64
21175 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
21176 return (GET_MODE_SIZE (mode1) == 64
21177 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
21178 if (GET_MODE_SIZE (mode2) == 32
21179 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
21180 return (GET_MODE_SIZE (mode1) == 32
21181 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
21182 if (GET_MODE_SIZE (mode2) == 16
21183 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
21184 return (GET_MODE_SIZE (mode1) == 16
21185 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
21187 /* If MODE2 is appropriate for an MMX register, then tie
21188 with any other mode acceptable to MMX registers. */
21189 if (GET_MODE_SIZE (mode2) == 8
21190 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
21191 return (GET_MODE_SIZE (mode1) == 8
21192 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
21194 /* SCmode and DImode can be tied. */
21195 if ((mode1 == E_SCmode && mode2 == E_DImode)
21196 || (mode1 == E_DImode && mode2 == E_SCmode))
21197 return TARGET_64BIT;
21199 /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
21200 if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
21201 || (mode1 == E_V2SFmode && mode2 == E_SCmode)
21202 || (mode1 == E_DCmode && mode2 == E_V2DFmode)
21203 || (mode1 == E_V2DFmode && mode2 == E_DCmode))
21204 return true;
21206 return false;
21209 /* Return the cost of moving between two registers of mode MODE. */
21211 static int
21212 ix86_set_reg_reg_cost (machine_mode mode)
21214 unsigned int units = UNITS_PER_WORD;
21216 switch (GET_MODE_CLASS (mode))
21218 default:
21219 break;
21221 case MODE_CC:
21222 units = GET_MODE_SIZE (CCmode);
21223 break;
21225 case MODE_FLOAT:
21226 if ((TARGET_SSE && mode == TFmode)
21227 || (TARGET_80387 && mode == XFmode)
21228 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
21229 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
21230 units = GET_MODE_SIZE (mode);
21231 break;
21233 case MODE_COMPLEX_FLOAT:
21234 if ((TARGET_SSE && mode == TCmode)
21235 || (TARGET_80387 && mode == XCmode)
21236 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
21237 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
21238 units = GET_MODE_SIZE (mode);
21239 break;
21241 case MODE_VECTOR_INT:
21242 case MODE_VECTOR_FLOAT:
21243 if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
21244 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
21245 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21246 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21247 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
21248 && VALID_MMX_REG_MODE (mode)))
21249 units = GET_MODE_SIZE (mode);
21252 /* Return the cost of moving between two registers of mode MODE,
21253 assuming that the move will be in pieces of at most UNITS bytes. */
21254 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
21257 /* Return cost of vector operation in MODE given that scalar version has
21258 COST. */
21260 static int
21261 ix86_vec_cost (machine_mode mode, int cost)
21263 if (!VECTOR_MODE_P (mode))
21264 return cost;
21266 if (GET_MODE_BITSIZE (mode) == 128
21267 && TARGET_SSE_SPLIT_REGS)
21268 return cost * GET_MODE_BITSIZE (mode) / 64;
21269 else if (GET_MODE_BITSIZE (mode) > 128
21270 && TARGET_AVX256_SPLIT_REGS)
21271 return cost * GET_MODE_BITSIZE (mode) / 128;
21272 else if (GET_MODE_BITSIZE (mode) > 256
21273 && TARGET_AVX512_SPLIT_REGS)
21274 return cost * GET_MODE_BITSIZE (mode) / 256;
21275 return cost;
21278 /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
21279 vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
21280 static int
21281 ix86_widen_mult_cost (const struct processor_costs *cost,
21282 enum machine_mode mode, bool uns_p)
21284 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
21285 int extra_cost = 0;
21286 int basic_cost = 0;
21287 switch (mode)
21289 case V8HImode:
21290 case V16HImode:
21291 if (!uns_p || mode == V16HImode)
21292 extra_cost = cost->sse_op * 2;
21293 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21294 break;
21295 case V4SImode:
21296 case V8SImode:
21297 /* pmulhw/pmullw can be used. */
21298 basic_cost = cost->mulss * 2 + cost->sse_op * 2;
21299 break;
21300 case V2DImode:
21301 /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
21302 require extra 4 mul, 4 add, 4 cmp and 2 shift. */
21303 if (!TARGET_SSE4_1 && !uns_p)
21304 extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
21305 + cost->sse_op * 2;
21306 /* Fallthru. */
21307 case V4DImode:
21308 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21309 break;
21310 default:
21311 /* Not implemented. */
21312 return 100;
21314 return ix86_vec_cost (mode, basic_cost + extra_cost);
21317 /* Return cost of multiplication in MODE. */
21319 static int
21320 ix86_multiplication_cost (const struct processor_costs *cost,
21321 enum machine_mode mode)
21323 machine_mode inner_mode = mode;
21324 if (VECTOR_MODE_P (mode))
21325 inner_mode = GET_MODE_INNER (mode);
21327 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21328 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
21329 else if (X87_FLOAT_MODE_P (mode))
21330 return cost->fmul;
21331 else if (FLOAT_MODE_P (mode))
21332 return ix86_vec_cost (mode,
21333 inner_mode == DFmode ? cost->mulsd : cost->mulss);
21334 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21336 int nmults, nops;
21337 /* Cost of reading the memory. */
21338 int extra;
21340 switch (mode)
21342 case V4QImode:
21343 case V8QImode:
21344 /* Partial V*QImode is emulated with 4-6 insns. */
21345 nmults = 1;
21346 nops = 3;
21347 extra = 0;
21349 if (TARGET_AVX512BW && TARGET_AVX512VL)
21351 else if (TARGET_AVX2)
21352 nops += 2;
21353 else if (TARGET_XOP)
21354 extra += cost->sse_load[2];
21355 else
21357 nops += 1;
21358 extra += cost->sse_load[2];
21360 goto do_qimode;
21362 case V16QImode:
21363 /* V*QImode is emulated with 4-11 insns. */
21364 nmults = 1;
21365 nops = 3;
21366 extra = 0;
21368 if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
21370 if (!(TARGET_AVX512BW && TARGET_AVX512VL))
21371 nops += 3;
21373 else if (TARGET_XOP)
21375 nmults += 1;
21376 nops += 2;
21377 extra += cost->sse_load[2];
21379 else
21381 nmults += 1;
21382 nops += 4;
21383 extra += cost->sse_load[2];
21385 goto do_qimode;
21387 case V32QImode:
21388 nmults = 1;
21389 nops = 3;
21390 extra = 0;
21392 if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
21394 nmults += 1;
21395 nops += 4;
21396 extra += cost->sse_load[3] * 2;
21398 goto do_qimode;
21400 case V64QImode:
21401 nmults = 2;
21402 nops = 9;
21403 extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2;
21405 do_qimode:
21406 return ix86_vec_cost (mode, cost->mulss * nmults
21407 + cost->sse_op * nops) + extra;
21409 case V4SImode:
21410 /* pmulld is used in this case. No emulation is needed. */
21411 if (TARGET_SSE4_1)
21412 goto do_native;
21413 /* V4SImode is emulated with 7 insns. */
21414 else
21415 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
21417 case V2DImode:
21418 case V4DImode:
21419 /* vpmullq is used in this case. No emulation is needed. */
21420 if (TARGET_AVX512DQ && TARGET_AVX512VL)
21421 goto do_native;
21422 /* V*DImode is emulated with 6-8 insns. */
21423 else if (TARGET_XOP && mode == V2DImode)
21424 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
21425 /* FALLTHRU */
21426 case V8DImode:
21427 /* vpmullq is used in this case. No emulation is needed. */
21428 if (TARGET_AVX512DQ && mode == V8DImode)
21429 goto do_native;
21430 else
21431 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
21433 default:
21434 do_native:
21435 return ix86_vec_cost (mode, cost->mulss);
21438 else
21439 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
21442 /* Return cost of multiplication in MODE. */
21444 static int
21445 ix86_division_cost (const struct processor_costs *cost,
21446 enum machine_mode mode)
21448 machine_mode inner_mode = mode;
21449 if (VECTOR_MODE_P (mode))
21450 inner_mode = GET_MODE_INNER (mode);
21452 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21453 return inner_mode == DFmode ? cost->divsd : cost->divss;
21454 else if (X87_FLOAT_MODE_P (mode))
21455 return cost->fdiv;
21456 else if (FLOAT_MODE_P (mode))
21457 return ix86_vec_cost (mode,
21458 inner_mode == DFmode ? cost->divsd : cost->divss);
21459 else
21460 return cost->divide[MODE_INDEX (mode)];
21463 /* Return cost of shift in MODE.
21464 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
21465 AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
21466 if op1 is a result of subreg.
21468 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
21470 static int
21471 ix86_shift_rotate_cost (const struct processor_costs *cost,
21472 enum rtx_code code,
21473 enum machine_mode mode, bool constant_op1,
21474 HOST_WIDE_INT op1_val,
21475 bool and_in_op1,
21476 bool shift_and_truncate,
21477 bool *skip_op0, bool *skip_op1)
21479 if (skip_op0)
21480 *skip_op0 = *skip_op1 = false;
21482 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21484 int count;
21485 /* Cost of reading the memory. */
21486 int extra;
21488 switch (mode)
21490 case V4QImode:
21491 case V8QImode:
21492 if (TARGET_AVX2)
21493 /* Use vpbroadcast. */
21494 extra = cost->sse_op;
21495 else
21496 extra = cost->sse_load[2];
21498 if (constant_op1)
21500 if (code == ASHIFTRT)
21502 count = 4;
21503 extra *= 2;
21505 else
21506 count = 2;
21508 else if (TARGET_AVX512BW && TARGET_AVX512VL)
21509 return ix86_vec_cost (mode, cost->sse_op * 4);
21510 else if (TARGET_SSE4_1)
21511 count = 5;
21512 else if (code == ASHIFTRT)
21513 count = 6;
21514 else
21515 count = 5;
21516 return ix86_vec_cost (mode, cost->sse_op * count) + extra;
21518 case V16QImode:
21519 if (TARGET_XOP)
21521 /* For XOP we use vpshab, which requires a broadcast of the
21522 value to the variable shift insn. For constants this
21523 means a V16Q const in mem; even when we can perform the
21524 shift with one insn set the cost to prefer paddb. */
21525 if (constant_op1)
21527 extra = cost->sse_load[2];
21528 return ix86_vec_cost (mode, cost->sse_op) + extra;
21530 else
21532 count = (code == ASHIFT) ? 3 : 4;
21533 return ix86_vec_cost (mode, cost->sse_op * count);
21536 /* FALLTHRU */
21537 case V32QImode:
21538 if (TARGET_AVX2)
21539 /* Use vpbroadcast. */
21540 extra = cost->sse_op;
21541 else
21542 extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
21544 if (constant_op1)
21546 if (code == ASHIFTRT)
21548 count = 4;
21549 extra *= 2;
21551 else
21552 count = 2;
21554 else if (TARGET_AVX512BW
21555 && ((mode == V32QImode && !TARGET_PREFER_AVX256)
21556 || (mode == V16QImode && TARGET_AVX512VL
21557 && !TARGET_PREFER_AVX128)))
21558 return ix86_vec_cost (mode, cost->sse_op * 4);
21559 else if (TARGET_AVX2
21560 && mode == V16QImode && !TARGET_PREFER_AVX128)
21561 count = 6;
21562 else if (TARGET_SSE4_1)
21563 count = 9;
21564 else if (code == ASHIFTRT)
21565 count = 10;
21566 else
21567 count = 9;
21568 return ix86_vec_cost (mode, cost->sse_op * count) + extra;
21570 case V2DImode:
21571 case V4DImode:
21572 /* V*DImode arithmetic right shift is emulated. */
21573 if (code == ASHIFTRT && !TARGET_AVX512VL)
21575 if (constant_op1)
21577 if (op1_val == 63)
21578 count = TARGET_SSE4_2 ? 1 : 2;
21579 else if (TARGET_XOP)
21580 count = 2;
21581 else if (TARGET_SSE4_1)
21582 count = 3;
21583 else
21584 count = 4;
21586 else if (TARGET_XOP)
21587 count = 3;
21588 else if (TARGET_SSE4_2)
21589 count = 4;
21590 else
21591 count = 5;
21593 return ix86_vec_cost (mode, cost->sse_op * count);
21595 /* FALLTHRU */
21596 default:
21597 return ix86_vec_cost (mode, cost->sse_op);
21601 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21603 if (constant_op1)
21605 if (op1_val > 32)
21606 return cost->shift_const + COSTS_N_INSNS (2);
21607 else
21608 return cost->shift_const * 2;
21610 else
21612 if (and_in_op1)
21613 return cost->shift_var * 2;
21614 else
21615 return cost->shift_var * 6 + COSTS_N_INSNS (2);
21618 else
21620 if (constant_op1)
21621 return cost->shift_const;
21622 else if (shift_and_truncate)
21624 if (skip_op0)
21625 *skip_op0 = *skip_op1 = true;
21626 /* Return the cost after shift-and truncation. */
21627 return cost->shift_var;
21629 else
21630 return cost->shift_var;
21634 static int
21635 ix86_insn_cost (rtx_insn *insn, bool speed)
21637 int insn_cost = 0;
21638 /* Add extra cost to avoid post_reload late_combine revert
21639 the optimization did in pass_rpad. */
21640 if (reload_completed
21641 && ix86_rpad_gate ()
21642 && recog_memoized (insn) >= 0
21643 && get_attr_avx_partial_xmm_update (insn)
21644 == AVX_PARTIAL_XMM_UPDATE_TRUE)
21645 insn_cost += COSTS_N_INSNS (3);
21647 return insn_cost + pattern_cost (PATTERN (insn), speed);
21650 /* Compute a (partial) cost for rtx X. Return true if the complete
21651 cost has been computed, and false if subexpressions should be
21652 scanned. In either case, *TOTAL contains the cost result. */
21654 static bool
21655 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
21656 int *total, bool speed)
21658 rtx mask;
21659 enum rtx_code code = GET_CODE (x);
21660 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
21661 const struct processor_costs *cost
21662 = speed ? ix86_tune_cost : &ix86_size_cost;
21663 int src_cost;
21665 /* Handling different vternlog variants. */
21666 if ((GET_MODE_SIZE (mode) == 64
21667 ? (TARGET_AVX512F && TARGET_EVEX512)
21668 : (TARGET_AVX512VL
21669 || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)))
21670 && GET_MODE_SIZE (mode) >= 16
21671 && outer_code_i == SET
21672 && ternlog_operand (x, mode))
21674 rtx args[3];
21676 args[0] = NULL_RTX;
21677 args[1] = NULL_RTX;
21678 args[2] = NULL_RTX;
21679 int idx = ix86_ternlog_idx (x, args);
21680 gcc_assert (idx >= 0);
21682 *total = cost->sse_op;
21683 for (int i = 0; i != 3; i++)
21684 if (args[i])
21685 *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
21686 return true;
21690 switch (code)
21692 case SET:
21693 if (register_operand (SET_DEST (x), VOIDmode)
21694 && register_operand (SET_SRC (x), VOIDmode))
21696 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
21697 return true;
21700 if (register_operand (SET_SRC (x), VOIDmode))
21701 /* Avoid potentially incorrect high cost from rtx_costs
21702 for non-tieable SUBREGs. */
21703 src_cost = 0;
21704 else
21706 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
21708 if (CONSTANT_P (SET_SRC (x)))
21709 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
21710 a small value, possibly zero for cheap constants. */
21711 src_cost += COSTS_N_INSNS (1);
21714 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
21715 return true;
21717 case CONST_INT:
21718 case CONST:
21719 case LABEL_REF:
21720 case SYMBOL_REF:
21721 if (x86_64_immediate_operand (x, VOIDmode))
21722 *total = 0;
21723 else
21724 /* movabsq is slightly more expensive than a simple instruction. */
21725 *total = COSTS_N_INSNS (1) + 1;
21726 return true;
21728 case CONST_DOUBLE:
21729 if (IS_STACK_MODE (mode))
21730 switch (standard_80387_constant_p (x))
21732 case -1:
21733 case 0:
21734 break;
21735 case 1: /* 0.0 */
21736 *total = 1;
21737 return true;
21738 default: /* Other constants */
21739 *total = 2;
21740 return true;
21742 /* FALLTHRU */
21744 case CONST_VECTOR:
21745 switch (standard_sse_constant_p (x, mode))
21747 case 0:
21748 break;
21749 case 1: /* 0: xor eliminates false dependency */
21750 *total = 0;
21751 return true;
21752 default: /* -1: cmp contains false dependency */
21753 *total = 1;
21754 return true;
21756 /* FALLTHRU */
21758 case CONST_WIDE_INT:
21759 /* Fall back to (MEM (SYMBOL_REF)), since that's where
21760 it'll probably end up. Add a penalty for size. */
21761 *total = (COSTS_N_INSNS (1)
21762 + (!TARGET_64BIT && flag_pic)
21763 + (GET_MODE_SIZE (mode) <= 4
21764 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
21765 return true;
21767 case ZERO_EXTEND:
21768 /* The zero extensions is often completely free on x86_64, so make
21769 it as cheap as possible. */
21770 if (TARGET_64BIT && mode == DImode
21771 && GET_MODE (XEXP (x, 0)) == SImode)
21772 *total = 1;
21773 else if (TARGET_ZERO_EXTEND_WITH_AND)
21774 *total = cost->add;
21775 else
21776 *total = cost->movzx;
21777 return false;
21779 case SIGN_EXTEND:
21780 *total = cost->movsx;
21781 return false;
21783 case ASHIFT:
21784 if (SCALAR_INT_MODE_P (mode)
21785 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
21786 && CONST_INT_P (XEXP (x, 1)))
21788 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
21789 if (value == 1)
21791 *total = cost->add;
21792 return false;
21794 if ((value == 2 || value == 3)
21795 && cost->lea <= cost->shift_const)
21797 *total = cost->lea;
21798 return false;
21801 /* FALLTHRU */
21803 case ROTATE:
21804 case ASHIFTRT:
21805 case LSHIFTRT:
21806 case ROTATERT:
21807 bool skip_op0, skip_op1;
21808 *total = ix86_shift_rotate_cost (cost, code, mode,
21809 CONSTANT_P (XEXP (x, 1)),
21810 CONST_INT_P (XEXP (x, 1))
21811 ? INTVAL (XEXP (x, 1)) : -1,
21812 GET_CODE (XEXP (x, 1)) == AND,
21813 SUBREG_P (XEXP (x, 1))
21814 && GET_CODE (XEXP (XEXP (x, 1),
21815 0)) == AND,
21816 &skip_op0, &skip_op1);
21817 if (skip_op0 || skip_op1)
21819 if (!skip_op0)
21820 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
21821 if (!skip_op1)
21822 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
21823 return true;
21825 return false;
21827 case FMA:
21829 rtx sub;
21831 gcc_assert (FLOAT_MODE_P (mode));
21832 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
21834 *total = ix86_vec_cost (mode,
21835 GET_MODE_INNER (mode) == SFmode
21836 ? cost->fmass : cost->fmasd);
21837 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
21839 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
21840 sub = XEXP (x, 0);
21841 if (GET_CODE (sub) == NEG)
21842 sub = XEXP (sub, 0);
21843 *total += rtx_cost (sub, mode, FMA, 0, speed);
21845 sub = XEXP (x, 2);
21846 if (GET_CODE (sub) == NEG)
21847 sub = XEXP (sub, 0);
21848 *total += rtx_cost (sub, mode, FMA, 2, speed);
21849 return true;
21852 case MULT:
21853 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
21855 rtx op0 = XEXP (x, 0);
21856 rtx op1 = XEXP (x, 1);
21857 int nbits;
21858 if (CONST_INT_P (XEXP (x, 1)))
21860 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
21861 for (nbits = 0; value != 0; value &= value - 1)
21862 nbits++;
21864 else
21865 /* This is arbitrary. */
21866 nbits = 7;
21868 /* Compute costs correctly for widening multiplication. */
21869 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
21870 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
21871 == GET_MODE_SIZE (mode))
21873 int is_mulwiden = 0;
21874 machine_mode inner_mode = GET_MODE (op0);
21876 if (GET_CODE (op0) == GET_CODE (op1))
21877 is_mulwiden = 1, op1 = XEXP (op1, 0);
21878 else if (CONST_INT_P (op1))
21880 if (GET_CODE (op0) == SIGN_EXTEND)
21881 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
21882 == INTVAL (op1);
21883 else
21884 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
21887 if (is_mulwiden)
21888 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
21891 int mult_init;
21892 // Double word multiplication requires 3 mults and 2 adds.
21893 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21895 mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
21896 + 2 * cost->add;
21897 nbits *= 3;
21899 else mult_init = cost->mult_init[MODE_INDEX (mode)];
21901 *total = (mult_init
21902 + nbits * cost->mult_bit
21903 + rtx_cost (op0, mode, outer_code, opno, speed)
21904 + rtx_cost (op1, mode, outer_code, opno, speed));
21906 return true;
21908 *total = ix86_multiplication_cost (cost, mode);
21909 return false;
21911 case DIV:
21912 case UDIV:
21913 case MOD:
21914 case UMOD:
21915 *total = ix86_division_cost (cost, mode);
21916 return false;
21918 case PLUS:
21919 if (GET_MODE_CLASS (mode) == MODE_INT
21920 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
21922 if (GET_CODE (XEXP (x, 0)) == PLUS
21923 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
21924 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
21925 && CONSTANT_P (XEXP (x, 1)))
21927 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
21928 if (val == 2 || val == 4 || val == 8)
21930 *total = cost->lea;
21931 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21932 outer_code, opno, speed);
21933 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
21934 outer_code, opno, speed);
21935 *total += rtx_cost (XEXP (x, 1), mode,
21936 outer_code, opno, speed);
21937 return true;
21940 else if (GET_CODE (XEXP (x, 0)) == MULT
21941 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
21943 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
21944 if (val == 2 || val == 4 || val == 8)
21946 *total = cost->lea;
21947 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21948 outer_code, opno, speed);
21949 *total += rtx_cost (XEXP (x, 1), mode,
21950 outer_code, opno, speed);
21951 return true;
21954 else if (GET_CODE (XEXP (x, 0)) == PLUS)
21956 rtx op = XEXP (XEXP (x, 0), 0);
21958 /* Add with carry, ignore the cost of adding a carry flag. */
21959 if (ix86_carry_flag_operator (op, mode)
21960 || ix86_carry_flag_unset_operator (op, mode))
21961 *total = cost->add;
21962 else
21964 *total = cost->lea;
21965 *total += rtx_cost (op, mode,
21966 outer_code, opno, speed);
21969 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21970 outer_code, opno, speed);
21971 *total += rtx_cost (XEXP (x, 1), mode,
21972 outer_code, opno, speed);
21973 return true;
21976 /* FALLTHRU */
21978 case MINUS:
21979 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
21980 if (GET_MODE_CLASS (mode) == MODE_INT
21981 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
21982 && GET_CODE (XEXP (x, 0)) == MINUS
21983 && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
21984 || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
21986 *total = cost->add;
21987 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21988 outer_code, opno, speed);
21989 *total += rtx_cost (XEXP (x, 1), mode,
21990 outer_code, opno, speed);
21991 return true;
21994 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21995 *total = cost->addss;
21996 else if (X87_FLOAT_MODE_P (mode))
21997 *total = cost->fadd;
21998 else if (FLOAT_MODE_P (mode))
21999 *total = ix86_vec_cost (mode, cost->addss);
22000 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22001 *total = ix86_vec_cost (mode, cost->sse_op);
22002 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22003 *total = cost->add * 2;
22004 else
22005 *total = cost->add;
22006 return false;
22008 case IOR:
22009 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22010 || SSE_FLOAT_MODE_P (mode))
22012 /* (ior (not ...) ...) can be a single insn in AVX512. */
22013 if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
22014 && ((TARGET_EVEX512
22015 && GET_MODE_SIZE (mode) == 64)
22016 || (TARGET_AVX512VL
22017 && (GET_MODE_SIZE (mode) == 32
22018 || GET_MODE_SIZE (mode) == 16))))
22020 rtx right = GET_CODE (XEXP (x, 1)) != NOT
22021 ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
22023 *total = ix86_vec_cost (mode, cost->sse_op)
22024 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22025 outer_code, opno, speed)
22026 + rtx_cost (right, mode, outer_code, opno, speed);
22027 return true;
22029 *total = ix86_vec_cost (mode, cost->sse_op);
22031 else if (TARGET_64BIT
22032 && mode == TImode
22033 && GET_CODE (XEXP (x, 0)) == ASHIFT
22034 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
22035 && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
22036 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
22037 && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
22038 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
22039 && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
22041 /* *concatditi3 is cheap. */
22042 rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
22043 rtx op1 = XEXP (XEXP (x, 1), 0);
22044 *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
22045 ? COSTS_N_INSNS (1) /* movq. */
22046 : set_src_cost (op0, DImode, speed);
22047 *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
22048 ? COSTS_N_INSNS (1) /* movq. */
22049 : set_src_cost (op1, DImode, speed);
22050 return true;
22052 else if (TARGET_64BIT
22053 && mode == TImode
22054 && GET_CODE (XEXP (x, 0)) == AND
22055 && REG_P (XEXP (XEXP (x, 0), 0))
22056 && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
22057 && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
22058 && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
22059 && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
22060 && GET_CODE (XEXP (x, 1)) == ASHIFT
22061 && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
22062 && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
22063 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
22064 && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
22066 /* *insvti_highpart is cheap. */
22067 rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
22068 *total = COSTS_N_INSNS (1) + 1;
22069 *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
22070 ? COSTS_N_INSNS (1) /* movq. */
22071 : set_src_cost (op, DImode, speed);
22072 return true;
22074 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22075 *total = cost->add * 2;
22076 else
22077 *total = cost->add;
22078 return false;
22080 case XOR:
22081 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22082 || SSE_FLOAT_MODE_P (mode))
22083 *total = ix86_vec_cost (mode, cost->sse_op);
22084 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22085 *total = cost->add * 2;
22086 else
22087 *total = cost->add;
22088 return false;
22090 case AND:
22091 if (address_no_seg_operand (x, mode))
22093 *total = cost->lea;
22094 return true;
22096 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
22097 || SSE_FLOAT_MODE_P (mode))
22099 /* pandn is a single instruction. */
22100 if (GET_CODE (XEXP (x, 0)) == NOT)
22102 rtx right = XEXP (x, 1);
22104 /* (and (not ...) (not ...)) can be a single insn in AVX512. */
22105 if (GET_CODE (right) == NOT && TARGET_AVX512F
22106 && ((TARGET_EVEX512
22107 && GET_MODE_SIZE (mode) == 64)
22108 || (TARGET_AVX512VL
22109 && (GET_MODE_SIZE (mode) == 32
22110 || GET_MODE_SIZE (mode) == 16))))
22111 right = XEXP (right, 0);
22113 *total = ix86_vec_cost (mode, cost->sse_op)
22114 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22115 outer_code, opno, speed)
22116 + rtx_cost (right, mode, outer_code, opno, speed);
22117 return true;
22119 else if (GET_CODE (XEXP (x, 1)) == NOT)
22121 *total = ix86_vec_cost (mode, cost->sse_op)
22122 + rtx_cost (XEXP (x, 0), mode,
22123 outer_code, opno, speed)
22124 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22125 outer_code, opno, speed);
22126 return true;
22128 *total = ix86_vec_cost (mode, cost->sse_op);
22130 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22132 if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
22134 *total = cost->add * 2
22135 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22136 outer_code, opno, speed)
22137 + rtx_cost (XEXP (x, 1), mode,
22138 outer_code, opno, speed);
22139 return true;
22141 else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
22143 *total = cost->add * 2
22144 + rtx_cost (XEXP (x, 0), mode,
22145 outer_code, opno, speed)
22146 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22147 outer_code, opno, speed);
22148 return true;
22150 *total = cost->add * 2;
22152 else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
22154 *total = cost->add
22155 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22156 outer_code, opno, speed)
22157 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
22158 return true;
22160 else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
22162 *total = cost->add
22163 + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
22164 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
22165 outer_code, opno, speed);
22166 return true;
22168 else
22169 *total = cost->add;
22170 return false;
22172 case NOT:
22173 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22175 /* (not (xor ...)) can be a single insn in AVX512. */
22176 if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
22177 && ((TARGET_EVEX512
22178 && GET_MODE_SIZE (mode) == 64)
22179 || (TARGET_AVX512VL
22180 && (GET_MODE_SIZE (mode) == 32
22181 || GET_MODE_SIZE (mode) == 16))))
22183 *total = ix86_vec_cost (mode, cost->sse_op)
22184 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
22185 outer_code, opno, speed)
22186 + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
22187 outer_code, opno, speed);
22188 return true;
22191 // vnot is pxor -1.
22192 *total = ix86_vec_cost (mode, cost->sse_op) + 1;
22194 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22195 *total = cost->add * 2;
22196 else
22197 *total = cost->add;
22198 return false;
22200 case NEG:
22201 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22202 *total = cost->sse_op;
22203 else if (X87_FLOAT_MODE_P (mode))
22204 *total = cost->fchs;
22205 else if (FLOAT_MODE_P (mode))
22206 *total = ix86_vec_cost (mode, cost->sse_op);
22207 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22208 *total = ix86_vec_cost (mode, cost->sse_op);
22209 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22210 *total = cost->add * 3;
22211 else
22212 *total = cost->add;
22213 return false;
22215 case COMPARE:
22216 rtx op0, op1;
22217 op0 = XEXP (x, 0);
22218 op1 = XEXP (x, 1);
22219 if (GET_CODE (op0) == ZERO_EXTRACT
22220 && XEXP (op0, 1) == const1_rtx
22221 && CONST_INT_P (XEXP (op0, 2))
22222 && op1 == const0_rtx)
22224 /* This kind of construct is implemented using test[bwl].
22225 Treat it as if we had an AND. */
22226 mode = GET_MODE (XEXP (op0, 0));
22227 *total = (cost->add
22228 + rtx_cost (XEXP (op0, 0), mode, outer_code,
22229 opno, speed)
22230 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
22231 return true;
22234 if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
22236 /* This is an overflow detection, count it as a normal compare. */
22237 *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
22238 return true;
22241 rtx geu;
22242 /* Match x
22243 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
22244 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
22245 if (mode == CCCmode
22246 && GET_CODE (op0) == NEG
22247 && GET_CODE (geu = XEXP (op0, 0)) == GEU
22248 && REG_P (XEXP (geu, 0))
22249 && (GET_MODE (XEXP (geu, 0)) == CCCmode
22250 || GET_MODE (XEXP (geu, 0)) == CCmode)
22251 && REGNO (XEXP (geu, 0)) == FLAGS_REG
22252 && XEXP (geu, 1) == const0_rtx
22253 && GET_CODE (op1) == LTU
22254 && REG_P (XEXP (op1, 0))
22255 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
22256 && REGNO (XEXP (op1, 0)) == FLAGS_REG
22257 && XEXP (op1, 1) == const0_rtx)
22259 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
22260 *total = 0;
22261 return true;
22263 /* Match x
22264 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
22265 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
22266 if (mode == CCCmode
22267 && GET_CODE (op0) == NEG
22268 && GET_CODE (XEXP (op0, 0)) == LTU
22269 && REG_P (XEXP (XEXP (op0, 0), 0))
22270 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
22271 && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
22272 && XEXP (XEXP (op0, 0), 1) == const0_rtx
22273 && GET_CODE (op1) == GEU
22274 && REG_P (XEXP (op1, 0))
22275 && GET_MODE (XEXP (op1, 0)) == CCCmode
22276 && REGNO (XEXP (op1, 0)) == FLAGS_REG
22277 && XEXP (op1, 1) == const0_rtx)
22279 /* This is *x86_cmc. */
22280 if (!speed)
22281 *total = COSTS_N_BYTES (1);
22282 else if (TARGET_SLOW_STC)
22283 *total = COSTS_N_INSNS (2);
22284 else
22285 *total = COSTS_N_INSNS (1);
22286 return true;
22289 if (SCALAR_INT_MODE_P (GET_MODE (op0))
22290 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
22292 if (op1 == const0_rtx)
22293 *total = cost->add
22294 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
22295 else
22296 *total = 3*cost->add
22297 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
22298 + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
22299 return true;
22302 /* The embedded comparison operand is completely free. */
22303 if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
22304 *total = 0;
22306 return false;
22308 case FLOAT_EXTEND:
22309 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22310 *total = 0;
22311 else
22312 *total = ix86_vec_cost (mode, cost->addss);
22313 return false;
22315 case FLOAT_TRUNCATE:
22316 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22317 *total = cost->fadd;
22318 else
22319 *total = ix86_vec_cost (mode, cost->addss);
22320 return false;
22322 case ABS:
22323 /* SSE requires memory load for the constant operand. It may make
22324 sense to account for this. Of course the constant operand may or
22325 may not be reused. */
22326 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22327 *total = cost->sse_op;
22328 else if (X87_FLOAT_MODE_P (mode))
22329 *total = cost->fabs;
22330 else if (FLOAT_MODE_P (mode))
22331 *total = ix86_vec_cost (mode, cost->sse_op);
22332 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22333 *total = cost->sse_op;
22334 return false;
22336 case SQRT:
22337 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22338 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
22339 else if (X87_FLOAT_MODE_P (mode))
22340 *total = cost->fsqrt;
22341 else if (FLOAT_MODE_P (mode))
22342 *total = ix86_vec_cost (mode,
22343 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
22344 return false;
22346 case UNSPEC:
22347 if (XINT (x, 1) == UNSPEC_TP)
22348 *total = 0;
22349 else if (XINT (x, 1) == UNSPEC_VTERNLOG)
22351 *total = cost->sse_op;
22352 *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
22353 *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
22354 *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
22355 return true;
22357 else if (XINT (x, 1) == UNSPEC_PTEST)
22359 *total = cost->sse_op;
22360 rtx test_op0 = XVECEXP (x, 0, 0);
22361 if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
22362 return false;
22363 if (GET_CODE (test_op0) == AND)
22365 rtx and_op0 = XEXP (test_op0, 0);
22366 if (GET_CODE (and_op0) == NOT)
22367 and_op0 = XEXP (and_op0, 0);
22368 *total += rtx_cost (and_op0, GET_MODE (and_op0),
22369 AND, 0, speed)
22370 + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
22371 AND, 1, speed);
22373 else
22374 *total = rtx_cost (test_op0, GET_MODE (test_op0),
22375 UNSPEC, 0, speed);
22376 return true;
22378 return false;
22380 case VEC_SELECT:
22381 case VEC_CONCAT:
22382 /* ??? Assume all of these vector manipulation patterns are
22383 recognizable. In which case they all pretty much have the
22384 same cost. */
22385 *total = cost->sse_op;
22386 return true;
22387 case VEC_DUPLICATE:
22388 *total = rtx_cost (XEXP (x, 0),
22389 GET_MODE (XEXP (x, 0)),
22390 VEC_DUPLICATE, 0, speed);
22391 /* It's broadcast instruction, not embedded broadcasting. */
22392 if (outer_code == SET)
22393 *total += cost->sse_op;
22395 return true;
22397 case VEC_MERGE:
22398 mask = XEXP (x, 2);
22399 /* This is masked instruction, assume the same cost,
22400 as nonmasked variant. */
22401 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
22402 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
22403 else
22404 *total = cost->sse_op;
22405 return true;
22407 case MEM:
22408 /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
22409 or variants in ix86_vector_duplicate_simode_const. */
22411 if (GET_MODE_SIZE (mode) >= 16
22412 && VECTOR_MODE_P (mode)
22413 && SYMBOL_REF_P (XEXP (x, 0))
22414 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
22415 && ix86_broadcast_from_constant (mode, x))
22417 *total = COSTS_N_INSNS (2) + speed;
22418 return true;
22421 /* An insn that accesses memory is slightly more expensive
22422 than one that does not. */
22423 if (speed)
22425 *total += 1;
22426 rtx addr = XEXP (x, 0);
22427 /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
22428 so for MEM (reg) and MEM (reg + 4), the former costs 5,
22429 the latter costs 9, it is not accurate for x86. Ideally
22430 address_cost should be used, but it reduce cost too much.
22431 So current solution is make constant disp as cheap as possible. */
22432 if (GET_CODE (addr) == PLUS
22433 && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
22434 /* Only hanlde (reg + disp) since other forms of addr are mostly LEA,
22435 there's no additional cost for the plus of disp. */
22436 && register_operand (XEXP (addr, 0), Pmode))
22438 *total += 1;
22439 *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
22440 return true;
22444 return false;
22446 case ZERO_EXTRACT:
22447 if (XEXP (x, 1) == const1_rtx
22448 && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
22449 && GET_MODE (XEXP (x, 2)) == SImode
22450 && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
22452 /* Ignore cost of zero extension and masking of last argument. */
22453 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22454 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22455 *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
22456 return true;
22458 return false;
22460 case IF_THEN_ELSE:
22461 if (TARGET_XOP
22462 && VECTOR_MODE_P (mode)
22463 && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
22465 /* vpcmov. */
22466 *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
22467 if (!REG_P (XEXP (x, 0)))
22468 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22469 if (!REG_P (XEXP (x, 1)))
22470 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22471 if (!REG_P (XEXP (x, 2)))
22472 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
22473 return true;
22475 else if (TARGET_CMOVE
22476 && SCALAR_INT_MODE_P (mode)
22477 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
22479 /* cmov. */
22480 *total = COSTS_N_INSNS (1);
22481 if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
22482 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22483 if (!REG_P (XEXP (x, 1)))
22484 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22485 if (!REG_P (XEXP (x, 2)))
22486 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
22487 return true;
22489 return false;
22491 default:
22492 return false;
22496 #if TARGET_MACHO
22498 static int current_machopic_label_num;
22500 /* Given a symbol name and its associated stub, write out the
22501 definition of the stub. */
22503 void
22504 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22506 unsigned int length;
22507 char *binder_name, *symbol_name, lazy_ptr_name[32];
22508 int label = ++current_machopic_label_num;
22510 /* For 64-bit we shouldn't get here. */
22511 gcc_assert (!TARGET_64BIT);
22513 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22514 symb = targetm.strip_name_encoding (symb);
22516 length = strlen (stub);
22517 binder_name = XALLOCAVEC (char, length + 32);
22518 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22520 length = strlen (symb);
22521 symbol_name = XALLOCAVEC (char, length + 32);
22522 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22524 sprintf (lazy_ptr_name, "L%d$lz", label);
22526 if (MACHOPIC_ATT_STUB)
22527 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
22528 else if (MACHOPIC_PURE)
22529 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
22530 else
22531 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22533 fprintf (file, "%s:\n", stub);
22534 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22536 if (MACHOPIC_ATT_STUB)
22538 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
22540 else if (MACHOPIC_PURE)
22542 /* PIC stub. */
22543 /* 25-byte PIC stub using "CALL get_pc_thunk". */
22544 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
22545 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
22546 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
22547 label, lazy_ptr_name, label);
22548 fprintf (file, "\tjmp\t*%%ecx\n");
22550 else
22551 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22553 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
22554 it needs no stub-binding-helper. */
22555 if (MACHOPIC_ATT_STUB)
22556 return;
22558 fprintf (file, "%s:\n", binder_name);
22560 if (MACHOPIC_PURE)
22562 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
22563 fprintf (file, "\tpushl\t%%ecx\n");
22565 else
22566 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22568 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
22570 /* N.B. Keep the correspondence of these
22571 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
22572 old-pic/new-pic/non-pic stubs; altering this will break
22573 compatibility with existing dylibs. */
22574 if (MACHOPIC_PURE)
22576 /* 25-byte PIC stub using "CALL get_pc_thunk". */
22577 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
22579 else
22580 /* 16-byte -mdynamic-no-pic stub. */
22581 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
22583 fprintf (file, "%s:\n", lazy_ptr_name);
22584 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22585 fprintf (file, ASM_LONG "%s\n", binder_name);
22587 #endif /* TARGET_MACHO */
22589 /* Order the registers for register allocator. */
22591 void
22592 x86_order_regs_for_local_alloc (void)
22594 int pos = 0;
22595 int i;
22597 /* First allocate the local general purpose registers. */
22598 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22599 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
22600 reg_alloc_order [pos++] = i;
22602 /* Global general purpose registers. */
22603 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22604 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
22605 reg_alloc_order [pos++] = i;
22607 /* x87 registers come first in case we are doing FP math
22608 using them. */
22609 if (!TARGET_SSE_MATH)
22610 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22611 reg_alloc_order [pos++] = i;
22613 /* SSE registers. */
22614 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22615 reg_alloc_order [pos++] = i;
22616 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22617 reg_alloc_order [pos++] = i;
22619 /* Extended REX SSE registers. */
22620 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
22621 reg_alloc_order [pos++] = i;
22623 /* Mask register. */
22624 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
22625 reg_alloc_order [pos++] = i;
22627 /* x87 registers. */
22628 if (TARGET_SSE_MATH)
22629 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22630 reg_alloc_order [pos++] = i;
22632 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22633 reg_alloc_order [pos++] = i;
22635 /* Initialize the rest of array as we do not allocate some registers
22636 at all. */
22637 while (pos < FIRST_PSEUDO_REGISTER)
22638 reg_alloc_order [pos++] = 0;
22641 static bool
22642 ix86_ms_bitfield_layout_p (const_tree record_type)
22644 return ((TARGET_MS_BITFIELD_LAYOUT
22645 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22646 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
22649 /* Returns an expression indicating where the this parameter is
22650 located on entry to the FUNCTION. */
22652 static rtx
22653 x86_this_parameter (tree function)
22655 tree type = TREE_TYPE (function);
22656 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22657 int nregs;
22659 if (TARGET_64BIT)
22661 const int *parm_regs;
22663 if (ix86_function_type_abi (type) == MS_ABI)
22664 parm_regs = x86_64_ms_abi_int_parameter_registers;
22665 else
22666 parm_regs = x86_64_int_parameter_registers;
22667 return gen_rtx_REG (Pmode, parm_regs[aggr]);
22670 nregs = ix86_function_regparm (type, function);
22672 if (nregs > 0 && !stdarg_p (type))
22674 int regno;
22675 unsigned int ccvt = ix86_get_callcvt (type);
22677 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
22678 regno = aggr ? DX_REG : CX_REG;
22679 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
22681 regno = CX_REG;
22682 if (aggr)
22683 return gen_rtx_MEM (SImode,
22684 plus_constant (Pmode, stack_pointer_rtx, 4));
22686 else
22688 regno = AX_REG;
22689 if (aggr)
22691 regno = DX_REG;
22692 if (nregs == 1)
22693 return gen_rtx_MEM (SImode,
22694 plus_constant (Pmode,
22695 stack_pointer_rtx, 4));
22698 return gen_rtx_REG (SImode, regno);
22701 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
22702 aggr ? 8 : 4));
22705 /* Determine whether x86_output_mi_thunk can succeed. */
22707 static bool
22708 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
22709 const_tree function)
22711 /* 64-bit can handle anything. */
22712 if (TARGET_64BIT)
22713 return true;
22715 /* For 32-bit, everything's fine if we have one free register. */
22716 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22717 return true;
22719 /* Need a free register for vcall_offset. */
22720 if (vcall_offset)
22721 return false;
22723 /* Need a free register for GOT references. */
22724 if (flag_pic && !targetm.binds_local_p (function))
22725 return false;
22727 /* Otherwise ok. */
22728 return true;
22731 /* Output the assembler code for a thunk function. THUNK_DECL is the
22732 declaration for the thunk function itself, FUNCTION is the decl for
22733 the target function. DELTA is an immediate constant offset to be
22734 added to THIS. If VCALL_OFFSET is nonzero, the word at
22735 *(*this + vcall_offset) should be added to THIS. */
22737 static void
22738 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
22739 HOST_WIDE_INT vcall_offset, tree function)
22741 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
22742 rtx this_param = x86_this_parameter (function);
22743 rtx this_reg, tmp, fnaddr;
22744 unsigned int tmp_regno;
22745 rtx_insn *insn;
22746 int saved_flag_force_indirect_call = flag_force_indirect_call;
22748 if (TARGET_64BIT)
22749 tmp_regno = R10_REG;
22750 else
22752 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
22753 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
22754 tmp_regno = AX_REG;
22755 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
22756 tmp_regno = DX_REG;
22757 else
22758 tmp_regno = CX_REG;
22760 if (flag_pic)
22761 flag_force_indirect_call = 0;
22764 emit_note (NOTE_INSN_PROLOGUE_END);
22766 /* CET is enabled, insert EB instruction. */
22767 if ((flag_cf_protection & CF_BRANCH))
22768 emit_insn (gen_nop_endbr ());
22770 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22771 pull it in now and let DELTA benefit. */
22772 if (REG_P (this_param))
22773 this_reg = this_param;
22774 else if (vcall_offset)
22776 /* Put the this parameter into %eax. */
22777 this_reg = gen_rtx_REG (Pmode, AX_REG);
22778 emit_move_insn (this_reg, this_param);
22780 else
22781 this_reg = NULL_RTX;
22783 /* Adjust the this parameter by a fixed constant. */
22784 if (delta)
22786 rtx delta_rtx = GEN_INT (delta);
22787 rtx delta_dst = this_reg ? this_reg : this_param;
22789 if (TARGET_64BIT)
22791 if (!x86_64_general_operand (delta_rtx, Pmode))
22793 tmp = gen_rtx_REG (Pmode, tmp_regno);
22794 emit_move_insn (tmp, delta_rtx);
22795 delta_rtx = tmp;
22799 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
22802 /* Adjust the this parameter by a value stored in the vtable. */
22803 if (vcall_offset)
22805 rtx vcall_addr, vcall_mem, this_mem;
22807 tmp = gen_rtx_REG (Pmode, tmp_regno);
22809 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
22810 if (Pmode != ptr_mode)
22811 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
22812 emit_move_insn (tmp, this_mem);
22814 /* Adjust the this parameter. */
22815 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
22816 if (TARGET_64BIT
22817 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
22819 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
22820 emit_move_insn (tmp2, GEN_INT (vcall_offset));
22821 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
22824 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
22825 if (Pmode != ptr_mode)
22826 emit_insn (gen_addsi_1_zext (this_reg,
22827 gen_rtx_REG (ptr_mode,
22828 REGNO (this_reg)),
22829 vcall_mem));
22830 else
22831 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
22834 /* If necessary, drop THIS back to its stack slot. */
22835 if (this_reg && this_reg != this_param)
22836 emit_move_insn (this_param, this_reg);
22838 fnaddr = XEXP (DECL_RTL (function), 0);
22839 if (TARGET_64BIT)
22841 if (!flag_pic || targetm.binds_local_p (function)
22842 || TARGET_PECOFF)
22844 else
22846 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
22847 tmp = gen_rtx_CONST (Pmode, tmp);
22848 fnaddr = gen_const_mem (Pmode, tmp);
22851 else
22853 if (!flag_pic || targetm.binds_local_p (function))
22855 #if TARGET_MACHO
22856 else if (TARGET_MACHO)
22858 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
22859 fnaddr = XEXP (fnaddr, 0);
22861 #endif /* TARGET_MACHO */
22862 else
22864 tmp = gen_rtx_REG (Pmode, CX_REG);
22865 output_set_got (tmp, NULL_RTX);
22867 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
22868 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
22869 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
22870 fnaddr = gen_const_mem (Pmode, fnaddr);
22874 /* Our sibling call patterns do not allow memories, because we have no
22875 predicate that can distinguish between frame and non-frame memory.
22876 For our purposes here, we can get away with (ab)using a jump pattern,
22877 because we're going to do no optimization. */
22878 if (MEM_P (fnaddr))
22880 if (sibcall_insn_operand (fnaddr, word_mode))
22882 fnaddr = XEXP (DECL_RTL (function), 0);
22883 tmp = gen_rtx_MEM (QImode, fnaddr);
22884 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
22885 tmp = emit_call_insn (tmp);
22886 SIBLING_CALL_P (tmp) = 1;
22888 else
22889 emit_jump_insn (gen_indirect_jump (fnaddr));
22891 else
22893 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
22895 // CM_LARGE_PIC always uses pseudo PIC register which is
22896 // uninitialized. Since FUNCTION is local and calling it
22897 // doesn't go through PLT, we use scratch register %r11 as
22898 // PIC register and initialize it here.
22899 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
22900 ix86_init_large_pic_reg (tmp_regno);
22901 fnaddr = legitimize_pic_address (fnaddr,
22902 gen_rtx_REG (Pmode, tmp_regno));
22905 if (!sibcall_insn_operand (fnaddr, word_mode))
22907 tmp = gen_rtx_REG (word_mode, tmp_regno);
22908 if (GET_MODE (fnaddr) != word_mode)
22909 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
22910 emit_move_insn (tmp, fnaddr);
22911 fnaddr = tmp;
22914 tmp = gen_rtx_MEM (QImode, fnaddr);
22915 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
22916 tmp = emit_call_insn (tmp);
22917 SIBLING_CALL_P (tmp) = 1;
22919 emit_barrier ();
22921 /* Emit just enough of rest_of_compilation to get the insns emitted. */
22922 insn = get_insns ();
22923 shorten_branches (insn);
22924 assemble_start_function (thunk_fndecl, fnname);
22925 final_start_function (insn, file, 1);
22926 final (insn, file, 1);
22927 final_end_function ();
22928 assemble_end_function (thunk_fndecl, fnname);
22930 flag_force_indirect_call = saved_flag_force_indirect_call;
22933 static void
22934 x86_file_start (void)
22936 default_file_start ();
22937 if (TARGET_16BIT)
22938 fputs ("\t.code16gcc\n", asm_out_file);
22939 #if TARGET_MACHO
22940 darwin_file_start ();
22941 #endif
22942 if (X86_FILE_START_VERSION_DIRECTIVE)
22943 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22944 if (X86_FILE_START_FLTUSED)
22945 fputs ("\t.global\t__fltused\n", asm_out_file);
22946 if (ix86_asm_dialect == ASM_INTEL)
22947 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
22951 x86_field_alignment (tree type, int computed)
22953 machine_mode mode;
22955 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22956 return computed;
22957 if (TARGET_IAMCU)
22958 return iamcu_alignment (type, computed);
22959 type = strip_array_types (type);
22960 mode = TYPE_MODE (type);
22961 if (mode == DFmode || mode == DCmode
22962 || GET_MODE_CLASS (mode) == MODE_INT
22963 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22965 if (TYPE_ATOMIC (type) && computed > 32)
22967 static bool warned;
22969 if (!warned && warn_psabi)
22971 const char *url
22972 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
22974 warned = true;
22975 inform (input_location, "the alignment of %<_Atomic %T%> "
22976 "fields changed in %{GCC 11.1%}",
22977 TYPE_MAIN_VARIANT (type), url);
22980 else
22981 return MIN (32, computed);
22983 return computed;
22986 /* Print call to TARGET to FILE. */
22988 static void
22989 x86_print_call_or_nop (FILE *file, const char *target)
22991 if (flag_nop_mcount || !strcmp (target, "nop"))
22992 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
22993 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
22994 else
22995 fprintf (file, "1:\tcall\t%s\n", target);
22998 static bool
22999 current_fentry_name (const char **name)
23001 tree attr = lookup_attribute ("fentry_name",
23002 DECL_ATTRIBUTES (current_function_decl));
23003 if (!attr)
23004 return false;
23005 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
23006 return true;
23009 static bool
23010 current_fentry_section (const char **name)
23012 tree attr = lookup_attribute ("fentry_section",
23013 DECL_ATTRIBUTES (current_function_decl));
23014 if (!attr)
23015 return false;
23016 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
23017 return true;
23020 /* Return a caller-saved register which isn't live or a callee-saved
23021 register which has been saved on stack in the prologue at entry for
23022 profile. */
23024 static int
23025 x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
23027 /* Use %r10 if the profiler is emitted before the prologue or it isn't
23028 used by DRAP. */
23029 if (ix86_profile_before_prologue ()
23030 || !crtl->drap_reg
23031 || REGNO (crtl->drap_reg) != R10_REG)
23032 return R10_REG;
23034 /* The profiler is emitted after the prologue. If there is a
23035 caller-saved register which isn't live or a callee-saved
23036 register saved on stack in the prologue, use it. */
23038 bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
23040 int i;
23041 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23042 if (GENERAL_REGNO_P (i)
23043 && i != R10_REG
23044 #ifdef NO_PROFILE_COUNTERS
23045 && (r11_ok || i != R11_REG)
23046 #else
23047 && i != R11_REG
23048 #endif
23049 && TEST_HARD_REG_BIT (accessible_reg_set, i)
23050 && (ix86_save_reg (i, true, true)
23051 || (call_used_regs[i]
23052 && !fixed_regs[i]
23053 && !REGNO_REG_SET_P (reg_live, i))))
23054 return i;
23056 sorry ("no register available for profiling %<-mcmodel=large%s%>",
23057 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
23059 return R10_REG;
23062 /* Output assembler code to FILE to increment profiler label # LABELNO
23063 for profiling a function entry. */
23064 void
23065 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23067 if (cfun->machine->insn_queued_at_entrance)
23069 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
23070 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
23071 unsigned int patch_area_size
23072 = crtl->patch_area_size - crtl->patch_area_entry;
23073 if (patch_area_size)
23074 ix86_output_patchable_area (patch_area_size,
23075 crtl->patch_area_entry == 0);
23078 const char *mcount_name = MCOUNT_NAME;
23080 if (current_fentry_name (&mcount_name))
23082 else if (fentry_name)
23083 mcount_name = fentry_name;
23084 else if (flag_fentry)
23085 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
23087 if (TARGET_64BIT)
23089 #ifndef NO_PROFILE_COUNTERS
23090 if (ASSEMBLER_DIALECT == ASM_INTEL)
23091 fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
23092 else
23093 fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
23094 #endif
23096 int scratch;
23097 const char *reg;
23098 char legacy_reg[4] = { 0 };
23100 if (!TARGET_PECOFF)
23102 switch (ix86_cmodel)
23104 case CM_LARGE:
23105 scratch = x86_64_select_profile_regnum (true);
23106 reg = hi_reg_name[scratch];
23107 if (LEGACY_INT_REGNO_P (scratch))
23109 legacy_reg[0] = 'r';
23110 legacy_reg[1] = reg[0];
23111 legacy_reg[2] = reg[1];
23112 reg = legacy_reg;
23114 if (ASSEMBLER_DIALECT == ASM_INTEL)
23115 fprintf (file, "1:\tmovabs\t%s, OFFSET FLAT:%s\n"
23116 "\tcall\t%s\n", reg, mcount_name, reg);
23117 else
23118 fprintf (file, "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
23119 mcount_name, reg, reg);
23120 break;
23121 case CM_LARGE_PIC:
23122 #ifdef NO_PROFILE_COUNTERS
23123 scratch = x86_64_select_profile_regnum (false);
23124 reg = hi_reg_name[scratch];
23125 if (LEGACY_INT_REGNO_P (scratch))
23127 legacy_reg[0] = 'r';
23128 legacy_reg[1] = reg[0];
23129 legacy_reg[2] = reg[1];
23130 reg = legacy_reg;
23132 if (ASSEMBLER_DIALECT == ASM_INTEL)
23134 fprintf (file, "1:movabs\tr11, "
23135 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
23136 fprintf (file, "\tlea\t%s, 1b[rip]\n", reg);
23137 fprintf (file, "\tadd\t%s, r11\n", reg);
23138 fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
23139 mcount_name);
23140 fprintf (file, "\tadd\t%s, r11\n", reg);
23141 fprintf (file, "\tcall\t%s\n", reg);
23142 break;
23144 fprintf (file,
23145 "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
23146 fprintf (file, "\tleaq\t1b(%%rip), %%%s\n", reg);
23147 fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
23148 fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
23149 fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
23150 fprintf (file, "\tcall\t*%%%s\n", reg);
23151 #else
23152 sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
23153 #endif
23154 break;
23155 case CM_SMALL_PIC:
23156 case CM_MEDIUM_PIC:
23157 if (!ix86_direct_extern_access)
23159 if (ASSEMBLER_DIALECT == ASM_INTEL)
23160 fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
23161 mcount_name);
23162 else
23163 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n",
23164 mcount_name);
23165 break;
23167 /* fall through */
23168 default:
23169 x86_print_call_or_nop (file, mcount_name);
23170 break;
23173 else
23174 x86_print_call_or_nop (file, mcount_name);
23176 else if (flag_pic)
23178 #ifndef NO_PROFILE_COUNTERS
23179 if (ASSEMBLER_DIALECT == ASM_INTEL)
23180 fprintf (file,
23181 "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
23182 LPREFIX, labelno);
23183 else
23184 fprintf (file,
23185 "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
23186 LPREFIX, labelno);
23187 #endif
23188 if (ASSEMBLER_DIALECT == ASM_INTEL)
23189 fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name);
23190 else
23191 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
23193 else
23195 #ifndef NO_PROFILE_COUNTERS
23196 if (ASSEMBLER_DIALECT == ASM_INTEL)
23197 fprintf (file,
23198 "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
23199 LPREFIX, labelno);
23200 else
23201 fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
23202 LPREFIX, labelno);
23203 #endif
23204 x86_print_call_or_nop (file, mcount_name);
23207 if (flag_record_mcount
23208 || lookup_attribute ("fentry_section",
23209 DECL_ATTRIBUTES (current_function_decl)))
23211 const char *sname = "__mcount_loc";
23213 if (current_fentry_section (&sname))
23215 else if (fentry_section)
23216 sname = fentry_section;
23218 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
23219 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
23220 fprintf (file, "\t.previous\n");
23224 /* We don't have exact information about the insn sizes, but we may assume
23225 quite safely that we are informed about all 1 byte insns and memory
23226 address sizes. This is enough to eliminate unnecessary padding in
23227 99% of cases. */
23230 ix86_min_insn_size (rtx_insn *insn)
23232 int l = 0, len;
23234 if (!INSN_P (insn) || !active_insn_p (insn))
23235 return 0;
23237 /* Discard alignments we've emit and jump instructions. */
23238 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23239 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23240 return 0;
23242 /* Important case - calls are always 5 bytes.
23243 It is common to have many calls in the row. */
23244 if (CALL_P (insn)
23245 && symbolic_reference_mentioned_p (PATTERN (insn))
23246 && !SIBLING_CALL_P (insn))
23247 return 5;
23248 len = get_attr_length (insn);
23249 if (len <= 1)
23250 return 1;
23252 /* For normal instructions we rely on get_attr_length being exact,
23253 with a few exceptions. */
23254 if (!JUMP_P (insn))
23256 enum attr_type type = get_attr_type (insn);
23258 switch (type)
23260 case TYPE_MULTI:
23261 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
23262 || asm_noperands (PATTERN (insn)) >= 0)
23263 return 0;
23264 break;
23265 case TYPE_OTHER:
23266 case TYPE_FCMP:
23267 break;
23268 default:
23269 /* Otherwise trust get_attr_length. */
23270 return len;
23273 l = get_attr_length_address (insn);
23274 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23275 l = 4;
23277 if (l)
23278 return 1+l;
23279 else
23280 return 2;
23283 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
23285 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23286 window. */
23288 static void
23289 ix86_avoid_jump_mispredicts (void)
23291 rtx_insn *insn, *start = get_insns ();
23292 int nbytes = 0, njumps = 0;
23293 bool isjump = false;
23295 /* Look for all minimal intervals of instructions containing 4 jumps.
23296 The intervals are bounded by START and INSN. NBYTES is the total
23297 size of instructions in the interval including INSN and not including
23298 START. When the NBYTES is smaller than 16 bytes, it is possible
23299 that the end of START and INSN ends up in the same 16byte page.
23301 The smallest offset in the page INSN can start is the case where START
23302 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23303 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
23305 Don't consider asm goto as jump, while it can contain a jump, it doesn't
23306 have to, control transfer to label(s) can be performed through other
23307 means, and also we estimate minimum length of all asm stmts as 0. */
23308 for (insn = start; insn; insn = NEXT_INSN (insn))
23310 int min_size;
23312 if (LABEL_P (insn))
23314 align_flags alignment = label_to_alignment (insn);
23315 int align = alignment.levels[0].log;
23316 int max_skip = alignment.levels[0].maxskip;
23318 if (max_skip > 15)
23319 max_skip = 15;
23320 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
23321 already in the current 16 byte page, because otherwise
23322 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
23323 bytes to reach 16 byte boundary. */
23324 if (align <= 0
23325 || (align <= 3 && max_skip != (1 << align) - 1))
23326 max_skip = 0;
23327 if (dump_file)
23328 fprintf (dump_file, "Label %i with max_skip %i\n",
23329 INSN_UID (insn), max_skip);
23330 if (max_skip)
23332 while (nbytes + max_skip >= 16)
23334 start = NEXT_INSN (start);
23335 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
23336 || CALL_P (start))
23337 njumps--, isjump = true;
23338 else
23339 isjump = false;
23340 nbytes -= ix86_min_insn_size (start);
23343 continue;
23346 min_size = ix86_min_insn_size (insn);
23347 nbytes += min_size;
23348 if (dump_file)
23349 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
23350 INSN_UID (insn), min_size);
23351 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
23352 || CALL_P (insn))
23353 njumps++;
23354 else
23355 continue;
23357 while (njumps > 3)
23359 start = NEXT_INSN (start);
23360 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
23361 || CALL_P (start))
23362 njumps--, isjump = true;
23363 else
23364 isjump = false;
23365 nbytes -= ix86_min_insn_size (start);
23367 gcc_assert (njumps >= 0);
23368 if (dump_file)
23369 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23370 INSN_UID (start), INSN_UID (insn), nbytes);
23372 if (njumps == 3 && isjump && nbytes < 16)
23374 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
23376 if (dump_file)
23377 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23378 INSN_UID (insn), padsize);
23379 emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn);
23383 #endif
23385 /* AMD Athlon works faster
23386 when RET is not destination of conditional jump or directly preceded
23387 by other jump instruction. We avoid the penalty by inserting NOP just
23388 before the RET instructions in such cases. */
23389 static void
23390 ix86_pad_returns (void)
23392 edge e;
23393 edge_iterator ei;
23395 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
23397 basic_block bb = e->src;
23398 rtx_insn *ret = BB_END (bb);
23399 rtx_insn *prev;
23400 bool replace = false;
23402 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
23403 || optimize_bb_for_size_p (bb))
23404 continue;
23405 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23406 if (active_insn_p (prev) || LABEL_P (prev))
23407 break;
23408 if (prev && LABEL_P (prev))
23410 edge e;
23411 edge_iterator ei;
23413 FOR_EACH_EDGE (e, ei, bb->preds)
23414 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23415 && !(e->flags & EDGE_FALLTHRU))
23417 replace = true;
23418 break;
23421 if (!replace)
23423 prev = prev_active_insn (ret);
23424 if (prev
23425 && ((JUMP_P (prev) && any_condjump_p (prev))
23426 || CALL_P (prev)))
23427 replace = true;
23428 /* Empty functions get branch mispredict even when
23429 the jump destination is not visible to us. */
23430 if (!prev && !optimize_function_for_size_p (cfun))
23431 replace = true;
23433 if (replace)
23435 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
23436 delete_insn (ret);
23441 /* Count the minimum number of instructions in BB. Return 4 if the
23442 number of instructions >= 4. */
23444 static int
23445 ix86_count_insn_bb (basic_block bb)
23447 rtx_insn *insn;
23448 int insn_count = 0;
23450 /* Count number of instructions in this block. Return 4 if the number
23451 of instructions >= 4. */
23452 FOR_BB_INSNS (bb, insn)
23454 /* Only happen in exit blocks. */
23455 if (JUMP_P (insn)
23456 && ANY_RETURN_P (PATTERN (insn)))
23457 break;
23459 if (NONDEBUG_INSN_P (insn)
23460 && GET_CODE (PATTERN (insn)) != USE
23461 && GET_CODE (PATTERN (insn)) != CLOBBER)
23463 insn_count++;
23464 if (insn_count >= 4)
23465 return insn_count;
23469 return insn_count;
23473 /* Count the minimum number of instructions in code path in BB.
23474 Return 4 if the number of instructions >= 4. */
23476 static int
23477 ix86_count_insn (basic_block bb)
23479 edge e;
23480 edge_iterator ei;
23481 int min_prev_count;
23483 /* Only bother counting instructions along paths with no
23484 more than 2 basic blocks between entry and exit. Given
23485 that BB has an edge to exit, determine if a predecessor
23486 of BB has an edge from entry. If so, compute the number
23487 of instructions in the predecessor block. If there
23488 happen to be multiple such blocks, compute the minimum. */
23489 min_prev_count = 4;
23490 FOR_EACH_EDGE (e, ei, bb->preds)
23492 edge prev_e;
23493 edge_iterator prev_ei;
23495 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
23497 min_prev_count = 0;
23498 break;
23500 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
23502 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
23504 int count = ix86_count_insn_bb (e->src);
23505 if (count < min_prev_count)
23506 min_prev_count = count;
23507 break;
23512 if (min_prev_count < 4)
23513 min_prev_count += ix86_count_insn_bb (bb);
23515 return min_prev_count;
23518 /* Pad short function to 4 instructions. */
23520 static void
23521 ix86_pad_short_function (void)
23523 edge e;
23524 edge_iterator ei;
23526 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
23528 rtx_insn *ret = BB_END (e->src);
23529 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
23531 int insn_count = ix86_count_insn (e->src);
23533 /* Pad short function. */
23534 if (insn_count < 4)
23536 rtx_insn *insn = ret;
23538 /* Find epilogue. */
23539 while (insn
23540 && (!NOTE_P (insn)
23541 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
23542 insn = PREV_INSN (insn);
23544 if (!insn)
23545 insn = ret;
23547 /* Two NOPs count as one instruction. */
23548 insn_count = 2 * (4 - insn_count);
23549 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
23555 /* Fix up a Windows system unwinder issue. If an EH region falls through into
23556 the epilogue, the Windows system unwinder will apply epilogue logic and
23557 produce incorrect offsets. This can be avoided by adding a nop between
23558 the last insn that can throw and the first insn of the epilogue. */
23560 static void
23561 ix86_seh_fixup_eh_fallthru (void)
23563 edge e;
23564 edge_iterator ei;
23566 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
23568 rtx_insn *insn, *next;
23570 /* Find the beginning of the epilogue. */
23571 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
23572 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
23573 break;
23574 if (insn == NULL)
23575 continue;
23577 /* We only care about preceding insns that can throw. */
23578 insn = prev_active_insn (insn);
23579 if (insn == NULL || !can_throw_internal (insn))
23580 continue;
23582 /* Do not separate calls from their debug information. */
23583 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
23584 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
23585 insn = next;
23586 else
23587 break;
23589 emit_insn_after (gen_nops (const1_rtx), insn);
23592 /* Split vector load from parm_decl to elemental loads to avoid STLF
23593 stalls. */
23594 static void
23595 ix86_split_stlf_stall_load ()
23597 rtx_insn* insn, *start = get_insns ();
23598 unsigned window = 0;
23600 for (insn = start; insn; insn = NEXT_INSN (insn))
23602 if (!NONDEBUG_INSN_P (insn))
23603 continue;
23604 window++;
23605 /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
23606 other, just emulate for pipeline) before stalled load, stlf stall
23607 case is as fast as no stall cases on CLX.
23608 Since CFG is freed before machine_reorg, just do a rough
23609 calculation of the window according to the layout. */
23610 if (window > (unsigned) x86_stlf_window_ninsns)
23611 return;
23613 if (any_uncondjump_p (insn)
23614 || ANY_RETURN_P (PATTERN (insn))
23615 || CALL_P (insn))
23616 return;
23618 rtx set = single_set (insn);
23619 if (!set)
23620 continue;
23621 rtx src = SET_SRC (set);
23622 if (!MEM_P (src)
23623 /* Only handle V2DFmode load since it doesn't need any scratch
23624 register. */
23625 || GET_MODE (src) != E_V2DFmode
23626 || !MEM_EXPR (src)
23627 || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
23628 continue;
23630 rtx zero = CONST0_RTX (V2DFmode);
23631 rtx dest = SET_DEST (set);
23632 rtx m = adjust_address (src, DFmode, 0);
23633 rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
23634 emit_insn_before (loadlpd, insn);
23635 m = adjust_address (src, DFmode, 8);
23636 rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
23637 if (dump_file && (dump_flags & TDF_DETAILS))
23639 fputs ("Due to potential STLF stall, split instruction:\n",
23640 dump_file);
23641 print_rtl_single (dump_file, insn);
23642 fputs ("To:\n", dump_file);
23643 print_rtl_single (dump_file, loadlpd);
23644 print_rtl_single (dump_file, loadhpd);
23646 PATTERN (insn) = loadhpd;
23647 INSN_CODE (insn) = -1;
23648 gcc_assert (recog_memoized (insn) != -1);
23652 /* Implement machine specific optimizations. We implement padding of returns
23653 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23654 static void
23655 ix86_reorg (void)
23657 /* We are freeing block_for_insn in the toplev to keep compatibility
23658 with old MDEP_REORGS that are not CFG based. Recompute it now. */
23659 compute_bb_for_insn ();
23661 if (TARGET_SEH && current_function_has_exception_handlers ())
23662 ix86_seh_fixup_eh_fallthru ();
23664 if (optimize && optimize_function_for_speed_p (cfun))
23666 if (TARGET_SSE2)
23667 ix86_split_stlf_stall_load ();
23668 if (TARGET_PAD_SHORT_FUNCTION)
23669 ix86_pad_short_function ();
23670 else if (TARGET_PAD_RETURNS)
23671 ix86_pad_returns ();
23672 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
23673 if (TARGET_FOUR_JUMP_LIMIT)
23674 ix86_avoid_jump_mispredicts ();
23675 #endif
23679 /* Return nonzero when QImode register that must be represented via REX prefix
23680 is used. */
23681 bool
23682 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
23684 int i;
23685 extract_insn_cached (insn);
23686 for (i = 0; i < recog_data.n_operands; i++)
23687 if (GENERAL_REG_P (recog_data.operand[i])
23688 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
23689 return true;
23690 return false;
23693 /* Return true when INSN mentions register that must be encoded using REX
23694 prefix. */
23695 bool
23696 x86_extended_reg_mentioned_p (rtx insn)
23698 subrtx_iterator::array_type array;
23699 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
23701 const_rtx x = *iter;
23702 if (REG_P (x)
23703 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
23704 || REX2_INT_REGNO_P (REGNO (x))))
23705 return true;
23707 return false;
23710 /* Return true when INSN mentions register that must be encoded using REX2
23711 prefix. */
23712 bool
23713 x86_extended_rex2reg_mentioned_p (rtx insn)
23715 subrtx_iterator::array_type array;
23716 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
23718 const_rtx x = *iter;
23719 if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
23720 return true;
23722 return false;
23725 /* Return true when rtx operands mentions register that must be encoded using
23726 evex prefix. */
23727 bool
23728 x86_evex_reg_mentioned_p (rtx operands[], int nops)
23730 int i;
23731 for (i = 0; i < nops; i++)
23732 if (EXT_REX_SSE_REG_P (operands[i])
23733 || x86_extended_rex2reg_mentioned_p (operands[i]))
23734 return true;
23735 return false;
23738 /* If profitable, negate (without causing overflow) integer constant
23739 of mode MODE at location LOC. Return true in this case. */
23740 bool
23741 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
23743 HOST_WIDE_INT val;
23745 if (!CONST_INT_P (*loc))
23746 return false;
23748 switch (mode)
23750 case E_DImode:
23751 /* DImode x86_64 constants must fit in 32 bits. */
23752 gcc_assert (x86_64_immediate_operand (*loc, mode));
23754 mode = SImode;
23755 break;
23757 case E_SImode:
23758 case E_HImode:
23759 case E_QImode:
23760 break;
23762 default:
23763 gcc_unreachable ();
23766 /* Avoid overflows. */
23767 if (mode_signbit_p (mode, *loc))
23768 return false;
23770 val = INTVAL (*loc);
23772 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
23773 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
23774 if ((val < 0 && val != -128)
23775 || val == 128)
23777 *loc = GEN_INT (-val);
23778 return true;
23781 return false;
23784 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23785 optabs would emit if we didn't have TFmode patterns. */
23787 void
23788 x86_emit_floatuns (rtx operands[2])
23790 rtx_code_label *neglab, *donelab;
23791 rtx i0, i1, f0, in, out;
23792 machine_mode mode, inmode;
23794 inmode = GET_MODE (operands[1]);
23795 gcc_assert (inmode == SImode || inmode == DImode);
23797 out = operands[0];
23798 in = force_reg (inmode, operands[1]);
23799 mode = GET_MODE (out);
23800 neglab = gen_label_rtx ();
23801 donelab = gen_label_rtx ();
23802 f0 = gen_reg_rtx (mode);
23804 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23806 expand_float (out, in, 0);
23808 emit_jump_insn (gen_jump (donelab));
23809 emit_barrier ();
23811 emit_label (neglab);
23813 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23814 1, OPTAB_DIRECT);
23815 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23816 1, OPTAB_DIRECT);
23817 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23819 expand_float (f0, i0, 0);
23821 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
23823 emit_label (donelab);
23826 /* Return the diagnostic message string if conversion from FROMTYPE to
23827 TOTYPE is not allowed, NULL otherwise. */
23829 static const char *
23830 ix86_invalid_conversion (const_tree fromtype, const_tree totype)
23832 machine_mode from_mode = element_mode (fromtype);
23833 machine_mode to_mode = element_mode (totype);
23835 if (!TARGET_SSE2 && from_mode != to_mode)
23837 /* Do no allow conversions to/from BFmode/HFmode scalar types
23838 when TARGET_SSE2 is not available. */
23839 if (from_mode == BFmode)
23840 return N_("invalid conversion from type %<__bf16%> "
23841 "without option %<-msse2%>");
23842 if (from_mode == HFmode)
23843 return N_("invalid conversion from type %<_Float16%> "
23844 "without option %<-msse2%>");
23845 if (to_mode == BFmode)
23846 return N_("invalid conversion to type %<__bf16%> "
23847 "without option %<-msse2%>");
23848 if (to_mode == HFmode)
23849 return N_("invalid conversion to type %<_Float16%> "
23850 "without option %<-msse2%>");
23853 /* Warn for silent implicit conversion between __bf16 and short,
23854 since __bfloat16 is refined as real __bf16 instead of short
23855 since GCC13. */
23856 if (element_mode (fromtype) != element_mode (totype)
23857 && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
23859 /* Warn for silent implicit conversion where user may expect
23860 a bitcast. */
23861 if ((TYPE_MODE (fromtype) == BFmode
23862 && TYPE_MODE (totype) == HImode)
23863 || (TYPE_MODE (totype) == BFmode
23864 && TYPE_MODE (fromtype) == HImode))
23865 warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
23866 "to real %<__bf16%> since GCC 13.1, be careful of "
23867 "implicit conversion between %<__bf16%> and %<short%>; "
23868 "an explicit bitcast may be needed here");
23871 /* Conversion allowed. */
23872 return NULL;
23875 /* Return the diagnostic message string if the unary operation OP is
23876 not permitted on TYPE, NULL otherwise. */
23878 static const char *
23879 ix86_invalid_unary_op (int op, const_tree type)
23881 machine_mode mmode = element_mode (type);
23882 /* Reject all single-operand operations on BFmode/HFmode except for &
23883 when TARGET_SSE2 is not available. */
23884 if (!TARGET_SSE2 && op != ADDR_EXPR)
23886 if (mmode == BFmode)
23887 return N_("operation not permitted on type %<__bf16%> "
23888 "without option %<-msse2%>");
23889 if (mmode == HFmode)
23890 return N_("operation not permitted on type %<_Float16%> "
23891 "without option %<-msse2%>");
23894 /* Operation allowed. */
23895 return NULL;
23898 /* Return the diagnostic message string if the binary operation OP is
23899 not permitted on TYPE1 and TYPE2, NULL otherwise. */
23901 static const char *
23902 ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
23903 const_tree type2)
23905 machine_mode type1_mode = element_mode (type1);
23906 machine_mode type2_mode = element_mode (type2);
23907 /* Reject all 2-operand operations on BFmode or HFmode
23908 when TARGET_SSE2 is not available. */
23909 if (!TARGET_SSE2)
23911 if (type1_mode == BFmode || type2_mode == BFmode)
23912 return N_("operation not permitted on type %<__bf16%> "
23913 "without option %<-msse2%>");
23915 if (type1_mode == HFmode || type2_mode == HFmode)
23916 return N_("operation not permitted on type %<_Float16%> "
23917 "without option %<-msse2%>");
23920 /* Operation allowed. */
23921 return NULL;
23925 /* Target hook for scalar_mode_supported_p. */
23926 static bool
23927 ix86_scalar_mode_supported_p (scalar_mode mode)
23929 if (DECIMAL_FLOAT_MODE_P (mode))
23930 return default_decimal_float_supported_p ();
23931 else if (mode == TFmode)
23932 return true;
23933 else if (mode == HFmode || mode == BFmode)
23934 return true;
23935 else
23936 return default_scalar_mode_supported_p (mode);
23939 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
23940 if MODE is HFmode, and punt to the generic implementation otherwise. */
23942 static bool
23943 ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
23945 /* NB: Always return TRUE for HFmode so that the _Float16 type will
23946 be defined by the C front-end for AVX512FP16 intrinsics. We will
23947 issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
23948 enabled. */
23949 return ((mode == HFmode || mode == BFmode)
23950 ? true
23951 : default_libgcc_floating_mode_supported_p (mode));
23954 /* Implements target hook vector_mode_supported_p. */
23955 static bool
23956 ix86_vector_mode_supported_p (machine_mode mode)
23958 /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
23959 either. */
23960 if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
23961 return false;
23962 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
23963 return true;
23964 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
23965 return true;
23966 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
23967 return true;
23968 if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
23969 return true;
23970 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
23971 && VALID_MMX_REG_MODE (mode))
23972 return true;
23973 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
23974 && VALID_MMX_REG_MODE_3DNOW (mode))
23975 return true;
23976 if (mode == V2QImode)
23977 return true;
23978 return false;
23981 /* Target hook for c_mode_for_suffix. */
23982 static machine_mode
23983 ix86_c_mode_for_suffix (char suffix)
23985 if (suffix == 'q')
23986 return TFmode;
23987 if (suffix == 'w')
23988 return XFmode;
23990 return VOIDmode;
23993 /* Helper function to map common constraints to non-EGPR ones.
23994 All related constraints have h prefix, and h plus Upper letter
23995 means the constraint is strictly EGPR enabled, while h plus
23996 lower letter indicates the constraint is strictly gpr16 only.
23998 Specially for "g" constraint, split it to rmi as there is
23999 no corresponding general constraint define for backend.
24001 Here is the full list to map constraints that may involve
24002 gpr to h prefixed.
24004 "g" -> "jrjmi"
24005 "r" -> "jr"
24006 "m" -> "jm"
24007 "<" -> "j<"
24008 ">" -> "j>"
24009 "o" -> "jo"
24010 "V" -> "jV"
24011 "p" -> "jp"
24012 "Bm" -> "ja"
24015 static void map_egpr_constraints (vec<const char *> &constraints)
24017 for (size_t i = 0; i < constraints.length(); i++)
24019 const char *cur = constraints[i];
24021 if (startswith (cur, "=@cc"))
24022 continue;
24024 int len = strlen (cur);
24025 auto_vec<char> buf;
24027 for (int j = 0; j < len; j++)
24029 switch (cur[j])
24031 case 'g':
24032 buf.safe_push ('j');
24033 buf.safe_push ('r');
24034 buf.safe_push ('j');
24035 buf.safe_push ('m');
24036 buf.safe_push ('i');
24037 break;
24038 case 'r':
24039 case 'm':
24040 case '<':
24041 case '>':
24042 case 'o':
24043 case 'V':
24044 case 'p':
24045 buf.safe_push ('j');
24046 buf.safe_push (cur[j]);
24047 break;
24048 case 'B':
24049 if (cur[j + 1] == 'm')
24051 buf.safe_push ('j');
24052 buf.safe_push ('a');
24053 j++;
24055 else
24057 buf.safe_push (cur[j]);
24058 buf.safe_push (cur[j + 1]);
24059 j++;
24061 break;
24062 case 'T':
24063 case 'Y':
24064 case 'W':
24065 case 'j':
24066 buf.safe_push (cur[j]);
24067 buf.safe_push (cur[j + 1]);
24068 j++;
24069 break;
24070 default:
24071 buf.safe_push (cur[j]);
24072 break;
24075 buf.safe_push ('\0');
24076 constraints[i] = xstrdup (buf.address ());
24080 /* Worker function for TARGET_MD_ASM_ADJUST.
24082 We implement asm flag outputs, and maintain source compatibility
24083 with the old cc0-based compiler. */
24085 static rtx_insn *
24086 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
24087 vec<machine_mode> & /*input_modes*/,
24088 vec<const char *> &constraints, vec<rtx> &/*uses*/,
24089 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
24090 location_t loc)
24092 bool saw_asm_flag = false;
24094 start_sequence ();
24096 if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
24097 map_egpr_constraints (constraints);
24099 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
24101 const char *con = constraints[i];
24102 if (!startswith (con, "=@cc"))
24103 continue;
24104 con += 4;
24105 if (strchr (con, ',') != NULL)
24107 error_at (loc, "alternatives not allowed in %<asm%> flag output");
24108 continue;
24111 bool invert = false;
24112 if (con[0] == 'n')
24113 invert = true, con++;
24115 machine_mode mode = CCmode;
24116 rtx_code code = UNKNOWN;
24118 switch (con[0])
24120 case 'a':
24121 if (con[1] == 0)
24122 mode = CCAmode, code = EQ;
24123 else if (con[1] == 'e' && con[2] == 0)
24124 mode = CCCmode, code = NE;
24125 break;
24126 case 'b':
24127 if (con[1] == 0)
24128 mode = CCCmode, code = EQ;
24129 else if (con[1] == 'e' && con[2] == 0)
24130 mode = CCAmode, code = NE;
24131 break;
24132 case 'c':
24133 if (con[1] == 0)
24134 mode = CCCmode, code = EQ;
24135 break;
24136 case 'e':
24137 if (con[1] == 0)
24138 mode = CCZmode, code = EQ;
24139 break;
24140 case 'g':
24141 if (con[1] == 0)
24142 mode = CCGCmode, code = GT;
24143 else if (con[1] == 'e' && con[2] == 0)
24144 mode = CCGCmode, code = GE;
24145 break;
24146 case 'l':
24147 if (con[1] == 0)
24148 mode = CCGCmode, code = LT;
24149 else if (con[1] == 'e' && con[2] == 0)
24150 mode = CCGCmode, code = LE;
24151 break;
24152 case 'o':
24153 if (con[1] == 0)
24154 mode = CCOmode, code = EQ;
24155 break;
24156 case 'p':
24157 if (con[1] == 0)
24158 mode = CCPmode, code = EQ;
24159 break;
24160 case 's':
24161 if (con[1] == 0)
24162 mode = CCSmode, code = EQ;
24163 break;
24164 case 'z':
24165 if (con[1] == 0)
24166 mode = CCZmode, code = EQ;
24167 break;
24169 if (code == UNKNOWN)
24171 error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
24172 continue;
24174 if (invert)
24175 code = reverse_condition (code);
24177 rtx dest = outputs[i];
24178 if (!saw_asm_flag)
24180 /* This is the first asm flag output. Here we put the flags
24181 register in as the real output and adjust the condition to
24182 allow it. */
24183 constraints[i] = "=Bf";
24184 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
24185 saw_asm_flag = true;
24187 else
24189 /* We don't need the flags register as output twice. */
24190 constraints[i] = "=X";
24191 outputs[i] = gen_rtx_SCRATCH (SImode);
24194 rtx x = gen_rtx_REG (mode, FLAGS_REG);
24195 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
24197 machine_mode dest_mode = GET_MODE (dest);
24198 if (!SCALAR_INT_MODE_P (dest_mode))
24200 error_at (loc, "invalid type for %<asm%> flag output");
24201 continue;
24204 if (dest_mode == QImode)
24205 emit_insn (gen_rtx_SET (dest, x));
24206 else
24208 rtx reg = gen_reg_rtx (QImode);
24209 emit_insn (gen_rtx_SET (reg, x));
24211 reg = convert_to_mode (dest_mode, reg, 1);
24212 emit_move_insn (dest, reg);
24216 rtx_insn *seq = get_insns ();
24217 end_sequence ();
24219 if (saw_asm_flag)
24220 return seq;
24221 else
24223 /* If we had no asm flag outputs, clobber the flags. */
24224 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
24225 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
24226 return NULL;
24230 /* Implements target vector targetm.asm.encode_section_info. */
24232 static void ATTRIBUTE_UNUSED
24233 ix86_encode_section_info (tree decl, rtx rtl, int first)
24235 default_encode_section_info (decl, rtl, first);
24237 if (ix86_in_large_data_p (decl))
24238 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24241 /* Worker function for REVERSE_CONDITION. */
24243 enum rtx_code
24244 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
24246 return (mode == CCFPmode
24247 ? reverse_condition_maybe_unordered (code)
24248 : reverse_condition (code));
24251 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24252 to OPERANDS[0]. */
24254 const char *
24255 output_387_reg_move (rtx_insn *insn, rtx *operands)
24257 if (REG_P (operands[0]))
24259 if (REG_P (operands[1])
24260 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24262 if (REGNO (operands[0]) == FIRST_STACK_REG)
24263 return output_387_ffreep (operands, 0);
24264 return "fstp\t%y0";
24266 if (STACK_TOP_P (operands[0]))
24267 return "fld%Z1\t%y1";
24268 return "fst\t%y0";
24270 else if (MEM_P (operands[0]))
24272 gcc_assert (REG_P (operands[1]));
24273 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24274 return "fstp%Z0\t%y0";
24275 else
24277 /* There is no non-popping store to memory for XFmode.
24278 So if we need one, follow the store with a load. */
24279 if (GET_MODE (operands[0]) == XFmode)
24280 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
24281 else
24282 return "fst%Z0\t%y0";
24285 else
24286 gcc_unreachable();
24288 #ifdef TARGET_SOLARIS
24289 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24291 static void
24292 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24293 tree decl)
24295 /* With Binutils 2.15, the "@unwind" marker must be specified on
24296 every occurrence of the ".eh_frame" section, not just the first
24297 one. */
24298 if (TARGET_64BIT
24299 && strcmp (name, ".eh_frame") == 0)
24301 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24302 flags & SECTION_WRITE ? "aw" : "a");
24303 return;
24306 #ifndef USE_GAS
24307 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
24309 solaris_elf_asm_comdat_section (name, flags, decl);
24310 return;
24313 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
24314 SPARC assembler. One cannot mix single-letter flags and #exclude, so
24315 only emit the latter here. */
24316 if (flags & SECTION_EXCLUDE)
24318 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
24319 return;
24321 #endif
24323 default_elf_asm_named_section (name, flags, decl);
24325 #endif /* TARGET_SOLARIS */
24327 /* Return the mangling of TYPE if it is an extended fundamental type. */
24329 static const char *
24330 ix86_mangle_type (const_tree type)
24332 type = TYPE_MAIN_VARIANT (type);
24334 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24335 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24336 return NULL;
24338 if (type == float128_type_node || type == float64x_type_node)
24339 return NULL;
24341 switch (TYPE_MODE (type))
24343 case E_BFmode:
24344 return "DF16b";
24345 case E_HFmode:
24346 /* _Float16 is "DF16_".
24347 Align with clang's decision in https://reviews.llvm.org/D33719. */
24348 return "DF16_";
24349 case E_TFmode:
24350 /* __float128 is "g". */
24351 return "g";
24352 case E_XFmode:
24353 /* "long double" or __float80 is "e". */
24354 return "e";
24355 default:
24356 return NULL;
24360 /* Create C++ tinfo symbols for only conditionally available fundamental
24361 types. */
24363 static void
24364 ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
24366 extern tree ix86_float16_type_node;
24367 extern tree ix86_bf16_type_node;
24369 if (!TARGET_SSE2)
24371 if (!float16_type_node)
24372 float16_type_node = ix86_float16_type_node;
24373 if (!bfloat16_type_node)
24374 bfloat16_type_node = ix86_bf16_type_node;
24375 callback (float16_type_node);
24376 callback (bfloat16_type_node);
24377 float16_type_node = NULL_TREE;
24378 bfloat16_type_node = NULL_TREE;
24382 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
24384 static tree
24385 ix86_stack_protect_guard (void)
24387 if (TARGET_SSP_TLS_GUARD)
24389 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
24390 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
24391 tree type = build_qualified_type (type_node, qual);
24392 tree t;
24394 if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
24396 t = ix86_tls_stack_chk_guard_decl;
24398 if (t == NULL)
24400 rtx x;
24402 t = build_decl
24403 (UNKNOWN_LOCATION, VAR_DECL,
24404 get_identifier (ix86_stack_protector_guard_symbol_str),
24405 type);
24406 TREE_STATIC (t) = 1;
24407 TREE_PUBLIC (t) = 1;
24408 DECL_EXTERNAL (t) = 1;
24409 TREE_USED (t) = 1;
24410 TREE_THIS_VOLATILE (t) = 1;
24411 DECL_ARTIFICIAL (t) = 1;
24412 DECL_IGNORED_P (t) = 1;
24414 /* Do not share RTL as the declaration is visible outside of
24415 current function. */
24416 x = DECL_RTL (t);
24417 RTX_FLAG (x, used) = 1;
24419 ix86_tls_stack_chk_guard_decl = t;
24422 else
24424 tree asptrtype = build_pointer_type (type);
24426 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
24427 t = build2 (MEM_REF, asptrtype, t,
24428 build_int_cst (asptrtype, 0));
24429 TREE_THIS_VOLATILE (t) = 1;
24432 return t;
24435 return default_stack_protect_guard ();
24438 static bool
24439 ix86_stack_protect_runtime_enabled_p (void)
24441 /* Naked functions should not enable stack protector. */
24442 return !ix86_function_naked (current_function_decl);
24445 /* For 32-bit code we can save PIC register setup by using
24446 __stack_chk_fail_local hidden function instead of calling
24447 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24448 register, so it is better to call __stack_chk_fail directly. */
24450 static tree ATTRIBUTE_UNUSED
24451 ix86_stack_protect_fail (void)
24453 return TARGET_64BIT
24454 ? default_external_stack_protect_fail ()
24455 : default_hidden_stack_protect_fail ();
24458 /* Select a format to encode pointers in exception handling data. CODE
24459 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24460 true if the symbol may be affected by dynamic relocations.
24462 ??? All x86 object file formats are capable of representing this.
24463 After all, the relocation needed is the same as for the call insn.
24464 Whether or not a particular assembler allows us to enter such, I
24465 guess we'll have to see. */
24468 asm_preferred_eh_data_format (int code, int global)
24470 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
24471 if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
24473 int type = DW_EH_PE_sdata8;
24474 if (ptr_mode == SImode
24475 || ix86_cmodel == CM_SMALL_PIC
24476 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24477 type = DW_EH_PE_sdata4;
24478 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24481 if (ix86_cmodel == CM_SMALL
24482 || (ix86_cmodel == CM_MEDIUM && code))
24483 return DW_EH_PE_udata4;
24485 return DW_EH_PE_absptr;
24488 /* Implement targetm.vectorize.builtin_vectorization_cost. */
24489 static int
24490 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
24491 tree vectype, int)
24493 bool fp = false;
24494 machine_mode mode = TImode;
24495 int index;
24496 if (vectype != NULL)
24498 fp = FLOAT_TYPE_P (vectype);
24499 mode = TYPE_MODE (vectype);
24502 switch (type_of_cost)
24504 case scalar_stmt:
24505 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
24507 case scalar_load:
24508 /* load/store costs are relative to register move which is 2. Recompute
24509 it to COSTS_N_INSNS so everything have same base. */
24510 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
24511 : ix86_cost->int_load [2]) / 2;
24513 case scalar_store:
24514 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
24515 : ix86_cost->int_store [2]) / 2;
24517 case vector_stmt:
24518 return ix86_vec_cost (mode,
24519 fp ? ix86_cost->addss : ix86_cost->sse_op);
24521 case vector_load:
24522 index = sse_store_index (mode);
24523 /* See PR82713 - we may end up being called on non-vector type. */
24524 if (index < 0)
24525 index = 2;
24526 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
24528 case vector_store:
24529 index = sse_store_index (mode);
24530 /* See PR82713 - we may end up being called on non-vector type. */
24531 if (index < 0)
24532 index = 2;
24533 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
24535 case vec_to_scalar:
24536 case scalar_to_vec:
24537 return ix86_vec_cost (mode, ix86_cost->sse_op);
24539 /* We should have separate costs for unaligned loads and gather/scatter.
24540 Do that incrementally. */
24541 case unaligned_load:
24542 index = sse_store_index (mode);
24543 /* See PR82713 - we may end up being called on non-vector type. */
24544 if (index < 0)
24545 index = 2;
24546 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
24548 case unaligned_store:
24549 index = sse_store_index (mode);
24550 /* See PR82713 - we may end up being called on non-vector type. */
24551 if (index < 0)
24552 index = 2;
24553 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
24555 case vector_gather_load:
24556 return ix86_vec_cost (mode,
24557 COSTS_N_INSNS
24558 (ix86_cost->gather_static
24559 + ix86_cost->gather_per_elt
24560 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
24562 case vector_scatter_store:
24563 return ix86_vec_cost (mode,
24564 COSTS_N_INSNS
24565 (ix86_cost->scatter_static
24566 + ix86_cost->scatter_per_elt
24567 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
24569 case cond_branch_taken:
24570 return ix86_cost->cond_taken_branch_cost;
24572 case cond_branch_not_taken:
24573 return ix86_cost->cond_not_taken_branch_cost;
24575 case vec_perm:
24576 case vec_promote_demote:
24577 return ix86_vec_cost (mode, ix86_cost->sse_op);
24579 case vec_construct:
24581 int n = TYPE_VECTOR_SUBPARTS (vectype);
24582 /* N - 1 element inserts into an SSE vector, the possible
24583 GPR -> XMM move is accounted for in add_stmt_cost. */
24584 if (GET_MODE_BITSIZE (mode) <= 128)
24585 return (n - 1) * ix86_cost->sse_op;
24586 /* One vinserti128 for combining two SSE vectors for AVX256. */
24587 else if (GET_MODE_BITSIZE (mode) == 256)
24588 return ((n - 2) * ix86_cost->sse_op
24589 + ix86_vec_cost (mode, ix86_cost->addss));
24590 /* One vinserti64x4 and two vinserti128 for combining SSE
24591 and AVX256 vectors to AVX512. */
24592 else if (GET_MODE_BITSIZE (mode) == 512)
24593 return ((n - 4) * ix86_cost->sse_op
24594 + 3 * ix86_vec_cost (mode, ix86_cost->addss));
24595 gcc_unreachable ();
24598 default:
24599 gcc_unreachable ();
24604 /* This function returns the calling abi specific va_list type node.
24605 It returns the FNDECL specific va_list type. */
24607 static tree
24608 ix86_fn_abi_va_list (tree fndecl)
24610 if (!TARGET_64BIT)
24611 return va_list_type_node;
24612 gcc_assert (fndecl != NULL_TREE);
24614 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
24615 return ms_va_list_type_node;
24616 else
24617 return sysv_va_list_type_node;
24620 /* Returns the canonical va_list type specified by TYPE. If there
24621 is no valid TYPE provided, it return NULL_TREE. */
24623 static tree
24624 ix86_canonical_va_list_type (tree type)
24626 if (TARGET_64BIT)
24628 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
24629 return ms_va_list_type_node;
24631 if ((TREE_CODE (type) == ARRAY_TYPE
24632 && integer_zerop (array_type_nelts (type)))
24633 || POINTER_TYPE_P (type))
24635 tree elem_type = TREE_TYPE (type);
24636 if (TREE_CODE (elem_type) == RECORD_TYPE
24637 && lookup_attribute ("sysv_abi va_list",
24638 TYPE_ATTRIBUTES (elem_type)))
24639 return sysv_va_list_type_node;
24642 return NULL_TREE;
24645 return std_canonical_va_list_type (type);
24648 /* Iterate through the target-specific builtin types for va_list.
24649 IDX denotes the iterator, *PTREE is set to the result type of
24650 the va_list builtin, and *PNAME to its internal type.
24651 Returns zero if there is no element for this index, otherwise
24652 IDX should be increased upon the next call.
24653 Note, do not iterate a base builtin's name like __builtin_va_list.
24654 Used from c_common_nodes_and_builtins. */
24656 static int
24657 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
24659 if (TARGET_64BIT)
24661 switch (idx)
24663 default:
24664 break;
24666 case 0:
24667 *ptree = ms_va_list_type_node;
24668 *pname = "__builtin_ms_va_list";
24669 return 1;
24671 case 1:
24672 *ptree = sysv_va_list_type_node;
24673 *pname = "__builtin_sysv_va_list";
24674 return 1;
24678 return 0;
24681 #undef TARGET_SCHED_DISPATCH
24682 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
24683 #undef TARGET_SCHED_DISPATCH_DO
24684 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
24685 #undef TARGET_SCHED_REASSOCIATION_WIDTH
24686 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
24687 #undef TARGET_SCHED_REORDER
24688 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
24689 #undef TARGET_SCHED_ADJUST_PRIORITY
24690 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
24691 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
24692 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
24693 ix86_dependencies_evaluation_hook
24696 /* Implementation of reassociation_width target hook used by
24697 reassoc phase to identify parallelism level in reassociated
24698 tree. Statements tree_code is passed in OPC. Arguments type
24699 is passed in MODE. */
24701 static int
24702 ix86_reassociation_width (unsigned int op, machine_mode mode)
24704 int width = 1;
24705 /* Vector part. */
24706 if (VECTOR_MODE_P (mode))
24708 int div = 1;
24709 if (INTEGRAL_MODE_P (mode))
24710 width = ix86_cost->reassoc_vec_int;
24711 else if (FLOAT_MODE_P (mode))
24712 width = ix86_cost->reassoc_vec_fp;
24714 if (width == 1)
24715 return 1;
24717 /* Znver1-4 Integer vector instructions execute in FP unit
24718 and can execute 3 additions and one multiplication per cycle. */
24719 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
24720 || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
24721 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
24722 return 1;
24723 /* Znver5 can do 2 integer multiplications per cycle with latency
24724 of 3. */
24725 if (ix86_tune == PROCESSOR_ZNVER5
24726 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
24727 width = 6;
24729 /* Account for targets that splits wide vectors into multiple parts. */
24730 if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
24731 div = GET_MODE_BITSIZE (mode) / 256;
24732 else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
24733 div = GET_MODE_BITSIZE (mode) / 128;
24734 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
24735 div = GET_MODE_BITSIZE (mode) / 64;
24736 width = (width + div - 1) / div;
24738 /* Scalar part. */
24739 else if (INTEGRAL_MODE_P (mode))
24740 width = ix86_cost->reassoc_int;
24741 else if (FLOAT_MODE_P (mode))
24742 width = ix86_cost->reassoc_fp;
24744 /* Avoid using too many registers in 32bit mode. */
24745 if (!TARGET_64BIT && width > 2)
24746 width = 2;
24747 return width;
24750 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
24751 place emms and femms instructions. */
24753 static machine_mode
24754 ix86_preferred_simd_mode (scalar_mode mode)
24756 if (!TARGET_SSE)
24757 return word_mode;
24759 switch (mode)
24761 case E_QImode:
24762 if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24763 return V64QImode;
24764 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24765 return V32QImode;
24766 else
24767 return V16QImode;
24769 case E_HImode:
24770 if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24771 return V32HImode;
24772 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24773 return V16HImode;
24774 else
24775 return V8HImode;
24777 case E_SImode:
24778 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24779 return V16SImode;
24780 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24781 return V8SImode;
24782 else
24783 return V4SImode;
24785 case E_DImode:
24786 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24787 return V8DImode;
24788 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24789 return V4DImode;
24790 else
24791 return V2DImode;
24793 case E_HFmode:
24794 if (TARGET_AVX512FP16)
24796 if (TARGET_AVX512VL)
24798 if (TARGET_PREFER_AVX128)
24799 return V8HFmode;
24800 else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512)
24801 return V16HFmode;
24803 if (TARGET_EVEX512)
24804 return V32HFmode;
24806 return word_mode;
24808 case E_BFmode:
24809 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24810 return V32BFmode;
24811 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24812 return V16BFmode;
24813 else
24814 return V8BFmode;
24816 case E_SFmode:
24817 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24818 return V16SFmode;
24819 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24820 return V8SFmode;
24821 else
24822 return V4SFmode;
24824 case E_DFmode:
24825 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24826 return V8DFmode;
24827 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24828 return V4DFmode;
24829 else if (TARGET_SSE2)
24830 return V2DFmode;
24831 /* FALLTHRU */
24833 default:
24834 return word_mode;
24838 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
24839 vectors. If AVX512F is enabled then try vectorizing with 512bit,
24840 256bit and 128bit vectors. */
24842 static unsigned int
24843 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
24845 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24847 modes->safe_push (V64QImode);
24848 modes->safe_push (V32QImode);
24849 modes->safe_push (V16QImode);
24851 else if (TARGET_AVX512F && TARGET_EVEX512 && all)
24853 modes->safe_push (V32QImode);
24854 modes->safe_push (V16QImode);
24855 modes->safe_push (V64QImode);
24857 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24859 modes->safe_push (V32QImode);
24860 modes->safe_push (V16QImode);
24862 else if (TARGET_AVX && all)
24864 modes->safe_push (V16QImode);
24865 modes->safe_push (V32QImode);
24867 else if (TARGET_SSE2)
24868 modes->safe_push (V16QImode);
24870 if (TARGET_MMX_WITH_SSE)
24871 modes->safe_push (V8QImode);
24873 if (TARGET_SSE2)
24874 modes->safe_push (V4QImode);
24876 return 0;
24879 /* Implemenation of targetm.vectorize.get_mask_mode. */
24881 static opt_machine_mode
24882 ix86_get_mask_mode (machine_mode data_mode)
24884 unsigned vector_size = GET_MODE_SIZE (data_mode);
24885 unsigned nunits = GET_MODE_NUNITS (data_mode);
24886 unsigned elem_size = vector_size / nunits;
24888 /* Scalar mask case. */
24889 if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
24890 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
24891 /* AVX512FP16 only supports vector comparison
24892 to kmask for _Float16. */
24893 || (TARGET_AVX512VL && TARGET_AVX512FP16
24894 && GET_MODE_INNER (data_mode) == E_HFmode)
24895 || (TARGET_AVX10_2_256 && GET_MODE_INNER (data_mode) == E_BFmode))
24897 if (elem_size == 4
24898 || elem_size == 8
24899 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
24900 return smallest_int_mode_for_size (nunits).require ();
24903 scalar_int_mode elem_mode
24904 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT).require ();
24906 gcc_assert (elem_size * nunits == vector_size);
24908 return mode_for_vector (elem_mode, nunits);
24913 /* Return class of registers which could be used for pseudo of MODE
24914 and of class RCLASS for spilling instead of memory. Return NO_REGS
24915 if it is not possible or non-profitable. */
24917 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
24919 static reg_class_t
24920 ix86_spill_class (reg_class_t rclass, machine_mode mode)
24922 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
24923 && TARGET_SSE2
24924 && TARGET_INTER_UNIT_MOVES_TO_VEC
24925 && TARGET_INTER_UNIT_MOVES_FROM_VEC
24926 && (mode == SImode || (TARGET_64BIT && mode == DImode))
24927 && INTEGER_CLASS_P (rclass))
24928 return ALL_SSE_REGS;
24929 return NO_REGS;
24932 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
24933 but returns a lower bound. */
24935 static unsigned int
24936 ix86_max_noce_ifcvt_seq_cost (edge e)
24938 bool predictable_p = predictable_edge_p (e);
24939 if (predictable_p)
24941 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
24942 return param_max_rtl_if_conversion_predictable_cost;
24944 else
24946 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
24947 return param_max_rtl_if_conversion_unpredictable_cost;
24950 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
24953 /* Return true if SEQ is a good candidate as a replacement for the
24954 if-convertible sequence described in IF_INFO. */
24956 static bool
24957 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
24959 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
24961 int cmov_cnt = 0;
24962 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
24963 Maybe we should allow even more conditional moves as long as they
24964 are used far enough not to stall the CPU, or also consider
24965 IF_INFO->TEST_BB succ edge probabilities. */
24966 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
24968 rtx set = single_set (insn);
24969 if (!set)
24970 continue;
24971 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
24972 continue;
24973 rtx src = SET_SRC (set);
24974 machine_mode mode = GET_MODE (src);
24975 if (GET_MODE_CLASS (mode) != MODE_INT
24976 && GET_MODE_CLASS (mode) != MODE_FLOAT)
24977 continue;
24978 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
24979 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
24980 continue;
24981 /* insn is CMOV or FCMOV. */
24982 if (++cmov_cnt > 1)
24983 return false;
24987 /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
24988 for movdfcc/movsfcc, and could possibly fail cost comparison.
24989 Increase branch cost will hurt performance for other modes, so
24990 specially add some preference for floating point ifcvt. */
24991 if (!TARGET_SSE4_1 && if_info->x
24992 && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
24993 && if_info->speed_p)
24995 unsigned cost = seq_cost (seq, true);
24997 if (cost <= if_info->original_cost)
24998 return true;
25000 return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
25003 return default_noce_conversion_profitable_p (seq, if_info);
25006 /* x86-specific vector costs. */
25007 class ix86_vector_costs : public vector_costs
25009 public:
25010 ix86_vector_costs (vec_info *, bool);
25012 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
25013 stmt_vec_info stmt_info, slp_tree node,
25014 tree vectype, int misalign,
25015 vect_cost_model_location where) override;
25016 void finish_cost (const vector_costs *) override;
25018 private:
25020 /* Estimate register pressure of the vectorized code. */
25021 void ix86_vect_estimate_reg_pressure ();
25022 /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
25023 estimation of register pressure.
25024 ??? Currently it's only used by vec_construct/scalar_to_vec
25025 where we know it's not loaded from memory. */
25026 unsigned m_num_gpr_needed[3];
25027 unsigned m_num_sse_needed[3];
25028 /* Number of 256-bit vector permutation. */
25029 unsigned m_num_avx256_vec_perm[3];
25032 ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
25033 : vector_costs (vinfo, costing_for_scalar),
25034 m_num_gpr_needed (),
25035 m_num_sse_needed (),
25036 m_num_avx256_vec_perm ()
25040 /* Implement targetm.vectorize.create_costs. */
25042 static vector_costs *
25043 ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
25045 return new ix86_vector_costs (vinfo, costing_for_scalar);
25048 unsigned
25049 ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
25050 stmt_vec_info stmt_info, slp_tree node,
25051 tree vectype, int misalign,
25052 vect_cost_model_location where)
25054 unsigned retval = 0;
25055 bool scalar_p
25056 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
25057 int stmt_cost = - 1;
25059 bool fp = false;
25060 machine_mode mode = scalar_p ? SImode : TImode;
25062 if (vectype != NULL)
25064 fp = FLOAT_TYPE_P (vectype);
25065 mode = TYPE_MODE (vectype);
25066 if (scalar_p)
25067 mode = TYPE_MODE (TREE_TYPE (vectype));
25070 if ((kind == vector_stmt || kind == scalar_stmt)
25071 && stmt_info
25072 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
25074 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
25075 /*machine_mode inner_mode = mode;
25076 if (VECTOR_MODE_P (mode))
25077 inner_mode = GET_MODE_INNER (mode);*/
25079 switch (subcode)
25081 case PLUS_EXPR:
25082 case POINTER_PLUS_EXPR:
25083 case MINUS_EXPR:
25084 if (kind == scalar_stmt)
25086 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
25087 stmt_cost = ix86_cost->addss;
25088 else if (X87_FLOAT_MODE_P (mode))
25089 stmt_cost = ix86_cost->fadd;
25090 else
25091 stmt_cost = ix86_cost->add;
25093 else
25094 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
25095 : ix86_cost->sse_op);
25096 break;
25098 case MULT_EXPR:
25099 /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
25100 take it as MULT_EXPR. */
25101 case MULT_HIGHPART_EXPR:
25102 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
25103 break;
25104 /* There's no direct instruction for WIDEN_MULT_EXPR,
25105 take emulation into account. */
25106 case WIDEN_MULT_EXPR:
25107 stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
25108 TYPE_UNSIGNED (vectype));
25109 break;
25111 case NEGATE_EXPR:
25112 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
25113 stmt_cost = ix86_cost->sse_op;
25114 else if (X87_FLOAT_MODE_P (mode))
25115 stmt_cost = ix86_cost->fchs;
25116 else if (VECTOR_MODE_P (mode))
25117 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
25118 else
25119 stmt_cost = ix86_cost->add;
25120 break;
25121 case TRUNC_DIV_EXPR:
25122 case CEIL_DIV_EXPR:
25123 case FLOOR_DIV_EXPR:
25124 case ROUND_DIV_EXPR:
25125 case TRUNC_MOD_EXPR:
25126 case CEIL_MOD_EXPR:
25127 case FLOOR_MOD_EXPR:
25128 case RDIV_EXPR:
25129 case ROUND_MOD_EXPR:
25130 case EXACT_DIV_EXPR:
25131 stmt_cost = ix86_division_cost (ix86_cost, mode);
25132 break;
25134 case RSHIFT_EXPR:
25135 case LSHIFT_EXPR:
25136 case LROTATE_EXPR:
25137 case RROTATE_EXPR:
25139 tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
25140 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
25141 stmt_cost = ix86_shift_rotate_cost
25142 (ix86_cost,
25143 (subcode == RSHIFT_EXPR
25144 && !TYPE_UNSIGNED (TREE_TYPE (op1)))
25145 ? ASHIFTRT : LSHIFTRT, mode,
25146 TREE_CODE (op2) == INTEGER_CST,
25147 cst_and_fits_in_hwi (op2)
25148 ? int_cst_value (op2) : -1,
25149 false, false, NULL, NULL);
25151 break;
25152 case NOP_EXPR:
25153 /* Only sign-conversions are free. */
25154 if (tree_nop_conversion_p
25155 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
25156 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
25157 stmt_cost = 0;
25158 break;
25160 case BIT_IOR_EXPR:
25161 case ABS_EXPR:
25162 case ABSU_EXPR:
25163 case MIN_EXPR:
25164 case MAX_EXPR:
25165 case BIT_XOR_EXPR:
25166 case BIT_AND_EXPR:
25167 case BIT_NOT_EXPR:
25168 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
25169 stmt_cost = ix86_cost->sse_op;
25170 else if (VECTOR_MODE_P (mode))
25171 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
25172 else
25173 stmt_cost = ix86_cost->add;
25174 break;
25175 default:
25176 break;
25180 combined_fn cfn;
25181 if ((kind == vector_stmt || kind == scalar_stmt)
25182 && stmt_info
25183 && stmt_info->stmt
25184 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
25185 switch (cfn)
25187 case CFN_FMA:
25188 stmt_cost = ix86_vec_cost (mode,
25189 mode == SFmode ? ix86_cost->fmass
25190 : ix86_cost->fmasd);
25191 break;
25192 case CFN_MULH:
25193 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
25194 break;
25195 default:
25196 break;
25199 /* If we do elementwise loads into a vector then we are bound by
25200 latency and execution resources for the many scalar loads
25201 (AGU and load ports). Try to account for this by scaling the
25202 construction cost by the number of elements involved. */
25203 if ((kind == vec_construct || kind == vec_to_scalar)
25204 && ((stmt_info
25205 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
25206 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
25207 && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
25208 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
25209 != INTEGER_CST))
25210 || (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
25211 == VMAT_GATHER_SCATTER)))
25212 || (node
25213 && ((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
25214 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
25215 (SLP_TREE_REPRESENTATIVE (node))))
25216 != INTEGER_CST))
25217 || (SLP_TREE_MEMORY_ACCESS_TYPE (node)
25218 == VMAT_GATHER_SCATTER)))))
25220 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
25221 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
25223 else if ((kind == vec_construct || kind == scalar_to_vec)
25224 && node
25225 && SLP_TREE_DEF_TYPE (node) == vect_external_def)
25227 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
25228 unsigned i;
25229 tree op;
25230 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
25231 if (TREE_CODE (op) == SSA_NAME)
25232 TREE_VISITED (op) = 0;
25233 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
25235 if (TREE_CODE (op) != SSA_NAME
25236 || TREE_VISITED (op))
25237 continue;
25238 TREE_VISITED (op) = 1;
25239 gimple *def = SSA_NAME_DEF_STMT (op);
25240 tree tem;
25241 if (is_gimple_assign (def)
25242 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
25243 && ((tem = gimple_assign_rhs1 (def)), true)
25244 && TREE_CODE (tem) == SSA_NAME
25245 /* A sign-change expands to nothing. */
25246 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)),
25247 TREE_TYPE (tem)))
25248 def = SSA_NAME_DEF_STMT (tem);
25249 /* When the component is loaded from memory we can directly
25250 move it to a vector register, otherwise we have to go
25251 via a GPR or via vpinsr which involves similar cost.
25252 Likewise with a BIT_FIELD_REF extracting from a vector
25253 register we can hope to avoid using a GPR. */
25254 if (!is_gimple_assign (def)
25255 || ((!gimple_assign_load_p (def)
25256 || (!TARGET_SSE4_1
25257 && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) == 1))
25258 && (gimple_assign_rhs_code (def) != BIT_FIELD_REF
25259 || !VECTOR_TYPE_P (TREE_TYPE
25260 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
25262 if (fp)
25263 m_num_sse_needed[where]++;
25264 else
25266 m_num_gpr_needed[where]++;
25267 stmt_cost += ix86_cost->sse_to_integer;
25271 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
25272 if (TREE_CODE (op) == SSA_NAME)
25273 TREE_VISITED (op) = 0;
25275 if (stmt_cost == -1)
25276 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
25278 if (kind == vec_perm && vectype
25279 && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32)
25280 m_num_avx256_vec_perm[where]++;
25282 /* Penalize DFmode vector operations for Bonnell. */
25283 if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
25284 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
25285 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
25287 /* Statements in an inner loop relative to the loop being
25288 vectorized are weighted more heavily. The value here is
25289 arbitrary and could potentially be improved with analysis. */
25290 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
25292 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
25293 for Silvermont as it has out of order integer pipeline and can execute
25294 2 scalar instruction per tick, but has in order SIMD pipeline. */
25295 if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
25296 || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
25297 && stmt_info && stmt_info->stmt)
25299 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
25300 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
25301 retval = (retval * 17) / 10;
25304 m_costs[where] += retval;
25306 return retval;
25309 void
25310 ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
25312 unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
25313 unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
25315 /* Any better way to have target available fp registers, currently use SSE_REGS. */
25316 unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
25317 for (unsigned i = 0; i != 3; i++)
25319 if (m_num_gpr_needed[i] > target_avail_regs)
25320 m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
25321 /* Only measure sse registers pressure. */
25322 if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
25323 m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
25327 void
25328 ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
25330 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
25331 if (loop_vinfo && !m_costing_for_scalar)
25333 /* We are currently not asking the vectorizer to compare costs
25334 between different vector mode sizes. When using predication
25335 that will end up always choosing the prefered mode size even
25336 if there's a smaller mode covering all lanes. Test for this
25337 situation and artificially reject the larger mode attempt.
25338 ??? We currently lack masked ops for sub-SSE sized modes,
25339 so we could restrict this rejection to AVX and AVX512 modes
25340 but error on the safe side for now. */
25341 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
25342 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
25343 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
25344 && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
25345 > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
25346 m_costs[vect_body] = INT_MAX;
25349 ix86_vect_estimate_reg_pressure ();
25351 for (int i = 0; i != 3; i++)
25352 if (m_num_avx256_vec_perm[i]
25353 && TARGET_AVX256_AVOID_VEC_PERM)
25354 m_costs[i] = INT_MAX;
25356 vector_costs::finish_cost (scalar_costs);
25359 /* Validate target specific memory model bits in VAL. */
25361 static unsigned HOST_WIDE_INT
25362 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
25364 enum memmodel model = memmodel_from_int (val);
25365 bool strong;
25367 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
25368 |MEMMODEL_MASK)
25369 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
25371 warning (OPT_Winvalid_memory_model,
25372 "unknown architecture specific memory model");
25373 return MEMMODEL_SEQ_CST;
25375 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
25376 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
25378 warning (OPT_Winvalid_memory_model,
25379 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
25380 "memory model");
25381 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
25383 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
25385 warning (OPT_Winvalid_memory_model,
25386 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
25387 "memory model");
25388 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
25390 return val;
25393 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
25394 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
25395 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
25396 or number of vecsize_mangle variants that should be emitted. */
25398 static int
25399 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
25400 struct cgraph_simd_clone *clonei,
25401 tree base_type, int num,
25402 bool explicit_p)
25404 int ret = 1;
25406 if (clonei->simdlen
25407 && (clonei->simdlen < 2
25408 || clonei->simdlen > 1024
25409 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
25411 if (explicit_p)
25412 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25413 "unsupported simdlen %wd", clonei->simdlen.to_constant ());
25414 return 0;
25417 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
25418 if (TREE_CODE (ret_type) != VOID_TYPE)
25419 switch (TYPE_MODE (ret_type))
25421 case E_QImode:
25422 case E_HImode:
25423 case E_SImode:
25424 case E_DImode:
25425 case E_SFmode:
25426 case E_DFmode:
25427 /* case E_SCmode: */
25428 /* case E_DCmode: */
25429 if (!AGGREGATE_TYPE_P (ret_type))
25430 break;
25431 /* FALLTHRU */
25432 default:
25433 if (explicit_p)
25434 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25435 "unsupported return type %qT for simd", ret_type);
25436 return 0;
25439 tree t;
25440 int i;
25441 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
25442 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
25444 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
25445 t && t != void_list_node; t = TREE_CHAIN (t), i++)
25447 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
25448 switch (TYPE_MODE (arg_type))
25450 case E_QImode:
25451 case E_HImode:
25452 case E_SImode:
25453 case E_DImode:
25454 case E_SFmode:
25455 case E_DFmode:
25456 /* case E_SCmode: */
25457 /* case E_DCmode: */
25458 if (!AGGREGATE_TYPE_P (arg_type))
25459 break;
25460 /* FALLTHRU */
25461 default:
25462 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
25463 break;
25464 if (explicit_p)
25465 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25466 "unsupported argument type %qT for simd", arg_type);
25467 return 0;
25471 if (!TREE_PUBLIC (node->decl) || !explicit_p)
25473 /* If the function isn't exported, we can pick up just one ISA
25474 for the clones. */
25475 if (TARGET_AVX512F && TARGET_EVEX512)
25476 clonei->vecsize_mangle = 'e';
25477 else if (TARGET_AVX2)
25478 clonei->vecsize_mangle = 'd';
25479 else if (TARGET_AVX)
25480 clonei->vecsize_mangle = 'c';
25481 else
25482 clonei->vecsize_mangle = 'b';
25483 ret = 1;
25485 else
25487 clonei->vecsize_mangle = "bcde"[num];
25488 ret = 4;
25490 clonei->mask_mode = VOIDmode;
25491 switch (clonei->vecsize_mangle)
25493 case 'b':
25494 clonei->vecsize_int = 128;
25495 clonei->vecsize_float = 128;
25496 break;
25497 case 'c':
25498 clonei->vecsize_int = 128;
25499 clonei->vecsize_float = 256;
25500 break;
25501 case 'd':
25502 clonei->vecsize_int = 256;
25503 clonei->vecsize_float = 256;
25504 break;
25505 case 'e':
25506 clonei->vecsize_int = 512;
25507 clonei->vecsize_float = 512;
25508 if (TYPE_MODE (base_type) == QImode)
25509 clonei->mask_mode = DImode;
25510 else
25511 clonei->mask_mode = SImode;
25512 break;
25514 if (clonei->simdlen == 0)
25516 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
25517 clonei->simdlen = clonei->vecsize_int;
25518 else
25519 clonei->simdlen = clonei->vecsize_float;
25520 clonei->simdlen = clonei->simdlen
25521 / GET_MODE_BITSIZE (TYPE_MODE (base_type));
25523 else if (clonei->simdlen > 16)
25525 /* For compatibility with ICC, use the same upper bounds
25526 for simdlen. In particular, for CTYPE below, use the return type,
25527 unless the function returns void, in that case use the characteristic
25528 type. If it is possible for given SIMDLEN to pass CTYPE value
25529 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
25530 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
25531 emit corresponding clone. */
25532 tree ctype = ret_type;
25533 if (VOID_TYPE_P (ret_type))
25534 ctype = base_type;
25535 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
25536 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
25537 cnt /= clonei->vecsize_int;
25538 else
25539 cnt /= clonei->vecsize_float;
25540 if (cnt > (TARGET_64BIT ? 16 : 8))
25542 if (explicit_p)
25543 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25544 "unsupported simdlen %wd",
25545 clonei->simdlen.to_constant ());
25546 return 0;
25549 return ret;
25552 /* If SIMD clone NODE can't be used in a vectorized loop
25553 in current function, return -1, otherwise return a badness of using it
25554 (0 if it is most desirable from vecsize_mangle point of view, 1
25555 slightly less desirable, etc.). */
25557 static int
25558 ix86_simd_clone_usable (struct cgraph_node *node)
25560 switch (node->simdclone->vecsize_mangle)
25562 case 'b':
25563 if (!TARGET_SSE2)
25564 return -1;
25565 if (!TARGET_AVX)
25566 return 0;
25567 return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1;
25568 case 'c':
25569 if (!TARGET_AVX)
25570 return -1;
25571 return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0;
25572 case 'd':
25573 if (!TARGET_AVX2)
25574 return -1;
25575 return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0;
25576 case 'e':
25577 if (!TARGET_AVX512F || !TARGET_EVEX512)
25578 return -1;
25579 return 0;
25580 default:
25581 gcc_unreachable ();
25585 /* This function adjusts the unroll factor based on
25586 the hardware capabilities. For ex, bdver3 has
25587 a loop buffer which makes unrolling of smaller
25588 loops less important. This function decides the
25589 unroll factor using number of memory references
25590 (value 32 is used) as a heuristic. */
25592 static unsigned
25593 ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
25595 basic_block *bbs;
25596 rtx_insn *insn;
25597 unsigned i;
25598 unsigned mem_count = 0;
25600 /* Unroll small size loop when unroll factor is not explicitly
25601 specified. */
25602 if (ix86_unroll_only_small_loops && !loop->unroll)
25604 if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
25605 return MIN (nunroll, ix86_cost->small_unroll_factor);
25606 else
25607 return 1;
25610 if (!TARGET_ADJUST_UNROLL)
25611 return nunroll;
25613 /* Count the number of memory references within the loop body.
25614 This value determines the unrolling factor for bdver3 and bdver4
25615 architectures. */
25616 subrtx_iterator::array_type array;
25617 bbs = get_loop_body (loop);
25618 for (i = 0; i < loop->num_nodes; i++)
25619 FOR_BB_INSNS (bbs[i], insn)
25620 if (NONDEBUG_INSN_P (insn))
25621 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
25622 if (const_rtx x = *iter)
25623 if (MEM_P (x))
25625 machine_mode mode = GET_MODE (x);
25626 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
25627 if (n_words > 4)
25628 mem_count += 2;
25629 else
25630 mem_count += 1;
25632 free (bbs);
25634 if (mem_count && mem_count <=32)
25635 return MIN (nunroll, 32 / mem_count);
25637 return nunroll;
25641 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
25643 static bool
25644 ix86_float_exceptions_rounding_supported_p (void)
25646 /* For x87 floating point with standard excess precision handling,
25647 there is no adddf3 pattern (since x87 floating point only has
25648 XFmode operations) so the default hook implementation gets this
25649 wrong. */
25650 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
25653 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
25655 static void
25656 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25658 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
25659 return;
25660 tree exceptions_var = create_tmp_var_raw (integer_type_node);
25661 if (TARGET_80387)
25663 tree fenv_index_type = build_index_type (size_int (6));
25664 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
25665 tree fenv_var = create_tmp_var_raw (fenv_type);
25666 TREE_ADDRESSABLE (fenv_var) = 1;
25667 tree fenv_ptr = build_pointer_type (fenv_type);
25668 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
25669 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
25670 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
25671 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
25672 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
25673 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
25674 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
25675 tree hold_fnclex = build_call_expr (fnclex, 0);
25676 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
25677 NULL_TREE, NULL_TREE);
25678 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
25679 hold_fnclex);
25680 *clear = build_call_expr (fnclex, 0);
25681 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
25682 tree fnstsw_call = build_call_expr (fnstsw, 0);
25683 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
25684 fnstsw_call, NULL_TREE, NULL_TREE);
25685 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
25686 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
25687 exceptions_var, exceptions_x87,
25688 NULL_TREE, NULL_TREE);
25689 *update = build2 (COMPOUND_EXPR, integer_type_node,
25690 sw_mod, update_mod);
25691 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
25692 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
25694 if (TARGET_SSE && TARGET_SSE_MATH)
25696 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
25697 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
25698 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
25699 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
25700 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
25701 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
25702 mxcsr_orig_var, stmxcsr_hold_call,
25703 NULL_TREE, NULL_TREE);
25704 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
25705 mxcsr_orig_var,
25706 build_int_cst (unsigned_type_node, 0x1f80));
25707 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
25708 build_int_cst (unsigned_type_node, 0xffffffc0));
25709 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
25710 mxcsr_mod_var, hold_mod_val,
25711 NULL_TREE, NULL_TREE);
25712 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
25713 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
25714 hold_assign_orig, hold_assign_mod);
25715 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
25716 ldmxcsr_hold_call);
25717 if (*hold)
25718 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
25719 else
25720 *hold = hold_all;
25721 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
25722 if (*clear)
25723 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
25724 ldmxcsr_clear_call);
25725 else
25726 *clear = ldmxcsr_clear_call;
25727 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
25728 tree exceptions_sse = fold_convert (integer_type_node,
25729 stxmcsr_update_call);
25730 if (*update)
25732 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
25733 exceptions_var, exceptions_sse);
25734 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
25735 exceptions_var, exceptions_mod);
25736 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
25737 exceptions_assign);
25739 else
25740 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
25741 exceptions_sse, NULL_TREE, NULL_TREE);
25742 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
25743 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
25744 ldmxcsr_update_call);
25746 tree atomic_feraiseexcept
25747 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
25748 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
25749 1, exceptions_var);
25750 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
25751 atomic_feraiseexcept_call);
25754 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
25755 /* For i386, common symbol is local only for non-PIE binaries. For
25756 x86-64, common symbol is local only for non-PIE binaries or linker
25757 supports copy reloc in PIE binaries. */
25759 static bool
25760 ix86_binds_local_p (const_tree exp)
25762 bool direct_extern_access
25763 = (ix86_direct_extern_access
25764 && !(VAR_OR_FUNCTION_DECL_P (exp)
25765 && lookup_attribute ("nodirect_extern_access",
25766 DECL_ATTRIBUTES (exp))));
25767 if (!direct_extern_access)
25768 ix86_has_no_direct_extern_access = true;
25769 return default_binds_local_p_3 (exp, flag_shlib != 0, true,
25770 direct_extern_access,
25771 (direct_extern_access
25772 && (!flag_pic
25773 || (TARGET_64BIT
25774 && HAVE_LD_PIE_COPYRELOC != 0))));
25777 /* If flag_pic or ix86_direct_extern_access is false, then neither
25778 local nor global relocs should be placed in readonly memory. */
25780 static int
25781 ix86_reloc_rw_mask (void)
25783 return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
25785 #endif
25787 /* Return true iff ADDR can be used as a symbolic base address. */
25789 static bool
25790 symbolic_base_address_p (rtx addr)
25792 if (GET_CODE (addr) == SYMBOL_REF)
25793 return true;
25795 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
25796 return true;
25798 return false;
25801 /* Return true iff ADDR can be used as a base address. */
25803 static bool
25804 base_address_p (rtx addr)
25806 if (REG_P (addr))
25807 return true;
25809 if (symbolic_base_address_p (addr))
25810 return true;
25812 return false;
25815 /* If MEM is in the form of [(base+symbase)+offset], extract the three
25816 parts of address and set to BASE, SYMBASE and OFFSET, otherwise
25817 return false. */
25819 static bool
25820 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
25822 rtx addr;
25824 gcc_assert (MEM_P (mem));
25826 addr = XEXP (mem, 0);
25828 if (GET_CODE (addr) == CONST)
25829 addr = XEXP (addr, 0);
25831 if (base_address_p (addr))
25833 *base = addr;
25834 *symbase = const0_rtx;
25835 *offset = const0_rtx;
25836 return true;
25839 if (GET_CODE (addr) == PLUS
25840 && base_address_p (XEXP (addr, 0)))
25842 rtx addend = XEXP (addr, 1);
25844 if (GET_CODE (addend) == CONST)
25845 addend = XEXP (addend, 0);
25847 if (CONST_INT_P (addend))
25849 *base = XEXP (addr, 0);
25850 *symbase = const0_rtx;
25851 *offset = addend;
25852 return true;
25855 /* Also accept REG + symbolic ref, with or without a CONST_INT
25856 offset. */
25857 if (REG_P (XEXP (addr, 0)))
25859 if (symbolic_base_address_p (addend))
25861 *base = XEXP (addr, 0);
25862 *symbase = addend;
25863 *offset = const0_rtx;
25864 return true;
25867 if (GET_CODE (addend) == PLUS
25868 && symbolic_base_address_p (XEXP (addend, 0))
25869 && CONST_INT_P (XEXP (addend, 1)))
25871 *base = XEXP (addr, 0);
25872 *symbase = XEXP (addend, 0);
25873 *offset = XEXP (addend, 1);
25874 return true;
25879 return false;
25882 /* Given OPERANDS of consecutive load/store, check if we can merge
25883 them into move multiple. LOAD is true if they are load instructions.
25884 MODE is the mode of memory operands. */
25886 bool
25887 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
25888 machine_mode mode)
25890 HOST_WIDE_INT offval_1, offval_2, msize;
25891 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
25892 symbase_1, symbase_2, offset_1, offset_2;
25894 if (load)
25896 mem_1 = operands[1];
25897 mem_2 = operands[3];
25898 reg_1 = operands[0];
25899 reg_2 = operands[2];
25901 else
25903 mem_1 = operands[0];
25904 mem_2 = operands[2];
25905 reg_1 = operands[1];
25906 reg_2 = operands[3];
25909 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
25911 if (REGNO (reg_1) != REGNO (reg_2))
25912 return false;
25914 /* Check if the addresses are in the form of [base+offset]. */
25915 if (!extract_base_offset_in_addr (mem_1, &base_1, &symbase_1, &offset_1))
25916 return false;
25917 if (!extract_base_offset_in_addr (mem_2, &base_2, &symbase_2, &offset_2))
25918 return false;
25920 /* Check if the bases are the same. */
25921 if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
25922 return false;
25924 offval_1 = INTVAL (offset_1);
25925 offval_2 = INTVAL (offset_2);
25926 msize = GET_MODE_SIZE (mode);
25927 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
25928 if (offval_1 + msize != offval_2)
25929 return false;
25931 return true;
25934 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
25936 static bool
25937 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
25938 optimization_type opt_type)
25940 switch (op)
25942 case asin_optab:
25943 case acos_optab:
25944 case log1p_optab:
25945 case exp_optab:
25946 case exp10_optab:
25947 case exp2_optab:
25948 case expm1_optab:
25949 case ldexp_optab:
25950 case scalb_optab:
25951 case round_optab:
25952 case lround_optab:
25953 return opt_type == OPTIMIZE_FOR_SPEED;
25955 case rint_optab:
25956 if (SSE_FLOAT_MODE_P (mode1)
25957 && TARGET_SSE_MATH
25958 && !flag_trapping_math
25959 && !TARGET_SSE4_1
25960 && mode1 != HFmode)
25961 return opt_type == OPTIMIZE_FOR_SPEED;
25962 return true;
25964 case floor_optab:
25965 case ceil_optab:
25966 case btrunc_optab:
25967 if (((SSE_FLOAT_MODE_P (mode1)
25968 && TARGET_SSE_MATH
25969 && TARGET_SSE4_1)
25970 || mode1 == HFmode)
25971 && !flag_trapping_math)
25972 return true;
25973 return opt_type == OPTIMIZE_FOR_SPEED;
25975 case rsqrt_optab:
25976 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
25978 default:
25979 return true;
25983 /* Address space support.
25985 This is not "far pointers" in the 16-bit sense, but an easy way
25986 to use %fs and %gs segment prefixes. Therefore:
25988 (a) All address spaces have the same modes,
25989 (b) All address spaces have the same addresss forms,
25990 (c) While %fs and %gs are technically subsets of the generic
25991 address space, they are probably not subsets of each other.
25992 (d) Since we have no access to the segment base register values
25993 without resorting to a system call, we cannot convert a
25994 non-default address space to a default address space.
25995 Therefore we do not claim %fs or %gs are subsets of generic.
25997 Therefore we can (mostly) use the default hooks. */
25999 /* All use of segmentation is assumed to make address 0 valid. */
26001 static bool
26002 ix86_addr_space_zero_address_valid (addr_space_t as)
26004 return as != ADDR_SPACE_GENERIC;
26007 static void
26008 ix86_init_libfuncs (void)
26010 if (TARGET_64BIT)
26012 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
26013 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
26015 else
26017 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
26018 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
26021 #if TARGET_MACHO
26022 darwin_rename_builtins ();
26023 #endif
26026 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
26027 FPU, assume that the fpcw is set to extended precision; when using
26028 only SSE, rounding is correct; when using both SSE and the FPU,
26029 the rounding precision is indeterminate, since either may be chosen
26030 apparently at random. */
26032 static enum flt_eval_method
26033 ix86_get_excess_precision (enum excess_precision_type type)
26035 switch (type)
26037 case EXCESS_PRECISION_TYPE_FAST:
26038 /* The fastest type to promote to will always be the native type,
26039 whether that occurs with implicit excess precision or
26040 otherwise. */
26041 return TARGET_AVX512FP16
26042 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26043 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
26044 case EXCESS_PRECISION_TYPE_STANDARD:
26045 case EXCESS_PRECISION_TYPE_IMPLICIT:
26046 /* Otherwise, the excess precision we want when we are
26047 in a standards compliant mode, and the implicit precision we
26048 provide would be identical were it not for the unpredictable
26049 cases. */
26050 if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
26051 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26052 else if (!TARGET_80387)
26053 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
26054 else if (!TARGET_MIX_SSE_I387)
26056 if (!(TARGET_SSE && TARGET_SSE_MATH))
26057 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
26058 else if (TARGET_SSE2)
26059 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
26062 /* If we are in standards compliant mode, but we know we will
26063 calculate in unpredictable precision, return
26064 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
26065 excess precision if the target can't guarantee it will honor
26066 it. */
26067 return (type == EXCESS_PRECISION_TYPE_STANDARD
26068 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
26069 : FLT_EVAL_METHOD_UNPREDICTABLE);
26070 case EXCESS_PRECISION_TYPE_FLOAT16:
26071 if (TARGET_80387
26072 && !(TARGET_SSE_MATH && TARGET_SSE))
26073 error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
26074 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26075 default:
26076 gcc_unreachable ();
26079 return FLT_EVAL_METHOD_UNPREDICTABLE;
26082 /* Return true if _BitInt(N) is supported and fill its details into *INFO. */
26083 bool
26084 ix86_bitint_type_info (int n, struct bitint_info *info)
26086 if (n <= 8)
26087 info->limb_mode = QImode;
26088 else if (n <= 16)
26089 info->limb_mode = HImode;
26090 else if (n <= 32 || (!TARGET_64BIT && n > 64))
26091 info->limb_mode = SImode;
26092 else
26093 info->limb_mode = DImode;
26094 info->abi_limb_mode = info->limb_mode;
26095 info->big_endian = false;
26096 info->extended = false;
26097 return true;
26100 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return DFmode, TFmode
26101 or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
26102 based on long double bits, go with the default one for the others. */
26104 static machine_mode
26105 ix86_c_mode_for_floating_type (enum tree_index ti)
26107 if (ti == TI_LONG_DOUBLE_TYPE)
26108 return (TARGET_LONG_DOUBLE_64 ? DFmode
26109 : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode));
26110 return default_mode_for_floating_type (ti);
26113 /* Returns modified FUNCTION_TYPE for cdtor callabi. */
26114 tree
26115 ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype)
26117 if (TARGET_64BIT
26118 || TARGET_RTD
26119 || ix86_function_type_abi (fntype) != MS_ABI)
26120 return fntype;
26121 /* For 32-bit MS ABI add thiscall attribute. */
26122 tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE,
26123 TYPE_ATTRIBUTES (fntype));
26124 return build_type_attribute_variant (fntype, attribs);
26127 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
26128 decrements by exactly 2 no matter what the position was, there is no pushb.
26130 But as CIE data alignment factor on this arch is -4 for 32bit targets
26131 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
26132 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
26134 poly_int64
26135 ix86_push_rounding (poly_int64 bytes)
26137 return ROUND_UP (bytes, UNITS_PER_WORD);
26140 /* Use 8 bits metadata start from bit48 for LAM_U48,
26141 6 bits metadat start from bit57 for LAM_U57. */
26142 #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
26143 ? 48 \
26144 : (ix86_lam_type == lam_u57 ? 57 : 0))
26145 #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
26146 ? 8 \
26147 : (ix86_lam_type == lam_u57 ? 6 : 0))
26149 /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
26150 bool
26151 ix86_memtag_can_tag_addresses ()
26153 return ix86_lam_type != lam_none && TARGET_LP64;
26156 /* Implement TARGET_MEMTAG_TAG_SIZE. */
26157 unsigned char
26158 ix86_memtag_tag_size ()
26160 return IX86_HWASAN_TAG_SIZE;
26163 /* Implement TARGET_MEMTAG_SET_TAG. */
26165 ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
26167 /* default_memtag_insert_random_tag may
26168 generate tag with value more than 6 bits. */
26169 if (ix86_lam_type == lam_u57)
26171 unsigned HOST_WIDE_INT and_imm
26172 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
26174 emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
26176 tag = expand_simple_binop (Pmode, ASHIFT, tag,
26177 GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
26178 /* unsignedp = */1, OPTAB_WIDEN);
26179 rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
26180 /* unsignedp = */1, OPTAB_DIRECT);
26181 return ret;
26184 /* Implement TARGET_MEMTAG_EXTRACT_TAG. */
26186 ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
26188 rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
26189 GEN_INT (IX86_HWASAN_SHIFT), target,
26190 /* unsignedp = */0,
26191 OPTAB_DIRECT);
26192 rtx ret = gen_reg_rtx (QImode);
26193 /* Mask off bit63 when LAM_U57. */
26194 if (ix86_lam_type == lam_u57)
26196 unsigned HOST_WIDE_INT and_imm
26197 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
26198 emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
26199 gen_int_mode (and_imm, QImode)));
26201 else
26202 emit_move_insn (ret, gen_lowpart (QImode, tag));
26203 return ret;
26206 /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
26208 ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
26210 /* Leave bit63 alone. */
26211 rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
26212 + (HOST_WIDE_INT_1U << 63) - 1),
26213 Pmode);
26214 rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
26215 tag_mask, target, true,
26216 OPTAB_DIRECT);
26217 gcc_assert (untagged_base);
26218 return untagged_base;
26221 /* Implement TARGET_MEMTAG_ADD_TAG. */
26223 ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
26225 rtx base_tag = gen_reg_rtx (QImode);
26226 rtx base_addr = gen_reg_rtx (Pmode);
26227 rtx tagged_addr = gen_reg_rtx (Pmode);
26228 rtx new_tag = gen_reg_rtx (QImode);
26229 unsigned HOST_WIDE_INT and_imm
26230 = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
26232 /* When there's "overflow" in tag adding,
26233 need to mask the most significant bit off. */
26234 emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
26235 emit_move_insn (base_addr,
26236 ix86_memtag_untagged_pointer (base, NULL_RTX));
26237 emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
26238 emit_move_insn (new_tag, base_tag);
26239 emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
26240 emit_move_insn (tagged_addr,
26241 ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
26242 return plus_constant (Pmode, tagged_addr, offset);
26245 /* Implement TARGET_HAVE_CCMP. */
26246 static bool
26247 ix86_have_ccmp ()
26249 return (bool) TARGET_APX_CCMP;
26252 /* Implement TARGET_MODE_CAN_TRANSFER_BITS. */
26253 static bool
26254 ix86_mode_can_transfer_bits (machine_mode mode)
26256 if (GET_MODE_CLASS (mode) == MODE_FLOAT
26257 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
26258 switch (GET_MODE_INNER (mode))
26260 case E_SFmode:
26261 case E_DFmode:
26262 /* These suffer from normalization upon load when not using SSE. */
26263 return !(ix86_fpmath & FPMATH_387);
26264 default:
26265 return true;
26268 return true;
26271 /* Target-specific selftests. */
26273 #if CHECKING_P
26275 namespace selftest {
26277 /* Verify that hard regs are dumped as expected (in compact mode). */
26279 static void
26280 ix86_test_dumping_hard_regs ()
26282 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
26283 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
26286 /* Test dumping an insn with repeated references to the same SCRATCH,
26287 to verify the rtx_reuse code. */
26289 static void
26290 ix86_test_dumping_memory_blockage ()
26292 set_new_first_and_last_insn (NULL, NULL);
26294 rtx pat = gen_memory_blockage ();
26295 rtx_reuse_manager r;
26296 r.preprocess (pat);
26298 /* Verify that the repeated references to the SCRATCH show use
26299 reuse IDS. The first should be prefixed with a reuse ID,
26300 and the second should be dumped as a "reuse_rtx" of that ID.
26301 The expected string assumes Pmode == DImode. */
26302 if (Pmode == DImode)
26303 ASSERT_RTL_DUMP_EQ_WITH_REUSE
26304 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
26305 " (unspec:BLK [\n"
26306 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
26307 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
26310 /* Verify loading an RTL dump; specifically a dump of copying
26311 a param on x86_64 from a hard reg into the frame.
26312 This test is target-specific since the dump contains target-specific
26313 hard reg names. */
26315 static void
26316 ix86_test_loading_dump_fragment_1 ()
26318 rtl_dump_test t (SELFTEST_LOCATION,
26319 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
26321 rtx_insn *insn = get_insn_by_uid (1);
26323 /* The block structure and indentation here is purely for
26324 readability; it mirrors the structure of the rtx. */
26325 tree mem_expr;
26327 rtx pat = PATTERN (insn);
26328 ASSERT_EQ (SET, GET_CODE (pat));
26330 rtx dest = SET_DEST (pat);
26331 ASSERT_EQ (MEM, GET_CODE (dest));
26332 /* Verify the "/c" was parsed. */
26333 ASSERT_TRUE (RTX_FLAG (dest, call));
26334 ASSERT_EQ (SImode, GET_MODE (dest));
26336 rtx addr = XEXP (dest, 0);
26337 ASSERT_EQ (PLUS, GET_CODE (addr));
26338 ASSERT_EQ (DImode, GET_MODE (addr));
26340 rtx lhs = XEXP (addr, 0);
26341 /* Verify that the "frame" REG was consolidated. */
26342 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
26345 rtx rhs = XEXP (addr, 1);
26346 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
26347 ASSERT_EQ (-4, INTVAL (rhs));
26350 /* Verify the "[1 i+0 S4 A32]" was parsed. */
26351 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
26352 /* "i" should have been handled by synthesizing a global int
26353 variable named "i". */
26354 mem_expr = MEM_EXPR (dest);
26355 ASSERT_NE (mem_expr, NULL);
26356 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
26357 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
26358 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
26359 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
26360 /* "+0". */
26361 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
26362 ASSERT_EQ (0, MEM_OFFSET (dest));
26363 /* "S4". */
26364 ASSERT_EQ (4, MEM_SIZE (dest));
26365 /* "A32. */
26366 ASSERT_EQ (32, MEM_ALIGN (dest));
26369 rtx src = SET_SRC (pat);
26370 ASSERT_EQ (REG, GET_CODE (src));
26371 ASSERT_EQ (SImode, GET_MODE (src));
26372 ASSERT_EQ (5, REGNO (src));
26373 tree reg_expr = REG_EXPR (src);
26374 /* "i" here should point to the same var as for the MEM_EXPR. */
26375 ASSERT_EQ (reg_expr, mem_expr);
26380 /* Verify that the RTL loader copes with a call_insn dump.
26381 This test is target-specific since the dump contains a target-specific
26382 hard reg name. */
26384 static void
26385 ix86_test_loading_call_insn ()
26387 /* The test dump includes register "xmm0", where requires TARGET_SSE
26388 to exist. */
26389 if (!TARGET_SSE)
26390 return;
26392 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
26394 rtx_insn *insn = get_insns ();
26395 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
26397 /* "/j". */
26398 ASSERT_TRUE (RTX_FLAG (insn, jump));
26400 rtx pat = PATTERN (insn);
26401 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
26403 /* Verify REG_NOTES. */
26405 /* "(expr_list:REG_CALL_DECL". */
26406 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
26407 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
26408 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
26410 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
26411 rtx_expr_list *note1 = note0->next ();
26412 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
26414 ASSERT_EQ (NULL, note1->next ());
26417 /* Verify CALL_INSN_FUNCTION_USAGE. */
26419 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
26420 rtx_expr_list *usage
26421 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
26422 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
26423 ASSERT_EQ (DFmode, GET_MODE (usage));
26424 ASSERT_EQ (USE, GET_CODE (usage->element ()));
26425 ASSERT_EQ (NULL, usage->next ());
26429 /* Verify that the RTL loader copes a dump from print_rtx_function.
26430 This test is target-specific since the dump contains target-specific
26431 hard reg names. */
26433 static void
26434 ix86_test_loading_full_dump ()
26436 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
26438 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
26440 rtx_insn *insn_1 = get_insn_by_uid (1);
26441 ASSERT_EQ (NOTE, GET_CODE (insn_1));
26443 rtx_insn *insn_7 = get_insn_by_uid (7);
26444 ASSERT_EQ (INSN, GET_CODE (insn_7));
26445 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
26447 rtx_insn *insn_15 = get_insn_by_uid (15);
26448 ASSERT_EQ (INSN, GET_CODE (insn_15));
26449 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
26451 /* Verify crtl->return_rtx. */
26452 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
26453 ASSERT_EQ (0, REGNO (crtl->return_rtx));
26454 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
26457 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
26458 In particular, verify that it correctly loads the 2nd operand.
26459 This test is target-specific since these are machine-specific
26460 operands (and enums). */
26462 static void
26463 ix86_test_loading_unspec ()
26465 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
26467 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
26469 ASSERT_TRUE (cfun);
26471 /* Test of an UNSPEC. */
26472 rtx_insn *insn = get_insns ();
26473 ASSERT_EQ (INSN, GET_CODE (insn));
26474 rtx set = single_set (insn);
26475 ASSERT_NE (NULL, set);
26476 rtx dst = SET_DEST (set);
26477 ASSERT_EQ (MEM, GET_CODE (dst));
26478 rtx src = SET_SRC (set);
26479 ASSERT_EQ (UNSPEC, GET_CODE (src));
26480 ASSERT_EQ (BLKmode, GET_MODE (src));
26481 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
26483 rtx v0 = XVECEXP (src, 0, 0);
26485 /* Verify that the two uses of the first SCRATCH have pointer
26486 equality. */
26487 rtx scratch_a = XEXP (dst, 0);
26488 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
26490 rtx scratch_b = XEXP (v0, 0);
26491 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
26493 ASSERT_EQ (scratch_a, scratch_b);
26495 /* Verify that the two mems are thus treated as equal. */
26496 ASSERT_TRUE (rtx_equal_p (dst, v0));
26498 /* Verify that the insn is recognized. */
26499 ASSERT_NE(-1, recog_memoized (insn));
26501 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
26502 insn = NEXT_INSN (insn);
26503 ASSERT_EQ (INSN, GET_CODE (insn));
26505 set = single_set (insn);
26506 ASSERT_NE (NULL, set);
26508 src = SET_SRC (set);
26509 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
26510 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
26513 /* Run all target-specific selftests. */
26515 static void
26516 ix86_run_selftests (void)
26518 ix86_test_dumping_hard_regs ();
26519 ix86_test_dumping_memory_blockage ();
26521 /* Various tests of loading RTL dumps, here because they contain
26522 ix86-isms (e.g. names of hard regs). */
26523 ix86_test_loading_dump_fragment_1 ();
26524 ix86_test_loading_call_insn ();
26525 ix86_test_loading_full_dump ();
26526 ix86_test_loading_unspec ();
26529 } // namespace selftest
26531 #endif /* CHECKING_P */
26533 static const scoped_attribute_specs *const ix86_attribute_table[] =
26535 &ix86_gnu_attribute_table
26538 /* Initialize the GCC target structure. */
26539 #undef TARGET_RETURN_IN_MEMORY
26540 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
26542 #undef TARGET_LEGITIMIZE_ADDRESS
26543 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
26545 #undef TARGET_ATTRIBUTE_TABLE
26546 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
26547 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
26548 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
26549 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26550 # undef TARGET_MERGE_DECL_ATTRIBUTES
26551 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
26552 #endif
26554 #undef TARGET_INVALID_CONVERSION
26555 #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
26557 #undef TARGET_INVALID_UNARY_OP
26558 #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
26560 #undef TARGET_INVALID_BINARY_OP
26561 #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
26563 #undef TARGET_COMP_TYPE_ATTRIBUTES
26564 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
26566 #undef TARGET_INIT_BUILTINS
26567 #define TARGET_INIT_BUILTINS ix86_init_builtins
26568 #undef TARGET_BUILTIN_DECL
26569 #define TARGET_BUILTIN_DECL ix86_builtin_decl
26570 #undef TARGET_EXPAND_BUILTIN
26571 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
26573 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
26574 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
26575 ix86_builtin_vectorized_function
26577 #undef TARGET_VECTORIZE_BUILTIN_GATHER
26578 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
26580 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
26581 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
26583 #undef TARGET_BUILTIN_RECIPROCAL
26584 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
26586 #undef TARGET_ASM_FUNCTION_EPILOGUE
26587 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
26589 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
26590 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
26591 ix86_print_patchable_function_entry
26593 #undef TARGET_ENCODE_SECTION_INFO
26594 #ifndef SUBTARGET_ENCODE_SECTION_INFO
26595 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
26596 #else
26597 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
26598 #endif
26600 #undef TARGET_ASM_OPEN_PAREN
26601 #define TARGET_ASM_OPEN_PAREN ""
26602 #undef TARGET_ASM_CLOSE_PAREN
26603 #define TARGET_ASM_CLOSE_PAREN ""
26605 #undef TARGET_ASM_BYTE_OP
26606 #define TARGET_ASM_BYTE_OP ASM_BYTE
26608 #undef TARGET_ASM_ALIGNED_HI_OP
26609 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
26610 #undef TARGET_ASM_ALIGNED_SI_OP
26611 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
26612 #ifdef ASM_QUAD
26613 #undef TARGET_ASM_ALIGNED_DI_OP
26614 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
26615 #endif
26617 #undef TARGET_PROFILE_BEFORE_PROLOGUE
26618 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
26620 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
26621 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
26623 #undef TARGET_ASM_UNALIGNED_HI_OP
26624 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
26625 #undef TARGET_ASM_UNALIGNED_SI_OP
26626 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
26627 #undef TARGET_ASM_UNALIGNED_DI_OP
26628 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
26630 #undef TARGET_PRINT_OPERAND
26631 #define TARGET_PRINT_OPERAND ix86_print_operand
26632 #undef TARGET_PRINT_OPERAND_ADDRESS
26633 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
26634 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
26635 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
26636 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
26637 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
26639 #undef TARGET_SCHED_INIT_GLOBAL
26640 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
26641 #undef TARGET_SCHED_ADJUST_COST
26642 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
26643 #undef TARGET_SCHED_ISSUE_RATE
26644 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
26645 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
26646 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
26647 ia32_multipass_dfa_lookahead
26648 #undef TARGET_SCHED_MACRO_FUSION_P
26649 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
26650 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
26651 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
26653 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
26654 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
26656 #undef TARGET_MEMMODEL_CHECK
26657 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
26659 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
26660 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
26662 #ifdef HAVE_AS_TLS
26663 #undef TARGET_HAVE_TLS
26664 #define TARGET_HAVE_TLS true
26665 #endif
26666 #undef TARGET_CANNOT_FORCE_CONST_MEM
26667 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
26668 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
26669 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
26671 #undef TARGET_DELEGITIMIZE_ADDRESS
26672 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
26674 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
26675 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
26677 #undef TARGET_MS_BITFIELD_LAYOUT_P
26678 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
26680 #if TARGET_MACHO
26681 #undef TARGET_BINDS_LOCAL_P
26682 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
26683 #else
26684 #undef TARGET_BINDS_LOCAL_P
26685 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
26686 #endif
26687 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26688 #undef TARGET_BINDS_LOCAL_P
26689 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
26690 #endif
26692 #undef TARGET_ASM_OUTPUT_MI_THUNK
26693 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
26694 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
26695 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
26697 #undef TARGET_ASM_FILE_START
26698 #define TARGET_ASM_FILE_START x86_file_start
26700 #undef TARGET_OPTION_OVERRIDE
26701 #define TARGET_OPTION_OVERRIDE ix86_option_override
26703 #undef TARGET_REGISTER_MOVE_COST
26704 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
26705 #undef TARGET_MEMORY_MOVE_COST
26706 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
26707 #undef TARGET_RTX_COSTS
26708 #define TARGET_RTX_COSTS ix86_rtx_costs
26709 #undef TARGET_INSN_COST
26710 #define TARGET_INSN_COST ix86_insn_cost
26711 #undef TARGET_ADDRESS_COST
26712 #define TARGET_ADDRESS_COST ix86_address_cost
26714 #undef TARGET_OVERLAP_OP_BY_PIECES_P
26715 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
26717 #undef TARGET_FLAGS_REGNUM
26718 #define TARGET_FLAGS_REGNUM FLAGS_REG
26719 #undef TARGET_FIXED_CONDITION_CODE_REGS
26720 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26721 #undef TARGET_CC_MODES_COMPATIBLE
26722 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26724 #undef TARGET_MACHINE_DEPENDENT_REORG
26725 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26727 #undef TARGET_BUILD_BUILTIN_VA_LIST
26728 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26730 #undef TARGET_FOLD_BUILTIN
26731 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
26733 #undef TARGET_GIMPLE_FOLD_BUILTIN
26734 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
26736 #undef TARGET_COMPARE_VERSION_PRIORITY
26737 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
26739 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
26740 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
26741 ix86_generate_version_dispatcher_body
26743 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
26744 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
26745 ix86_get_function_versions_dispatcher
26747 #undef TARGET_ENUM_VA_LIST_P
26748 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
26750 #undef TARGET_FN_ABI_VA_LIST
26751 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
26753 #undef TARGET_CANONICAL_VA_LIST_TYPE
26754 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
26756 #undef TARGET_EXPAND_BUILTIN_VA_START
26757 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26759 #undef TARGET_MD_ASM_ADJUST
26760 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
26762 #undef TARGET_C_EXCESS_PRECISION
26763 #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
26764 #undef TARGET_C_BITINT_TYPE_INFO
26765 #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
26766 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
26767 #define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type
26768 #undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE
26769 #define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype
26770 #undef TARGET_PROMOTE_PROTOTYPES
26771 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
26772 #undef TARGET_PUSH_ARGUMENT
26773 #define TARGET_PUSH_ARGUMENT ix86_push_argument
26774 #undef TARGET_SETUP_INCOMING_VARARGS
26775 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26776 #undef TARGET_MUST_PASS_IN_STACK
26777 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26778 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
26779 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
26780 #undef TARGET_FUNCTION_ARG_ADVANCE
26781 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
26782 #undef TARGET_FUNCTION_ARG
26783 #define TARGET_FUNCTION_ARG ix86_function_arg
26784 #undef TARGET_INIT_PIC_REG
26785 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
26786 #undef TARGET_USE_PSEUDO_PIC_REG
26787 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
26788 #undef TARGET_FUNCTION_ARG_BOUNDARY
26789 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
26790 #undef TARGET_PASS_BY_REFERENCE
26791 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26792 #undef TARGET_INTERNAL_ARG_POINTER
26793 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26794 #undef TARGET_UPDATE_STACK_BOUNDARY
26795 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
26796 #undef TARGET_GET_DRAP_RTX
26797 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
26798 #undef TARGET_STRICT_ARGUMENT_NAMING
26799 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
26800 #undef TARGET_STATIC_CHAIN
26801 #define TARGET_STATIC_CHAIN ix86_static_chain
26802 #undef TARGET_TRAMPOLINE_INIT
26803 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
26804 #undef TARGET_RETURN_POPS_ARGS
26805 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
26807 #undef TARGET_WARN_FUNC_RETURN
26808 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
26810 #undef TARGET_LEGITIMATE_COMBINED_INSN
26811 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
26813 #undef TARGET_ASAN_SHADOW_OFFSET
26814 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
26816 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
26817 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26819 #undef TARGET_SCALAR_MODE_SUPPORTED_P
26820 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26822 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
26823 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
26824 ix86_libgcc_floating_mode_supported_p
26826 #undef TARGET_VECTOR_MODE_SUPPORTED_P
26827 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26829 #undef TARGET_C_MODE_FOR_SUFFIX
26830 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26832 #ifdef HAVE_AS_TLS
26833 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26834 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26835 #endif
26837 #ifdef SUBTARGET_INSERT_ATTRIBUTES
26838 #undef TARGET_INSERT_ATTRIBUTES
26839 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26840 #endif
26842 #undef TARGET_MANGLE_TYPE
26843 #define TARGET_MANGLE_TYPE ix86_mangle_type
26845 #undef TARGET_EMIT_SUPPORT_TINFOS
26846 #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
26848 #undef TARGET_STACK_PROTECT_GUARD
26849 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
26851 #undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
26852 #define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
26853 ix86_stack_protect_runtime_enabled_p
26855 #if !TARGET_MACHO
26856 #undef TARGET_STACK_PROTECT_FAIL
26857 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26858 #endif
26860 #undef TARGET_FUNCTION_VALUE
26861 #define TARGET_FUNCTION_VALUE ix86_function_value
26863 #undef TARGET_FUNCTION_VALUE_REGNO_P
26864 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
26866 #undef TARGET_ZERO_CALL_USED_REGS
26867 #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
26869 #undef TARGET_PROMOTE_FUNCTION_MODE
26870 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
26872 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
26873 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
26875 #undef TARGET_MEMBER_TYPE_FORCES_BLK
26876 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
26878 #undef TARGET_INSTANTIATE_DECLS
26879 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
26881 #undef TARGET_SECONDARY_RELOAD
26882 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26883 #undef TARGET_SECONDARY_MEMORY_NEEDED
26884 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
26885 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
26886 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
26888 #undef TARGET_CLASS_MAX_NREGS
26889 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
26891 #undef TARGET_PREFERRED_RELOAD_CLASS
26892 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
26893 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
26894 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
26895 #undef TARGET_CLASS_LIKELY_SPILLED_P
26896 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
26898 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26899 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
26900 ix86_builtin_vectorization_cost
26901 #undef TARGET_VECTORIZE_VEC_PERM_CONST
26902 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
26903 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
26904 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
26905 ix86_preferred_simd_mode
26906 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
26907 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
26908 ix86_split_reduction
26909 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
26910 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
26911 ix86_autovectorize_vector_modes
26912 #undef TARGET_VECTORIZE_GET_MASK_MODE
26913 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
26914 #undef TARGET_VECTORIZE_CREATE_COSTS
26915 #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
26917 #undef TARGET_SET_CURRENT_FUNCTION
26918 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
26920 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
26921 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
26923 #undef TARGET_OPTION_SAVE
26924 #define TARGET_OPTION_SAVE ix86_function_specific_save
26926 #undef TARGET_OPTION_RESTORE
26927 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
26929 #undef TARGET_OPTION_POST_STREAM_IN
26930 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
26932 #undef TARGET_OPTION_PRINT
26933 #define TARGET_OPTION_PRINT ix86_function_specific_print
26935 #undef TARGET_OPTION_FUNCTION_VERSIONS
26936 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
26938 #undef TARGET_CAN_INLINE_P
26939 #define TARGET_CAN_INLINE_P ix86_can_inline_p
26941 #undef TARGET_LEGITIMATE_ADDRESS_P
26942 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
26944 #undef TARGET_REGISTER_PRIORITY
26945 #define TARGET_REGISTER_PRIORITY ix86_register_priority
26947 #undef TARGET_REGISTER_USAGE_LEVELING_P
26948 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
26950 #undef TARGET_LEGITIMATE_CONSTANT_P
26951 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
26953 #undef TARGET_COMPUTE_FRAME_LAYOUT
26954 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
26956 #undef TARGET_FRAME_POINTER_REQUIRED
26957 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
26959 #undef TARGET_CAN_ELIMINATE
26960 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
26962 #undef TARGET_EXTRA_LIVE_ON_ENTRY
26963 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
26965 #undef TARGET_ASM_CODE_END
26966 #define TARGET_ASM_CODE_END ix86_code_end
26968 #undef TARGET_CONDITIONAL_REGISTER_USAGE
26969 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
26971 #undef TARGET_CANONICALIZE_COMPARISON
26972 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
26974 #undef TARGET_LOOP_UNROLL_ADJUST
26975 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
26977 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
26978 #undef TARGET_SPILL_CLASS
26979 #define TARGET_SPILL_CLASS ix86_spill_class
26981 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
26982 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
26983 ix86_simd_clone_compute_vecsize_and_simdlen
26985 #undef TARGET_SIMD_CLONE_ADJUST
26986 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
26988 #undef TARGET_SIMD_CLONE_USABLE
26989 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
26991 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
26992 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
26994 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
26995 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
26996 ix86_float_exceptions_rounding_supported_p
26998 #undef TARGET_MODE_EMIT
26999 #define TARGET_MODE_EMIT ix86_emit_mode_set
27001 #undef TARGET_MODE_NEEDED
27002 #define TARGET_MODE_NEEDED ix86_mode_needed
27004 #undef TARGET_MODE_AFTER
27005 #define TARGET_MODE_AFTER ix86_mode_after
27007 #undef TARGET_MODE_ENTRY
27008 #define TARGET_MODE_ENTRY ix86_mode_entry
27010 #undef TARGET_MODE_EXIT
27011 #define TARGET_MODE_EXIT ix86_mode_exit
27013 #undef TARGET_MODE_PRIORITY
27014 #define TARGET_MODE_PRIORITY ix86_mode_priority
27016 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
27017 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
27019 #undef TARGET_OFFLOAD_OPTIONS
27020 #define TARGET_OFFLOAD_OPTIONS \
27021 ix86_offload_options
27023 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
27024 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
27026 #undef TARGET_OPTAB_SUPPORTED_P
27027 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
27029 #undef TARGET_HARD_REGNO_SCRATCH_OK
27030 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
27032 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
27033 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
27035 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
27036 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
27038 #undef TARGET_INIT_LIBFUNCS
27039 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
27041 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
27042 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
27044 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
27045 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
27047 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
27048 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
27050 #undef TARGET_HARD_REGNO_NREGS
27051 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
27052 #undef TARGET_HARD_REGNO_MODE_OK
27053 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
27055 #undef TARGET_MODES_TIEABLE_P
27056 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
27058 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
27059 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
27060 ix86_hard_regno_call_part_clobbered
27062 #undef TARGET_INSN_CALLEE_ABI
27063 #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
27065 #undef TARGET_CAN_CHANGE_MODE_CLASS
27066 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
27068 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
27069 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
27071 #undef TARGET_STATIC_RTX_ALIGNMENT
27072 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
27073 #undef TARGET_CONSTANT_ALIGNMENT
27074 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
27076 #undef TARGET_EMPTY_RECORD_P
27077 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
27079 #undef TARGET_WARN_PARAMETER_PASSING_ABI
27080 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
27082 #undef TARGET_GET_MULTILIB_ABI_NAME
27083 #define TARGET_GET_MULTILIB_ABI_NAME \
27084 ix86_get_multilib_abi_name
27086 #undef TARGET_IFUNC_REF_LOCAL_OK
27087 #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
27089 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
27090 # undef TARGET_ASM_RELOC_RW_MASK
27091 # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
27092 #endif
27094 #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
27095 #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
27097 #undef TARGET_MEMTAG_ADD_TAG
27098 #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
27100 #undef TARGET_MEMTAG_SET_TAG
27101 #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
27103 #undef TARGET_MEMTAG_EXTRACT_TAG
27104 #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
27106 #undef TARGET_MEMTAG_UNTAGGED_POINTER
27107 #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
27109 #undef TARGET_MEMTAG_TAG_SIZE
27110 #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
27112 #undef TARGET_GEN_CCMP_FIRST
27113 #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
27115 #undef TARGET_GEN_CCMP_NEXT
27116 #define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
27118 #undef TARGET_HAVE_CCMP
27119 #define TARGET_HAVE_CCMP ix86_have_ccmp
27121 #undef TARGET_MODE_CAN_TRANSFER_BITS
27122 #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
27124 static bool
27125 ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
27127 #ifdef OPTION_GLIBC
27128 if (OPTION_GLIBC)
27129 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
27130 else
27131 return false;
27132 #else
27133 return false;
27134 #endif
27137 #undef TARGET_LIBC_HAS_FAST_FUNCTION
27138 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
27140 static unsigned
27141 ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
27142 bool boundary_p)
27144 #ifdef OPTION_GLIBC
27145 bool glibc_p = OPTION_GLIBC;
27146 #else
27147 bool glibc_p = false;
27148 #endif
27149 if (glibc_p)
27151 /* If __FAST_MATH__ is defined, glibc provides libmvec. */
27152 unsigned int libmvec_ret = 0;
27153 if (!flag_trapping_math
27154 && flag_unsafe_math_optimizations
27155 && flag_finite_math_only
27156 && !flag_signed_zeros
27157 && !flag_errno_math)
27158 switch (cfn)
27160 CASE_CFN_COS:
27161 CASE_CFN_COS_FN:
27162 CASE_CFN_SIN:
27163 CASE_CFN_SIN_FN:
27164 if (!boundary_p)
27166 /* With non-default rounding modes, libmvec provides
27167 complete garbage in results. E.g.
27168 _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
27169 returns 0.00333309174f rather than 1.40129846e-45f. */
27170 if (flag_rounding_math)
27171 return ~0U;
27172 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
27173 claims libmvec maximum error is 4ulps.
27174 My own random testing indicates 2ulps for SFmode and
27175 0.5ulps for DFmode, but let's go with the 4ulps. */
27176 libmvec_ret = 4;
27178 break;
27179 default:
27180 break;
27182 unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
27183 boundary_p);
27184 return MAX (ret, libmvec_ret);
27186 return default_libm_function_max_error (cfn, mode, boundary_p);
27189 #undef TARGET_LIBM_FUNCTION_MAX_ERROR
27190 #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
27192 #if TARGET_MACHO
27193 static bool
27194 ix86_cannot_copy_insn_p (rtx_insn *insn)
27196 if (TARGET_64BIT)
27197 return false;
27199 rtx set = single_set (insn);
27200 if (set)
27202 rtx src = SET_SRC (set);
27203 if (GET_CODE (src) == UNSPEC
27204 && XINT (src, 1) == UNSPEC_SET_GOT)
27205 return true;
27207 return false;
27210 #undef TARGET_CANNOT_COPY_INSN_P
27211 #define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
27213 #endif
27215 #if CHECKING_P
27216 #undef TARGET_RUN_TARGET_SELFTESTS
27217 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
27218 #endif /* #if CHECKING_P */
27220 struct gcc_target targetm = TARGET_INITIALIZER;
27222 #include "gt-i386.h"