[PR testsuite/116860] Testsuite adjustment for recently added tests
[official-gcc.git] / gcc / config / arm / arm.cc
blob86838ebde5f8dd9d2720a28d2d9e29c92b0a1910
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2025 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "tm_p.h"
38 #include "stringpool.h"
39 #include "attribs.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "varasm.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "reload.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "cfgrtl.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
61 #include "intl.h"
62 #include "libfuncs.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "gimple-iterator.h"
73 #include "selftest.h"
74 #include "tree-vectorizer.h"
75 #include "opts.h"
76 #include "aarch-common.h"
77 #include "aarch-common-protos.h"
78 #include "machmode.h"
80 /* This file should be included last. */
81 #include "target-def.h"
83 /* Forward definitions of types. */
84 typedef struct minipool_node Mnode;
85 typedef struct minipool_fixup Mfix;
87 void (*arm_lang_output_object_attributes_hook)(void);
89 struct four_ints
91 int i[4];
94 /* Forward function declarations. */
95 static bool arm_const_not_ok_for_debug_p (rtx);
96 static int arm_needs_doubleword_align (machine_mode, const_tree);
97 static int arm_compute_static_chain_stack_bytes (void);
98 static arm_stack_offsets *arm_get_frame_offsets (void);
99 static void arm_compute_frame_layout (void);
100 static void arm_add_gc_roots (void);
101 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
102 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
103 static unsigned bit_count (unsigned long);
104 static unsigned bitmap_popcount (const sbitmap);
105 static int arm_address_register_rtx_p (rtx, int);
106 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
107 static bool is_called_in_ARM_mode (tree);
108 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
109 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
110 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
111 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
112 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
113 inline static int thumb1_index_register_rtx_p (rtx, int);
114 static int thumb_far_jump_used_p (void);
115 static bool thumb_force_lr_save (void);
116 static unsigned arm_size_return_regs (void);
117 static bool arm_assemble_integer (rtx, unsigned int, int);
118 static void arm_print_operand (FILE *, rtx, int);
119 static void arm_print_operand_address (FILE *, machine_mode, rtx);
120 static bool arm_print_operand_punct_valid_p (unsigned char code);
121 static arm_cc get_arm_condition_code (rtx);
122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
123 static const char *output_multi_immediate (rtx *, const char *, const char *,
124 int, HOST_WIDE_INT);
125 static const char *shift_op (rtx, HOST_WIDE_INT *);
126 static struct machine_function *arm_init_machine_status (void);
127 static void thumb_exit (FILE *, int);
128 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
129 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
130 static Mnode *add_minipool_forward_ref (Mfix *);
131 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
132 static Mnode *add_minipool_backward_ref (Mfix *);
133 static void assign_minipool_offsets (Mfix *);
134 static void arm_print_value (FILE *, rtx);
135 static void dump_minipool (rtx_insn *);
136 static int arm_barrier_cost (rtx_insn *);
137 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
138 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
139 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
140 machine_mode, rtx);
141 static void arm_reorg (void);
142 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
144 static unsigned long arm_compute_save_core_reg_mask (void);
145 static unsigned long arm_isr_value (tree);
146 static unsigned long arm_compute_func_type (void);
147 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
149 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
151 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
152 #endif
153 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
154 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
155 static void arm_output_function_epilogue (FILE *);
156 static void arm_output_function_prologue (FILE *);
157 static int arm_comp_type_attributes (const_tree, const_tree);
158 static void arm_set_default_type_attributes (tree);
159 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
160 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
161 static int optimal_immediate_sequence (enum rtx_code code,
162 unsigned HOST_WIDE_INT val,
163 struct four_ints *return_sequence);
164 static int optimal_immediate_sequence_1 (enum rtx_code code,
165 unsigned HOST_WIDE_INT val,
166 struct four_ints *return_sequence,
167 int i);
168 static int arm_get_strip_length (int);
169 static bool arm_function_ok_for_sibcall (tree, tree);
170 static machine_mode arm_promote_function_mode (const_tree,
171 machine_mode, int *,
172 const_tree, int);
173 static bool arm_return_in_memory (const_tree, const_tree);
174 static rtx arm_function_value (const_tree, const_tree, bool);
175 static rtx arm_libcall_value_1 (machine_mode);
176 static rtx arm_libcall_value (machine_mode, const_rtx);
177 static bool arm_function_value_regno_p (const unsigned int);
178 static void arm_internal_label (FILE *, const char *, unsigned long);
179 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
180 tree);
181 static bool arm_have_conditional_execution (void);
182 static bool arm_cannot_force_const_mem (machine_mode, rtx);
183 static bool arm_legitimate_constant_p (machine_mode, rtx);
184 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
185 static int arm_insn_cost (rtx_insn *, bool);
186 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
187 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
188 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
189 static void emit_constant_insn (rtx cond, rtx pattern);
190 static rtx_insn *emit_set_insn (rtx, rtx);
191 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
192 static rtx emit_multi_reg_push (unsigned long, unsigned long);
193 static void arm_emit_multi_reg_pop (unsigned long);
194 static int vfp_emit_fstmd (int, int);
195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
196 static int arm_arg_partial_bytes (cumulative_args_t,
197 const function_arg_info &);
198 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
199 static void arm_function_arg_advance (cumulative_args_t,
200 const function_arg_info &);
201 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
202 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
203 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
204 const_tree);
205 static rtx aapcs_libcall_value (machine_mode);
206 static int aapcs_select_return_coproc (const_tree, const_tree);
208 #ifdef OBJECT_FORMAT_ELF
209 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
210 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
211 #endif
212 static void arm_encode_section_info (tree, rtx, int);
214 static void arm_file_end (void);
215 static void arm_file_start (void);
216 static void arm_insert_attributes (tree, tree *);
218 static void arm_setup_incoming_varargs (cumulative_args_t,
219 const function_arg_info &, int *, int);
220 static bool arm_pass_by_reference (cumulative_args_t,
221 const function_arg_info &);
222 static bool arm_promote_prototypes (const_tree);
223 static bool arm_default_short_enums (void);
224 static bool arm_align_anon_bitfield (void);
225 static bool arm_return_in_msb (const_tree);
226 static bool arm_must_pass_in_stack (const function_arg_info &);
227 static bool arm_return_in_memory (const_tree, const_tree);
228 #if ARM_UNWIND_INFO
229 static void arm_unwind_emit (FILE *, rtx_insn *);
230 static bool arm_output_ttype (rtx);
231 static void arm_asm_emit_except_personality (rtx);
232 #endif
233 static void arm_asm_init_sections (void);
234 static rtx arm_dwarf_register_span (rtx);
236 static tree arm_cxx_guard_type (void);
237 static bool arm_cxx_guard_mask_bit (void);
238 static tree arm_get_cookie_size (tree);
239 static bool arm_cookie_has_size (void);
240 static bool arm_cxx_cdtor_returns_this (void);
241 static bool arm_cxx_key_method_may_be_inline (void);
242 static void arm_cxx_determine_class_data_visibility (tree);
243 static bool arm_cxx_class_data_always_comdat (void);
244 static bool arm_cxx_use_aeabi_atexit (void);
245 static void arm_init_libfuncs (void);
246 static tree arm_build_builtin_va_list (void);
247 static void arm_expand_builtin_va_start (tree, rtx);
248 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
249 static void arm_option_override (void);
250 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
251 struct cl_target_option *);
252 static void arm_override_options_after_change (void);
253 static void arm_option_print (FILE *, int, struct cl_target_option *);
254 static void arm_set_current_function (tree);
255 static bool arm_can_inline_p (tree, tree);
256 static void arm_relayout_function (tree);
257 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
258 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
259 static bool arm_sched_can_speculate_insn (rtx_insn *);
260 static bool arm_macro_fusion_p (void);
261 static bool arm_cannot_copy_insn_p (rtx_insn *);
262 static int arm_issue_rate (void);
263 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
264 static int arm_first_cycle_multipass_dfa_lookahead (void);
265 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
266 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
267 static bool arm_output_addr_const_extra (FILE *, rtx);
268 static bool arm_allocate_stack_slots_for_args (void);
269 static bool arm_warn_func_return (tree);
270 static tree arm_promoted_type (const_tree t);
271 static bool arm_scalar_mode_supported_p (scalar_mode);
272 static bool arm_frame_pointer_required (void);
273 static bool arm_can_eliminate (const int, const int);
274 static void arm_asm_trampoline_template (FILE *);
275 static void arm_trampoline_init (rtx, tree, rtx);
276 static rtx arm_trampoline_adjust_address (rtx);
277 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
278 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
279 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
280 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
281 static opt_machine_mode arm_array_mode (machine_mode, unsigned HOST_WIDE_INT);
282 static bool arm_array_mode_supported_p (machine_mode,
283 unsigned HOST_WIDE_INT);
284 static machine_mode arm_preferred_simd_mode (scalar_mode);
285 static bool arm_class_likely_spilled_p (reg_class_t);
286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
289 const_tree type,
290 int misalignment,
291 bool is_packed);
292 static void arm_conditional_register_usage (void);
293 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
294 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
295 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
296 static int arm_default_branch_cost (bool, bool);
297 static int arm_cortex_a5_branch_cost (bool, bool);
298 static int arm_cortex_m_branch_cost (bool, bool);
299 static int arm_cortex_m7_branch_cost (bool, bool);
301 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
302 rtx, const vec_perm_indices &);
304 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
306 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
307 tree vectype,
308 int misalign ATTRIBUTE_UNUSED);
310 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
311 bool op0_preserve_value);
312 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
315 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
316 const_tree);
317 static section *arm_function_section (tree, enum node_frequency, bool, bool);
318 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
319 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
320 int reloc);
321 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
322 static opt_scalar_float_mode arm_floatn_mode (int, bool);
323 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
324 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
325 static bool arm_modes_tieable_p (machine_mode, machine_mode);
326 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
327 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
328 vec<machine_mode> &,
329 vec<const char *> &, vec<rtx> &,
330 vec<rtx> &, HARD_REG_SET &, location_t);
331 static const char *arm_identify_fpu_from_isa (sbitmap);
333 /* Table of machine attributes. */
334 static const attribute_spec arm_gnu_attributes[] =
336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
337 affects_type_identity, handler, exclude } */
338 /* Function calls made to this symbol must be done indirectly, because
339 it may lie outside of the 26 bit addressing range of a normal function
340 call. */
341 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
342 /* Whereas these functions are always known to reside within the 26 bit
343 addressing range. */
344 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
345 /* Specify the procedure call conventions for a function. */
346 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
347 NULL },
348 /* Interrupt Service Routines have special prologue and epilogue requirements. */
349 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
350 NULL },
351 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
352 NULL },
353 { "naked", 0, 0, true, false, false, false,
354 arm_handle_fndecl_attribute, NULL },
355 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
356 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
357 NULL },
358 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
359 NULL },
360 { "notshared", 0, 0, false, true, false, false,
361 arm_handle_notshared_attribute, NULL },
362 #endif
363 /* ARMv8-M Security Extensions support. */
364 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
365 arm_handle_cmse_nonsecure_entry, NULL },
366 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
367 arm_handle_cmse_nonsecure_call, NULL },
368 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }
371 static const scoped_attribute_specs arm_gnu_attribute_table =
373 "gnu", { arm_gnu_attributes }
376 static const scoped_attribute_specs *const arm_attribute_table[] =
378 &arm_gnu_attribute_table
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
385 #endif
387 #undef TARGET_CHECK_BUILTIN_CALL
388 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
390 #undef TARGET_LEGITIMIZE_ADDRESS
391 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
393 #undef TARGET_ATTRIBUTE_TABLE
394 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
396 #undef TARGET_INSERT_ATTRIBUTES
397 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
399 #undef TARGET_ASM_FILE_START
400 #define TARGET_ASM_FILE_START arm_file_start
401 #undef TARGET_ASM_FILE_END
402 #define TARGET_ASM_FILE_END arm_file_end
404 #undef TARGET_ASM_ALIGNED_SI_OP
405 #define TARGET_ASM_ALIGNED_SI_OP NULL
406 #undef TARGET_ASM_INTEGER
407 #define TARGET_ASM_INTEGER arm_assemble_integer
409 #undef TARGET_PRINT_OPERAND
410 #define TARGET_PRINT_OPERAND arm_print_operand
411 #undef TARGET_PRINT_OPERAND_ADDRESS
412 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
413 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
414 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
416 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
417 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
419 #undef TARGET_ASM_FUNCTION_PROLOGUE
420 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
422 #undef TARGET_ASM_FUNCTION_EPILOGUE
423 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
425 #undef TARGET_CAN_INLINE_P
426 #define TARGET_CAN_INLINE_P arm_can_inline_p
428 #undef TARGET_RELAYOUT_FUNCTION
429 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
431 #undef TARGET_OPTION_OVERRIDE
432 #define TARGET_OPTION_OVERRIDE arm_option_override
434 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
435 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
476 #undef TARGET_ENCODE_SECTION_INFO
477 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
479 #undef TARGET_STRIP_NAME_ENCODING
480 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
482 #undef TARGET_ASM_INTERNAL_LABEL
483 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
485 #undef TARGET_FLOATN_MODE
486 #define TARGET_FLOATN_MODE arm_floatn_mode
488 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
489 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
491 #undef TARGET_FUNCTION_VALUE
492 #define TARGET_FUNCTION_VALUE arm_function_value
494 #undef TARGET_LIBCALL_VALUE
495 #define TARGET_LIBCALL_VALUE arm_libcall_value
497 #undef TARGET_FUNCTION_VALUE_REGNO_P
498 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
500 #undef TARGET_GIMPLE_FOLD_BUILTIN
501 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
503 #undef TARGET_ASM_OUTPUT_MI_THUNK
504 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
505 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
506 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
508 #undef TARGET_RTX_COSTS
509 #define TARGET_RTX_COSTS arm_rtx_costs
510 #undef TARGET_ADDRESS_COST
511 #define TARGET_ADDRESS_COST arm_address_cost
512 #undef TARGET_INSN_COST
513 #define TARGET_INSN_COST arm_insn_cost
515 #undef TARGET_SHIFT_TRUNCATION_MASK
516 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
517 #undef TARGET_VECTOR_MODE_SUPPORTED_P
518 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
519 #undef TARGET_ARRAY_MODE
520 #define TARGET_ARRAY_MODE arm_array_mode
521 #undef TARGET_ARRAY_MODE_SUPPORTED_P
522 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
523 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
524 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
525 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
526 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
527 arm_autovectorize_vector_modes
529 #undef TARGET_MACHINE_DEPENDENT_REORG
530 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
532 #undef TARGET_INIT_BUILTINS
533 #define TARGET_INIT_BUILTINS arm_init_builtins
534 #undef TARGET_EXPAND_BUILTIN
535 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
536 #undef TARGET_BUILTIN_DECL
537 #define TARGET_BUILTIN_DECL arm_builtin_decl
539 #undef TARGET_INIT_LIBFUNCS
540 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
542 #undef TARGET_PROMOTE_FUNCTION_MODE
543 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
544 #undef TARGET_PROMOTE_PROTOTYPES
545 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
546 #undef TARGET_PASS_BY_REFERENCE
547 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
548 #undef TARGET_ARG_PARTIAL_BYTES
549 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
550 #undef TARGET_FUNCTION_ARG
551 #define TARGET_FUNCTION_ARG arm_function_arg
552 #undef TARGET_FUNCTION_ARG_ADVANCE
553 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
554 #undef TARGET_FUNCTION_ARG_PADDING
555 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
556 #undef TARGET_FUNCTION_ARG_BOUNDARY
557 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
559 #undef TARGET_SETUP_INCOMING_VARARGS
560 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
562 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
563 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
565 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
566 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
567 #undef TARGET_TRAMPOLINE_INIT
568 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
569 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
570 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
572 #undef TARGET_WARN_FUNC_RETURN
573 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
575 #undef TARGET_DEFAULT_SHORT_ENUMS
576 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
578 #undef TARGET_ALIGN_ANON_BITFIELD
579 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
581 #undef TARGET_NARROW_VOLATILE_BITFIELD
582 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
584 #undef TARGET_CXX_GUARD_TYPE
585 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
587 #undef TARGET_CXX_GUARD_MASK_BIT
588 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
590 #undef TARGET_CXX_GET_COOKIE_SIZE
591 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
593 #undef TARGET_CXX_COOKIE_HAS_SIZE
594 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
596 #undef TARGET_CXX_CDTOR_RETURNS_THIS
597 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
599 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
600 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
602 #undef TARGET_CXX_USE_AEABI_ATEXIT
603 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
605 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
606 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
607 arm_cxx_determine_class_data_visibility
609 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
610 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
612 #undef TARGET_RETURN_IN_MSB
613 #define TARGET_RETURN_IN_MSB arm_return_in_msb
615 #undef TARGET_RETURN_IN_MEMORY
616 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
618 #undef TARGET_MUST_PASS_IN_STACK
619 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
621 #if ARM_UNWIND_INFO
622 #undef TARGET_ASM_UNWIND_EMIT
623 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
625 /* EABI unwinding tables use a different format for the typeinfo tables. */
626 #undef TARGET_ASM_TTYPE
627 #define TARGET_ASM_TTYPE arm_output_ttype
629 #undef TARGET_ARM_EABI_UNWINDER
630 #define TARGET_ARM_EABI_UNWINDER true
632 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
633 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
635 #endif /* ARM_UNWIND_INFO */
637 #undef TARGET_ASM_INIT_SECTIONS
638 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
640 #undef TARGET_DWARF_REGISTER_SPAN
641 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
643 #undef TARGET_CANNOT_COPY_INSN_P
644 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
646 #ifdef HAVE_AS_TLS
647 #undef TARGET_HAVE_TLS
648 #define TARGET_HAVE_TLS true
649 #endif
651 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
652 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
654 #undef TARGET_LOOP_UNROLL_ADJUST
655 #define TARGET_LOOP_UNROLL_ADJUST arm_loop_unroll_adjust
657 #undef TARGET_PREDICT_DOLOOP_P
658 #define TARGET_PREDICT_DOLOOP_P arm_predict_doloop_p
660 #undef TARGET_LEGITIMATE_CONSTANT_P
661 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
663 #undef TARGET_CANNOT_FORCE_CONST_MEM
664 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
666 #undef TARGET_MAX_ANCHOR_OFFSET
667 #define TARGET_MAX_ANCHOR_OFFSET 4095
669 /* The minimum is set such that the total size of the block
670 for a particular anchor is -4088 + 1 + 4095 bytes, which is
671 divisible by eight, ensuring natural spacing of anchors. */
672 #undef TARGET_MIN_ANCHOR_OFFSET
673 #define TARGET_MIN_ANCHOR_OFFSET -4088
675 #undef TARGET_SCHED_ISSUE_RATE
676 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
678 #undef TARGET_SCHED_VARIABLE_ISSUE
679 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
683 arm_first_cycle_multipass_dfa_lookahead
685 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
686 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
687 arm_first_cycle_multipass_dfa_lookahead_guard
689 #undef TARGET_MANGLE_TYPE
690 #define TARGET_MANGLE_TYPE arm_mangle_type
692 #undef TARGET_INVALID_CONVERSION
693 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
695 #undef TARGET_INVALID_UNARY_OP
696 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
698 #undef TARGET_INVALID_BINARY_OP
699 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
701 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
702 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
704 #undef TARGET_BUILD_BUILTIN_VA_LIST
705 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
706 #undef TARGET_EXPAND_BUILTIN_VA_START
707 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
708 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
709 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
711 #ifdef HAVE_AS_TLS
712 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
713 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
714 #endif
716 #undef TARGET_LEGITIMATE_ADDRESS_P
717 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
719 #undef TARGET_PREFERRED_RELOAD_CLASS
720 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
722 #undef TARGET_PROMOTED_TYPE
723 #define TARGET_PROMOTED_TYPE arm_promoted_type
725 #undef TARGET_SCALAR_MODE_SUPPORTED_P
726 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
728 #undef TARGET_COMPUTE_FRAME_LAYOUT
729 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
731 #undef TARGET_FRAME_POINTER_REQUIRED
732 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
734 #undef TARGET_CAN_ELIMINATE
735 #define TARGET_CAN_ELIMINATE arm_can_eliminate
737 #undef TARGET_CONDITIONAL_REGISTER_USAGE
738 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
740 #undef TARGET_CLASS_LIKELY_SPILLED_P
741 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
743 #undef TARGET_VECTORIZE_BUILTINS
744 #define TARGET_VECTORIZE_BUILTINS
746 #undef TARGET_VECTOR_ALIGNMENT
747 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
749 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
750 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
751 arm_vector_alignment_reachable
753 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
754 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
755 arm_builtin_support_vector_misalignment
757 #undef TARGET_PREFERRED_RENAME_CLASS
758 #define TARGET_PREFERRED_RENAME_CLASS \
759 arm_preferred_rename_class
761 #undef TARGET_VECTORIZE_VEC_PERM_CONST
762 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
764 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
765 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
766 arm_builtin_vectorization_cost
768 #undef TARGET_CANONICALIZE_COMPARISON
769 #define TARGET_CANONICALIZE_COMPARISON \
770 arm_canonicalize_comparison
772 #undef TARGET_ASAN_SHADOW_OFFSET
773 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
775 #undef MAX_INSN_PER_IT_BLOCK
776 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
778 #undef TARGET_CAN_USE_DOLOOP_P
779 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
781 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
782 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
784 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
785 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
787 #undef TARGET_SCHED_FUSION_PRIORITY
788 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
790 #undef TARGET_ASM_FUNCTION_SECTION
791 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
793 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
794 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
796 #undef TARGET_SECTION_TYPE_FLAGS
797 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
799 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
800 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
802 #undef TARGET_C_EXCESS_PRECISION
803 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
805 /* Although the architecture reserves bits 0 and 1, only the former is
806 used for ARM/Thumb ISA selection in v7 and earlier versions. */
807 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
808 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
810 #undef TARGET_FIXED_CONDITION_CODE_REGS
811 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
813 #undef TARGET_HARD_REGNO_NREGS
814 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
815 #undef TARGET_HARD_REGNO_MODE_OK
816 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
818 #undef TARGET_MODES_TIEABLE_P
819 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
821 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
822 #define TARGET_NOCE_CONVERSION_PROFITABLE_P arm_noce_conversion_profitable_p
824 #undef TARGET_CAN_CHANGE_MODE_CLASS
825 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
827 #undef TARGET_CONSTANT_ALIGNMENT
828 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
830 #undef TARGET_INVALID_WITHIN_DOLOOP
831 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
833 #undef TARGET_MD_ASM_ADJUST
834 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
836 #undef TARGET_STACK_PROTECT_GUARD
837 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
839 #undef TARGET_VECTORIZE_GET_MASK_MODE
840 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
842 /* Obstack for minipool constant handling. */
843 static struct obstack minipool_obstack;
844 static char * minipool_startobj;
846 /* The maximum number of insns skipped which
847 will be conditionalised if possible. */
848 static int max_insns_skipped = 5;
850 /* True if we are currently building a constant table. */
851 int making_const_table;
853 /* The processor for which instructions should be scheduled. */
854 enum processor_type arm_tune = TARGET_CPU_arm_none;
856 /* The current tuning set. */
857 const struct tune_params *current_tune;
859 /* Which floating point hardware to schedule for. */
860 int arm_fpu_attr;
862 /* Used for Thumb call_via trampolines. */
863 rtx thumb_call_via_label[14];
864 static int thumb_call_reg_needed;
866 /* The bits in this mask specify which instruction scheduling options should
867 be used. */
868 unsigned int tune_flags = 0;
870 /* The highest ARM architecture version supported by the
871 target. */
872 enum base_architecture arm_base_arch = BASE_ARCH_0;
874 /* Active target architecture and tuning. */
876 struct arm_build_target arm_active_target;
878 /* The following are used in the arm.md file as equivalents to bits
879 in the above two flag variables. */
881 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
882 int arm_arch4 = 0;
884 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
885 int arm_arch4t = 0;
887 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
888 int arm_arch5t = 0;
890 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
891 int arm_arch5te = 0;
893 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
894 int arm_arch6 = 0;
896 /* Nonzero if this chip supports the ARM 6K extensions. */
897 int arm_arch6k = 0;
899 /* Nonzero if this chip supports the ARM 6KZ extensions. */
900 int arm_arch6kz = 0;
902 /* Nonzero if instructions present in ARMv6-M can be used. */
903 int arm_arch6m = 0;
905 /* Nonzero if this chip supports the ARM 7 extensions. */
906 int arm_arch7 = 0;
908 /* Nonzero if this chip supports the Large Physical Address Extension. */
909 int arm_arch_lpae = 0;
911 /* Nonzero if instructions not present in the 'M' profile can be used. */
912 int arm_arch_notm = 0;
914 /* Nonzero if instructions present in ARMv7E-M can be used. */
915 int arm_arch7em = 0;
917 /* Nonzero if instructions present in ARMv8 can be used. */
918 int arm_arch8 = 0;
920 /* Nonzero if this chip supports the ARMv8.1 extensions. */
921 int arm_arch8_1 = 0;
923 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
924 int arm_arch8_2 = 0;
926 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
927 int arm_arch8_3 = 0;
929 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
930 int arm_arch8_4 = 0;
932 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
933 extensions. */
934 int arm_arch8m_main = 0;
936 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
937 extensions. */
938 int arm_arch8_1m_main = 0;
940 /* Nonzero if this chip supports the FP16 instructions extension of ARM
941 Architecture 8.2. */
942 int arm_fp16_inst = 0;
944 /* Nonzero if this chip can benefit from load scheduling. */
945 int arm_ld_sched = 0;
947 /* Nonzero if this chip is a StrongARM. */
948 int arm_tune_strongarm = 0;
950 /* Nonzero if this chip supports Intel Wireless MMX technology. */
951 int arm_arch_iwmmxt = 0;
953 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
954 int arm_arch_iwmmxt2 = 0;
956 /* Nonzero if this chip is an XScale. */
957 int arm_arch_xscale = 0;
959 /* Nonzero if tuning for XScale */
960 int arm_tune_xscale = 0;
962 /* Nonzero if we want to tune for stores that access the write-buffer.
963 This typically means an ARM6 or ARM7 with MMU or MPU. */
964 int arm_tune_wbuf = 0;
966 /* Nonzero if tuning for Cortex-A9. */
967 int arm_tune_cortex_a9 = 0;
969 /* Nonzero if we should define __THUMB_INTERWORK__ in the
970 preprocessor.
971 XXX This is a bit of a hack, it's intended to help work around
972 problems in GLD which doesn't understand that armv5t code is
973 interworking clean. */
974 int arm_cpp_interwork = 0;
976 /* Nonzero if chip supports Thumb 1. */
977 int arm_arch_thumb1;
979 /* Nonzero if chip supports Thumb 2. */
980 int arm_arch_thumb2;
982 /* Nonzero if chip supports integer division instruction. */
983 int arm_arch_arm_hwdiv;
984 int arm_arch_thumb_hwdiv;
986 /* Nonzero if chip disallows volatile memory access in IT block. */
987 int arm_arch_no_volatile_ce;
989 /* Nonzero if we shouldn't use literal pools. */
990 bool arm_disable_literal_pool = false;
992 /* The register number to be used for the PIC offset register. */
993 unsigned arm_pic_register = INVALID_REGNUM;
995 enum arm_pcs arm_pcs_default;
997 /* For an explanation of these variables, see final_prescan_insn below. */
998 int arm_ccfsm_state;
999 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
1000 enum arm_cond_code arm_current_cc;
1002 rtx arm_target_insn;
1003 int arm_target_label;
1004 /* The number of conditionally executed insns, including the current insn. */
1005 int arm_condexec_count = 0;
1006 /* A bitmask specifying the patterns for the IT block.
1007 Zero means do not output an IT block before this insn. */
1008 int arm_condexec_mask = 0;
1009 /* The number of bits used in arm_condexec_mask. */
1010 int arm_condexec_masklen = 0;
1012 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1013 int arm_arch_crc = 0;
1015 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1016 int arm_arch_dotprod = 0;
1018 /* Nonzero if chip supports the ARMv8-M security extensions. */
1019 int arm_arch_cmse = 0;
1021 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1022 int arm_m_profile_small_mul = 0;
1024 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1025 int arm_arch_i8mm = 0;
1027 /* Nonzero if chip supports the BFloat16 instructions. */
1028 int arm_arch_bf16 = 0;
1030 /* Nonzero if chip supports the Custom Datapath Extension. */
1031 int arm_arch_cde = 0;
1032 int arm_arch_cde_coproc = 0;
1033 const int arm_arch_cde_coproc_bits[] = {
1034 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1037 /* The condition codes of the ARM, and the inverse function. */
1038 static const char * const arm_condition_codes[] =
1040 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1041 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1044 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1045 int arm_regs_in_sequence[] =
1047 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1050 #define DEF_FP_SYSREG(reg) #reg,
1051 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1052 FP_SYSREGS
1054 #undef DEF_FP_SYSREG
1056 #define ARM_LSL_NAME "lsl"
1057 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1059 #define THUMB2_WORK_REGS \
1060 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1061 | (1 << SP_REGNUM) \
1062 | (1 << PC_REGNUM) \
1063 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1064 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1065 : 0)))
1067 /* Initialization code. */
1069 struct cpu_tune
1071 enum processor_type scheduler;
1072 unsigned int tune_flags;
1073 const struct tune_params *tune;
1076 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1077 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1079 num_slots, \
1080 l1_size, \
1081 l1_line_size \
1084 /* arm generic vectorizer costs. */
1085 static const
1086 struct cpu_vec_costs arm_default_vec_cost = {
1087 1, /* scalar_stmt_cost. */
1088 1, /* scalar load_cost. */
1089 1, /* scalar_store_cost. */
1090 1, /* vec_stmt_cost. */
1091 1, /* vec_to_scalar_cost. */
1092 1, /* scalar_to_vec_cost. */
1093 1, /* vec_align_load_cost. */
1094 1, /* vec_unalign_load_cost. */
1095 1, /* vec_unalign_store_cost. */
1096 1, /* vec_store_cost. */
1097 3, /* cond_taken_branch_cost. */
1098 1, /* cond_not_taken_branch_cost. */
1101 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1102 #include "aarch-cost-tables.h"
1106 const struct cpu_cost_table cortexa9_extra_costs =
1108 /* ALU */
1110 0, /* arith. */
1111 0, /* logical. */
1112 0, /* shift. */
1113 COSTS_N_INSNS (1), /* shift_reg. */
1114 COSTS_N_INSNS (1), /* arith_shift. */
1115 COSTS_N_INSNS (2), /* arith_shift_reg. */
1116 0, /* log_shift. */
1117 COSTS_N_INSNS (1), /* log_shift_reg. */
1118 COSTS_N_INSNS (1), /* extend. */
1119 COSTS_N_INSNS (2), /* extend_arith. */
1120 COSTS_N_INSNS (1), /* bfi. */
1121 COSTS_N_INSNS (1), /* bfx. */
1122 0, /* clz. */
1123 0, /* rev. */
1124 0, /* non_exec. */
1125 true /* non_exec_costs_exec. */
1128 /* MULT SImode */
1130 COSTS_N_INSNS (3), /* simple. */
1131 COSTS_N_INSNS (3), /* flag_setting. */
1132 COSTS_N_INSNS (2), /* extend. */
1133 COSTS_N_INSNS (3), /* add. */
1134 COSTS_N_INSNS (2), /* extend_add. */
1135 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1137 /* MULT DImode */
1139 0, /* simple (N/A). */
1140 0, /* flag_setting (N/A). */
1141 COSTS_N_INSNS (4), /* extend. */
1142 0, /* add (N/A). */
1143 COSTS_N_INSNS (4), /* extend_add. */
1144 0 /* idiv (N/A). */
1147 /* LD/ST */
1149 COSTS_N_INSNS (2), /* load. */
1150 COSTS_N_INSNS (2), /* load_sign_extend. */
1151 COSTS_N_INSNS (2), /* ldrd. */
1152 COSTS_N_INSNS (2), /* ldm_1st. */
1153 1, /* ldm_regs_per_insn_1st. */
1154 2, /* ldm_regs_per_insn_subsequent. */
1155 COSTS_N_INSNS (5), /* loadf. */
1156 COSTS_N_INSNS (5), /* loadd. */
1157 COSTS_N_INSNS (1), /* load_unaligned. */
1158 COSTS_N_INSNS (2), /* store. */
1159 COSTS_N_INSNS (2), /* strd. */
1160 COSTS_N_INSNS (2), /* stm_1st. */
1161 1, /* stm_regs_per_insn_1st. */
1162 2, /* stm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (1), /* storef. */
1164 COSTS_N_INSNS (1), /* stored. */
1165 COSTS_N_INSNS (1), /* store_unaligned. */
1166 COSTS_N_INSNS (1), /* loadv. */
1167 COSTS_N_INSNS (1) /* storev. */
1170 /* FP SFmode */
1172 COSTS_N_INSNS (14), /* div. */
1173 COSTS_N_INSNS (4), /* mult. */
1174 COSTS_N_INSNS (7), /* mult_addsub. */
1175 COSTS_N_INSNS (30), /* fma. */
1176 COSTS_N_INSNS (3), /* addsub. */
1177 COSTS_N_INSNS (1), /* fpconst. */
1178 COSTS_N_INSNS (1), /* neg. */
1179 COSTS_N_INSNS (3), /* compare. */
1180 COSTS_N_INSNS (3), /* widen. */
1181 COSTS_N_INSNS (3), /* narrow. */
1182 COSTS_N_INSNS (3), /* toint. */
1183 COSTS_N_INSNS (3), /* fromint. */
1184 COSTS_N_INSNS (3) /* roundint. */
1186 /* FP DFmode */
1188 COSTS_N_INSNS (24), /* div. */
1189 COSTS_N_INSNS (5), /* mult. */
1190 COSTS_N_INSNS (8), /* mult_addsub. */
1191 COSTS_N_INSNS (30), /* fma. */
1192 COSTS_N_INSNS (3), /* addsub. */
1193 COSTS_N_INSNS (1), /* fpconst. */
1194 COSTS_N_INSNS (1), /* neg. */
1195 COSTS_N_INSNS (3), /* compare. */
1196 COSTS_N_INSNS (3), /* widen. */
1197 COSTS_N_INSNS (3), /* narrow. */
1198 COSTS_N_INSNS (3), /* toint. */
1199 COSTS_N_INSNS (3), /* fromint. */
1200 COSTS_N_INSNS (3) /* roundint. */
1203 /* Vector */
1205 COSTS_N_INSNS (1), /* alu. */
1206 COSTS_N_INSNS (4), /* mult. */
1207 COSTS_N_INSNS (1), /* movi. */
1208 COSTS_N_INSNS (2), /* dup. */
1209 COSTS_N_INSNS (2) /* extract. */
1213 const struct cpu_cost_table cortexa8_extra_costs =
1215 /* ALU */
1217 0, /* arith. */
1218 0, /* logical. */
1219 COSTS_N_INSNS (1), /* shift. */
1220 0, /* shift_reg. */
1221 COSTS_N_INSNS (1), /* arith_shift. */
1222 0, /* arith_shift_reg. */
1223 COSTS_N_INSNS (1), /* log_shift. */
1224 0, /* log_shift_reg. */
1225 0, /* extend. */
1226 0, /* extend_arith. */
1227 0, /* bfi. */
1228 0, /* bfx. */
1229 0, /* clz. */
1230 0, /* rev. */
1231 0, /* non_exec. */
1232 true /* non_exec_costs_exec. */
1235 /* MULT SImode */
1237 COSTS_N_INSNS (1), /* simple. */
1238 COSTS_N_INSNS (1), /* flag_setting. */
1239 COSTS_N_INSNS (1), /* extend. */
1240 COSTS_N_INSNS (1), /* add. */
1241 COSTS_N_INSNS (1), /* extend_add. */
1242 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1244 /* MULT DImode */
1246 0, /* simple (N/A). */
1247 0, /* flag_setting (N/A). */
1248 COSTS_N_INSNS (2), /* extend. */
1249 0, /* add (N/A). */
1250 COSTS_N_INSNS (2), /* extend_add. */
1251 0 /* idiv (N/A). */
1254 /* LD/ST */
1256 COSTS_N_INSNS (1), /* load. */
1257 COSTS_N_INSNS (1), /* load_sign_extend. */
1258 COSTS_N_INSNS (1), /* ldrd. */
1259 COSTS_N_INSNS (1), /* ldm_1st. */
1260 1, /* ldm_regs_per_insn_1st. */
1261 2, /* ldm_regs_per_insn_subsequent. */
1262 COSTS_N_INSNS (1), /* loadf. */
1263 COSTS_N_INSNS (1), /* loadd. */
1264 COSTS_N_INSNS (1), /* load_unaligned. */
1265 COSTS_N_INSNS (1), /* store. */
1266 COSTS_N_INSNS (1), /* strd. */
1267 COSTS_N_INSNS (1), /* stm_1st. */
1268 1, /* stm_regs_per_insn_1st. */
1269 2, /* stm_regs_per_insn_subsequent. */
1270 COSTS_N_INSNS (1), /* storef. */
1271 COSTS_N_INSNS (1), /* stored. */
1272 COSTS_N_INSNS (1), /* store_unaligned. */
1273 COSTS_N_INSNS (1), /* loadv. */
1274 COSTS_N_INSNS (1) /* storev. */
1277 /* FP SFmode */
1279 COSTS_N_INSNS (36), /* div. */
1280 COSTS_N_INSNS (11), /* mult. */
1281 COSTS_N_INSNS (20), /* mult_addsub. */
1282 COSTS_N_INSNS (30), /* fma. */
1283 COSTS_N_INSNS (9), /* addsub. */
1284 COSTS_N_INSNS (3), /* fpconst. */
1285 COSTS_N_INSNS (3), /* neg. */
1286 COSTS_N_INSNS (6), /* compare. */
1287 COSTS_N_INSNS (4), /* widen. */
1288 COSTS_N_INSNS (4), /* narrow. */
1289 COSTS_N_INSNS (8), /* toint. */
1290 COSTS_N_INSNS (8), /* fromint. */
1291 COSTS_N_INSNS (8) /* roundint. */
1293 /* FP DFmode */
1295 COSTS_N_INSNS (64), /* div. */
1296 COSTS_N_INSNS (16), /* mult. */
1297 COSTS_N_INSNS (25), /* mult_addsub. */
1298 COSTS_N_INSNS (30), /* fma. */
1299 COSTS_N_INSNS (9), /* addsub. */
1300 COSTS_N_INSNS (3), /* fpconst. */
1301 COSTS_N_INSNS (3), /* neg. */
1302 COSTS_N_INSNS (6), /* compare. */
1303 COSTS_N_INSNS (6), /* widen. */
1304 COSTS_N_INSNS (6), /* narrow. */
1305 COSTS_N_INSNS (8), /* toint. */
1306 COSTS_N_INSNS (8), /* fromint. */
1307 COSTS_N_INSNS (8) /* roundint. */
1310 /* Vector */
1312 COSTS_N_INSNS (1), /* alu. */
1313 COSTS_N_INSNS (4), /* mult. */
1314 COSTS_N_INSNS (1), /* movi. */
1315 COSTS_N_INSNS (2), /* dup. */
1316 COSTS_N_INSNS (2) /* extract. */
1320 const struct cpu_cost_table cortexa5_extra_costs =
1322 /* ALU */
1324 0, /* arith. */
1325 0, /* logical. */
1326 COSTS_N_INSNS (1), /* shift. */
1327 COSTS_N_INSNS (1), /* shift_reg. */
1328 COSTS_N_INSNS (1), /* arith_shift. */
1329 COSTS_N_INSNS (1), /* arith_shift_reg. */
1330 COSTS_N_INSNS (1), /* log_shift. */
1331 COSTS_N_INSNS (1), /* log_shift_reg. */
1332 COSTS_N_INSNS (1), /* extend. */
1333 COSTS_N_INSNS (1), /* extend_arith. */
1334 COSTS_N_INSNS (1), /* bfi. */
1335 COSTS_N_INSNS (1), /* bfx. */
1336 COSTS_N_INSNS (1), /* clz. */
1337 COSTS_N_INSNS (1), /* rev. */
1338 0, /* non_exec. */
1339 true /* non_exec_costs_exec. */
1343 /* MULT SImode */
1345 0, /* simple. */
1346 COSTS_N_INSNS (1), /* flag_setting. */
1347 COSTS_N_INSNS (1), /* extend. */
1348 COSTS_N_INSNS (1), /* add. */
1349 COSTS_N_INSNS (1), /* extend_add. */
1350 COSTS_N_INSNS (7) /* idiv. */
1352 /* MULT DImode */
1354 0, /* simple (N/A). */
1355 0, /* flag_setting (N/A). */
1356 COSTS_N_INSNS (1), /* extend. */
1357 0, /* add. */
1358 COSTS_N_INSNS (2), /* extend_add. */
1359 0 /* idiv (N/A). */
1362 /* LD/ST */
1364 COSTS_N_INSNS (1), /* load. */
1365 COSTS_N_INSNS (1), /* load_sign_extend. */
1366 COSTS_N_INSNS (6), /* ldrd. */
1367 COSTS_N_INSNS (1), /* ldm_1st. */
1368 1, /* ldm_regs_per_insn_1st. */
1369 2, /* ldm_regs_per_insn_subsequent. */
1370 COSTS_N_INSNS (2), /* loadf. */
1371 COSTS_N_INSNS (4), /* loadd. */
1372 COSTS_N_INSNS (1), /* load_unaligned. */
1373 COSTS_N_INSNS (1), /* store. */
1374 COSTS_N_INSNS (3), /* strd. */
1375 COSTS_N_INSNS (1), /* stm_1st. */
1376 1, /* stm_regs_per_insn_1st. */
1377 2, /* stm_regs_per_insn_subsequent. */
1378 COSTS_N_INSNS (2), /* storef. */
1379 COSTS_N_INSNS (2), /* stored. */
1380 COSTS_N_INSNS (1), /* store_unaligned. */
1381 COSTS_N_INSNS (1), /* loadv. */
1382 COSTS_N_INSNS (1) /* storev. */
1385 /* FP SFmode */
1387 COSTS_N_INSNS (15), /* div. */
1388 COSTS_N_INSNS (3), /* mult. */
1389 COSTS_N_INSNS (7), /* mult_addsub. */
1390 COSTS_N_INSNS (7), /* fma. */
1391 COSTS_N_INSNS (3), /* addsub. */
1392 COSTS_N_INSNS (3), /* fpconst. */
1393 COSTS_N_INSNS (3), /* neg. */
1394 COSTS_N_INSNS (3), /* compare. */
1395 COSTS_N_INSNS (3), /* widen. */
1396 COSTS_N_INSNS (3), /* narrow. */
1397 COSTS_N_INSNS (3), /* toint. */
1398 COSTS_N_INSNS (3), /* fromint. */
1399 COSTS_N_INSNS (3) /* roundint. */
1401 /* FP DFmode */
1403 COSTS_N_INSNS (30), /* div. */
1404 COSTS_N_INSNS (6), /* mult. */
1405 COSTS_N_INSNS (10), /* mult_addsub. */
1406 COSTS_N_INSNS (7), /* fma. */
1407 COSTS_N_INSNS (3), /* addsub. */
1408 COSTS_N_INSNS (3), /* fpconst. */
1409 COSTS_N_INSNS (3), /* neg. */
1410 COSTS_N_INSNS (3), /* compare. */
1411 COSTS_N_INSNS (3), /* widen. */
1412 COSTS_N_INSNS (3), /* narrow. */
1413 COSTS_N_INSNS (3), /* toint. */
1414 COSTS_N_INSNS (3), /* fromint. */
1415 COSTS_N_INSNS (3) /* roundint. */
1418 /* Vector */
1420 COSTS_N_INSNS (1), /* alu. */
1421 COSTS_N_INSNS (4), /* mult. */
1422 COSTS_N_INSNS (1), /* movi. */
1423 COSTS_N_INSNS (2), /* dup. */
1424 COSTS_N_INSNS (2) /* extract. */
1429 const struct cpu_cost_table cortexa7_extra_costs =
1431 /* ALU */
1433 0, /* arith. */
1434 0, /* logical. */
1435 COSTS_N_INSNS (1), /* shift. */
1436 COSTS_N_INSNS (1), /* shift_reg. */
1437 COSTS_N_INSNS (1), /* arith_shift. */
1438 COSTS_N_INSNS (1), /* arith_shift_reg. */
1439 COSTS_N_INSNS (1), /* log_shift. */
1440 COSTS_N_INSNS (1), /* log_shift_reg. */
1441 COSTS_N_INSNS (1), /* extend. */
1442 COSTS_N_INSNS (1), /* extend_arith. */
1443 COSTS_N_INSNS (1), /* bfi. */
1444 COSTS_N_INSNS (1), /* bfx. */
1445 COSTS_N_INSNS (1), /* clz. */
1446 COSTS_N_INSNS (1), /* rev. */
1447 0, /* non_exec. */
1448 true /* non_exec_costs_exec. */
1452 /* MULT SImode */
1454 0, /* simple. */
1455 COSTS_N_INSNS (1), /* flag_setting. */
1456 COSTS_N_INSNS (1), /* extend. */
1457 COSTS_N_INSNS (1), /* add. */
1458 COSTS_N_INSNS (1), /* extend_add. */
1459 COSTS_N_INSNS (7) /* idiv. */
1461 /* MULT DImode */
1463 0, /* simple (N/A). */
1464 0, /* flag_setting (N/A). */
1465 COSTS_N_INSNS (1), /* extend. */
1466 0, /* add. */
1467 COSTS_N_INSNS (2), /* extend_add. */
1468 0 /* idiv (N/A). */
1471 /* LD/ST */
1473 COSTS_N_INSNS (1), /* load. */
1474 COSTS_N_INSNS (1), /* load_sign_extend. */
1475 COSTS_N_INSNS (3), /* ldrd. */
1476 COSTS_N_INSNS (1), /* ldm_1st. */
1477 1, /* ldm_regs_per_insn_1st. */
1478 2, /* ldm_regs_per_insn_subsequent. */
1479 COSTS_N_INSNS (2), /* loadf. */
1480 COSTS_N_INSNS (2), /* loadd. */
1481 COSTS_N_INSNS (1), /* load_unaligned. */
1482 COSTS_N_INSNS (1), /* store. */
1483 COSTS_N_INSNS (3), /* strd. */
1484 COSTS_N_INSNS (1), /* stm_1st. */
1485 1, /* stm_regs_per_insn_1st. */
1486 2, /* stm_regs_per_insn_subsequent. */
1487 COSTS_N_INSNS (2), /* storef. */
1488 COSTS_N_INSNS (2), /* stored. */
1489 COSTS_N_INSNS (1), /* store_unaligned. */
1490 COSTS_N_INSNS (1), /* loadv. */
1491 COSTS_N_INSNS (1) /* storev. */
1494 /* FP SFmode */
1496 COSTS_N_INSNS (15), /* div. */
1497 COSTS_N_INSNS (3), /* mult. */
1498 COSTS_N_INSNS (7), /* mult_addsub. */
1499 COSTS_N_INSNS (7), /* fma. */
1500 COSTS_N_INSNS (3), /* addsub. */
1501 COSTS_N_INSNS (3), /* fpconst. */
1502 COSTS_N_INSNS (3), /* neg. */
1503 COSTS_N_INSNS (3), /* compare. */
1504 COSTS_N_INSNS (3), /* widen. */
1505 COSTS_N_INSNS (3), /* narrow. */
1506 COSTS_N_INSNS (3), /* toint. */
1507 COSTS_N_INSNS (3), /* fromint. */
1508 COSTS_N_INSNS (3) /* roundint. */
1510 /* FP DFmode */
1512 COSTS_N_INSNS (30), /* div. */
1513 COSTS_N_INSNS (6), /* mult. */
1514 COSTS_N_INSNS (10), /* mult_addsub. */
1515 COSTS_N_INSNS (7), /* fma. */
1516 COSTS_N_INSNS (3), /* addsub. */
1517 COSTS_N_INSNS (3), /* fpconst. */
1518 COSTS_N_INSNS (3), /* neg. */
1519 COSTS_N_INSNS (3), /* compare. */
1520 COSTS_N_INSNS (3), /* widen. */
1521 COSTS_N_INSNS (3), /* narrow. */
1522 COSTS_N_INSNS (3), /* toint. */
1523 COSTS_N_INSNS (3), /* fromint. */
1524 COSTS_N_INSNS (3) /* roundint. */
1527 /* Vector */
1529 COSTS_N_INSNS (1), /* alu. */
1530 COSTS_N_INSNS (4), /* mult. */
1531 COSTS_N_INSNS (1), /* movi. */
1532 COSTS_N_INSNS (2), /* dup. */
1533 COSTS_N_INSNS (2) /* extract. */
1537 const struct cpu_cost_table cortexa12_extra_costs =
1539 /* ALU */
1541 0, /* arith. */
1542 0, /* logical. */
1543 0, /* shift. */
1544 COSTS_N_INSNS (1), /* shift_reg. */
1545 COSTS_N_INSNS (1), /* arith_shift. */
1546 COSTS_N_INSNS (1), /* arith_shift_reg. */
1547 COSTS_N_INSNS (1), /* log_shift. */
1548 COSTS_N_INSNS (1), /* log_shift_reg. */
1549 0, /* extend. */
1550 COSTS_N_INSNS (1), /* extend_arith. */
1551 0, /* bfi. */
1552 COSTS_N_INSNS (1), /* bfx. */
1553 COSTS_N_INSNS (1), /* clz. */
1554 COSTS_N_INSNS (1), /* rev. */
1555 0, /* non_exec. */
1556 true /* non_exec_costs_exec. */
1558 /* MULT SImode */
1561 COSTS_N_INSNS (2), /* simple. */
1562 COSTS_N_INSNS (3), /* flag_setting. */
1563 COSTS_N_INSNS (2), /* extend. */
1564 COSTS_N_INSNS (3), /* add. */
1565 COSTS_N_INSNS (2), /* extend_add. */
1566 COSTS_N_INSNS (18) /* idiv. */
1568 /* MULT DImode */
1570 0, /* simple (N/A). */
1571 0, /* flag_setting (N/A). */
1572 COSTS_N_INSNS (3), /* extend. */
1573 0, /* add (N/A). */
1574 COSTS_N_INSNS (3), /* extend_add. */
1575 0 /* idiv (N/A). */
1578 /* LD/ST */
1580 COSTS_N_INSNS (3), /* load. */
1581 COSTS_N_INSNS (3), /* load_sign_extend. */
1582 COSTS_N_INSNS (3), /* ldrd. */
1583 COSTS_N_INSNS (3), /* ldm_1st. */
1584 1, /* ldm_regs_per_insn_1st. */
1585 2, /* ldm_regs_per_insn_subsequent. */
1586 COSTS_N_INSNS (3), /* loadf. */
1587 COSTS_N_INSNS (3), /* loadd. */
1588 0, /* load_unaligned. */
1589 0, /* store. */
1590 0, /* strd. */
1591 0, /* stm_1st. */
1592 1, /* stm_regs_per_insn_1st. */
1593 2, /* stm_regs_per_insn_subsequent. */
1594 COSTS_N_INSNS (2), /* storef. */
1595 COSTS_N_INSNS (2), /* stored. */
1596 0, /* store_unaligned. */
1597 COSTS_N_INSNS (1), /* loadv. */
1598 COSTS_N_INSNS (1) /* storev. */
1601 /* FP SFmode */
1603 COSTS_N_INSNS (17), /* div. */
1604 COSTS_N_INSNS (4), /* mult. */
1605 COSTS_N_INSNS (8), /* mult_addsub. */
1606 COSTS_N_INSNS (8), /* fma. */
1607 COSTS_N_INSNS (4), /* addsub. */
1608 COSTS_N_INSNS (2), /* fpconst. */
1609 COSTS_N_INSNS (2), /* neg. */
1610 COSTS_N_INSNS (2), /* compare. */
1611 COSTS_N_INSNS (4), /* widen. */
1612 COSTS_N_INSNS (4), /* narrow. */
1613 COSTS_N_INSNS (4), /* toint. */
1614 COSTS_N_INSNS (4), /* fromint. */
1615 COSTS_N_INSNS (4) /* roundint. */
1617 /* FP DFmode */
1619 COSTS_N_INSNS (31), /* div. */
1620 COSTS_N_INSNS (4), /* mult. */
1621 COSTS_N_INSNS (8), /* mult_addsub. */
1622 COSTS_N_INSNS (8), /* fma. */
1623 COSTS_N_INSNS (4), /* addsub. */
1624 COSTS_N_INSNS (2), /* fpconst. */
1625 COSTS_N_INSNS (2), /* neg. */
1626 COSTS_N_INSNS (2), /* compare. */
1627 COSTS_N_INSNS (4), /* widen. */
1628 COSTS_N_INSNS (4), /* narrow. */
1629 COSTS_N_INSNS (4), /* toint. */
1630 COSTS_N_INSNS (4), /* fromint. */
1631 COSTS_N_INSNS (4) /* roundint. */
1634 /* Vector */
1636 COSTS_N_INSNS (1), /* alu. */
1637 COSTS_N_INSNS (4), /* mult. */
1638 COSTS_N_INSNS (1), /* movi. */
1639 COSTS_N_INSNS (2), /* dup. */
1640 COSTS_N_INSNS (2) /* extract. */
1644 const struct cpu_cost_table cortexa15_extra_costs =
1646 /* ALU */
1648 0, /* arith. */
1649 0, /* logical. */
1650 0, /* shift. */
1651 0, /* shift_reg. */
1652 COSTS_N_INSNS (1), /* arith_shift. */
1653 COSTS_N_INSNS (1), /* arith_shift_reg. */
1654 COSTS_N_INSNS (1), /* log_shift. */
1655 COSTS_N_INSNS (1), /* log_shift_reg. */
1656 0, /* extend. */
1657 COSTS_N_INSNS (1), /* extend_arith. */
1658 COSTS_N_INSNS (1), /* bfi. */
1659 0, /* bfx. */
1660 0, /* clz. */
1661 0, /* rev. */
1662 0, /* non_exec. */
1663 true /* non_exec_costs_exec. */
1665 /* MULT SImode */
1668 COSTS_N_INSNS (2), /* simple. */
1669 COSTS_N_INSNS (3), /* flag_setting. */
1670 COSTS_N_INSNS (2), /* extend. */
1671 COSTS_N_INSNS (2), /* add. */
1672 COSTS_N_INSNS (2), /* extend_add. */
1673 COSTS_N_INSNS (18) /* idiv. */
1675 /* MULT DImode */
1677 0, /* simple (N/A). */
1678 0, /* flag_setting (N/A). */
1679 COSTS_N_INSNS (3), /* extend. */
1680 0, /* add (N/A). */
1681 COSTS_N_INSNS (3), /* extend_add. */
1682 0 /* idiv (N/A). */
1685 /* LD/ST */
1687 COSTS_N_INSNS (3), /* load. */
1688 COSTS_N_INSNS (3), /* load_sign_extend. */
1689 COSTS_N_INSNS (3), /* ldrd. */
1690 COSTS_N_INSNS (4), /* ldm_1st. */
1691 1, /* ldm_regs_per_insn_1st. */
1692 2, /* ldm_regs_per_insn_subsequent. */
1693 COSTS_N_INSNS (4), /* loadf. */
1694 COSTS_N_INSNS (4), /* loadd. */
1695 0, /* load_unaligned. */
1696 0, /* store. */
1697 0, /* strd. */
1698 COSTS_N_INSNS (1), /* stm_1st. */
1699 1, /* stm_regs_per_insn_1st. */
1700 2, /* stm_regs_per_insn_subsequent. */
1701 0, /* storef. */
1702 0, /* stored. */
1703 0, /* store_unaligned. */
1704 COSTS_N_INSNS (1), /* loadv. */
1705 COSTS_N_INSNS (1) /* storev. */
1708 /* FP SFmode */
1710 COSTS_N_INSNS (17), /* div. */
1711 COSTS_N_INSNS (4), /* mult. */
1712 COSTS_N_INSNS (8), /* mult_addsub. */
1713 COSTS_N_INSNS (8), /* fma. */
1714 COSTS_N_INSNS (4), /* addsub. */
1715 COSTS_N_INSNS (2), /* fpconst. */
1716 COSTS_N_INSNS (2), /* neg. */
1717 COSTS_N_INSNS (5), /* compare. */
1718 COSTS_N_INSNS (4), /* widen. */
1719 COSTS_N_INSNS (4), /* narrow. */
1720 COSTS_N_INSNS (4), /* toint. */
1721 COSTS_N_INSNS (4), /* fromint. */
1722 COSTS_N_INSNS (4) /* roundint. */
1724 /* FP DFmode */
1726 COSTS_N_INSNS (31), /* div. */
1727 COSTS_N_INSNS (4), /* mult. */
1728 COSTS_N_INSNS (8), /* mult_addsub. */
1729 COSTS_N_INSNS (8), /* fma. */
1730 COSTS_N_INSNS (4), /* addsub. */
1731 COSTS_N_INSNS (2), /* fpconst. */
1732 COSTS_N_INSNS (2), /* neg. */
1733 COSTS_N_INSNS (2), /* compare. */
1734 COSTS_N_INSNS (4), /* widen. */
1735 COSTS_N_INSNS (4), /* narrow. */
1736 COSTS_N_INSNS (4), /* toint. */
1737 COSTS_N_INSNS (4), /* fromint. */
1738 COSTS_N_INSNS (4) /* roundint. */
1741 /* Vector */
1743 COSTS_N_INSNS (1), /* alu. */
1744 COSTS_N_INSNS (4), /* mult. */
1745 COSTS_N_INSNS (1), /* movi. */
1746 COSTS_N_INSNS (2), /* dup. */
1747 COSTS_N_INSNS (2) /* extract. */
1751 const struct cpu_cost_table v7m_extra_costs =
1753 /* ALU */
1755 0, /* arith. */
1756 0, /* logical. */
1757 0, /* shift. */
1758 0, /* shift_reg. */
1759 0, /* arith_shift. */
1760 COSTS_N_INSNS (1), /* arith_shift_reg. */
1761 0, /* log_shift. */
1762 COSTS_N_INSNS (1), /* log_shift_reg. */
1763 0, /* extend. */
1764 COSTS_N_INSNS (1), /* extend_arith. */
1765 0, /* bfi. */
1766 0, /* bfx. */
1767 0, /* clz. */
1768 0, /* rev. */
1769 COSTS_N_INSNS (1), /* non_exec. */
1770 false /* non_exec_costs_exec. */
1773 /* MULT SImode */
1775 COSTS_N_INSNS (1), /* simple. */
1776 COSTS_N_INSNS (1), /* flag_setting. */
1777 COSTS_N_INSNS (2), /* extend. */
1778 COSTS_N_INSNS (1), /* add. */
1779 COSTS_N_INSNS (3), /* extend_add. */
1780 COSTS_N_INSNS (8) /* idiv. */
1782 /* MULT DImode */
1784 0, /* simple (N/A). */
1785 0, /* flag_setting (N/A). */
1786 COSTS_N_INSNS (2), /* extend. */
1787 0, /* add (N/A). */
1788 COSTS_N_INSNS (3), /* extend_add. */
1789 0 /* idiv (N/A). */
1792 /* LD/ST */
1794 COSTS_N_INSNS (2), /* load. */
1795 0, /* load_sign_extend. */
1796 COSTS_N_INSNS (3), /* ldrd. */
1797 COSTS_N_INSNS (2), /* ldm_1st. */
1798 1, /* ldm_regs_per_insn_1st. */
1799 1, /* ldm_regs_per_insn_subsequent. */
1800 COSTS_N_INSNS (2), /* loadf. */
1801 COSTS_N_INSNS (3), /* loadd. */
1802 COSTS_N_INSNS (1), /* load_unaligned. */
1803 COSTS_N_INSNS (2), /* store. */
1804 COSTS_N_INSNS (3), /* strd. */
1805 COSTS_N_INSNS (2), /* stm_1st. */
1806 1, /* stm_regs_per_insn_1st. */
1807 1, /* stm_regs_per_insn_subsequent. */
1808 COSTS_N_INSNS (2), /* storef. */
1809 COSTS_N_INSNS (3), /* stored. */
1810 COSTS_N_INSNS (1), /* store_unaligned. */
1811 COSTS_N_INSNS (1), /* loadv. */
1812 COSTS_N_INSNS (1) /* storev. */
1815 /* FP SFmode */
1817 COSTS_N_INSNS (7), /* div. */
1818 COSTS_N_INSNS (2), /* mult. */
1819 COSTS_N_INSNS (5), /* mult_addsub. */
1820 COSTS_N_INSNS (3), /* fma. */
1821 COSTS_N_INSNS (1), /* addsub. */
1822 0, /* fpconst. */
1823 0, /* neg. */
1824 0, /* compare. */
1825 0, /* widen. */
1826 0, /* narrow. */
1827 0, /* toint. */
1828 0, /* fromint. */
1829 0 /* roundint. */
1831 /* FP DFmode */
1833 COSTS_N_INSNS (15), /* div. */
1834 COSTS_N_INSNS (5), /* mult. */
1835 COSTS_N_INSNS (7), /* mult_addsub. */
1836 COSTS_N_INSNS (7), /* fma. */
1837 COSTS_N_INSNS (3), /* addsub. */
1838 0, /* fpconst. */
1839 0, /* neg. */
1840 0, /* compare. */
1841 0, /* widen. */
1842 0, /* narrow. */
1843 0, /* toint. */
1844 0, /* fromint. */
1845 0 /* roundint. */
1848 /* Vector */
1850 COSTS_N_INSNS (1), /* alu. */
1851 COSTS_N_INSNS (4), /* mult. */
1852 COSTS_N_INSNS (1), /* movi. */
1853 COSTS_N_INSNS (2), /* dup. */
1854 COSTS_N_INSNS (2) /* extract. */
1858 const struct addr_mode_cost_table generic_addr_mode_costs =
1860 /* int. */
1862 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1863 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1864 COSTS_N_INSNS (0) /* AMO_WB. */
1866 /* float. */
1868 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1869 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1870 COSTS_N_INSNS (0) /* AMO_WB. */
1872 /* vector. */
1874 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1875 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1876 COSTS_N_INSNS (0) /* AMO_WB. */
1880 const struct tune_params arm_slowmul_tune =
1882 &generic_extra_costs, /* Insn extra costs. */
1883 &generic_addr_mode_costs, /* Addressing mode costs. */
1884 NULL, /* Sched adj cost. */
1885 arm_default_branch_cost,
1886 &arm_default_vec_cost,
1887 3, /* Constant limit. */
1888 5, /* Max cond insns. */
1889 8, /* Memset max inline. */
1890 1, /* Issue rate. */
1891 ARM_PREFETCH_NOT_BENEFICIAL,
1892 tune_params::PREF_CONST_POOL_TRUE,
1893 tune_params::PREF_LDRD_FALSE,
1894 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1895 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1896 tune_params::DISPARAGE_FLAGS_NEITHER,
1897 tune_params::PREF_NEON_STRINGOPS_FALSE,
1898 tune_params::FUSE_NOTHING,
1899 tune_params::SCHED_AUTOPREF_OFF
1902 const struct tune_params arm_fastmul_tune =
1904 &generic_extra_costs, /* Insn extra costs. */
1905 &generic_addr_mode_costs, /* Addressing mode costs. */
1906 NULL, /* Sched adj cost. */
1907 arm_default_branch_cost,
1908 &arm_default_vec_cost,
1909 1, /* Constant limit. */
1910 5, /* Max cond insns. */
1911 8, /* Memset max inline. */
1912 1, /* Issue rate. */
1913 ARM_PREFETCH_NOT_BENEFICIAL,
1914 tune_params::PREF_CONST_POOL_TRUE,
1915 tune_params::PREF_LDRD_FALSE,
1916 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1917 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1918 tune_params::DISPARAGE_FLAGS_NEITHER,
1919 tune_params::PREF_NEON_STRINGOPS_FALSE,
1920 tune_params::FUSE_NOTHING,
1921 tune_params::SCHED_AUTOPREF_OFF
1924 /* StrongARM has early execution of branches, so a sequence that is worth
1925 skipping is shorter. Set max_insns_skipped to a lower value. */
1927 const struct tune_params arm_strongarm_tune =
1929 &generic_extra_costs, /* Insn extra costs. */
1930 &generic_addr_mode_costs, /* Addressing mode costs. */
1931 NULL, /* Sched adj cost. */
1932 arm_default_branch_cost,
1933 &arm_default_vec_cost,
1934 1, /* Constant limit. */
1935 3, /* Max cond insns. */
1936 8, /* Memset max inline. */
1937 1, /* Issue rate. */
1938 ARM_PREFETCH_NOT_BENEFICIAL,
1939 tune_params::PREF_CONST_POOL_TRUE,
1940 tune_params::PREF_LDRD_FALSE,
1941 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1942 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1943 tune_params::DISPARAGE_FLAGS_NEITHER,
1944 tune_params::PREF_NEON_STRINGOPS_FALSE,
1945 tune_params::FUSE_NOTHING,
1946 tune_params::SCHED_AUTOPREF_OFF
1949 const struct tune_params arm_xscale_tune =
1951 &generic_extra_costs, /* Insn extra costs. */
1952 &generic_addr_mode_costs, /* Addressing mode costs. */
1953 xscale_sched_adjust_cost,
1954 arm_default_branch_cost,
1955 &arm_default_vec_cost,
1956 2, /* Constant limit. */
1957 3, /* Max cond insns. */
1958 8, /* Memset max inline. */
1959 1, /* Issue rate. */
1960 ARM_PREFETCH_NOT_BENEFICIAL,
1961 tune_params::PREF_CONST_POOL_TRUE,
1962 tune_params::PREF_LDRD_FALSE,
1963 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1964 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1965 tune_params::DISPARAGE_FLAGS_NEITHER,
1966 tune_params::PREF_NEON_STRINGOPS_FALSE,
1967 tune_params::FUSE_NOTHING,
1968 tune_params::SCHED_AUTOPREF_OFF
1971 const struct tune_params arm_9e_tune =
1973 &generic_extra_costs, /* Insn extra costs. */
1974 &generic_addr_mode_costs, /* Addressing mode costs. */
1975 NULL, /* Sched adj cost. */
1976 arm_default_branch_cost,
1977 &arm_default_vec_cost,
1978 1, /* Constant limit. */
1979 5, /* Max cond insns. */
1980 8, /* Memset max inline. */
1981 1, /* Issue rate. */
1982 ARM_PREFETCH_NOT_BENEFICIAL,
1983 tune_params::PREF_CONST_POOL_TRUE,
1984 tune_params::PREF_LDRD_FALSE,
1985 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1986 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1987 tune_params::DISPARAGE_FLAGS_NEITHER,
1988 tune_params::PREF_NEON_STRINGOPS_FALSE,
1989 tune_params::FUSE_NOTHING,
1990 tune_params::SCHED_AUTOPREF_OFF
1993 const struct tune_params arm_marvell_pj4_tune =
1995 &generic_extra_costs, /* Insn extra costs. */
1996 &generic_addr_mode_costs, /* Addressing mode costs. */
1997 NULL, /* Sched adj cost. */
1998 arm_default_branch_cost,
1999 &arm_default_vec_cost,
2000 1, /* Constant limit. */
2001 5, /* Max cond insns. */
2002 8, /* Memset max inline. */
2003 2, /* Issue rate. */
2004 ARM_PREFETCH_NOT_BENEFICIAL,
2005 tune_params::PREF_CONST_POOL_TRUE,
2006 tune_params::PREF_LDRD_FALSE,
2007 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2008 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2009 tune_params::DISPARAGE_FLAGS_NEITHER,
2010 tune_params::PREF_NEON_STRINGOPS_FALSE,
2011 tune_params::FUSE_NOTHING,
2012 tune_params::SCHED_AUTOPREF_OFF
2015 const struct tune_params arm_v6t2_tune =
2017 &generic_extra_costs, /* Insn extra costs. */
2018 &generic_addr_mode_costs, /* Addressing mode costs. */
2019 NULL, /* Sched adj cost. */
2020 arm_default_branch_cost,
2021 &arm_default_vec_cost,
2022 1, /* Constant limit. */
2023 5, /* Max cond insns. */
2024 8, /* Memset max inline. */
2025 1, /* Issue rate. */
2026 ARM_PREFETCH_NOT_BENEFICIAL,
2027 tune_params::PREF_CONST_POOL_FALSE,
2028 tune_params::PREF_LDRD_FALSE,
2029 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2030 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2031 tune_params::DISPARAGE_FLAGS_NEITHER,
2032 tune_params::PREF_NEON_STRINGOPS_FALSE,
2033 tune_params::FUSE_NOTHING,
2034 tune_params::SCHED_AUTOPREF_OFF
2038 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2039 const struct tune_params arm_cortex_tune =
2041 &generic_extra_costs,
2042 &generic_addr_mode_costs, /* Addressing mode costs. */
2043 NULL, /* Sched adj cost. */
2044 arm_default_branch_cost,
2045 &arm_default_vec_cost,
2046 1, /* Constant limit. */
2047 5, /* Max cond insns. */
2048 8, /* Memset max inline. */
2049 2, /* Issue rate. */
2050 ARM_PREFETCH_NOT_BENEFICIAL,
2051 tune_params::PREF_CONST_POOL_FALSE,
2052 tune_params::PREF_LDRD_FALSE,
2053 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2054 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2055 tune_params::DISPARAGE_FLAGS_NEITHER,
2056 tune_params::PREF_NEON_STRINGOPS_FALSE,
2057 tune_params::FUSE_NOTHING,
2058 tune_params::SCHED_AUTOPREF_OFF
2061 const struct tune_params arm_cortex_a8_tune =
2063 &cortexa8_extra_costs,
2064 &generic_addr_mode_costs, /* Addressing mode costs. */
2065 NULL, /* Sched adj cost. */
2066 arm_default_branch_cost,
2067 &arm_default_vec_cost,
2068 1, /* Constant limit. */
2069 5, /* Max cond insns. */
2070 8, /* Memset max inline. */
2071 2, /* Issue rate. */
2072 ARM_PREFETCH_NOT_BENEFICIAL,
2073 tune_params::PREF_CONST_POOL_FALSE,
2074 tune_params::PREF_LDRD_FALSE,
2075 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2076 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2077 tune_params::DISPARAGE_FLAGS_NEITHER,
2078 tune_params::PREF_NEON_STRINGOPS_TRUE,
2079 tune_params::FUSE_NOTHING,
2080 tune_params::SCHED_AUTOPREF_OFF
2083 const struct tune_params arm_cortex_a7_tune =
2085 &cortexa7_extra_costs,
2086 &generic_addr_mode_costs, /* Addressing mode costs. */
2087 NULL, /* Sched adj cost. */
2088 arm_default_branch_cost,
2089 &arm_default_vec_cost,
2090 1, /* Constant limit. */
2091 5, /* Max cond insns. */
2092 8, /* Memset max inline. */
2093 2, /* Issue rate. */
2094 ARM_PREFETCH_NOT_BENEFICIAL,
2095 tune_params::PREF_CONST_POOL_FALSE,
2096 tune_params::PREF_LDRD_FALSE,
2097 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2098 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2099 tune_params::DISPARAGE_FLAGS_NEITHER,
2100 tune_params::PREF_NEON_STRINGOPS_TRUE,
2101 tune_params::FUSE_NOTHING,
2102 tune_params::SCHED_AUTOPREF_OFF
2105 const struct tune_params arm_cortex_a15_tune =
2107 &cortexa15_extra_costs,
2108 &generic_addr_mode_costs, /* Addressing mode costs. */
2109 NULL, /* Sched adj cost. */
2110 arm_default_branch_cost,
2111 &arm_default_vec_cost,
2112 1, /* Constant limit. */
2113 2, /* Max cond insns. */
2114 8, /* Memset max inline. */
2115 3, /* Issue rate. */
2116 ARM_PREFETCH_NOT_BENEFICIAL,
2117 tune_params::PREF_CONST_POOL_FALSE,
2118 tune_params::PREF_LDRD_TRUE,
2119 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2120 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2121 tune_params::DISPARAGE_FLAGS_ALL,
2122 tune_params::PREF_NEON_STRINGOPS_TRUE,
2123 tune_params::FUSE_NOTHING,
2124 tune_params::SCHED_AUTOPREF_FULL
2127 const struct tune_params arm_cortex_a35_tune =
2129 &cortexa53_extra_costs,
2130 &generic_addr_mode_costs, /* Addressing mode costs. */
2131 NULL, /* Sched adj cost. */
2132 arm_default_branch_cost,
2133 &arm_default_vec_cost,
2134 1, /* Constant limit. */
2135 5, /* Max cond insns. */
2136 8, /* Memset max inline. */
2137 1, /* Issue rate. */
2138 ARM_PREFETCH_NOT_BENEFICIAL,
2139 tune_params::PREF_CONST_POOL_FALSE,
2140 tune_params::PREF_LDRD_FALSE,
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2142 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2143 tune_params::DISPARAGE_FLAGS_NEITHER,
2144 tune_params::PREF_NEON_STRINGOPS_TRUE,
2145 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2146 tune_params::SCHED_AUTOPREF_OFF
2149 const struct tune_params arm_cortex_a53_tune =
2151 &cortexa53_extra_costs,
2152 &generic_addr_mode_costs, /* Addressing mode costs. */
2153 NULL, /* Sched adj cost. */
2154 arm_default_branch_cost,
2155 &arm_default_vec_cost,
2156 1, /* Constant limit. */
2157 5, /* Max cond insns. */
2158 8, /* Memset max inline. */
2159 2, /* Issue rate. */
2160 ARM_PREFETCH_NOT_BENEFICIAL,
2161 tune_params::PREF_CONST_POOL_FALSE,
2162 tune_params::PREF_LDRD_FALSE,
2163 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2164 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2165 tune_params::DISPARAGE_FLAGS_NEITHER,
2166 tune_params::PREF_NEON_STRINGOPS_TRUE,
2167 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2168 tune_params::SCHED_AUTOPREF_OFF
2171 const struct tune_params arm_cortex_a57_tune =
2173 &cortexa57_extra_costs,
2174 &generic_addr_mode_costs, /* addressing mode costs */
2175 NULL, /* Sched adj cost. */
2176 arm_default_branch_cost,
2177 &arm_default_vec_cost,
2178 1, /* Constant limit. */
2179 2, /* Max cond insns. */
2180 8, /* Memset max inline. */
2181 3, /* Issue rate. */
2182 ARM_PREFETCH_NOT_BENEFICIAL,
2183 tune_params::PREF_CONST_POOL_FALSE,
2184 tune_params::PREF_LDRD_TRUE,
2185 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2186 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2187 tune_params::DISPARAGE_FLAGS_ALL,
2188 tune_params::PREF_NEON_STRINGOPS_TRUE,
2189 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2190 tune_params::SCHED_AUTOPREF_FULL
2193 const struct tune_params arm_exynosm1_tune =
2195 &exynosm1_extra_costs,
2196 &generic_addr_mode_costs, /* Addressing mode costs. */
2197 NULL, /* Sched adj cost. */
2198 arm_default_branch_cost,
2199 &arm_default_vec_cost,
2200 1, /* Constant limit. */
2201 2, /* Max cond insns. */
2202 8, /* Memset max inline. */
2203 3, /* Issue rate. */
2204 ARM_PREFETCH_NOT_BENEFICIAL,
2205 tune_params::PREF_CONST_POOL_FALSE,
2206 tune_params::PREF_LDRD_TRUE,
2207 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2208 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2209 tune_params::DISPARAGE_FLAGS_ALL,
2210 tune_params::PREF_NEON_STRINGOPS_TRUE,
2211 tune_params::FUSE_NOTHING,
2212 tune_params::SCHED_AUTOPREF_OFF
2215 const struct tune_params arm_xgene1_tune =
2217 &xgene1_extra_costs,
2218 &generic_addr_mode_costs, /* Addressing mode costs. */
2219 NULL, /* Sched adj cost. */
2220 arm_default_branch_cost,
2221 &arm_default_vec_cost,
2222 1, /* Constant limit. */
2223 2, /* Max cond insns. */
2224 32, /* Memset max inline. */
2225 4, /* Issue rate. */
2226 ARM_PREFETCH_NOT_BENEFICIAL,
2227 tune_params::PREF_CONST_POOL_FALSE,
2228 tune_params::PREF_LDRD_TRUE,
2229 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2230 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2231 tune_params::DISPARAGE_FLAGS_ALL,
2232 tune_params::PREF_NEON_STRINGOPS_FALSE,
2233 tune_params::FUSE_NOTHING,
2234 tune_params::SCHED_AUTOPREF_OFF
2237 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2238 less appealing. Set max_insns_skipped to a low value. */
2240 const struct tune_params arm_cortex_a5_tune =
2242 &cortexa5_extra_costs,
2243 &generic_addr_mode_costs, /* Addressing mode costs. */
2244 NULL, /* Sched adj cost. */
2245 arm_cortex_a5_branch_cost,
2246 &arm_default_vec_cost,
2247 1, /* Constant limit. */
2248 1, /* Max cond insns. */
2249 8, /* Memset max inline. */
2250 2, /* Issue rate. */
2251 ARM_PREFETCH_NOT_BENEFICIAL,
2252 tune_params::PREF_CONST_POOL_FALSE,
2253 tune_params::PREF_LDRD_FALSE,
2254 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2255 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2256 tune_params::DISPARAGE_FLAGS_NEITHER,
2257 tune_params::PREF_NEON_STRINGOPS_TRUE,
2258 tune_params::FUSE_NOTHING,
2259 tune_params::SCHED_AUTOPREF_OFF
2262 const struct tune_params arm_cortex_a9_tune =
2264 &cortexa9_extra_costs,
2265 &generic_addr_mode_costs, /* Addressing mode costs. */
2266 cortex_a9_sched_adjust_cost,
2267 arm_default_branch_cost,
2268 &arm_default_vec_cost,
2269 1, /* Constant limit. */
2270 5, /* Max cond insns. */
2271 8, /* Memset max inline. */
2272 2, /* Issue rate. */
2273 ARM_PREFETCH_BENEFICIAL(4,32,32),
2274 tune_params::PREF_CONST_POOL_FALSE,
2275 tune_params::PREF_LDRD_FALSE,
2276 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2277 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2278 tune_params::DISPARAGE_FLAGS_NEITHER,
2279 tune_params::PREF_NEON_STRINGOPS_FALSE,
2280 tune_params::FUSE_NOTHING,
2281 tune_params::SCHED_AUTOPREF_OFF
2284 const struct tune_params arm_cortex_a12_tune =
2286 &cortexa12_extra_costs,
2287 &generic_addr_mode_costs, /* Addressing mode costs. */
2288 NULL, /* Sched adj cost. */
2289 arm_default_branch_cost,
2290 &arm_default_vec_cost, /* Vectorizer costs. */
2291 1, /* Constant limit. */
2292 2, /* Max cond insns. */
2293 8, /* Memset max inline. */
2294 2, /* Issue rate. */
2295 ARM_PREFETCH_NOT_BENEFICIAL,
2296 tune_params::PREF_CONST_POOL_FALSE,
2297 tune_params::PREF_LDRD_TRUE,
2298 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2299 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2300 tune_params::DISPARAGE_FLAGS_ALL,
2301 tune_params::PREF_NEON_STRINGOPS_TRUE,
2302 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2303 tune_params::SCHED_AUTOPREF_OFF
2306 const struct tune_params arm_cortex_a73_tune =
2308 &cortexa57_extra_costs,
2309 &generic_addr_mode_costs, /* Addressing mode costs. */
2310 NULL, /* Sched adj cost. */
2311 arm_default_branch_cost,
2312 &arm_default_vec_cost, /* Vectorizer costs. */
2313 1, /* Constant limit. */
2314 2, /* Max cond insns. */
2315 8, /* Memset max inline. */
2316 2, /* Issue rate. */
2317 ARM_PREFETCH_NOT_BENEFICIAL,
2318 tune_params::PREF_CONST_POOL_FALSE,
2319 tune_params::PREF_LDRD_TRUE,
2320 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2321 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2322 tune_params::DISPARAGE_FLAGS_ALL,
2323 tune_params::PREF_NEON_STRINGOPS_TRUE,
2324 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2325 tune_params::SCHED_AUTOPREF_FULL
2328 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2329 cycle to execute each. An LDR from the constant pool also takes two cycles
2330 to execute, but mildly increases pipelining opportunity (consecutive
2331 loads/stores can be pipelined together, saving one cycle), and may also
2332 improve icache utilisation. Hence we prefer the constant pool for such
2333 processors. */
2335 const struct tune_params arm_v7m_tune =
2337 &v7m_extra_costs,
2338 &generic_addr_mode_costs, /* Addressing mode costs. */
2339 NULL, /* Sched adj cost. */
2340 arm_cortex_m_branch_cost,
2341 &arm_default_vec_cost,
2342 1, /* Constant limit. */
2343 2, /* Max cond insns. */
2344 8, /* Memset max inline. */
2345 1, /* Issue rate. */
2346 ARM_PREFETCH_NOT_BENEFICIAL,
2347 tune_params::PREF_CONST_POOL_TRUE,
2348 tune_params::PREF_LDRD_FALSE,
2349 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2350 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2351 tune_params::DISPARAGE_FLAGS_NEITHER,
2352 tune_params::PREF_NEON_STRINGOPS_FALSE,
2353 tune_params::FUSE_NOTHING,
2354 tune_params::SCHED_AUTOPREF_OFF
2357 /* Cortex-M7 tuning. */
2359 const struct tune_params arm_cortex_m7_tune =
2361 &v7m_extra_costs,
2362 &generic_addr_mode_costs, /* Addressing mode costs. */
2363 NULL, /* Sched adj cost. */
2364 arm_cortex_m7_branch_cost,
2365 &arm_default_vec_cost,
2366 0, /* Constant limit. */
2367 1, /* Max cond insns. */
2368 8, /* Memset max inline. */
2369 2, /* Issue rate. */
2370 ARM_PREFETCH_NOT_BENEFICIAL,
2371 tune_params::PREF_CONST_POOL_TRUE,
2372 tune_params::PREF_LDRD_FALSE,
2373 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2374 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2375 tune_params::DISPARAGE_FLAGS_NEITHER,
2376 tune_params::PREF_NEON_STRINGOPS_FALSE,
2377 tune_params::FUSE_NOTHING,
2378 tune_params::SCHED_AUTOPREF_OFF
2381 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2382 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2383 cortex-m23. */
2384 const struct tune_params arm_v6m_tune =
2386 &generic_extra_costs, /* Insn extra costs. */
2387 &generic_addr_mode_costs, /* Addressing mode costs. */
2388 NULL, /* Sched adj cost. */
2389 arm_default_branch_cost,
2390 &arm_default_vec_cost, /* Vectorizer costs. */
2391 1, /* Constant limit. */
2392 5, /* Max cond insns. */
2393 8, /* Memset max inline. */
2394 1, /* Issue rate. */
2395 ARM_PREFETCH_NOT_BENEFICIAL,
2396 tune_params::PREF_CONST_POOL_FALSE,
2397 tune_params::PREF_LDRD_FALSE,
2398 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2399 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2400 tune_params::DISPARAGE_FLAGS_NEITHER,
2401 tune_params::PREF_NEON_STRINGOPS_FALSE,
2402 tune_params::FUSE_NOTHING,
2403 tune_params::SCHED_AUTOPREF_OFF
2406 const struct tune_params arm_fa726te_tune =
2408 &generic_extra_costs, /* Insn extra costs. */
2409 &generic_addr_mode_costs, /* Addressing mode costs. */
2410 fa726te_sched_adjust_cost,
2411 arm_default_branch_cost,
2412 &arm_default_vec_cost,
2413 1, /* Constant limit. */
2414 5, /* Max cond insns. */
2415 8, /* Memset max inline. */
2416 2, /* Issue rate. */
2417 ARM_PREFETCH_NOT_BENEFICIAL,
2418 tune_params::PREF_CONST_POOL_TRUE,
2419 tune_params::PREF_LDRD_FALSE,
2420 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2421 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2422 tune_params::DISPARAGE_FLAGS_NEITHER,
2423 tune_params::PREF_NEON_STRINGOPS_FALSE,
2424 tune_params::FUSE_NOTHING,
2425 tune_params::SCHED_AUTOPREF_OFF
2428 /* Auto-generated CPU, FPU and architecture tables. */
2429 #include "arm-cpu-data.h"
2431 /* The name of the preprocessor macro to define for this architecture. PROFILE
2432 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2433 is thus chosen to be big enough to hold the longest architecture name. */
2435 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2437 /* Supported TLS relocations. */
2439 enum tls_reloc {
2440 TLS_GD32,
2441 TLS_GD32_FDPIC,
2442 TLS_LDM32,
2443 TLS_LDM32_FDPIC,
2444 TLS_LDO32,
2445 TLS_IE32,
2446 TLS_IE32_FDPIC,
2447 TLS_LE32,
2448 TLS_DESCSEQ /* GNU scheme */
2451 /* The maximum number of insns to be used when loading a constant. */
2452 inline static int
2453 arm_constant_limit (bool size_p)
2455 return size_p ? 1 : current_tune->constant_limit;
2458 /* Emit an insn that's a simple single-set. Both the operands must be known
2459 to be valid. */
2460 inline static rtx_insn *
2461 emit_set_insn (rtx x, rtx y)
2463 return emit_insn (gen_rtx_SET (x, y));
2466 /* Return the number of bits set in VALUE. */
2467 static unsigned
2468 bit_count (unsigned long value)
2470 unsigned long count = 0;
2472 while (value)
2474 count++;
2475 value &= value - 1; /* Clear the least-significant set bit. */
2478 return count;
2481 /* Return the number of bits set in BMAP. */
2482 static unsigned
2483 bitmap_popcount (const sbitmap bmap)
2485 unsigned int count = 0;
2486 unsigned int n = 0;
2487 sbitmap_iterator sbi;
2489 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2490 count++;
2491 return count;
2494 typedef struct
2496 machine_mode mode;
2497 const char *name;
2498 } arm_fixed_mode_set;
2500 /* A small helper for setting fixed-point library libfuncs. */
2502 static void
2503 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2504 const char *funcname, const char *modename,
2505 int num_suffix)
2507 char buffer[50];
2509 if (num_suffix == 0)
2510 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2511 else
2512 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2514 set_optab_libfunc (optable, mode, buffer);
2517 static void
2518 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2519 machine_mode from, const char *funcname,
2520 const char *toname, const char *fromname)
2522 char buffer[50];
2523 const char *maybe_suffix_2 = "";
2525 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2526 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2527 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2528 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2529 maybe_suffix_2 = "2";
2531 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2532 maybe_suffix_2);
2534 set_conv_libfunc (optable, to, from, buffer);
2537 static GTY(()) rtx speculation_barrier_libfunc;
2539 /* Record that we have no arithmetic or comparison libfuncs for
2540 machine mode MODE. */
2542 static void
2543 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2545 /* Arithmetic. */
2546 set_optab_libfunc (add_optab, mode, NULL);
2547 set_optab_libfunc (sdiv_optab, mode, NULL);
2548 set_optab_libfunc (smul_optab, mode, NULL);
2549 set_optab_libfunc (neg_optab, mode, NULL);
2550 set_optab_libfunc (sub_optab, mode, NULL);
2552 /* Comparisons. */
2553 set_optab_libfunc (eq_optab, mode, NULL);
2554 set_optab_libfunc (ne_optab, mode, NULL);
2555 set_optab_libfunc (lt_optab, mode, NULL);
2556 set_optab_libfunc (le_optab, mode, NULL);
2557 set_optab_libfunc (ge_optab, mode, NULL);
2558 set_optab_libfunc (gt_optab, mode, NULL);
2559 set_optab_libfunc (unord_optab, mode, NULL);
2562 /* Set up library functions unique to ARM. */
2563 static void
2564 arm_init_libfuncs (void)
2566 machine_mode mode_iter;
2568 /* For Linux, we have access to kernel support for atomic operations. */
2569 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2570 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2572 /* There are no special library functions unless we are using the
2573 ARM BPABI. */
2574 if (!TARGET_BPABI)
2575 return;
2577 /* The functions below are described in Section 4 of the "Run-Time
2578 ABI for the ARM architecture", Version 1.0. */
2580 /* Double-precision floating-point arithmetic. Table 2. */
2581 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2582 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2583 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2584 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2585 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2587 /* Double-precision comparisons. Table 3. */
2588 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2589 set_optab_libfunc (ne_optab, DFmode, NULL);
2590 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2591 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2592 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2593 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2594 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2596 /* Single-precision floating-point arithmetic. Table 4. */
2597 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2598 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2599 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2600 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2601 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2603 /* Single-precision comparisons. Table 5. */
2604 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2605 set_optab_libfunc (ne_optab, SFmode, NULL);
2606 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2607 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2608 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2609 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2610 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2612 /* Floating-point to integer conversions. Table 6. */
2613 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2614 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2615 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2616 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2617 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2618 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2619 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2620 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2622 /* Conversions between floating types. Table 7. */
2623 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2624 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2626 /* Integer to floating-point conversions. Table 8. */
2627 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2628 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2629 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2630 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2631 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2632 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2633 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2634 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2636 /* Long long. Table 9. */
2637 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2638 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2639 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2640 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2641 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2642 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2643 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2644 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2646 /* Integer (32/32->32) division. \S 4.3.1. */
2647 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2648 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2650 /* The divmod functions are designed so that they can be used for
2651 plain division, even though they return both the quotient and the
2652 remainder. The quotient is returned in the usual location (i.e.,
2653 r0 for SImode, {r0, r1} for DImode), just as would be expected
2654 for an ordinary division routine. Because the AAPCS calling
2655 conventions specify that all of { r0, r1, r2, r3 } are
2656 callee-saved registers, there is no need to tell the compiler
2657 explicitly that those registers are clobbered by these
2658 routines. */
2659 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2660 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2662 /* For SImode division the ABI provides div-without-mod routines,
2663 which are faster. */
2664 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2665 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2667 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2668 divmod libcalls instead. */
2669 set_optab_libfunc (smod_optab, DImode, NULL);
2670 set_optab_libfunc (umod_optab, DImode, NULL);
2671 set_optab_libfunc (smod_optab, SImode, NULL);
2672 set_optab_libfunc (umod_optab, SImode, NULL);
2674 /* Half-precision float operations. The compiler handles all operations
2675 with NULL libfuncs by converting the SFmode. */
2676 switch (arm_fp16_format)
2678 case ARM_FP16_FORMAT_IEEE:
2679 case ARM_FP16_FORMAT_ALTERNATIVE:
2681 /* Conversions. */
2682 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2683 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2684 ? "__gnu_f2h_ieee"
2685 : "__gnu_f2h_alternative"));
2686 set_conv_libfunc (sext_optab, SFmode, HFmode,
2687 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2688 ? "__gnu_h2f_ieee"
2689 : "__gnu_h2f_alternative"));
2691 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2692 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2693 ? "__gnu_d2h_ieee"
2694 : "__gnu_d2h_alternative"));
2696 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2697 break;
2699 default:
2700 break;
2703 /* For all possible libcalls in BFmode, record NULL. */
2704 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2706 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2707 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2708 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2709 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2711 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2713 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2715 const arm_fixed_mode_set fixed_arith_modes[] =
2717 { E_QQmode, "qq" },
2718 { E_UQQmode, "uqq" },
2719 { E_HQmode, "hq" },
2720 { E_UHQmode, "uhq" },
2721 { E_SQmode, "sq" },
2722 { E_USQmode, "usq" },
2723 { E_DQmode, "dq" },
2724 { E_UDQmode, "udq" },
2725 { E_TQmode, "tq" },
2726 { E_UTQmode, "utq" },
2727 { E_HAmode, "ha" },
2728 { E_UHAmode, "uha" },
2729 { E_SAmode, "sa" },
2730 { E_USAmode, "usa" },
2731 { E_DAmode, "da" },
2732 { E_UDAmode, "uda" },
2733 { E_TAmode, "ta" },
2734 { E_UTAmode, "uta" }
2736 const arm_fixed_mode_set fixed_conv_modes[] =
2738 { E_QQmode, "qq" },
2739 { E_UQQmode, "uqq" },
2740 { E_HQmode, "hq" },
2741 { E_UHQmode, "uhq" },
2742 { E_SQmode, "sq" },
2743 { E_USQmode, "usq" },
2744 { E_DQmode, "dq" },
2745 { E_UDQmode, "udq" },
2746 { E_TQmode, "tq" },
2747 { E_UTQmode, "utq" },
2748 { E_HAmode, "ha" },
2749 { E_UHAmode, "uha" },
2750 { E_SAmode, "sa" },
2751 { E_USAmode, "usa" },
2752 { E_DAmode, "da" },
2753 { E_UDAmode, "uda" },
2754 { E_TAmode, "ta" },
2755 { E_UTAmode, "uta" },
2756 { E_QImode, "qi" },
2757 { E_HImode, "hi" },
2758 { E_SImode, "si" },
2759 { E_DImode, "di" },
2760 { E_TImode, "ti" },
2761 { E_SFmode, "sf" },
2762 { E_DFmode, "df" }
2764 unsigned int i, j;
2766 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2768 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2769 "add", fixed_arith_modes[i].name, 3);
2770 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2771 "ssadd", fixed_arith_modes[i].name, 3);
2772 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2773 "usadd", fixed_arith_modes[i].name, 3);
2774 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2775 "sub", fixed_arith_modes[i].name, 3);
2776 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2777 "sssub", fixed_arith_modes[i].name, 3);
2778 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2779 "ussub", fixed_arith_modes[i].name, 3);
2780 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2781 "mul", fixed_arith_modes[i].name, 3);
2782 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2783 "ssmul", fixed_arith_modes[i].name, 3);
2784 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2785 "usmul", fixed_arith_modes[i].name, 3);
2786 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2787 "div", fixed_arith_modes[i].name, 3);
2788 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2789 "udiv", fixed_arith_modes[i].name, 3);
2790 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2791 "ssdiv", fixed_arith_modes[i].name, 3);
2792 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2793 "usdiv", fixed_arith_modes[i].name, 3);
2794 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2795 "neg", fixed_arith_modes[i].name, 2);
2796 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2797 "ssneg", fixed_arith_modes[i].name, 2);
2798 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2799 "usneg", fixed_arith_modes[i].name, 2);
2800 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2801 "ashl", fixed_arith_modes[i].name, 3);
2802 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2803 "ashr", fixed_arith_modes[i].name, 3);
2804 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2805 "lshr", fixed_arith_modes[i].name, 3);
2806 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2807 "ssashl", fixed_arith_modes[i].name, 3);
2808 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2809 "usashl", fixed_arith_modes[i].name, 3);
2810 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2811 "cmp", fixed_arith_modes[i].name, 2);
2814 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2815 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2817 if (i == j
2818 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2819 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2820 continue;
2822 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2823 fixed_conv_modes[j].mode, "fract",
2824 fixed_conv_modes[i].name,
2825 fixed_conv_modes[j].name);
2826 arm_set_fixed_conv_libfunc (satfract_optab,
2827 fixed_conv_modes[i].mode,
2828 fixed_conv_modes[j].mode, "satfract",
2829 fixed_conv_modes[i].name,
2830 fixed_conv_modes[j].name);
2831 arm_set_fixed_conv_libfunc (fractuns_optab,
2832 fixed_conv_modes[i].mode,
2833 fixed_conv_modes[j].mode, "fractuns",
2834 fixed_conv_modes[i].name,
2835 fixed_conv_modes[j].name);
2836 arm_set_fixed_conv_libfunc (satfractuns_optab,
2837 fixed_conv_modes[i].mode,
2838 fixed_conv_modes[j].mode, "satfractuns",
2839 fixed_conv_modes[i].name,
2840 fixed_conv_modes[j].name);
2844 if (TARGET_AAPCS_BASED)
2845 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2847 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2850 /* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
2851 static bool
2852 arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
2854 gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
2855 tree fndecl = gimple_call_fndecl (stmt);
2856 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
2857 unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
2858 gimple *new_stmt = NULL;
2859 switch (code & ARM_BUILTIN_CLASS)
2861 case ARM_BUILTIN_GENERAL:
2862 break;
2863 case ARM_BUILTIN_MVE:
2864 new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
2866 if (!new_stmt)
2867 return false;
2869 gsi_replace (gsi, new_stmt, true);
2870 return true;
2873 /* On AAPCS systems, this is the "struct __va_list". */
2874 static GTY(()) tree va_list_type;
2876 /* Return the type to use as __builtin_va_list. */
2877 static tree
2878 arm_build_builtin_va_list (void)
2880 tree va_list_name;
2881 tree ap_field;
2883 if (!TARGET_AAPCS_BASED)
2884 return std_build_builtin_va_list ();
2886 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2887 defined as:
2889 struct __va_list
2891 void *__ap;
2894 The C Library ABI further reinforces this definition in \S
2895 4.1.
2897 We must follow this definition exactly. The structure tag
2898 name is visible in C++ mangled names, and thus forms a part
2899 of the ABI. The field name may be used by people who
2900 #include <stdarg.h>. */
2901 /* Create the type. */
2902 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2903 /* Give it the required name. */
2904 va_list_name = build_decl (BUILTINS_LOCATION,
2905 TYPE_DECL,
2906 get_identifier ("__va_list"),
2907 va_list_type);
2908 DECL_ARTIFICIAL (va_list_name) = 1;
2909 TREE_PUBLIC (va_list_name) = 1;
2910 TYPE_NAME (va_list_type) = va_list_name;
2911 TYPE_STUB_DECL (va_list_type) = va_list_name;
2912 /* Create the __ap field. */
2913 ap_field = build_decl (BUILTINS_LOCATION,
2914 FIELD_DECL,
2915 get_identifier ("__ap"),
2916 ptr_type_node);
2917 DECL_ARTIFICIAL (ap_field) = 1;
2918 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2919 TYPE_FIELDS (va_list_type) = ap_field;
2920 /* Compute its layout. */
2921 layout_type (va_list_type);
2923 return va_list_type;
2926 /* Return an expression of type "void *" pointing to the next
2927 available argument in a variable-argument list. VALIST is the
2928 user-level va_list object, of type __builtin_va_list. */
2929 static tree
2930 arm_extract_valist_ptr (tree valist)
2932 if (TREE_TYPE (valist) == error_mark_node)
2933 return error_mark_node;
2935 /* On an AAPCS target, the pointer is stored within "struct
2936 va_list". */
2937 if (TARGET_AAPCS_BASED)
2939 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2940 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2941 valist, ap_field, NULL_TREE);
2944 return valist;
2947 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2948 static void
2949 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2951 valist = arm_extract_valist_ptr (valist);
2952 std_expand_builtin_va_start (valist, nextarg);
2955 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2956 static tree
2957 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2958 gimple_seq *post_p)
2960 valist = arm_extract_valist_ptr (valist);
2961 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2964 /* Check any incompatible options that the user has specified. */
2965 static void
2966 arm_option_check_internal (struct gcc_options *opts)
2968 int flags = opts->x_target_flags;
2970 /* iWMMXt and NEON are incompatible. */
2971 if (TARGET_IWMMXT
2972 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2973 error ("iWMMXt and NEON are incompatible");
2975 /* Make sure that the processor choice does not conflict with any of the
2976 other command line choices. */
2977 if (TARGET_ARM_P (flags)
2978 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2979 error ("target CPU does not support ARM mode");
2981 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2982 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2983 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2985 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2986 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2988 /* If this target is normally configured to use APCS frames, warn if they
2989 are turned off and debugging is turned on. */
2990 if (TARGET_ARM_P (flags)
2991 && write_symbols != NO_DEBUG
2992 && !TARGET_APCS_FRAME
2993 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2994 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2995 "debugging");
2997 /* iWMMXt unsupported under Thumb mode. */
2998 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2999 error ("iWMMXt unsupported under Thumb mode");
3001 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
3002 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3004 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
3006 error ("RTP PIC is incompatible with Thumb");
3007 flag_pic = 0;
3010 if (target_pure_code || target_slow_flash_data)
3012 const char *flag = (target_pure_code ? "-mpure-code" :
3013 "-mslow-flash-data");
3014 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
3016 /* We only support -mslow-flash-data on M-profile targets with
3017 MOVT. */
3018 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
3019 error ("%qs only supports non-pic code on M-profile targets with the "
3020 "MOVT instruction", flag);
3022 /* We only support -mpure-code on M-profile targets. */
3023 if (target_pure_code && common_unsupported_modes)
3024 error ("%qs only supports non-pic code on M-profile targets", flag);
3026 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3027 -mword-relocations forbids relocation of MOVT/MOVW. */
3028 if (target_word_relocations)
3029 error ("%qs is incompatible with %<-mword-relocations%>", flag);
3033 /* Recompute the global settings depending on target attribute options. */
3035 static void
3036 arm_option_params_internal (void)
3038 /* If we are not using the default (ARM mode) section anchor offset
3039 ranges, then set the correct ranges now. */
3040 if (TARGET_THUMB1)
3042 /* Thumb-1 LDR instructions cannot have negative offsets.
3043 Permissible positive offset ranges are 5-bit (for byte loads),
3044 6-bit (for halfword loads), or 7-bit (for word loads).
3045 Empirical results suggest a 7-bit anchor range gives the best
3046 overall code size. */
3047 targetm.min_anchor_offset = 0;
3048 targetm.max_anchor_offset = 127;
3050 else if (TARGET_THUMB2)
3052 /* The minimum is set such that the total size of the block
3053 for a particular anchor is 248 + 1 + 4095 bytes, which is
3054 divisible by eight, ensuring natural spacing of anchors. */
3055 targetm.min_anchor_offset = -248;
3056 targetm.max_anchor_offset = 4095;
3058 else
3060 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3061 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3064 /* Increase the number of conditional instructions with -Os. */
3065 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3067 /* For THUMB2, we limit the conditional sequence to one IT block. */
3068 if (TARGET_THUMB2)
3069 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3071 if (TARGET_THUMB1)
3072 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3073 else
3074 targetm.md_asm_adjust = arm_md_asm_adjust;
3077 /* True if -mflip-thumb should next add an attribute for the default
3078 mode, false if it should next add an attribute for the opposite mode. */
3079 static GTY(()) bool thumb_flipper;
3081 /* Options after initial target override. */
3082 static GTY(()) tree init_optimize;
3084 static void
3085 arm_override_options_after_change_1 (struct gcc_options *opts,
3086 struct gcc_options *opts_set)
3088 /* -falign-functions without argument: supply one. */
3089 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3090 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3091 && opts->x_optimize_size ? "2" : "4";
3094 /* Implement targetm.override_options_after_change. */
3096 static void
3097 arm_override_options_after_change (void)
3099 arm_override_options_after_change_1 (&global_options, &global_options_set);
3102 /* Implement TARGET_OPTION_RESTORE. */
3103 static void
3104 arm_option_restore (struct gcc_options */* opts */,
3105 struct gcc_options */* opts_set */,
3106 struct cl_target_option *ptr)
3108 arm_configure_build_target (&arm_active_target, ptr, false);
3109 arm_option_reconfigure_globals ();
3112 /* Reset options between modes that the user has specified. */
3113 static void
3114 arm_option_override_internal (struct gcc_options *opts,
3115 struct gcc_options *opts_set)
3117 arm_override_options_after_change_1 (opts, opts_set);
3119 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3121 /* The default is to enable interworking, so this warning message would
3122 be confusing to users who have just compiled with
3123 eg, -march=armv4. */
3124 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3125 opts->x_target_flags &= ~MASK_INTERWORK;
3128 if (TARGET_THUMB_P (opts->x_target_flags)
3129 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3131 warning (0, "target CPU does not support THUMB instructions");
3132 opts->x_target_flags &= ~MASK_THUMB;
3135 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3137 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3138 opts->x_target_flags &= ~MASK_APCS_FRAME;
3141 /* Callee super interworking implies thumb interworking. Adding
3142 this to the flags here simplifies the logic elsewhere. */
3143 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3144 opts->x_target_flags |= MASK_INTERWORK;
3146 /* need to remember initial values so combinaisons of options like
3147 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3148 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3150 if (! opts_set->x_arm_restrict_it)
3151 opts->x_arm_restrict_it = arm_arch8;
3153 /* ARM execution state and M profile don't have [restrict] IT. */
3154 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3155 opts->x_arm_restrict_it = 0;
3157 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3158 if (!opts_set->x_arm_restrict_it
3159 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3160 opts->x_arm_restrict_it = 0;
3162 /* Enable -munaligned-access by default for
3163 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3164 i.e. Thumb2 and ARM state only.
3165 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3166 - ARMv8 architecture-base processors.
3168 Disable -munaligned-access by default for
3169 - all pre-ARMv6 architecture-based processors
3170 - ARMv6-M architecture-based processors
3171 - ARMv8-M Baseline processors. */
3173 if (! opts_set->x_unaligned_access)
3175 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3176 && arm_arch6 && (arm_arch_notm || arm_arch7));
3178 else if (opts->x_unaligned_access == 1
3179 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3181 warning (0, "target CPU does not support unaligned accesses");
3182 opts->x_unaligned_access = 0;
3185 /* Don't warn since it's on by default in -O2. */
3186 if (TARGET_THUMB1_P (opts->x_target_flags))
3187 opts->x_flag_schedule_insns = 0;
3188 else
3189 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3191 /* Disable shrink-wrap when optimizing function for size, since it tends to
3192 generate additional returns. */
3193 if (optimize_function_for_size_p (cfun)
3194 && TARGET_THUMB2_P (opts->x_target_flags))
3195 opts->x_flag_shrink_wrap = false;
3196 else
3197 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3199 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3200 - epilogue_insns - does not accurately model the corresponding insns
3201 emitted in the asm file. In particular, see the comment in thumb_exit
3202 'Find out how many of the (return) argument registers we can corrupt'.
3203 As a consequence, the epilogue may clobber registers without fipa-ra
3204 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3205 TODO: Accurately model clobbers for epilogue_insns and reenable
3206 fipa-ra. */
3207 if (TARGET_THUMB1_P (opts->x_target_flags))
3208 opts->x_flag_ipa_ra = 0;
3209 else
3210 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3212 /* Thumb2 inline assembly code should always use unified syntax.
3213 This will apply to ARM and Thumb1 eventually. */
3214 if (TARGET_THUMB2_P (opts->x_target_flags))
3215 opts->x_inline_asm_unified = true;
3217 if (arm_stack_protector_guard == SSP_GLOBAL
3218 && opts->x_arm_stack_protector_guard_offset_str)
3220 error ("incompatible options %<-mstack-protector-guard=global%> and "
3221 "%<-mstack-protector-guard-offset=%s%>",
3222 arm_stack_protector_guard_offset_str);
3225 if (opts->x_arm_stack_protector_guard_offset_str)
3227 char *end;
3228 const char *str = arm_stack_protector_guard_offset_str;
3229 errno = 0;
3230 long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3231 if (!*str || *end || errno)
3232 error ("%qs is not a valid offset in %qs", str,
3233 "-mstack-protector-guard-offset=");
3234 arm_stack_protector_guard_offset = offs;
3237 if (arm_current_function_pac_enabled_p ())
3239 if (!arm_arch8m_main)
3240 error ("This architecture does not support branch protection "
3241 "instructions");
3242 if (TARGET_TPCS_FRAME)
3243 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3246 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3247 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3248 #endif
3251 static sbitmap isa_all_fpubits_internal;
3252 static sbitmap isa_all_fpbits;
3253 static sbitmap isa_quirkbits;
3255 static void
3256 arm_handle_no_branch_protection (void)
3258 aarch_ra_sign_scope = AARCH_FUNCTION_NONE;
3259 aarch_enable_bti = 0;
3262 static void
3263 arm_handle_standard_branch_protection (void)
3265 aarch_ra_sign_scope = AARCH_FUNCTION_NON_LEAF;
3266 aarch_enable_bti = 1;
3269 static void
3270 arm_handle_pac_ret_protection (void)
3272 aarch_ra_sign_scope = AARCH_FUNCTION_NON_LEAF;
3275 static void
3276 arm_handle_pac_ret_leaf (void)
3278 aarch_ra_sign_scope = AARCH_FUNCTION_ALL;
3281 static void
3282 arm_handle_bti_protection (void)
3284 aarch_enable_bti = 1;
3287 static const struct aarch_branch_protect_type arm_pac_ret_subtypes[] = {
3288 { "leaf", false, arm_handle_pac_ret_leaf, NULL, 0 },
3289 { NULL, false, NULL, NULL, 0 }
3292 static const struct aarch_branch_protect_type arm_branch_protect_types[] = {
3293 { "none", true, arm_handle_no_branch_protection, NULL, 0 },
3294 { "standard", true, arm_handle_standard_branch_protection, NULL, 0 },
3295 { "pac-ret", false, arm_handle_pac_ret_protection, arm_pac_ret_subtypes,
3296 ARRAY_SIZE (arm_pac_ret_subtypes) },
3297 { "bti", false, arm_handle_bti_protection, NULL, 0 },
3298 { NULL, false, NULL, NULL, 0 }
3301 /* Configure a build target TARGET from the user-specified options OPTS and
3302 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3303 architecture have been specified, but the two are not identical. */
3304 void
3305 arm_configure_build_target (struct arm_build_target *target,
3306 struct cl_target_option *opts,
3307 bool warn_compatible)
3309 const cpu_option *arm_selected_tune = NULL;
3310 const arch_option *arm_selected_arch = NULL;
3311 const cpu_option *arm_selected_cpu = NULL;
3312 const arm_fpu_desc *arm_selected_fpu = NULL;
3313 const char *tune_opts = NULL;
3314 const char *arch_opts = NULL;
3315 const char *cpu_opts = NULL;
3317 bitmap_clear (target->isa);
3318 target->core_name = NULL;
3319 target->arch_name = NULL;
3321 if (opts->x_arm_arch_string)
3323 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3324 "-march",
3325 opts->x_arm_arch_string);
3326 arch_opts = strchr (opts->x_arm_arch_string, '+');
3329 if (opts->x_arm_cpu_string)
3331 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3332 opts->x_arm_cpu_string);
3333 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3334 arm_selected_tune = arm_selected_cpu;
3335 /* If taking the tuning from -mcpu, we don't need to rescan the
3336 options for tuning. */
3339 if (opts->x_arm_tune_string)
3341 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3342 opts->x_arm_tune_string);
3343 tune_opts = strchr (opts->x_arm_tune_string, '+');
3346 if (opts->x_arm_branch_protection_string)
3348 aarch_validate_mbranch_protection (arm_branch_protect_types,
3349 opts->x_arm_branch_protection_string,
3350 "-mbranch-protection=");
3353 if (arm_selected_arch)
3355 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3356 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3357 arch_opts);
3359 if (arm_selected_cpu)
3361 auto_sbitmap cpu_isa (isa_num_bits);
3362 auto_sbitmap isa_delta (isa_num_bits);
3364 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3365 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3366 cpu_opts);
3367 bitmap_xor (isa_delta, cpu_isa, target->isa);
3368 /* Ignore any bits that are quirk bits. */
3369 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3370 /* If the user (or the default configuration) has specified a
3371 specific FPU, then ignore any bits that depend on the FPU
3372 configuration. Do similarly if using the soft-float
3373 ABI. */
3374 if (opts->x_arm_fpu_index != TARGET_FPU_auto
3375 || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3376 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3378 if (!bitmap_empty_p (isa_delta))
3380 if (warn_compatible)
3381 warning (0, "switch %<-mcpu=%s%> conflicts "
3382 "with switch %<-march=%s%>",
3383 opts->x_arm_cpu_string,
3384 opts->x_arm_arch_string);
3386 /* -march wins for code generation.
3387 -mcpu wins for default tuning. */
3388 if (!arm_selected_tune)
3389 arm_selected_tune = arm_selected_cpu;
3391 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3392 target->arch_name = arm_selected_arch->common.name;
3394 else
3396 /* Architecture and CPU are essentially the same.
3397 Prefer the CPU setting. */
3398 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3399 target->core_name = arm_selected_cpu->common.name;
3400 /* Copy the CPU's capabilities, so that we inherit the
3401 appropriate extensions and quirks. */
3402 bitmap_copy (target->isa, cpu_isa);
3405 else
3407 /* Pick a CPU based on the architecture. */
3408 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3409 target->arch_name = arm_selected_arch->common.name;
3410 /* Note: target->core_name is left unset in this path. */
3413 else if (arm_selected_cpu)
3415 target->core_name = arm_selected_cpu->common.name;
3416 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3417 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3418 cpu_opts);
3419 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3421 /* If the user did not specify a processor or architecture, choose
3422 one for them. */
3423 else
3425 const cpu_option *sel;
3426 auto_sbitmap sought_isa (isa_num_bits);
3427 bitmap_clear (sought_isa);
3428 auto_sbitmap default_isa (isa_num_bits);
3430 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3431 TARGET_CPU_DEFAULT);
3432 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3433 gcc_assert (arm_selected_cpu->common.name);
3435 /* RWE: All of the selection logic below (to the end of this
3436 'if' clause) looks somewhat suspect. It appears to be mostly
3437 there to support forcing thumb support when the default CPU
3438 does not have thumb (somewhat dubious in terms of what the
3439 user might be expecting). I think it should be removed once
3440 support for the pre-thumb era cores is removed. */
3441 sel = arm_selected_cpu;
3442 arm_initialize_isa (default_isa, sel->common.isa_bits);
3443 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3444 cpu_opts);
3446 /* Now check to see if the user has specified any command line
3447 switches that require certain abilities from the cpu. */
3449 if (TARGET_INTERWORK || TARGET_THUMB)
3450 bitmap_set_bit (sought_isa, isa_bit_thumb);
3452 /* If there are such requirements and the default CPU does not
3453 satisfy them, we need to run over the complete list of
3454 cores looking for one that is satisfactory. */
3455 if (!bitmap_empty_p (sought_isa)
3456 && !bitmap_subset_p (sought_isa, default_isa))
3458 auto_sbitmap candidate_isa (isa_num_bits);
3459 /* We're only interested in a CPU with at least the
3460 capabilities of the default CPU and the required
3461 additional features. */
3462 bitmap_ior (default_isa, default_isa, sought_isa);
3464 /* Try to locate a CPU type that supports all of the abilities
3465 of the default CPU, plus the extra abilities requested by
3466 the user. */
3467 for (sel = all_cores; sel->common.name != NULL; sel++)
3469 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3470 /* An exact match? */
3471 if (bitmap_equal_p (default_isa, candidate_isa))
3472 break;
3475 if (sel->common.name == NULL)
3477 unsigned current_bit_count = isa_num_bits;
3478 const cpu_option *best_fit = NULL;
3480 /* Ideally we would like to issue an error message here
3481 saying that it was not possible to find a CPU compatible
3482 with the default CPU, but which also supports the command
3483 line options specified by the programmer, and so they
3484 ought to use the -mcpu=<name> command line option to
3485 override the default CPU type.
3487 If we cannot find a CPU that has exactly the
3488 characteristics of the default CPU and the given
3489 command line options we scan the array again looking
3490 for a best match. The best match must have at least
3491 the capabilities of the perfect match. */
3492 for (sel = all_cores; sel->common.name != NULL; sel++)
3494 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3496 if (bitmap_subset_p (default_isa, candidate_isa))
3498 unsigned count;
3500 bitmap_and_compl (candidate_isa, candidate_isa,
3501 default_isa);
3502 count = bitmap_popcount (candidate_isa);
3504 if (count < current_bit_count)
3506 best_fit = sel;
3507 current_bit_count = count;
3511 gcc_assert (best_fit);
3512 sel = best_fit;
3515 arm_selected_cpu = sel;
3518 /* Now we know the CPU, we can finally initialize the target
3519 structure. */
3520 target->core_name = arm_selected_cpu->common.name;
3521 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3522 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3523 cpu_opts);
3524 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3527 gcc_assert (arm_selected_cpu);
3528 gcc_assert (arm_selected_arch);
3530 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3532 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3533 auto_sbitmap fpu_bits (isa_num_bits);
3535 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3536 /* This should clear out ALL bits relating to the FPU/simd
3537 extensions, to avoid potentially invalid combinations later on
3538 that we can't match. At present we only clear out those bits
3539 that can be set by -mfpu. This should be fixed in GCC-12. */
3540 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3541 bitmap_ior (target->isa, target->isa, fpu_bits);
3544 /* If we have the soft-float ABI, clear any feature bits relating to use of
3545 floating-point operations. They'll just confuse things later on. */
3546 if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3547 bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3549 /* There may be implied bits which we still need to enable. These are
3550 non-named features which are needed to complete other sets of features,
3551 but cannot be enabled from arm-cpus.in due to being shared between
3552 multiple fgroups. Each entry in all_implied_fbits is of the form
3553 ante -> cons, meaning that if the feature "ante" is enabled, we should
3554 implicitly enable "cons". */
3555 const struct fbit_implication *impl = all_implied_fbits;
3556 while (impl->ante)
3558 if (bitmap_bit_p (target->isa, impl->ante))
3559 bitmap_set_bit (target->isa, impl->cons);
3560 impl++;
3563 if (!arm_selected_tune)
3564 arm_selected_tune = arm_selected_cpu;
3565 else /* Validate the features passed to -mtune. */
3566 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3568 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3570 /* Finish initializing the target structure. */
3571 if (!target->arch_name)
3572 target->arch_name = arm_selected_arch->common.name;
3573 target->arch_pp_name = arm_selected_arch->arch;
3574 target->base_arch = arm_selected_arch->base_arch;
3575 target->profile = arm_selected_arch->profile;
3577 target->tune_flags = tune_data->tune_flags;
3578 target->tune = tune_data->tune;
3579 target->tune_core = tune_data->scheduler;
3582 /* Fix up any incompatible options that the user has specified. */
3583 static void
3584 arm_option_override (void)
3586 static const enum isa_feature fpu_bitlist_internal[]
3587 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3588 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3589 static const enum isa_feature fp_bitlist[]
3590 = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3591 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3592 cl_target_option opts;
3594 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3595 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3597 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3598 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3599 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3600 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3602 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3604 if (!OPTION_SET_P (arm_fpu_index))
3606 bool ok;
3607 int fpu_index;
3609 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3610 CL_TARGET);
3611 gcc_assert (ok);
3612 arm_fpu_index = (enum fpu_type) fpu_index;
3615 cl_target_option_save (&opts, &global_options, &global_options_set);
3616 arm_configure_build_target (&arm_active_target, &opts, true);
3618 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3619 SUBTARGET_OVERRIDE_OPTIONS;
3620 #endif
3622 /* Initialize boolean versions of the architectural flags, for use
3623 in the arm.md file and for enabling feature flags. */
3624 arm_option_reconfigure_globals ();
3626 arm_tune = arm_active_target.tune_core;
3627 tune_flags = arm_active_target.tune_flags;
3628 current_tune = arm_active_target.tune;
3630 /* TBD: Dwarf info for apcs frame is not handled yet. */
3631 if (TARGET_APCS_FRAME)
3632 flag_shrink_wrap = false;
3634 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3636 warning (0, "%<-mapcs-stack-check%> incompatible with "
3637 "%<-mno-apcs-frame%>");
3638 target_flags |= MASK_APCS_FRAME;
3641 if (TARGET_POKE_FUNCTION_NAME)
3642 target_flags |= MASK_APCS_FRAME;
3644 if (TARGET_APCS_REENT && flag_pic)
3645 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3647 if (TARGET_APCS_REENT)
3648 warning (0, "APCS reentrant code not supported. Ignored");
3650 /* Set up some tuning parameters. */
3651 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3652 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3653 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3654 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3655 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3656 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3658 /* For arm2/3 there is no need to do any scheduling if we are doing
3659 software floating-point. */
3660 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3661 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3663 /* Override the default structure alignment for AAPCS ABI. */
3664 if (!OPTION_SET_P (arm_structure_size_boundary))
3666 if (TARGET_AAPCS_BASED)
3667 arm_structure_size_boundary = 8;
3669 else
3671 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3673 if (arm_structure_size_boundary != 8
3674 && arm_structure_size_boundary != 32
3675 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3677 if (ARM_DOUBLEWORD_ALIGN)
3678 warning (0,
3679 "structure size boundary can only be set to 8, 32 or 64");
3680 else
3681 warning (0, "structure size boundary can only be set to 8 or 32");
3682 arm_structure_size_boundary
3683 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3687 if (TARGET_VXWORKS_RTP)
3689 if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3690 arm_pic_data_is_text_relative = 0;
3692 else if (flag_pic
3693 && !arm_pic_data_is_text_relative
3694 && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3695 /* When text & data segments don't have a fixed displacement, the
3696 intended use is with a single, read only, pic base register.
3697 Unless the user explicitly requested not to do that, set
3698 it. */
3699 target_flags |= MASK_SINGLE_PIC_BASE;
3701 /* If stack checking is disabled, we can use r10 as the PIC register,
3702 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3703 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3705 if (TARGET_VXWORKS_RTP)
3706 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3707 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3710 if (flag_pic && TARGET_VXWORKS_RTP)
3711 arm_pic_register = 9;
3713 /* If in FDPIC mode then force arm_pic_register to be r9. */
3714 if (TARGET_FDPIC)
3716 arm_pic_register = FDPIC_REGNUM;
3717 if (TARGET_THUMB1)
3718 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3721 if (arm_pic_register_string != NULL)
3723 int pic_register = decode_reg_name (arm_pic_register_string);
3725 if (!flag_pic)
3726 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3728 /* Prevent the user from choosing an obviously stupid PIC register. */
3729 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3730 || pic_register == HARD_FRAME_POINTER_REGNUM
3731 || pic_register == STACK_POINTER_REGNUM
3732 || pic_register >= PC_REGNUM
3733 || (TARGET_VXWORKS_RTP
3734 && (unsigned int) pic_register != arm_pic_register))
3735 error ("unable to use %qs for PIC register", arm_pic_register_string);
3736 else
3737 arm_pic_register = pic_register;
3740 if (flag_pic)
3741 target_word_relocations = 1;
3743 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3744 if (fix_cm3_ldrd == 2)
3746 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3747 fix_cm3_ldrd = 1;
3748 else
3749 fix_cm3_ldrd = 0;
3752 /* Enable fix_vlldm by default if required. */
3753 if (fix_vlldm == 2)
3755 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3756 fix_vlldm = 1;
3757 else
3758 fix_vlldm = 0;
3761 /* Enable fix_aes by default if required. */
3762 if (fix_aes_erratum_1742098 == 2)
3764 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3765 fix_aes_erratum_1742098 = 1;
3766 else
3767 fix_aes_erratum_1742098 = 0;
3770 /* Hot/Cold partitioning is not currently supported, since we can't
3771 handle literal pool placement in that case. */
3772 if (flag_reorder_blocks_and_partition)
3774 inform (input_location,
3775 "%<-freorder-blocks-and-partition%> not supported "
3776 "on this architecture");
3777 flag_reorder_blocks_and_partition = 0;
3778 flag_reorder_blocks = 1;
3781 if (flag_pic)
3782 /* Hoisting PIC address calculations more aggressively provides a small,
3783 but measurable, size reduction for PIC code. Therefore, we decrease
3784 the bar for unrestricted expression hoisting to the cost of PIC address
3785 calculation, which is 2 instructions. */
3786 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3787 param_gcse_unrestricted_cost, 2);
3789 /* ARM EABI defaults to strict volatile bitfields. */
3790 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3791 && abi_version_at_least(2))
3792 flag_strict_volatile_bitfields = 1;
3794 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3795 have deemed it beneficial (signified by setting
3796 prefetch.num_slots to 1 or more). */
3797 if (flag_prefetch_loop_arrays < 0
3798 && HAVE_prefetch
3799 && optimize >= 3
3800 && current_tune->prefetch.num_slots > 0)
3801 flag_prefetch_loop_arrays = 1;
3803 /* Set up parameters to be used in prefetching algorithm. Do not
3804 override the defaults unless we are tuning for a core we have
3805 researched values for. */
3806 if (current_tune->prefetch.num_slots > 0)
3807 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3808 param_simultaneous_prefetches,
3809 current_tune->prefetch.num_slots);
3810 if (current_tune->prefetch.l1_cache_line_size >= 0)
3811 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3812 param_l1_cache_line_size,
3813 current_tune->prefetch.l1_cache_line_size);
3814 if (current_tune->prefetch.l1_cache_line_size >= 0)
3816 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3817 param_destruct_interfere_size,
3818 current_tune->prefetch.l1_cache_line_size);
3819 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3820 param_construct_interfere_size,
3821 current_tune->prefetch.l1_cache_line_size);
3823 else
3825 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3826 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3827 constructive? */
3828 /* More recent Cortex chips have a 64-byte cache line, but are marked
3829 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3830 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3831 param_destruct_interfere_size, 64);
3832 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3833 param_construct_interfere_size, 64);
3836 if (current_tune->prefetch.l1_cache_size >= 0)
3837 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3838 param_l1_cache_size,
3839 current_tune->prefetch.l1_cache_size);
3841 /* Look through ready list and all of queue for instructions
3842 relevant for L2 auto-prefetcher. */
3843 int sched_autopref_queue_depth;
3845 switch (current_tune->sched_autopref)
3847 case tune_params::SCHED_AUTOPREF_OFF:
3848 sched_autopref_queue_depth = -1;
3849 break;
3851 case tune_params::SCHED_AUTOPREF_RANK:
3852 sched_autopref_queue_depth = 0;
3853 break;
3855 case tune_params::SCHED_AUTOPREF_FULL:
3856 sched_autopref_queue_depth = max_insn_queue_index + 1;
3857 break;
3859 default:
3860 gcc_unreachable ();
3863 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3864 param_sched_autopref_queue_depth,
3865 sched_autopref_queue_depth);
3867 /* Currently, for slow flash data, we just disable literal pools. We also
3868 disable it for pure-code. */
3869 if (target_slow_flash_data || target_pure_code)
3870 arm_disable_literal_pool = true;
3872 /* Disable scheduling fusion by default if it's not armv7 processor
3873 or doesn't prefer ldrd/strd. */
3874 if (flag_schedule_fusion == 2
3875 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3876 flag_schedule_fusion = 0;
3878 /* Need to remember initial options before they are overriden. */
3879 init_optimize = build_optimization_node (&global_options,
3880 &global_options_set);
3882 arm_options_perform_arch_sanity_checks ();
3883 arm_option_override_internal (&global_options, &global_options_set);
3884 arm_option_check_internal (&global_options);
3885 arm_option_params_internal ();
3887 /* Create the default target_options structure. */
3888 target_option_default_node = target_option_current_node
3889 = build_target_option_node (&global_options, &global_options_set);
3891 /* Register global variables with the garbage collector. */
3892 arm_add_gc_roots ();
3894 /* Init initial mode for testing. */
3895 thumb_flipper = TARGET_THUMB;
3899 /* Reconfigure global status flags from the active_target.isa. */
3900 void
3901 arm_option_reconfigure_globals (void)
3903 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3904 arm_base_arch = arm_active_target.base_arch;
3906 /* Initialize boolean versions of the architectural flags, for use
3907 in the arm.md file. */
3908 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3909 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3910 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3911 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3912 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3913 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3914 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3915 arm_arch6m = arm_arch6 && !arm_arch_notm;
3916 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3917 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3918 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3919 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3920 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3921 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3922 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3923 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3924 isa_bit_armv8_1m_main);
3925 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3926 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3927 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3928 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3929 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3930 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3931 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3932 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3933 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3934 arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3935 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3936 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3937 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3939 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3940 if (arm_fp16_inst)
3942 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3943 error ("selected fp16 options are incompatible");
3944 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3947 arm_arch_cde = 0;
3948 arm_arch_cde_coproc = 0;
3949 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3950 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3951 isa_bit_cdecp6, isa_bit_cdecp7};
3952 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3954 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3955 if (cde_bit)
3957 arm_arch_cde |= cde_bit;
3958 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3962 /* And finally, set up some quirks. */
3963 arm_arch_no_volatile_ce
3964 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3965 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3966 isa_bit_quirk_armv6kz);
3968 /* Use the cp15 method if it is available. */
3969 if (target_thread_pointer == TP_AUTO)
3971 if (arm_arch6k && !TARGET_THUMB1)
3972 target_thread_pointer = TP_TPIDRURO;
3973 else
3974 target_thread_pointer = TP_SOFT;
3977 if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3978 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3981 /* Perform some validation between the desired architecture and the rest of the
3982 options. */
3983 void
3984 arm_options_perform_arch_sanity_checks (void)
3986 /* V5T code we generate is completely interworking capable, so we turn off
3987 TARGET_INTERWORK here to avoid many tests later on. */
3989 /* XXX However, we must pass the right pre-processor defines to CPP
3990 or GLD can get confused. This is a hack. */
3991 if (TARGET_INTERWORK)
3992 arm_cpp_interwork = 1;
3994 if (arm_arch5t)
3995 target_flags &= ~MASK_INTERWORK;
3997 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3998 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
4000 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
4001 error ("iwmmxt abi requires an iwmmxt capable cpu");
4003 /* BPABI targets use linker tricks to allow interworking on cores
4004 without thumb support. */
4005 if (TARGET_INTERWORK
4006 && !TARGET_BPABI
4007 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
4009 warning (0, "target CPU does not support interworking" );
4010 target_flags &= ~MASK_INTERWORK;
4013 /* If soft-float is specified then don't use FPU. */
4014 if (TARGET_SOFT_FLOAT)
4015 arm_fpu_attr = FPU_NONE;
4016 else
4017 arm_fpu_attr = FPU_VFP;
4019 if (TARGET_AAPCS_BASED)
4021 if (TARGET_CALLER_INTERWORKING)
4022 error ("AAPCS does not support %<-mcaller-super-interworking%>");
4023 else
4024 if (TARGET_CALLEE_INTERWORKING)
4025 error ("AAPCS does not support %<-mcallee-super-interworking%>");
4028 /* __fp16 support currently assumes the core has ldrh. */
4029 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
4030 sorry ("%<__fp16%> and no ldrh");
4032 if (use_cmse && !arm_arch_cmse)
4033 error ("target CPU does not support ARMv8-M Security Extensions");
4035 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4036 and ARMv8-M Baseline and Mainline do not allow such configuration. */
4037 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
4038 error ("ARMv8-M Security Extensions incompatible with selected FPU");
4041 if (TARGET_AAPCS_BASED)
4043 if (arm_abi == ARM_ABI_IWMMXT)
4044 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
4045 else if (TARGET_HARD_FLOAT_ABI)
4047 arm_pcs_default = ARM_PCS_AAPCS_VFP;
4048 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
4049 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
4050 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4052 else
4053 arm_pcs_default = ARM_PCS_AAPCS;
4055 else
4057 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
4058 sorry ("%<-mfloat-abi=hard%> and VFP");
4060 if (arm_abi == ARM_ABI_APCS)
4061 arm_pcs_default = ARM_PCS_APCS;
4062 else
4063 arm_pcs_default = ARM_PCS_ATPCS;
4067 /* Test whether a local function descriptor is canonical, i.e.,
4068 whether we can use GOTOFFFUNCDESC to compute the address of the
4069 function. */
4070 static bool
4071 arm_fdpic_local_funcdesc_p (rtx fnx)
4073 tree fn;
4074 enum symbol_visibility vis;
4075 bool ret;
4077 if (!TARGET_FDPIC)
4078 return true;
4080 if (! SYMBOL_REF_LOCAL_P (fnx))
4081 return false;
4083 fn = SYMBOL_REF_DECL (fnx);
4085 if (! fn)
4086 return false;
4088 vis = DECL_VISIBILITY (fn);
4090 if (vis == VISIBILITY_PROTECTED)
4091 /* Private function descriptors for protected functions are not
4092 canonical. Temporarily change the visibility to global so that
4093 we can ensure uniqueness of funcdesc pointers. */
4094 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4096 ret = default_binds_local_p_1 (fn, flag_pic);
4098 DECL_VISIBILITY (fn) = vis;
4100 return ret;
4103 static void
4104 arm_add_gc_roots (void)
4106 gcc_obstack_init(&minipool_obstack);
4107 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4110 /* A table of known ARM exception types.
4111 For use with the interrupt function attribute. */
4113 typedef struct
4115 const char *const arg;
4116 const unsigned long return_value;
4118 isr_attribute_arg;
4120 static const isr_attribute_arg isr_attribute_args [] =
4122 { "IRQ", ARM_FT_ISR },
4123 { "irq", ARM_FT_ISR },
4124 { "FIQ", ARM_FT_FIQ },
4125 { "fiq", ARM_FT_FIQ },
4126 { "ABORT", ARM_FT_ISR },
4127 { "abort", ARM_FT_ISR },
4128 { "UNDEF", ARM_FT_EXCEPTION },
4129 { "undef", ARM_FT_EXCEPTION },
4130 { "SWI", ARM_FT_EXCEPTION },
4131 { "swi", ARM_FT_EXCEPTION },
4132 { NULL, ARM_FT_NORMAL }
4135 /* Returns the (interrupt) function type of the current
4136 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4138 static unsigned long
4139 arm_isr_value (tree argument)
4141 const isr_attribute_arg * ptr;
4142 const char * arg;
4144 if (!arm_arch_notm)
4145 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4147 /* No argument - default to IRQ. */
4148 if (argument == NULL_TREE)
4149 return ARM_FT_ISR;
4151 /* Get the value of the argument. */
4152 if (TREE_VALUE (argument) == NULL_TREE
4153 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4154 return ARM_FT_UNKNOWN;
4156 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4158 /* Check it against the list of known arguments. */
4159 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4160 if (streq (arg, ptr->arg))
4161 return ptr->return_value;
4163 /* An unrecognized interrupt type. */
4164 return ARM_FT_UNKNOWN;
4167 /* Computes the type of the current function. */
4169 static unsigned long
4170 arm_compute_func_type (void)
4172 unsigned long type = ARM_FT_UNKNOWN;
4173 tree a;
4174 tree attr;
4176 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4178 /* Decide if the current function is volatile. Such functions
4179 never return, and many memory cycles can be saved by not storing
4180 register values that will never be needed again. This optimization
4181 was added to speed up context switching in a kernel application. */
4182 if (optimize > 0
4183 && (TREE_NOTHROW (current_function_decl)
4184 || !(flag_unwind_tables
4185 || (flag_exceptions
4186 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4187 && TREE_THIS_VOLATILE (current_function_decl))
4188 type |= ARM_FT_VOLATILE;
4190 if (cfun->static_chain_decl != NULL)
4191 type |= ARM_FT_NESTED;
4193 attr = DECL_ATTRIBUTES (current_function_decl);
4195 a = lookup_attribute ("naked", attr);
4196 if (a != NULL_TREE)
4197 type |= ARM_FT_NAKED;
4199 a = lookup_attribute ("isr", attr);
4200 if (a == NULL_TREE)
4201 a = lookup_attribute ("interrupt", attr);
4203 if (a == NULL_TREE)
4204 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4205 else
4206 type |= arm_isr_value (TREE_VALUE (a));
4208 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4209 type |= ARM_FT_CMSE_ENTRY;
4211 return type;
4214 /* Returns the type of the current function. */
4216 unsigned long
4217 arm_current_func_type (void)
4219 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4220 cfun->machine->func_type = arm_compute_func_type ();
4222 return cfun->machine->func_type;
4225 bool
4226 arm_allocate_stack_slots_for_args (void)
4228 /* Naked functions should not allocate stack slots for arguments. */
4229 return !IS_NAKED (arm_current_func_type ());
4232 static bool
4233 arm_warn_func_return (tree decl)
4235 /* Naked functions are implemented entirely in assembly, including the
4236 return sequence, so suppress warnings about this. */
4237 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4241 /* Output assembler code for a block containing the constant parts
4242 of a trampoline, leaving space for the variable parts.
4244 On the ARM, (if r8 is the static chain regnum, and remembering that
4245 referencing pc adds an offset of 8) the trampoline looks like:
4246 ldr r8, [pc, #0]
4247 ldr pc, [pc]
4248 .word static chain value
4249 .word function's address
4250 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4252 In FDPIC mode, the trampoline looks like:
4253 .word trampoline address
4254 .word trampoline GOT address
4255 ldr r12, [pc, #8] ; #4 for Arm mode
4256 ldr r9, [pc, #8] ; #4 for Arm mode
4257 ldr pc, [pc, #8] ; #4 for Arm mode
4258 .word static chain value
4259 .word GOT address
4260 .word function's address
4263 static void
4264 arm_asm_trampoline_template (FILE *f)
4266 fprintf (f, "\t.syntax unified\n");
4268 if (TARGET_FDPIC)
4270 /* The first two words are a function descriptor pointing to the
4271 trampoline code just below. */
4272 if (TARGET_ARM)
4273 fprintf (f, "\t.arm\n");
4274 else if (TARGET_THUMB2)
4275 fprintf (f, "\t.thumb\n");
4276 else
4277 /* Only ARM and Thumb-2 are supported. */
4278 gcc_unreachable ();
4280 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4281 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4282 /* Trampoline code which sets the static chain register but also
4283 PIC register before jumping into real code. */
4284 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4285 STATIC_CHAIN_REGNUM, PC_REGNUM,
4286 TARGET_THUMB2 ? 8 : 4);
4287 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4288 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4289 TARGET_THUMB2 ? 8 : 4);
4290 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4291 PC_REGNUM, PC_REGNUM,
4292 TARGET_THUMB2 ? 8 : 4);
4293 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4295 else if (TARGET_ARM)
4297 fprintf (f, "\t.arm\n");
4298 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4299 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4301 else if (TARGET_THUMB2)
4303 fprintf (f, "\t.thumb\n");
4304 /* The Thumb-2 trampoline is similar to the arm implementation.
4305 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4306 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4307 STATIC_CHAIN_REGNUM, PC_REGNUM);
4308 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4310 else
4312 ASM_OUTPUT_ALIGN (f, 2);
4313 fprintf (f, "\t.code\t16\n");
4314 fprintf (f, ".Ltrampoline_start:\n");
4315 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4316 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4317 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4318 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4319 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4320 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4322 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4323 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4326 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4328 static void
4329 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4331 rtx fnaddr, mem, a_tramp;
4333 emit_block_move (m_tramp, assemble_trampoline_template (),
4334 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4336 if (TARGET_FDPIC)
4338 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4339 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4340 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4341 /* The function start address is at offset 8, but in Thumb mode
4342 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4343 below. */
4344 rtx trampoline_code_start
4345 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4347 /* Write initial funcdesc which points to the trampoline. */
4348 mem = adjust_address (m_tramp, SImode, 0);
4349 emit_move_insn (mem, trampoline_code_start);
4350 mem = adjust_address (m_tramp, SImode, 4);
4351 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4352 /* Setup static chain. */
4353 mem = adjust_address (m_tramp, SImode, 20);
4354 emit_move_insn (mem, chain_value);
4355 /* GOT + real function entry point. */
4356 mem = adjust_address (m_tramp, SImode, 24);
4357 emit_move_insn (mem, gotaddr);
4358 mem = adjust_address (m_tramp, SImode, 28);
4359 emit_move_insn (mem, fnaddr);
4361 else
4363 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4364 emit_move_insn (mem, chain_value);
4366 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4367 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4368 emit_move_insn (mem, fnaddr);
4371 a_tramp = XEXP (m_tramp, 0);
4372 maybe_emit_call_builtin___clear_cache (a_tramp,
4373 plus_constant (ptr_mode,
4374 a_tramp,
4375 TRAMPOLINE_SIZE));
4378 /* Thumb trampolines should be entered in thumb mode, so set
4379 the bottom bit of the address. */
4381 static rtx
4382 arm_trampoline_adjust_address (rtx addr)
4384 /* For FDPIC don't fix trampoline address since it's a function
4385 descriptor and not a function address. */
4386 if (TARGET_THUMB && !TARGET_FDPIC)
4387 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4388 NULL, 0, OPTAB_LIB_WIDEN);
4389 return addr;
4392 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4393 includes call-clobbered registers too. If this is a leaf function
4394 we can just examine the registers used by the RTL, but otherwise we
4395 have to assume that whatever function is called might clobber
4396 anything, and so we have to save all the call-clobbered registers
4397 as well. */
4398 static inline bool reg_needs_saving_p (unsigned reg)
4400 unsigned long func_type = arm_current_func_type ();
4402 if (IS_INTERRUPT (func_type))
4403 if (df_regs_ever_live_p (reg)
4404 /* Save call-clobbered core registers. */
4405 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4406 return true;
4407 else
4408 return false;
4409 else
4410 if (!df_regs_ever_live_p (reg)
4411 || call_used_or_fixed_reg_p (reg))
4412 return false;
4413 else
4414 return true;
4417 /* Return 1 if it is possible to return using a single instruction.
4418 If SIBLING is non-null, this is a test for a return before a sibling
4419 call. SIBLING is the call insn, so we can examine its register usage. */
4422 use_return_insn (int iscond, rtx sibling)
4424 int regno;
4425 unsigned int func_type;
4426 unsigned long saved_int_regs;
4427 unsigned HOST_WIDE_INT stack_adjust;
4428 arm_stack_offsets *offsets;
4430 /* Never use a return instruction before reload has run. */
4431 if (!reload_completed)
4432 return 0;
4434 /* Never use a return instruction when return address signing
4435 mechanism is enabled as it requires more than one
4436 instruction. */
4437 if (arm_current_function_pac_enabled_p ())
4438 return 0;
4440 func_type = arm_current_func_type ();
4442 /* Naked, volatile and stack alignment functions need special
4443 consideration. */
4444 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4445 return 0;
4447 /* So do interrupt functions that use the frame pointer and Thumb
4448 interrupt functions. */
4449 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4450 return 0;
4452 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4453 && !optimize_function_for_size_p (cfun))
4454 return 0;
4456 offsets = arm_get_frame_offsets ();
4457 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4459 /* As do variadic functions. */
4460 if (crtl->args.pretend_args_size
4461 || cfun->machine->uses_anonymous_args
4462 /* Or if the function calls __builtin_eh_return () */
4463 || crtl->calls_eh_return
4464 /* Or if the function calls alloca */
4465 || cfun->calls_alloca
4466 /* Or if there is a stack adjustment. However, if the stack pointer
4467 is saved on the stack, we can use a pre-incrementing stack load. */
4468 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4469 && stack_adjust == 4))
4470 /* Or if the static chain register was saved above the frame, under the
4471 assumption that the stack pointer isn't saved on the stack. */
4472 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4473 && arm_compute_static_chain_stack_bytes() != 0))
4474 return 0;
4476 saved_int_regs = offsets->saved_regs_mask;
4478 /* Unfortunately, the insn
4480 ldmib sp, {..., sp, ...}
4482 triggers a bug on most SA-110 based devices, such that the stack
4483 pointer won't be correctly restored if the instruction takes a
4484 page fault. We work around this problem by popping r3 along with
4485 the other registers, since that is never slower than executing
4486 another instruction.
4488 We test for !arm_arch5t here, because code for any architecture
4489 less than this could potentially be run on one of the buggy
4490 chips. */
4491 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4493 /* Validate that r3 is a call-clobbered register (always true in
4494 the default abi) ... */
4495 if (!call_used_or_fixed_reg_p (3))
4496 return 0;
4498 /* ... that it isn't being used for a return value ... */
4499 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4500 return 0;
4502 /* ... or for a tail-call argument ... */
4503 if (sibling)
4505 gcc_assert (CALL_P (sibling));
4507 if (find_regno_fusage (sibling, USE, 3))
4508 return 0;
4511 /* ... and that there are no call-saved registers in r0-r2
4512 (always true in the default ABI). */
4513 if (saved_int_regs & 0x7)
4514 return 0;
4517 /* Can't be done if interworking with Thumb, and any registers have been
4518 stacked. */
4519 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4520 return 0;
4522 /* On StrongARM, conditional returns are expensive if they aren't
4523 taken and multiple registers have been stacked. */
4524 if (iscond && arm_tune_strongarm)
4526 /* Conditional return when just the LR is stored is a simple
4527 conditional-load instruction, that's not expensive. */
4528 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4529 return 0;
4531 if (flag_pic
4532 && arm_pic_register != INVALID_REGNUM
4533 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4534 return 0;
4537 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4538 several instructions if anything needs to be popped. Armv8.1-M Mainline
4539 also needs several instructions to save and restore FP context. */
4540 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4541 return 0;
4543 /* If there are saved registers but the LR isn't saved, then we need
4544 two instructions for the return. */
4545 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4546 return 0;
4548 /* Can't be done if any of the VFP regs are pushed,
4549 since this also requires an insn. */
4550 if (TARGET_VFP_BASE)
4551 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4552 if (reg_needs_saving_p (regno))
4553 return 0;
4555 if (TARGET_REALLY_IWMMXT)
4556 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4557 if (reg_needs_saving_p (regno))
4558 return 0;
4560 return 1;
4563 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4564 shrink-wrapping if possible. This is the case if we need to emit a
4565 prologue, which we can test by looking at the offsets. */
4566 bool
4567 use_simple_return_p (void)
4569 arm_stack_offsets *offsets;
4571 /* Note this function can be called before or after reload. */
4572 if (!reload_completed)
4573 arm_compute_frame_layout ();
4575 offsets = arm_get_frame_offsets ();
4576 return offsets->outgoing_args != 0;
4579 /* Return TRUE if int I is a valid immediate ARM constant. */
4582 const_ok_for_arm (HOST_WIDE_INT i)
4584 int lowbit;
4586 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4587 be all zero, or all one. */
4588 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4589 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4590 != ((~(unsigned HOST_WIDE_INT) 0)
4591 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4592 return FALSE;
4594 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4596 /* Fast return for 0 and small values. We must do this for zero, since
4597 the code below can't handle that one case. */
4598 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4599 return TRUE;
4601 /* Get the number of trailing zeros. */
4602 lowbit = ffs((int) i) - 1;
4604 /* Only even shifts are allowed in ARM mode so round down to the
4605 nearest even number. */
4606 if (TARGET_ARM)
4607 lowbit &= ~1;
4609 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4610 return TRUE;
4612 if (TARGET_ARM)
4614 /* Allow rotated constants in ARM mode. */
4615 if (lowbit <= 4
4616 && ((i & ~0xc000003f) == 0
4617 || (i & ~0xf000000f) == 0
4618 || (i & ~0xfc000003) == 0))
4619 return TRUE;
4621 else if (TARGET_THUMB2)
4623 HOST_WIDE_INT v;
4625 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4626 v = i & 0xff;
4627 v |= v << 16;
4628 if (i == v || i == (v | (v << 8)))
4629 return TRUE;
4631 /* Allow repeated pattern 0xXY00XY00. */
4632 v = i & 0xff00;
4633 v |= v << 16;
4634 if (i == v)
4635 return TRUE;
4637 else if (TARGET_HAVE_MOVT)
4639 /* Thumb-1 Targets with MOVT. */
4640 if (i > 0xffff)
4641 return FALSE;
4642 else
4643 return TRUE;
4646 return FALSE;
4649 /* Return true if I is a valid constant for the operation CODE. */
4651 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4653 if (const_ok_for_arm (i))
4654 return 1;
4656 switch (code)
4658 case SET:
4659 /* See if we can use movw. */
4660 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4661 return 1;
4662 else
4663 /* Otherwise, try mvn. */
4664 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4666 case PLUS:
4667 /* See if we can use addw or subw. */
4668 if (TARGET_THUMB2
4669 && ((i & 0xfffff000) == 0
4670 || ((-i) & 0xfffff000) == 0))
4671 return 1;
4672 /* Fall through. */
4673 case COMPARE:
4674 case EQ:
4675 case NE:
4676 case GT:
4677 case LE:
4678 case LT:
4679 case GE:
4680 case GEU:
4681 case LTU:
4682 case GTU:
4683 case LEU:
4684 case UNORDERED:
4685 case ORDERED:
4686 case UNEQ:
4687 case UNGE:
4688 case UNLT:
4689 case UNGT:
4690 case UNLE:
4691 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4693 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4694 case XOR:
4695 return 0;
4697 case IOR:
4698 if (TARGET_THUMB2)
4699 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4700 return 0;
4702 case AND:
4703 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4705 default:
4706 gcc_unreachable ();
4710 /* Return true if I is a valid di mode constant for the operation CODE. */
4712 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4714 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4715 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4716 rtx hi = GEN_INT (hi_val);
4717 rtx lo = GEN_INT (lo_val);
4719 if (TARGET_THUMB1)
4720 return 0;
4722 switch (code)
4724 case AND:
4725 case IOR:
4726 case XOR:
4727 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4728 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4729 case PLUS:
4730 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4732 default:
4733 return 0;
4737 /* Emit a sequence of insns to handle a large constant.
4738 CODE is the code of the operation required, it can be any of SET, PLUS,
4739 IOR, AND, XOR, MINUS;
4740 MODE is the mode in which the operation is being performed;
4741 VAL is the integer to operate on;
4742 SOURCE is the other operand (a register, or a null-pointer for SET);
4743 SUBTARGETS means it is safe to create scratch registers if that will
4744 either produce a simpler sequence, or we will want to cse the values.
4745 Return value is the number of insns emitted. */
4747 /* ??? Tweak this for thumb2. */
4749 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4750 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4752 rtx cond;
4754 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4755 cond = COND_EXEC_TEST (PATTERN (insn));
4756 else
4757 cond = NULL_RTX;
4759 if (subtargets || code == SET
4760 || (REG_P (target) && REG_P (source)
4761 && REGNO (target) != REGNO (source)))
4763 /* After arm_reorg has been called, we can't fix up expensive
4764 constants by pushing them into memory so we must synthesize
4765 them in-line, regardless of the cost. This is only likely to
4766 be more costly on chips that have load delay slots and we are
4767 compiling without running the scheduler (so no splitting
4768 occurred before the final instruction emission).
4770 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4772 if (!cfun->machine->after_arm_reorg
4773 && !cond
4774 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4775 1, 0)
4776 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4777 + (code != SET))))
4779 if (code == SET)
4781 /* Currently SET is the only monadic value for CODE, all
4782 the rest are diadic. */
4783 if (TARGET_USE_MOVT)
4784 arm_emit_movpair (target, GEN_INT (val));
4785 else
4786 emit_set_insn (target, GEN_INT (val));
4788 return 1;
4790 else
4792 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4794 if (TARGET_USE_MOVT)
4795 arm_emit_movpair (temp, GEN_INT (val));
4796 else
4797 emit_set_insn (temp, GEN_INT (val));
4799 /* For MINUS, the value is subtracted from, since we never
4800 have subtraction of a constant. */
4801 if (code == MINUS)
4802 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4803 else
4804 emit_set_insn (target,
4805 gen_rtx_fmt_ee (code, mode, source, temp));
4806 return 2;
4811 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4815 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4816 ARM/THUMB2 immediates, and add up to VAL.
4817 Thr function return value gives the number of insns required. */
4818 static int
4819 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4820 struct four_ints *return_sequence)
4822 int best_consecutive_zeros = 0;
4823 int i;
4824 int best_start = 0;
4825 int insns1, insns2;
4826 struct four_ints tmp_sequence;
4828 /* If we aren't targeting ARM, the best place to start is always at
4829 the bottom, otherwise look more closely. */
4830 if (TARGET_ARM)
4832 for (i = 0; i < 32; i += 2)
4834 int consecutive_zeros = 0;
4836 if (!(val & (3 << i)))
4838 while ((i < 32) && !(val & (3 << i)))
4840 consecutive_zeros += 2;
4841 i += 2;
4843 if (consecutive_zeros > best_consecutive_zeros)
4845 best_consecutive_zeros = consecutive_zeros;
4846 best_start = i - consecutive_zeros;
4848 i -= 2;
4853 /* So long as it won't require any more insns to do so, it's
4854 desirable to emit a small constant (in bits 0...9) in the last
4855 insn. This way there is more chance that it can be combined with
4856 a later addressing insn to form a pre-indexed load or store
4857 operation. Consider:
4859 *((volatile int *)0xe0000100) = 1;
4860 *((volatile int *)0xe0000110) = 2;
4862 We want this to wind up as:
4864 mov rA, #0xe0000000
4865 mov rB, #1
4866 str rB, [rA, #0x100]
4867 mov rB, #2
4868 str rB, [rA, #0x110]
4870 rather than having to synthesize both large constants from scratch.
4872 Therefore, we calculate how many insns would be required to emit
4873 the constant starting from `best_start', and also starting from
4874 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4875 yield a shorter sequence, we may as well use zero. */
4876 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4877 if (best_start != 0
4878 && ((HOST_WIDE_INT_1U << best_start) < val))
4880 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4881 if (insns2 <= insns1)
4883 *return_sequence = tmp_sequence;
4884 insns1 = insns2;
4888 return insns1;
4891 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4892 static int
4893 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4894 struct four_ints *return_sequence, int i)
4896 int remainder = val & 0xffffffff;
4897 int insns = 0;
4899 /* Try and find a way of doing the job in either two or three
4900 instructions.
4902 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4903 location. We start at position I. This may be the MSB, or
4904 optimial_immediate_sequence may have positioned it at the largest block
4905 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4906 wrapping around to the top of the word when we drop off the bottom.
4907 In the worst case this code should produce no more than four insns.
4909 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4910 constants, shifted to any arbitrary location. We should always start
4911 at the MSB. */
4914 int end;
4915 unsigned int b1, b2, b3, b4;
4916 unsigned HOST_WIDE_INT result;
4917 int loc;
4919 gcc_assert (insns < 4);
4921 if (i <= 0)
4922 i += 32;
4924 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4925 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4927 loc = i;
4928 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4929 /* We can use addw/subw for the last 12 bits. */
4930 result = remainder;
4931 else
4933 /* Use an 8-bit shifted/rotated immediate. */
4934 end = i - 8;
4935 if (end < 0)
4936 end += 32;
4937 result = remainder & ((0x0ff << end)
4938 | ((i < end) ? (0xff >> (32 - end))
4939 : 0));
4940 i -= 8;
4943 else
4945 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4946 arbitrary shifts. */
4947 i -= TARGET_ARM ? 2 : 1;
4948 continue;
4951 /* Next, see if we can do a better job with a thumb2 replicated
4952 constant.
4954 We do it this way around to catch the cases like 0x01F001E0 where
4955 two 8-bit immediates would work, but a replicated constant would
4956 make it worse.
4958 TODO: 16-bit constants that don't clear all the bits, but still win.
4959 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4960 if (TARGET_THUMB2)
4962 b1 = (remainder & 0xff000000) >> 24;
4963 b2 = (remainder & 0x00ff0000) >> 16;
4964 b3 = (remainder & 0x0000ff00) >> 8;
4965 b4 = remainder & 0xff;
4967 if (loc > 24)
4969 /* The 8-bit immediate already found clears b1 (and maybe b2),
4970 but must leave b3 and b4 alone. */
4972 /* First try to find a 32-bit replicated constant that clears
4973 almost everything. We can assume that we can't do it in one,
4974 or else we wouldn't be here. */
4975 unsigned int tmp = b1 & b2 & b3 & b4;
4976 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4977 + (tmp << 24);
4978 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4979 + (tmp == b3) + (tmp == b4);
4980 if (tmp
4981 && (matching_bytes >= 3
4982 || (matching_bytes == 2
4983 && const_ok_for_op (remainder & ~tmp2, code))))
4985 /* At least 3 of the bytes match, and the fourth has at
4986 least as many bits set, or two of the bytes match
4987 and it will only require one more insn to finish. */
4988 result = tmp2;
4989 i = tmp != b1 ? 32
4990 : tmp != b2 ? 24
4991 : tmp != b3 ? 16
4992 : 8;
4995 /* Second, try to find a 16-bit replicated constant that can
4996 leave three of the bytes clear. If b2 or b4 is already
4997 zero, then we can. If the 8-bit from above would not
4998 clear b2 anyway, then we still win. */
4999 else if (b1 == b3 && (!b2 || !b4
5000 || (remainder & 0x00ff0000 & ~result)))
5002 result = remainder & 0xff00ff00;
5003 i = 24;
5006 else if (loc > 16)
5008 /* The 8-bit immediate already found clears b2 (and maybe b3)
5009 and we don't get here unless b1 is alredy clear, but it will
5010 leave b4 unchanged. */
5012 /* If we can clear b2 and b4 at once, then we win, since the
5013 8-bits couldn't possibly reach that far. */
5014 if (b2 == b4)
5016 result = remainder & 0x00ff00ff;
5017 i = 16;
5022 return_sequence->i[insns++] = result;
5023 remainder &= ~result;
5025 if (code == SET || code == MINUS)
5026 code = PLUS;
5028 while (remainder);
5030 return insns;
5033 /* Emit an instruction with the indicated PATTERN. If COND is
5034 non-NULL, conditionalize the execution of the instruction on COND
5035 being true. */
5037 static void
5038 emit_constant_insn (rtx cond, rtx pattern)
5040 if (cond)
5041 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
5042 emit_insn (pattern);
5045 /* As above, but extra parameter GENERATE which, if clear, suppresses
5046 RTL generation. */
5048 static int
5049 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
5050 unsigned HOST_WIDE_INT val, rtx target, rtx source,
5051 int subtargets, int generate)
5053 int can_invert = 0;
5054 int can_negate = 0;
5055 int final_invert = 0;
5056 int i;
5057 int set_sign_bit_copies = 0;
5058 int clear_sign_bit_copies = 0;
5059 int clear_zero_bit_copies = 0;
5060 int set_zero_bit_copies = 0;
5061 int insns = 0, neg_insns, inv_insns;
5062 unsigned HOST_WIDE_INT temp1, temp2;
5063 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
5064 struct four_ints *immediates;
5065 struct four_ints pos_immediates, neg_immediates, inv_immediates;
5067 /* Find out which operations are safe for a given CODE. Also do a quick
5068 check for degenerate cases; these can occur when DImode operations
5069 are split. */
5070 switch (code)
5072 case SET:
5073 can_invert = 1;
5074 break;
5076 case PLUS:
5077 can_negate = 1;
5078 break;
5080 case IOR:
5081 if (remainder == 0xffffffff)
5083 if (generate)
5084 emit_constant_insn (cond,
5085 gen_rtx_SET (target,
5086 GEN_INT (ARM_SIGN_EXTEND (val))));
5087 return 1;
5090 if (remainder == 0)
5092 if (reload_completed && rtx_equal_p (target, source))
5093 return 0;
5095 if (generate)
5096 emit_constant_insn (cond, gen_rtx_SET (target, source));
5097 return 1;
5099 break;
5101 case AND:
5102 if (remainder == 0)
5104 if (generate)
5105 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5106 return 1;
5108 if (remainder == 0xffffffff)
5110 if (reload_completed && rtx_equal_p (target, source))
5111 return 0;
5112 if (generate)
5113 emit_constant_insn (cond, gen_rtx_SET (target, source));
5114 return 1;
5116 can_invert = 1;
5117 break;
5119 case XOR:
5120 if (remainder == 0)
5122 if (reload_completed && rtx_equal_p (target, source))
5123 return 0;
5124 if (generate)
5125 emit_constant_insn (cond, gen_rtx_SET (target, source));
5126 return 1;
5129 if (remainder == 0xffffffff)
5131 if (generate)
5132 emit_constant_insn (cond,
5133 gen_rtx_SET (target,
5134 gen_rtx_NOT (mode, source)));
5135 return 1;
5137 final_invert = 1;
5138 break;
5140 case MINUS:
5141 /* We treat MINUS as (val - source), since (source - val) is always
5142 passed as (source + (-val)). */
5143 if (remainder == 0)
5145 if (generate)
5146 emit_constant_insn (cond,
5147 gen_rtx_SET (target,
5148 gen_rtx_NEG (mode, source)));
5149 return 1;
5151 if (const_ok_for_arm (val))
5153 if (generate)
5154 emit_constant_insn (cond,
5155 gen_rtx_SET (target,
5156 gen_rtx_MINUS (mode, GEN_INT (val),
5157 source)));
5158 return 1;
5161 break;
5163 default:
5164 gcc_unreachable ();
5167 /* If we can do it in one insn get out quickly. */
5168 if (const_ok_for_op (val, code))
5170 if (generate)
5171 emit_constant_insn (cond,
5172 gen_rtx_SET (target,
5173 (source
5174 ? gen_rtx_fmt_ee (code, mode, source,
5175 GEN_INT (val))
5176 : GEN_INT (val))));
5177 return 1;
5180 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5181 insn. */
5182 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5183 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5185 if (generate)
5187 if (mode == SImode && i == 16)
5188 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5189 smaller insn. */
5190 emit_constant_insn (cond,
5191 gen_zero_extendhisi2
5192 (target, gen_lowpart (HImode, source)));
5193 else
5194 /* Extz only supports SImode, but we can coerce the operands
5195 into that mode. */
5196 emit_constant_insn (cond,
5197 gen_extzv_t2 (gen_lowpart (SImode, target),
5198 gen_lowpart (SImode, source),
5199 GEN_INT (i), const0_rtx));
5202 return 1;
5205 /* Calculate a few attributes that may be useful for specific
5206 optimizations. */
5207 /* Count number of leading zeros. */
5208 for (i = 31; i >= 0; i--)
5210 if ((remainder & (1 << i)) == 0)
5211 clear_sign_bit_copies++;
5212 else
5213 break;
5216 /* Count number of leading 1's. */
5217 for (i = 31; i >= 0; i--)
5219 if ((remainder & (1 << i)) != 0)
5220 set_sign_bit_copies++;
5221 else
5222 break;
5225 /* Count number of trailing zero's. */
5226 for (i = 0; i <= 31; i++)
5228 if ((remainder & (1 << i)) == 0)
5229 clear_zero_bit_copies++;
5230 else
5231 break;
5234 /* Count number of trailing 1's. */
5235 for (i = 0; i <= 31; i++)
5237 if ((remainder & (1 << i)) != 0)
5238 set_zero_bit_copies++;
5239 else
5240 break;
5243 switch (code)
5245 case SET:
5246 /* See if we can do this by sign_extending a constant that is known
5247 to be negative. This is a good, way of doing it, since the shift
5248 may well merge into a subsequent insn. */
5249 if (set_sign_bit_copies > 1)
5251 if (const_ok_for_arm
5252 (temp1 = ARM_SIGN_EXTEND (remainder
5253 << (set_sign_bit_copies - 1))))
5255 if (generate)
5257 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5258 emit_constant_insn (cond,
5259 gen_rtx_SET (new_src, GEN_INT (temp1)));
5260 emit_constant_insn (cond,
5261 gen_ashrsi3 (target, new_src,
5262 GEN_INT (set_sign_bit_copies - 1)));
5264 return 2;
5266 /* For an inverted constant, we will need to set the low bits,
5267 these will be shifted out of harm's way. */
5268 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5269 if (const_ok_for_arm (~temp1))
5271 if (generate)
5273 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5274 emit_constant_insn (cond,
5275 gen_rtx_SET (new_src, GEN_INT (temp1)));
5276 emit_constant_insn (cond,
5277 gen_ashrsi3 (target, new_src,
5278 GEN_INT (set_sign_bit_copies - 1)));
5280 return 2;
5284 /* See if we can calculate the value as the difference between two
5285 valid immediates. */
5286 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5288 int topshift = clear_sign_bit_copies & ~1;
5290 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5291 & (0xff000000 >> topshift));
5293 /* If temp1 is zero, then that means the 9 most significant
5294 bits of remainder were 1 and we've caused it to overflow.
5295 When topshift is 0 we don't need to do anything since we
5296 can borrow from 'bit 32'. */
5297 if (temp1 == 0 && topshift != 0)
5298 temp1 = 0x80000000 >> (topshift - 1);
5300 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5302 if (const_ok_for_arm (temp2))
5304 if (generate)
5306 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5307 emit_constant_insn (cond,
5308 gen_rtx_SET (new_src, GEN_INT (temp1)));
5309 emit_constant_insn (cond,
5310 gen_addsi3 (target, new_src,
5311 GEN_INT (-temp2)));
5314 return 2;
5318 /* See if we can generate this by setting the bottom (or the top)
5319 16 bits, and then shifting these into the other half of the
5320 word. We only look for the simplest cases, to do more would cost
5321 too much. Be careful, however, not to generate this when the
5322 alternative would take fewer insns. */
5323 if (val & 0xffff0000)
5325 temp1 = remainder & 0xffff0000;
5326 temp2 = remainder & 0x0000ffff;
5328 /* Overlaps outside this range are best done using other methods. */
5329 for (i = 9; i < 24; i++)
5331 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5332 && !const_ok_for_arm (temp2))
5334 rtx new_src = (subtargets
5335 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5336 : target);
5337 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5338 source, subtargets, generate);
5339 source = new_src;
5340 if (generate)
5341 emit_constant_insn
5342 (cond,
5343 gen_rtx_SET
5344 (target,
5345 gen_rtx_IOR (mode,
5346 gen_rtx_ASHIFT (mode, source,
5347 GEN_INT (i)),
5348 source)));
5349 return insns + 1;
5353 /* Don't duplicate cases already considered. */
5354 for (i = 17; i < 24; i++)
5356 if (((temp1 | (temp1 >> i)) == remainder)
5357 && !const_ok_for_arm (temp1))
5359 rtx new_src = (subtargets
5360 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5361 : target);
5362 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5363 source, subtargets, generate);
5364 source = new_src;
5365 if (generate)
5366 emit_constant_insn
5367 (cond,
5368 gen_rtx_SET (target,
5369 gen_rtx_IOR
5370 (mode,
5371 gen_rtx_LSHIFTRT (mode, source,
5372 GEN_INT (i)),
5373 source)));
5374 return insns + 1;
5378 break;
5380 case IOR:
5381 case XOR:
5382 /* If we have IOR or XOR, and the constant can be loaded in a
5383 single instruction, and we can find a temporary to put it in,
5384 then this can be done in two instructions instead of 3-4. */
5385 if (subtargets
5386 /* TARGET can't be NULL if SUBTARGETS is 0 */
5387 || (reload_completed && !reg_mentioned_p (target, source)))
5389 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5391 if (generate)
5393 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5395 emit_constant_insn (cond,
5396 gen_rtx_SET (sub, GEN_INT (val)));
5397 emit_constant_insn (cond,
5398 gen_rtx_SET (target,
5399 gen_rtx_fmt_ee (code, mode,
5400 source, sub)));
5402 return 2;
5406 if (code == XOR)
5407 break;
5409 /* Convert.
5410 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5411 and the remainder 0s for e.g. 0xfff00000)
5412 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5414 This can be done in 2 instructions by using shifts with mov or mvn.
5415 e.g. for
5416 x = x | 0xfff00000;
5417 we generate.
5418 mvn r0, r0, asl #12
5419 mvn r0, r0, lsr #12 */
5420 if (set_sign_bit_copies > 8
5421 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5423 if (generate)
5425 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5426 rtx shift = GEN_INT (set_sign_bit_copies);
5428 emit_constant_insn
5429 (cond,
5430 gen_rtx_SET (sub,
5431 gen_rtx_NOT (mode,
5432 gen_rtx_ASHIFT (mode,
5433 source,
5434 shift))));
5435 emit_constant_insn
5436 (cond,
5437 gen_rtx_SET (target,
5438 gen_rtx_NOT (mode,
5439 gen_rtx_LSHIFTRT (mode, sub,
5440 shift))));
5442 return 2;
5445 /* Convert
5446 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5448 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5450 For eg. r0 = r0 | 0xfff
5451 mvn r0, r0, lsr #12
5452 mvn r0, r0, asl #12
5455 if (set_zero_bit_copies > 8
5456 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5458 if (generate)
5460 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5461 rtx shift = GEN_INT (set_zero_bit_copies);
5463 emit_constant_insn
5464 (cond,
5465 gen_rtx_SET (sub,
5466 gen_rtx_NOT (mode,
5467 gen_rtx_LSHIFTRT (mode,
5468 source,
5469 shift))));
5470 emit_constant_insn
5471 (cond,
5472 gen_rtx_SET (target,
5473 gen_rtx_NOT (mode,
5474 gen_rtx_ASHIFT (mode, sub,
5475 shift))));
5477 return 2;
5480 /* This will never be reached for Thumb2 because orn is a valid
5481 instruction. This is for Thumb1 and the ARM 32 bit cases.
5483 x = y | constant (such that ~constant is a valid constant)
5484 Transform this to
5485 x = ~(~y & ~constant).
5487 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5489 if (generate)
5491 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5492 emit_constant_insn (cond,
5493 gen_rtx_SET (sub,
5494 gen_rtx_NOT (mode, source)));
5495 source = sub;
5496 if (subtargets)
5497 sub = gen_reg_rtx (mode);
5498 emit_constant_insn (cond,
5499 gen_rtx_SET (sub,
5500 gen_rtx_AND (mode, source,
5501 GEN_INT (temp1))));
5502 emit_constant_insn (cond,
5503 gen_rtx_SET (target,
5504 gen_rtx_NOT (mode, sub)));
5506 return 3;
5508 break;
5510 case AND:
5511 /* See if two shifts will do 2 or more insn's worth of work. */
5512 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5514 HOST_WIDE_INT shift_mask = ((0xffffffff
5515 << (32 - clear_sign_bit_copies))
5516 & 0xffffffff);
5518 if ((remainder | shift_mask) != 0xffffffff)
5520 HOST_WIDE_INT new_val
5521 = ARM_SIGN_EXTEND (remainder | shift_mask);
5523 if (generate)
5525 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5526 insns = arm_gen_constant (AND, SImode, cond, new_val,
5527 new_src, source, subtargets, 1);
5528 source = new_src;
5530 else
5532 rtx targ = subtargets ? NULL_RTX : target;
5533 insns = arm_gen_constant (AND, mode, cond, new_val,
5534 targ, source, subtargets, 0);
5538 if (generate)
5540 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5541 rtx shift = GEN_INT (clear_sign_bit_copies);
5543 emit_insn (gen_ashlsi3 (new_src, source, shift));
5544 emit_insn (gen_lshrsi3 (target, new_src, shift));
5547 return insns + 2;
5550 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5552 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5554 if ((remainder | shift_mask) != 0xffffffff)
5556 HOST_WIDE_INT new_val
5557 = ARM_SIGN_EXTEND (remainder | shift_mask);
5558 if (generate)
5560 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5562 insns = arm_gen_constant (AND, mode, cond, new_val,
5563 new_src, source, subtargets, 1);
5564 source = new_src;
5566 else
5568 rtx targ = subtargets ? NULL_RTX : target;
5570 insns = arm_gen_constant (AND, mode, cond, new_val,
5571 targ, source, subtargets, 0);
5575 if (generate)
5577 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5578 rtx shift = GEN_INT (clear_zero_bit_copies);
5580 emit_insn (gen_lshrsi3 (new_src, source, shift));
5581 emit_insn (gen_ashlsi3 (target, new_src, shift));
5584 return insns + 2;
5587 break;
5589 default:
5590 break;
5593 /* Calculate what the instruction sequences would be if we generated it
5594 normally, negated, or inverted. */
5595 if (code == AND)
5596 /* AND cannot be split into multiple insns, so invert and use BIC. */
5597 insns = 99;
5598 else
5599 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5601 if (can_negate)
5602 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5603 &neg_immediates);
5604 else
5605 neg_insns = 99;
5607 if (can_invert || final_invert)
5608 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5609 &inv_immediates);
5610 else
5611 inv_insns = 99;
5613 immediates = &pos_immediates;
5615 /* Is the negated immediate sequence more efficient? */
5616 if (neg_insns < insns && neg_insns <= inv_insns)
5618 insns = neg_insns;
5619 immediates = &neg_immediates;
5621 else
5622 can_negate = 0;
5624 /* Is the inverted immediate sequence more efficient?
5625 We must allow for an extra NOT instruction for XOR operations, although
5626 there is some chance that the final 'mvn' will get optimized later. */
5627 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5629 insns = inv_insns;
5630 immediates = &inv_immediates;
5632 else
5634 can_invert = 0;
5635 final_invert = 0;
5638 /* Now output the chosen sequence as instructions. */
5639 if (generate)
5641 for (i = 0; i < insns; i++)
5643 rtx new_src, temp1_rtx;
5645 temp1 = immediates->i[i];
5647 if (code == SET || code == MINUS)
5648 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5649 else if ((final_invert || i < (insns - 1)) && subtargets)
5650 new_src = gen_reg_rtx (mode);
5651 else
5652 new_src = target;
5654 if (can_invert)
5655 temp1 = ~temp1;
5656 else if (can_negate)
5657 temp1 = -temp1;
5659 temp1 = trunc_int_for_mode (temp1, mode);
5660 temp1_rtx = GEN_INT (temp1);
5662 if (code == SET)
5664 else if (code == MINUS)
5665 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5666 else
5667 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5669 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5670 source = new_src;
5672 if (code == SET)
5674 can_negate = can_invert;
5675 can_invert = 0;
5676 code = PLUS;
5678 else if (code == MINUS)
5679 code = PLUS;
5683 if (final_invert)
5685 if (generate)
5686 emit_constant_insn (cond, gen_rtx_SET (target,
5687 gen_rtx_NOT (mode, source)));
5688 insns++;
5691 return insns;
5694 /* Return TRUE if op is a constant where both the low and top words are
5695 suitable for RSB/RSC instructions. This is never true for Thumb, since
5696 we do not have RSC in that case. */
5697 static bool
5698 arm_const_double_prefer_rsbs_rsc (rtx op)
5700 /* Thumb lacks RSC, so we never prefer that sequence. */
5701 if (TARGET_THUMB || !CONST_INT_P (op))
5702 return false;
5703 HOST_WIDE_INT hi, lo;
5704 lo = UINTVAL (op) & 0xffffffffULL;
5705 hi = UINTVAL (op) >> 32;
5706 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5709 /* Canonicalize a comparison so that we are more likely to recognize it.
5710 This can be done for a few constant compares, where we can make the
5711 immediate value easier to load. */
5713 static void
5714 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5715 bool op0_preserve_value)
5717 machine_mode mode;
5718 unsigned HOST_WIDE_INT i, maxval;
5720 mode = GET_MODE (*op0);
5721 if (mode == VOIDmode)
5722 mode = GET_MODE (*op1);
5724 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5726 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5727 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5728 either reversed or (for constant OP1) adjusted to GE/LT.
5729 Similarly for GTU/LEU in Thumb mode. */
5730 if (mode == DImode)
5733 if (*code == GT || *code == LE
5734 || *code == GTU || *code == LEU)
5736 /* Missing comparison. First try to use an available
5737 comparison. */
5738 if (CONST_INT_P (*op1))
5740 i = INTVAL (*op1);
5741 switch (*code)
5743 case GT:
5744 case LE:
5745 if (i != maxval)
5747 /* Try to convert to GE/LT, unless that would be more
5748 expensive. */
5749 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5750 && arm_const_double_prefer_rsbs_rsc (*op1))
5751 return;
5752 *op1 = GEN_INT (i + 1);
5753 *code = *code == GT ? GE : LT;
5755 else
5757 /* GT maxval is always false, LE maxval is always true.
5758 We can't fold that away here as we must make a
5759 comparison, but we can fold them to comparisons
5760 with the same result that can be handled:
5761 op0 GT maxval -> op0 LT minval
5762 op0 LE maxval -> op0 GE minval
5763 where minval = (-maxval - 1). */
5764 *op1 = GEN_INT (-maxval - 1);
5765 *code = *code == GT ? LT : GE;
5767 return;
5769 case GTU:
5770 case LEU:
5771 if (i != ~((unsigned HOST_WIDE_INT) 0))
5773 /* Try to convert to GEU/LTU, unless that would
5774 be more expensive. */
5775 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5776 && arm_const_double_prefer_rsbs_rsc (*op1))
5777 return;
5778 *op1 = GEN_INT (i + 1);
5779 *code = *code == GTU ? GEU : LTU;
5781 else
5783 /* GTU ~0 is always false, LEU ~0 is always true.
5784 We can't fold that away here as we must make a
5785 comparison, but we can fold them to comparisons
5786 with the same result that can be handled:
5787 op0 GTU ~0 -> op0 LTU 0
5788 op0 LEU ~0 -> op0 GEU 0. */
5789 *op1 = const0_rtx;
5790 *code = *code == GTU ? LTU : GEU;
5792 return;
5794 default:
5795 gcc_unreachable ();
5799 if (!op0_preserve_value)
5801 std::swap (*op0, *op1);
5802 *code = (int)swap_condition ((enum rtx_code)*code);
5805 return;
5808 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5809 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5810 to facilitate possible combining with a cmp into 'ands'. */
5811 if (mode == SImode
5812 && GET_CODE (*op0) == ZERO_EXTEND
5813 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5814 && GET_MODE (XEXP (*op0, 0)) == QImode
5815 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5816 && subreg_lowpart_p (XEXP (*op0, 0))
5817 && *op1 == const0_rtx)
5818 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5819 GEN_INT (255));
5821 /* Comparisons smaller than DImode. Only adjust comparisons against
5822 an out-of-range constant. */
5823 if (!CONST_INT_P (*op1)
5824 || const_ok_for_arm (INTVAL (*op1))
5825 || const_ok_for_arm (- INTVAL (*op1)))
5826 return;
5828 i = INTVAL (*op1);
5830 switch (*code)
5832 case EQ:
5833 case NE:
5834 return;
5836 case GT:
5837 case LE:
5838 if (i != maxval
5839 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5841 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5842 *code = *code == GT ? GE : LT;
5843 return;
5845 break;
5847 case GE:
5848 case LT:
5849 if (i != ~maxval
5850 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5852 *op1 = GEN_INT (i - 1);
5853 *code = *code == GE ? GT : LE;
5854 return;
5856 break;
5858 case GTU:
5859 case LEU:
5860 if (i != ~((unsigned HOST_WIDE_INT) 0)
5861 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5863 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5864 *code = *code == GTU ? GEU : LTU;
5865 return;
5867 break;
5869 case GEU:
5870 case LTU:
5871 if (i != 0
5872 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5874 *op1 = GEN_INT (i - 1);
5875 *code = *code == GEU ? GTU : LEU;
5876 return;
5878 break;
5880 default:
5881 gcc_unreachable ();
5886 /* Define how to find the value returned by a function. */
5888 static rtx
5889 arm_function_value(const_tree type, const_tree func,
5890 bool outgoing ATTRIBUTE_UNUSED)
5892 machine_mode mode;
5893 int unsignedp ATTRIBUTE_UNUSED;
5894 rtx r ATTRIBUTE_UNUSED;
5896 mode = TYPE_MODE (type);
5898 if (TARGET_AAPCS_BASED)
5899 return aapcs_allocate_return_reg (mode, type, func);
5901 /* Promote integer types. */
5902 if (INTEGRAL_TYPE_P (type))
5903 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5905 /* Promotes small structs returned in a register to full-word size
5906 for big-endian AAPCS. */
5907 if (arm_return_in_msb (type))
5909 HOST_WIDE_INT size = int_size_in_bytes (type);
5910 if (size % UNITS_PER_WORD != 0)
5912 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5913 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5917 return arm_libcall_value_1 (mode);
5920 /* libcall hashtable helpers. */
5922 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5924 static inline hashval_t hash (const rtx_def *);
5925 static inline bool equal (const rtx_def *, const rtx_def *);
5926 static inline void remove (rtx_def *);
5929 inline bool
5930 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5932 return rtx_equal_p (p1, p2);
5935 inline hashval_t
5936 libcall_hasher::hash (const rtx_def *p1)
5938 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5941 typedef hash_table<libcall_hasher> libcall_table_type;
5943 static void
5944 add_libcall (libcall_table_type *htab, rtx libcall)
5946 *htab->find_slot (libcall, INSERT) = libcall;
5949 static bool
5950 arm_libcall_uses_aapcs_base (const_rtx libcall)
5952 static bool init_done = false;
5953 static libcall_table_type *libcall_htab = NULL;
5955 if (!init_done)
5957 init_done = true;
5959 libcall_htab = new libcall_table_type (31);
5960 add_libcall (libcall_htab,
5961 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5962 add_libcall (libcall_htab,
5963 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5964 add_libcall (libcall_htab,
5965 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5966 add_libcall (libcall_htab,
5967 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5969 add_libcall (libcall_htab,
5970 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5971 add_libcall (libcall_htab,
5972 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5973 add_libcall (libcall_htab,
5974 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5975 add_libcall (libcall_htab,
5976 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5978 add_libcall (libcall_htab,
5979 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5980 add_libcall (libcall_htab,
5981 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5982 add_libcall (libcall_htab,
5983 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5984 add_libcall (libcall_htab,
5985 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5986 add_libcall (libcall_htab,
5987 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5988 add_libcall (libcall_htab,
5989 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5990 add_libcall (libcall_htab,
5991 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5992 add_libcall (libcall_htab,
5993 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5994 add_libcall (libcall_htab,
5995 convert_optab_libfunc (sfix_optab, SImode, SFmode));
5996 add_libcall (libcall_htab,
5997 convert_optab_libfunc (ufix_optab, SImode, SFmode));
5999 /* Values from double-precision helper functions are returned in core
6000 registers if the selected core only supports single-precision
6001 arithmetic, even if we are using the hard-float ABI. The same is
6002 true for single-precision helpers except in case of MVE, because in
6003 MVE we will be using the hard-float ABI on a CPU which doesn't support
6004 single-precision operations in hardware. In MVE the following check
6005 enables use of emulation for the single-precision arithmetic
6006 operations. */
6007 if (TARGET_HAVE_MVE)
6009 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
6010 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
6011 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
6012 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
6013 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
6014 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
6015 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
6016 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
6017 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
6018 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
6019 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
6021 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
6022 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
6023 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
6024 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
6025 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
6026 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
6027 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
6028 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
6029 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
6030 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
6031 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
6032 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
6033 SFmode));
6034 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
6035 DFmode));
6036 add_libcall (libcall_htab,
6037 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
6040 return libcall && libcall_htab->find (libcall) != NULL;
6043 static rtx
6044 arm_libcall_value_1 (machine_mode mode)
6046 if (TARGET_AAPCS_BASED)
6047 return aapcs_libcall_value (mode);
6048 else if (TARGET_IWMMXT_ABI
6049 && arm_vector_mode_supported_p (mode))
6050 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
6051 else
6052 return gen_rtx_REG (mode, ARG_REGISTER (1));
6055 /* Define how to find the value returned by a library function
6056 assuming the value has mode MODE. */
6058 static rtx
6059 arm_libcall_value (machine_mode mode, const_rtx libcall)
6061 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
6062 && GET_MODE_CLASS (mode) == MODE_FLOAT)
6064 /* The following libcalls return their result in integer registers,
6065 even though they return a floating point value. */
6066 if (arm_libcall_uses_aapcs_base (libcall))
6067 return gen_rtx_REG (mode, ARG_REGISTER(1));
6071 return arm_libcall_value_1 (mode);
6074 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6076 static bool
6077 arm_function_value_regno_p (const unsigned int regno)
6079 if (regno == ARG_REGISTER (1)
6080 || (TARGET_32BIT
6081 && TARGET_AAPCS_BASED
6082 && TARGET_HARD_FLOAT
6083 && regno == FIRST_VFP_REGNUM)
6084 || (TARGET_IWMMXT_ABI
6085 && regno == FIRST_IWMMXT_REGNUM))
6086 return true;
6088 return false;
6091 /* Determine the amount of memory needed to store the possible return
6092 registers of an untyped call. */
6094 arm_apply_result_size (void)
6096 int size = 16;
6098 if (TARGET_32BIT)
6100 if (TARGET_HARD_FLOAT_ABI)
6101 size += 32;
6102 if (TARGET_IWMMXT_ABI)
6103 size += 8;
6106 return size;
6109 /* Decide whether TYPE should be returned in memory (true)
6110 or in a register (false). FNTYPE is the type of the function making
6111 the call. */
6112 static bool
6113 arm_return_in_memory (const_tree type, const_tree fntype)
6115 HOST_WIDE_INT size;
6117 size = int_size_in_bytes (type); /* Negative if not fixed size. */
6119 if (TARGET_AAPCS_BASED)
6121 /* Simple, non-aggregate types (ie not including vectors and
6122 complex) are always returned in a register (or registers).
6123 We don't care about which register here, so we can short-cut
6124 some of the detail. */
6125 if (!AGGREGATE_TYPE_P (type)
6126 && TREE_CODE (type) != VECTOR_TYPE
6127 && TREE_CODE (type) != COMPLEX_TYPE)
6128 return false;
6130 /* Any return value that is no larger than one word can be
6131 returned in r0. */
6132 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6133 return false;
6135 /* Check any available co-processors to see if they accept the
6136 type as a register candidate (VFP, for example, can return
6137 some aggregates in consecutive registers). These aren't
6138 available if the call is variadic. */
6139 if (aapcs_select_return_coproc (type, fntype) >= 0)
6140 return false;
6142 /* Vector values should be returned using ARM registers, not
6143 memory (unless they're over 16 bytes, which will break since
6144 we only have four call-clobbered registers to play with). */
6145 if (TREE_CODE (type) == VECTOR_TYPE)
6146 return (size < 0 || size > (4 * UNITS_PER_WORD));
6148 /* The rest go in memory. */
6149 return true;
6152 if (TREE_CODE (type) == VECTOR_TYPE)
6153 return (size < 0 || size > (4 * UNITS_PER_WORD));
6155 if (!AGGREGATE_TYPE_P (type) &&
6156 (TREE_CODE (type) != VECTOR_TYPE))
6157 /* All simple types are returned in registers. */
6158 return false;
6160 if (arm_abi != ARM_ABI_APCS)
6162 /* ATPCS and later return aggregate types in memory only if they are
6163 larger than a word (or are variable size). */
6164 return (size < 0 || size > UNITS_PER_WORD);
6167 /* For the arm-wince targets we choose to be compatible with Microsoft's
6168 ARM and Thumb compilers, which always return aggregates in memory. */
6169 #ifndef ARM_WINCE
6170 /* All structures/unions bigger than one word are returned in memory.
6171 Also catch the case where int_size_in_bytes returns -1. In this case
6172 the aggregate is either huge or of variable size, and in either case
6173 we will want to return it via memory and not in a register. */
6174 if (size < 0 || size > UNITS_PER_WORD)
6175 return true;
6177 if (TREE_CODE (type) == RECORD_TYPE)
6179 tree field;
6181 /* For a struct the APCS says that we only return in a register
6182 if the type is 'integer like' and every addressable element
6183 has an offset of zero. For practical purposes this means
6184 that the structure can have at most one non bit-field element
6185 and that this element must be the first one in the structure. */
6187 /* Find the first field, ignoring non FIELD_DECL things which will
6188 have been created by C++. */
6189 /* NOTE: This code is deprecated and has not been updated to handle
6190 DECL_FIELD_ABI_IGNORED. */
6191 for (field = TYPE_FIELDS (type);
6192 field && TREE_CODE (field) != FIELD_DECL;
6193 field = DECL_CHAIN (field))
6194 continue;
6196 if (field == NULL)
6197 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6199 /* Check that the first field is valid for returning in a register. */
6201 /* ... Floats are not allowed */
6202 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6203 return true;
6205 /* ... Aggregates that are not themselves valid for returning in
6206 a register are not allowed. */
6207 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6208 return true;
6210 /* Now check the remaining fields, if any. Only bitfields are allowed,
6211 since they are not addressable. */
6212 for (field = DECL_CHAIN (field);
6213 field;
6214 field = DECL_CHAIN (field))
6216 if (TREE_CODE (field) != FIELD_DECL)
6217 continue;
6219 if (!DECL_BIT_FIELD_TYPE (field))
6220 return true;
6223 return false;
6226 if (TREE_CODE (type) == UNION_TYPE)
6228 tree field;
6230 /* Unions can be returned in registers if every element is
6231 integral, or can be returned in an integer register. */
6232 for (field = TYPE_FIELDS (type);
6233 field;
6234 field = DECL_CHAIN (field))
6236 if (TREE_CODE (field) != FIELD_DECL)
6237 continue;
6239 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6240 return true;
6242 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6243 return true;
6246 return false;
6248 #endif /* not ARM_WINCE */
6250 /* Return all other types in memory. */
6251 return true;
6254 const struct pcs_attribute_arg
6256 const char *arg;
6257 enum arm_pcs value;
6258 } pcs_attribute_args[] =
6260 {"aapcs", ARM_PCS_AAPCS},
6261 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6262 #if 0
6263 /* We could recognize these, but changes would be needed elsewhere
6264 * to implement them. */
6265 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6266 {"atpcs", ARM_PCS_ATPCS},
6267 {"apcs", ARM_PCS_APCS},
6268 #endif
6269 {NULL, ARM_PCS_UNKNOWN}
6272 static enum arm_pcs
6273 arm_pcs_from_attribute (tree attr)
6275 const struct pcs_attribute_arg *ptr;
6276 const char *arg;
6278 /* Get the value of the argument. */
6279 if (TREE_VALUE (attr) == NULL_TREE
6280 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6281 return ARM_PCS_UNKNOWN;
6283 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6285 /* Check it against the list of known arguments. */
6286 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6287 if (streq (arg, ptr->arg))
6288 return ptr->value;
6290 /* An unrecognized interrupt type. */
6291 return ARM_PCS_UNKNOWN;
6294 /* Get the PCS variant to use for this call. TYPE is the function's type
6295 specification, DECL is the specific declartion. DECL may be null if
6296 the call could be indirect or if this is a library call. */
6297 static enum arm_pcs
6298 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6300 bool user_convention = false;
6301 enum arm_pcs user_pcs = arm_pcs_default;
6302 tree attr;
6304 gcc_assert (type);
6306 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6307 if (attr)
6309 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6310 user_convention = true;
6313 if (TARGET_AAPCS_BASED)
6315 /* Detect varargs functions. These always use the base rules
6316 (no argument is ever a candidate for a co-processor
6317 register). */
6318 bool base_rules = stdarg_p (type);
6320 if (user_convention)
6322 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6323 sorry ("non-AAPCS derived PCS variant");
6324 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6325 error ("variadic functions must use the base AAPCS variant");
6328 if (base_rules)
6329 return ARM_PCS_AAPCS;
6330 else if (user_convention)
6331 return user_pcs;
6332 #if 0
6333 /* Unfortunately, this is not safe and can lead to wrong code
6334 being generated (PR96882). Not all calls into the back-end
6335 pass the DECL, so it is unsafe to make any PCS-changing
6336 decisions based on it. In particular the RETURN_IN_MEMORY
6337 hook is only ever passed a TYPE. This needs revisiting to
6338 see if there are any partial improvements that can be
6339 re-enabled. */
6340 else if (decl && flag_unit_at_a_time)
6342 /* Local functions never leak outside this compilation unit,
6343 so we are free to use whatever conventions are
6344 appropriate. */
6345 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6346 cgraph_node *local_info_node
6347 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6348 if (local_info_node && local_info_node->local)
6349 return ARM_PCS_AAPCS_LOCAL;
6351 #endif
6353 else if (user_convention && user_pcs != arm_pcs_default)
6354 sorry ("PCS variant");
6356 /* For everything else we use the target's default. */
6357 return arm_pcs_default;
6361 static void
6362 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6363 const_tree fntype ATTRIBUTE_UNUSED,
6364 rtx libcall ATTRIBUTE_UNUSED,
6365 const_tree fndecl ATTRIBUTE_UNUSED)
6367 /* Record the unallocated VFP registers. */
6368 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6369 pcum->aapcs_vfp_reg_alloc = 0;
6372 /* Bitmasks that indicate whether earlier versions of GCC would have
6373 taken a different path through the ABI logic. This should result in
6374 a -Wpsabi warning if the earlier path led to a different ABI decision.
6376 WARN_PSABI_EMPTY_CXX17_BASE
6377 Indicates that the type includes an artificial empty C++17 base field
6378 that, prior to GCC 10.1, would prevent the type from being treated as
6379 a HFA or HVA. See PR94711 for details.
6381 WARN_PSABI_NO_UNIQUE_ADDRESS
6382 Indicates that the type includes an empty [[no_unique_address]] field
6383 that, prior to GCC 10.1, would prevent the type from being treated as
6384 a HFA or HVA. */
6385 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6386 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6387 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6389 /* Walk down the type tree of TYPE counting consecutive base elements.
6390 If *MODEP is VOIDmode, then set it to the first valid floating point
6391 type. If a non-floating point type is found, or if a floating point
6392 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6393 otherwise return the count in the sub-tree.
6395 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6396 function has changed its behavior relative to earlier versions of GCC.
6397 Normally the argument should be nonnull and point to a zero-initialized
6398 variable. The function then records whether the ABI decision might
6399 be affected by a known fix to the ABI logic, setting the associated
6400 WARN_PSABI_* bits if so.
6402 When the argument is instead a null pointer, the function tries to
6403 simulate the behavior of GCC before all such ABI fixes were made.
6404 This is useful to check whether the function returns something
6405 different after the ABI fixes. */
6406 static int
6407 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6408 unsigned int *warn_psabi_flags)
6410 machine_mode mode;
6411 HOST_WIDE_INT size;
6413 switch (TREE_CODE (type))
6415 case REAL_TYPE:
6416 mode = TYPE_MODE (type);
6417 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6418 return -1;
6420 if (*modep == VOIDmode)
6421 *modep = mode;
6423 if (*modep == mode)
6424 return 1;
6426 break;
6428 case COMPLEX_TYPE:
6429 mode = TYPE_MODE (TREE_TYPE (type));
6430 if (mode != DFmode && mode != SFmode)
6431 return -1;
6433 if (*modep == VOIDmode)
6434 *modep = mode;
6436 if (*modep == mode)
6437 return 2;
6439 break;
6441 case VECTOR_TYPE:
6442 /* Use V2SImode and V4SImode as representatives of all 64-bit
6443 and 128-bit vector types, whether or not those modes are
6444 supported with the present options. */
6445 size = int_size_in_bytes (type);
6446 switch (size)
6448 case 8:
6449 mode = V2SImode;
6450 break;
6451 case 16:
6452 mode = V4SImode;
6453 break;
6454 default:
6455 return -1;
6458 if (*modep == VOIDmode)
6459 *modep = mode;
6461 /* Vector modes are considered to be opaque: two vectors are
6462 equivalent for the purposes of being homogeneous aggregates
6463 if they are the same size. */
6464 if (*modep == mode)
6465 return 1;
6467 break;
6469 case ARRAY_TYPE:
6471 int count;
6472 tree index = TYPE_DOMAIN (type);
6474 /* Can't handle incomplete types nor sizes that are not
6475 fixed. */
6476 if (!COMPLETE_TYPE_P (type)
6477 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6478 return -1;
6480 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6481 warn_psabi_flags);
6482 if (count == -1
6483 || !index
6484 || !TYPE_MAX_VALUE (index)
6485 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6486 || !TYPE_MIN_VALUE (index)
6487 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6488 || count < 0)
6489 return -1;
6491 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6492 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6494 /* There must be no padding. */
6495 if (wi::to_wide (TYPE_SIZE (type))
6496 != count * GET_MODE_BITSIZE (*modep))
6497 return -1;
6499 return count;
6502 case RECORD_TYPE:
6504 int count = 0;
6505 int sub_count;
6506 tree field;
6508 /* Can't handle incomplete types nor sizes that are not
6509 fixed. */
6510 if (!COMPLETE_TYPE_P (type)
6511 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6512 return -1;
6514 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6516 if (TREE_CODE (field) != FIELD_DECL)
6517 continue;
6519 if (DECL_FIELD_ABI_IGNORED (field))
6521 /* See whether this is something that earlier versions of
6522 GCC failed to ignore. */
6523 unsigned int flag;
6524 if (lookup_attribute ("no_unique_address",
6525 DECL_ATTRIBUTES (field)))
6526 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6527 else if (cxx17_empty_base_field_p (field))
6528 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6529 else
6530 /* No compatibility problem. */
6531 continue;
6533 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6534 if (warn_psabi_flags)
6536 *warn_psabi_flags |= flag;
6537 continue;
6540 /* A zero-width bitfield may affect layout in some
6541 circumstances, but adds no members. The determination
6542 of whether or not a type is an HFA is performed after
6543 layout is complete, so if the type still looks like an
6544 HFA afterwards, it is still classed as one. This is
6545 potentially an ABI break for the hard-float ABI. */
6546 else if (DECL_BIT_FIELD (field)
6547 && integer_zerop (DECL_SIZE (field)))
6549 /* Prior to GCC-12 these fields were striped early,
6550 hiding them from the back-end entirely and
6551 resulting in the correct behaviour for argument
6552 passing. Simulate that old behaviour without
6553 generating a warning. */
6554 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6555 continue;
6556 if (warn_psabi_flags)
6558 *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6559 continue;
6563 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6564 warn_psabi_flags);
6565 if (sub_count < 0)
6566 return -1;
6567 count += sub_count;
6570 /* There must be no padding. */
6571 if (wi::to_wide (TYPE_SIZE (type))
6572 != count * GET_MODE_BITSIZE (*modep))
6573 return -1;
6575 return count;
6578 case UNION_TYPE:
6579 case QUAL_UNION_TYPE:
6581 /* These aren't very interesting except in a degenerate case. */
6582 int count = 0;
6583 int sub_count;
6584 tree field;
6586 /* Can't handle incomplete types nor sizes that are not
6587 fixed. */
6588 if (!COMPLETE_TYPE_P (type)
6589 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6590 return -1;
6592 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6594 if (TREE_CODE (field) != FIELD_DECL)
6595 continue;
6597 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6598 warn_psabi_flags);
6599 if (sub_count < 0)
6600 return -1;
6601 count = count > sub_count ? count : sub_count;
6604 /* There must be no padding. */
6605 if (wi::to_wide (TYPE_SIZE (type))
6606 != count * GET_MODE_BITSIZE (*modep))
6607 return -1;
6609 return count;
6612 default:
6613 break;
6616 return -1;
6619 /* Return true if PCS_VARIANT should use VFP registers. */
6620 static bool
6621 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6623 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6625 static bool seen_thumb1_vfp = false;
6627 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6629 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6630 /* sorry() is not immediately fatal, so only display this once. */
6631 seen_thumb1_vfp = true;
6634 return true;
6637 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6638 return false;
6640 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6641 (TARGET_VFP_DOUBLE || !is_double));
6644 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6645 suitable for passing or returning in VFP registers for the PCS
6646 variant selected. If it is, then *BASE_MODE is updated to contain
6647 a machine mode describing each element of the argument's type and
6648 *COUNT to hold the number of such elements. */
6649 static bool
6650 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6651 machine_mode mode, const_tree type,
6652 machine_mode *base_mode, int *count)
6654 machine_mode new_mode = VOIDmode;
6656 /* If we have the type information, prefer that to working things
6657 out from the mode. */
6658 if (type)
6660 unsigned int warn_psabi_flags = 0;
6661 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6662 &warn_psabi_flags);
6663 if (ag_count > 0 && ag_count <= 4)
6665 static unsigned last_reported_type_uid;
6666 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6667 int alt;
6668 if (warn_psabi
6669 && warn_psabi_flags
6670 && uid != last_reported_type_uid
6671 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6672 != ag_count))
6674 const char *url10
6675 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6676 const char *url12
6677 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6678 gcc_assert (alt == -1);
6679 last_reported_type_uid = uid;
6680 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6681 qualification. */
6682 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6683 inform (input_location, "parameter passing for argument of "
6684 "type %qT with %<[[no_unique_address]]%> members "
6685 "changed %{in GCC 10.1%}",
6686 TYPE_MAIN_VARIANT (type), url10);
6687 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6688 inform (input_location, "parameter passing for argument of "
6689 "type %qT when C++17 is enabled changed to match "
6690 "C++14 %{in GCC 10.1%}",
6691 TYPE_MAIN_VARIANT (type), url10);
6692 else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6693 inform (input_location, "parameter passing for argument of "
6694 "type %qT changed %{in GCC 12.1%}",
6695 TYPE_MAIN_VARIANT (type), url12);
6697 *count = ag_count;
6699 else
6700 return false;
6702 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6703 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6704 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6706 *count = 1;
6707 new_mode = mode;
6709 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6711 *count = 2;
6712 new_mode = (mode == DCmode ? DFmode : SFmode);
6714 else
6715 return false;
6718 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6719 return false;
6721 *base_mode = new_mode;
6723 if (TARGET_GENERAL_REGS_ONLY)
6724 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6725 type);
6727 return true;
6730 static bool
6731 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6732 machine_mode mode, const_tree type)
6734 int count ATTRIBUTE_UNUSED;
6735 machine_mode ag_mode ATTRIBUTE_UNUSED;
6737 if (!use_vfp_abi (pcs_variant, false))
6738 return false;
6739 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6740 &ag_mode, &count);
6743 static bool
6744 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6745 const_tree type)
6747 if (!use_vfp_abi (pcum->pcs_variant, false))
6748 return false;
6750 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6751 &pcum->aapcs_vfp_rmode,
6752 &pcum->aapcs_vfp_rcount);
6755 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6756 for the behaviour of this function. */
6758 static bool
6759 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6760 const_tree type ATTRIBUTE_UNUSED)
6762 int rmode_size
6763 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6764 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6765 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6766 int regno;
6768 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6769 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6771 pcum->aapcs_vfp_reg_alloc = mask << regno;
6772 if (mode == BLKmode
6773 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6774 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6776 int i;
6777 int rcount = pcum->aapcs_vfp_rcount;
6778 int rshift = shift;
6779 machine_mode rmode = pcum->aapcs_vfp_rmode;
6780 rtx par;
6781 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6783 /* Avoid using unsupported vector modes. */
6784 if (rmode == V2SImode)
6785 rmode = DImode;
6786 else if (rmode == V4SImode)
6788 rmode = DImode;
6789 rcount *= 2;
6790 rshift /= 2;
6793 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6794 for (i = 0; i < rcount; i++)
6796 rtx tmp = gen_rtx_REG (rmode,
6797 FIRST_VFP_REGNUM + regno + i * rshift);
6798 tmp = gen_rtx_EXPR_LIST
6799 (VOIDmode, tmp,
6800 GEN_INT (i * GET_MODE_SIZE (rmode)));
6801 XVECEXP (par, 0, i) = tmp;
6804 pcum->aapcs_reg = par;
6806 else
6807 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6808 return true;
6810 return false;
6813 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6814 comment there for the behaviour of this function. */
6816 static rtx
6817 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6818 machine_mode mode,
6819 const_tree type ATTRIBUTE_UNUSED)
6821 if (!use_vfp_abi (pcs_variant, false))
6822 return NULL;
6824 if (mode == BLKmode
6825 || (GET_MODE_CLASS (mode) == MODE_INT
6826 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6827 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6829 int count;
6830 machine_mode ag_mode;
6831 int i;
6832 rtx par;
6833 int shift;
6835 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6836 &ag_mode, &count);
6838 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6840 if (ag_mode == V2SImode)
6841 ag_mode = DImode;
6842 else if (ag_mode == V4SImode)
6844 ag_mode = DImode;
6845 count *= 2;
6848 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6849 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6850 for (i = 0; i < count; i++)
6852 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6853 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6854 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6855 XVECEXP (par, 0, i) = tmp;
6858 return par;
6861 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6864 static void
6865 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6866 machine_mode mode ATTRIBUTE_UNUSED,
6867 const_tree type ATTRIBUTE_UNUSED)
6869 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6870 pcum->aapcs_vfp_reg_alloc = 0;
6871 return;
6874 #define AAPCS_CP(X) \
6876 aapcs_ ## X ## _cum_init, \
6877 aapcs_ ## X ## _is_call_candidate, \
6878 aapcs_ ## X ## _allocate, \
6879 aapcs_ ## X ## _is_return_candidate, \
6880 aapcs_ ## X ## _allocate_return_reg, \
6881 aapcs_ ## X ## _advance \
6884 /* Table of co-processors that can be used to pass arguments in
6885 registers. Idealy no arugment should be a candidate for more than
6886 one co-processor table entry, but the table is processed in order
6887 and stops after the first match. If that entry then fails to put
6888 the argument into a co-processor register, the argument will go on
6889 the stack. */
6890 static struct
6892 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6893 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6895 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6896 BLKmode) is a candidate for this co-processor's registers; this
6897 function should ignore any position-dependent state in
6898 CUMULATIVE_ARGS and only use call-type dependent information. */
6899 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6901 /* Return true if the argument does get a co-processor register; it
6902 should set aapcs_reg to an RTX of the register allocated as is
6903 required for a return from FUNCTION_ARG. */
6904 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6906 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6907 be returned in this co-processor's registers. */
6908 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6910 /* Allocate and return an RTX element to hold the return type of a call. This
6911 routine must not fail and will only be called if is_return_candidate
6912 returned true with the same parameters. */
6913 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6915 /* Finish processing this argument and prepare to start processing
6916 the next one. */
6917 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6918 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6920 AAPCS_CP(vfp)
6923 #undef AAPCS_CP
6925 static int
6926 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6927 const_tree type)
6929 int i;
6931 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6932 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6933 return i;
6935 return -1;
6938 static int
6939 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6941 /* We aren't passed a decl, so we can't check that a call is local.
6942 However, it isn't clear that that would be a win anyway, since it
6943 might limit some tail-calling opportunities. */
6944 enum arm_pcs pcs_variant;
6946 if (fntype)
6948 const_tree fndecl = NULL_TREE;
6950 if (TREE_CODE (fntype) == FUNCTION_DECL)
6952 fndecl = fntype;
6953 fntype = TREE_TYPE (fntype);
6956 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6958 else
6959 pcs_variant = arm_pcs_default;
6961 if (pcs_variant != ARM_PCS_AAPCS)
6963 int i;
6965 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6966 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6967 TYPE_MODE (type),
6968 type))
6969 return i;
6971 return -1;
6974 static rtx
6975 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6976 const_tree fntype)
6978 /* We aren't passed a decl, so we can't check that a call is local.
6979 However, it isn't clear that that would be a win anyway, since it
6980 might limit some tail-calling opportunities. */
6981 enum arm_pcs pcs_variant;
6982 int unsignedp ATTRIBUTE_UNUSED;
6984 if (fntype)
6986 const_tree fndecl = NULL_TREE;
6988 if (TREE_CODE (fntype) == FUNCTION_DECL)
6990 fndecl = fntype;
6991 fntype = TREE_TYPE (fntype);
6994 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6996 else
6997 pcs_variant = arm_pcs_default;
6999 /* Promote integer types. */
7000 if (type && INTEGRAL_TYPE_P (type))
7001 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
7003 if (pcs_variant != ARM_PCS_AAPCS)
7005 int i;
7007 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7008 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
7009 type))
7010 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
7011 mode, type);
7014 /* Promotes small structs returned in a register to full-word size
7015 for big-endian AAPCS. */
7016 if (type && arm_return_in_msb (type))
7018 HOST_WIDE_INT size = int_size_in_bytes (type);
7019 if (size % UNITS_PER_WORD != 0)
7021 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
7022 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
7026 return gen_rtx_REG (mode, R0_REGNUM);
7029 static rtx
7030 aapcs_libcall_value (machine_mode mode)
7032 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
7033 && GET_MODE_SIZE (mode) <= 4)
7034 mode = SImode;
7036 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
7039 /* Lay out a function argument using the AAPCS rules. The rule
7040 numbers referred to here are those in the AAPCS. */
7041 static void
7042 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
7043 const_tree type, bool named)
7045 int nregs, nregs2;
7046 int ncrn;
7048 /* We only need to do this once per argument. */
7049 if (pcum->aapcs_arg_processed)
7050 return;
7052 pcum->aapcs_arg_processed = true;
7054 /* Special case: if named is false then we are handling an incoming
7055 anonymous argument which is on the stack. */
7056 if (!named)
7057 return;
7059 /* Is this a potential co-processor register candidate? */
7060 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7062 int slot = aapcs_select_call_coproc (pcum, mode, type);
7063 pcum->aapcs_cprc_slot = slot;
7065 /* We don't have to apply any of the rules from part B of the
7066 preparation phase, these are handled elsewhere in the
7067 compiler. */
7069 if (slot >= 0)
7071 /* A Co-processor register candidate goes either in its own
7072 class of registers or on the stack. */
7073 if (!pcum->aapcs_cprc_failed[slot])
7075 /* C1.cp - Try to allocate the argument to co-processor
7076 registers. */
7077 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7078 return;
7080 /* C2.cp - Put the argument on the stack and note that we
7081 can't assign any more candidates in this slot. We also
7082 need to note that we have allocated stack space, so that
7083 we won't later try to split a non-cprc candidate between
7084 core registers and the stack. */
7085 pcum->aapcs_cprc_failed[slot] = true;
7086 pcum->can_split = false;
7089 /* We didn't get a register, so this argument goes on the
7090 stack. */
7091 gcc_assert (pcum->can_split == false);
7092 return;
7096 /* C3 - For double-word aligned arguments, round the NCRN up to the
7097 next even number. */
7098 ncrn = pcum->aapcs_ncrn;
7099 if (ncrn & 1)
7101 int res = arm_needs_doubleword_align (mode, type);
7102 /* Only warn during RTL expansion of call stmts, otherwise we would
7103 warn e.g. during gimplification even on functions that will be
7104 always inlined, and we'd warn multiple times. Don't warn when
7105 called in expand_function_start either, as we warn instead in
7106 arm_function_arg_boundary in that case. */
7107 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7108 inform (input_location, "parameter passing for argument of type "
7109 "%qT changed in GCC 7.1", type);
7110 else if (res > 0)
7111 ncrn++;
7114 nregs = ARM_NUM_REGS2(mode, type);
7116 /* Sigh, this test should really assert that nregs > 0, but a GCC
7117 extension allows empty structs and then gives them empty size; it
7118 then allows such a structure to be passed by value. For some of
7119 the code below we have to pretend that such an argument has
7120 non-zero size so that we 'locate' it correctly either in
7121 registers or on the stack. */
7122 gcc_assert (nregs >= 0);
7124 nregs2 = nregs ? nregs : 1;
7126 /* C4 - Argument fits entirely in core registers. */
7127 if (ncrn + nregs2 <= NUM_ARG_REGS)
7129 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7130 pcum->aapcs_next_ncrn = ncrn + nregs;
7131 return;
7134 /* C5 - Some core registers left and there are no arguments already
7135 on the stack: split this argument between the remaining core
7136 registers and the stack. */
7137 if (ncrn < NUM_ARG_REGS && pcum->can_split)
7139 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7140 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7141 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7142 return;
7145 /* C6 - NCRN is set to 4. */
7146 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7148 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7149 return;
7152 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7153 for a call to a function whose data type is FNTYPE.
7154 For a library call, FNTYPE is NULL. */
7155 void
7156 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7157 rtx libname,
7158 tree fndecl ATTRIBUTE_UNUSED)
7160 /* Long call handling. */
7161 if (fntype)
7162 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7163 else
7164 pcum->pcs_variant = arm_pcs_default;
7166 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7168 if (arm_libcall_uses_aapcs_base (libname))
7169 pcum->pcs_variant = ARM_PCS_AAPCS;
7171 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7172 pcum->aapcs_reg = NULL_RTX;
7173 pcum->aapcs_partial = 0;
7174 pcum->aapcs_arg_processed = false;
7175 pcum->aapcs_cprc_slot = -1;
7176 pcum->can_split = true;
7178 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7180 int i;
7182 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7184 pcum->aapcs_cprc_failed[i] = false;
7185 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7188 return;
7191 /* Legacy ABIs */
7193 /* On the ARM, the offset starts at 0. */
7194 pcum->nregs = 0;
7195 pcum->iwmmxt_nregs = 0;
7196 pcum->can_split = true;
7198 /* Varargs vectors are treated the same as long long.
7199 named_count avoids having to change the way arm handles 'named' */
7200 pcum->named_count = 0;
7201 pcum->nargs = 0;
7203 if (TARGET_REALLY_IWMMXT && fntype)
7205 tree fn_arg;
7207 for (fn_arg = TYPE_ARG_TYPES (fntype);
7208 fn_arg;
7209 fn_arg = TREE_CHAIN (fn_arg))
7210 pcum->named_count += 1;
7212 if (! pcum->named_count)
7213 pcum->named_count = INT_MAX;
7217 /* Return 2 if double word alignment is required for argument passing,
7218 but wasn't required before the fix for PR88469.
7219 Return 1 if double word alignment is required for argument passing.
7220 Return -1 if double word alignment used to be required for argument
7221 passing before PR77728 ABI fix, but is not required anymore.
7222 Return 0 if double word alignment is not required and wasn't requried
7223 before either. */
7224 static int
7225 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7227 if (!type)
7228 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7230 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7231 if (!AGGREGATE_TYPE_P (type))
7232 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7234 /* Array types: Use member alignment of element type. */
7235 if (TREE_CODE (type) == ARRAY_TYPE)
7236 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7238 int ret = 0;
7239 int ret2 = 0;
7240 /* Record/aggregate types: Use greatest member alignment of any member.
7242 Note that we explicitly consider zero-sized fields here, even though
7243 they don't map to AAPCS machine types. For example, in:
7245 struct __attribute__((aligned(8))) empty {};
7247 struct s {
7248 [[no_unique_address]] empty e;
7249 int x;
7252 "s" contains only one Fundamental Data Type (the int field)
7253 but gains 8-byte alignment and size thanks to "e". */
7254 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7255 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7257 if (TREE_CODE (field) == FIELD_DECL)
7258 return 1;
7259 else
7260 /* Before PR77728 fix, we were incorrectly considering also
7261 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7262 Make sure we can warn about that with -Wpsabi. */
7263 ret = -1;
7265 else if (TREE_CODE (field) == FIELD_DECL
7266 && DECL_BIT_FIELD_TYPE (field)
7267 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7268 ret2 = 1;
7270 if (ret2)
7271 return 2;
7273 return ret;
7277 /* Determine where to put an argument to a function.
7278 Value is zero to push the argument on the stack,
7279 or a hard register in which to store the argument.
7281 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7282 the preceding args and about the function being called.
7283 ARG is a description of the argument.
7285 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7286 other arguments are passed on the stack. If (NAMED == 0) (which happens
7287 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7288 defined), say it is passed in the stack (function_prologue will
7289 indeed make it pass in the stack if necessary). */
7291 static rtx
7292 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7294 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7295 int nregs;
7297 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7298 a call insn (op3 of a call_value insn). */
7299 if (arg.end_marker_p ())
7300 return const0_rtx;
7302 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7304 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7305 return pcum->aapcs_reg;
7308 /* Varargs vectors are treated the same as long long.
7309 named_count avoids having to change the way arm handles 'named' */
7310 if (TARGET_IWMMXT_ABI
7311 && arm_vector_mode_supported_p (arg.mode)
7312 && pcum->named_count > pcum->nargs + 1)
7314 if (pcum->iwmmxt_nregs <= 9)
7315 return gen_rtx_REG (arg.mode,
7316 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7317 else
7319 pcum->can_split = false;
7320 return NULL_RTX;
7324 /* Put doubleword aligned quantities in even register pairs. */
7325 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7327 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7328 if (res < 0 && warn_psabi)
7329 inform (input_location, "parameter passing for argument of type "
7330 "%qT changed in GCC 7.1", arg.type);
7331 else if (res > 0)
7333 pcum->nregs++;
7334 if (res > 1 && warn_psabi)
7335 inform (input_location, "parameter passing for argument of type "
7336 "%qT changed in GCC 9.1", arg.type);
7340 /* Only allow splitting an arg between regs and memory if all preceding
7341 args were allocated to regs. For args passed by reference we only count
7342 the reference pointer. */
7343 if (pcum->can_split)
7344 nregs = 1;
7345 else
7346 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7348 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7349 return NULL_RTX;
7351 return gen_rtx_REG (arg.mode, pcum->nregs);
7354 static unsigned int
7355 arm_function_arg_boundary (machine_mode mode, const_tree type)
7357 if (!ARM_DOUBLEWORD_ALIGN)
7358 return PARM_BOUNDARY;
7360 int res = arm_needs_doubleword_align (mode, type);
7361 if (res < 0 && warn_psabi)
7362 inform (input_location, "parameter passing for argument of type %qT "
7363 "changed in GCC 7.1", type);
7364 if (res > 1 && warn_psabi)
7365 inform (input_location, "parameter passing for argument of type "
7366 "%qT changed in GCC 9.1", type);
7368 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7371 static int
7372 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7374 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7375 int nregs = pcum->nregs;
7377 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7379 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7380 return pcum->aapcs_partial;
7383 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7384 return 0;
7386 if (NUM_ARG_REGS > nregs
7387 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7388 && pcum->can_split)
7389 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7391 return 0;
7394 /* Update the data in PCUM to advance over argument ARG. */
7396 static void
7397 arm_function_arg_advance (cumulative_args_t pcum_v,
7398 const function_arg_info &arg)
7400 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7402 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7404 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7406 if (pcum->aapcs_cprc_slot >= 0)
7408 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7409 arg.type);
7410 pcum->aapcs_cprc_slot = -1;
7413 /* Generic stuff. */
7414 pcum->aapcs_arg_processed = false;
7415 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7416 pcum->aapcs_reg = NULL_RTX;
7417 pcum->aapcs_partial = 0;
7419 else
7421 pcum->nargs += 1;
7422 if (arm_vector_mode_supported_p (arg.mode)
7423 && pcum->named_count > pcum->nargs
7424 && TARGET_IWMMXT_ABI)
7425 pcum->iwmmxt_nregs += 1;
7426 else
7427 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7431 /* Variable sized types are passed by reference. This is a GCC
7432 extension to the ARM ABI. */
7434 static bool
7435 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7437 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7440 /* Encode the current state of the #pragma [no_]long_calls. */
7441 typedef enum
7443 OFF, /* No #pragma [no_]long_calls is in effect. */
7444 LONG, /* #pragma long_calls is in effect. */
7445 SHORT /* #pragma no_long_calls is in effect. */
7446 } arm_pragma_enum;
7448 static arm_pragma_enum arm_pragma_long_calls = OFF;
7450 void
7451 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7453 arm_pragma_long_calls = LONG;
7456 void
7457 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7459 arm_pragma_long_calls = SHORT;
7462 void
7463 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7465 arm_pragma_long_calls = OFF;
7468 /* Handle an attribute requiring a FUNCTION_DECL;
7469 arguments as in struct attribute_spec.handler. */
7470 static tree
7471 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7472 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7474 if (TREE_CODE (*node) != FUNCTION_DECL)
7476 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7477 name);
7478 *no_add_attrs = true;
7481 return NULL_TREE;
7484 /* Handle an "interrupt" or "isr" attribute;
7485 arguments as in struct attribute_spec.handler. */
7486 static tree
7487 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7488 bool *no_add_attrs)
7490 if (DECL_P (*node))
7492 if (TREE_CODE (*node) != FUNCTION_DECL)
7494 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7495 name);
7496 *no_add_attrs = true;
7498 else if (TARGET_VFP_BASE)
7500 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7501 name);
7503 /* FIXME: the argument if any is checked for type attributes;
7504 should it be checked for decl ones? */
7506 else
7508 if (FUNC_OR_METHOD_TYPE_P (*node))
7510 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7512 warning (OPT_Wattributes, "%qE attribute ignored",
7513 name);
7514 *no_add_attrs = true;
7517 else if (TREE_CODE (*node) == POINTER_TYPE
7518 && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node))
7519 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7521 *node = build_variant_type_copy (*node);
7522 TREE_TYPE (*node) = build_type_attribute_variant
7523 (TREE_TYPE (*node),
7524 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7525 *no_add_attrs = true;
7527 else
7529 /* Possibly pass this attribute on from the type to a decl. */
7530 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7531 | (int) ATTR_FLAG_FUNCTION_NEXT
7532 | (int) ATTR_FLAG_ARRAY_NEXT))
7534 *no_add_attrs = true;
7535 return tree_cons (name, args, NULL_TREE);
7537 else
7539 warning (OPT_Wattributes, "%qE attribute ignored",
7540 name);
7545 return NULL_TREE;
7548 /* Handle a "pcs" attribute; arguments as in struct
7549 attribute_spec.handler. */
7550 static tree
7551 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7552 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7554 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7556 warning (OPT_Wattributes, "%qE attribute ignored", name);
7557 *no_add_attrs = true;
7559 return NULL_TREE;
7562 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7563 /* Handle the "notshared" attribute. This attribute is another way of
7564 requesting hidden visibility. ARM's compiler supports
7565 "__declspec(notshared)"; we support the same thing via an
7566 attribute. */
7568 static tree
7569 arm_handle_notshared_attribute (tree *node,
7570 tree name ATTRIBUTE_UNUSED,
7571 tree args ATTRIBUTE_UNUSED,
7572 int flags ATTRIBUTE_UNUSED,
7573 bool *no_add_attrs)
7575 tree decl = TYPE_NAME (*node);
7577 if (decl)
7579 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7580 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7581 *no_add_attrs = false;
7583 return NULL_TREE;
7585 #endif
7587 /* This function returns true if a function with declaration FNDECL and type
7588 FNTYPE uses the stack to pass arguments or return variables and false
7589 otherwise. This is used for functions with the attributes
7590 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7591 diagnostic messages if the stack is used. NAME is the name of the attribute
7592 used. */
7594 static bool
7595 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7597 function_args_iterator args_iter;
7598 CUMULATIVE_ARGS args_so_far_v;
7599 cumulative_args_t args_so_far;
7600 bool first_param = true;
7601 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7603 /* Error out if any argument is passed on the stack. */
7604 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7605 args_so_far = pack_cumulative_args (&args_so_far_v);
7606 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7608 rtx arg_rtx;
7610 prev_arg_type = arg_type;
7611 if (VOID_TYPE_P (arg_type))
7612 continue;
7614 function_arg_info arg (arg_type, /*named=*/true);
7615 if (!first_param)
7616 /* ??? We should advance after processing the argument and pass
7617 the argument we're advancing past. */
7618 arm_function_arg_advance (args_so_far, arg);
7619 arg_rtx = arm_function_arg (args_so_far, arg);
7620 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7622 error ("%qE attribute not available to functions with arguments "
7623 "passed on the stack", name);
7624 return true;
7626 first_param = false;
7629 /* Error out for variadic functions since we cannot control how many
7630 arguments will be passed and thus stack could be used. stdarg_p () is not
7631 used for the checking to avoid browsing arguments twice. */
7632 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7634 error ("%qE attribute not available to functions with variable number "
7635 "of arguments", name);
7636 return true;
7639 /* Error out if return value is passed on the stack. */
7640 ret_type = TREE_TYPE (fntype);
7641 if (arm_return_in_memory (ret_type, fntype))
7643 error ("%qE attribute not available to functions that return value on "
7644 "the stack", name);
7645 return true;
7647 return false;
7650 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7651 function will check whether the attribute is allowed here and will add the
7652 attribute to the function declaration tree or otherwise issue a warning. */
7654 static tree
7655 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7656 tree /* args */,
7657 int /* flags */,
7658 bool *no_add_attrs)
7660 tree fndecl;
7662 if (!use_cmse)
7664 *no_add_attrs = true;
7665 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7666 "option", name);
7667 return NULL_TREE;
7670 /* Ignore attribute for function types. */
7671 if (TREE_CODE (*node) != FUNCTION_DECL)
7673 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7674 name);
7675 *no_add_attrs = true;
7676 return NULL_TREE;
7679 fndecl = *node;
7681 /* Warn for static linkage functions. */
7682 if (!TREE_PUBLIC (fndecl))
7684 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7685 "with static linkage", name);
7686 *no_add_attrs = true;
7687 return NULL_TREE;
7690 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7691 TREE_TYPE (fndecl));
7692 return NULL_TREE;
7696 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7697 function will check whether the attribute is allowed here and will add the
7698 attribute to the function type tree or otherwise issue a diagnostic. The
7699 reason we check this at declaration time is to only allow the use of the
7700 attribute with declarations of function pointers and not function
7701 declarations. This function checks NODE is of the expected type and issues
7702 diagnostics otherwise using NAME. If it is not of the expected type
7703 *NO_ADD_ATTRS will be set to true. */
7705 static tree
7706 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7707 tree /* args */,
7708 int /* flags */,
7709 bool *no_add_attrs)
7711 tree decl = NULL_TREE;
7712 tree fntype, type;
7714 if (!use_cmse)
7716 *no_add_attrs = true;
7717 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7718 "option", name);
7719 return NULL_TREE;
7722 if (DECL_P (*node))
7724 fntype = TREE_TYPE (*node);
7726 if (VAR_P (*node) || TREE_CODE (*node) == TYPE_DECL)
7727 decl = *node;
7729 else
7730 fntype = *node;
7732 while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7733 fntype = TREE_TYPE (fntype);
7735 if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7737 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7738 "function pointer", name);
7739 *no_add_attrs = true;
7740 return NULL_TREE;
7743 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7745 if (*no_add_attrs)
7746 return NULL_TREE;
7748 /* Prevent trees being shared among function types with and without
7749 cmse_nonsecure_call attribute. */
7750 if (decl)
7752 type = build_distinct_type_copy (TREE_TYPE (decl));
7753 TREE_TYPE (decl) = type;
7755 else
7757 type = build_distinct_type_copy (*node);
7758 *node = type;
7761 fntype = type;
7763 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7765 type = fntype;
7766 fntype = TREE_TYPE (fntype);
7767 fntype = build_distinct_type_copy (fntype);
7768 TREE_TYPE (type) = fntype;
7771 /* Construct a type attribute and add it to the function type. */
7772 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7773 TYPE_ATTRIBUTES (fntype));
7774 TYPE_ATTRIBUTES (fntype) = attrs;
7775 return NULL_TREE;
7778 /* Return 0 if the attributes for two types are incompatible, 1 if they
7779 are compatible, and 2 if they are nearly compatible (which causes a
7780 warning to be generated). */
7781 static int
7782 arm_comp_type_attributes (const_tree type1, const_tree type2)
7784 int l1, l2, s1, s2;
7786 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7787 TYPE_ATTRIBUTES (type1));
7788 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7789 TYPE_ATTRIBUTES (type2));
7790 if (bool (attrs1) != bool (attrs2))
7791 return 0;
7792 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7793 return 0;
7795 /* Check for mismatch of non-default calling convention. */
7796 if (TREE_CODE (type1) != FUNCTION_TYPE)
7797 return 1;
7799 /* Check for mismatched call attributes. */
7800 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7801 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7802 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7803 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7805 /* Only bother to check if an attribute is defined. */
7806 if (l1 | l2 | s1 | s2)
7808 /* If one type has an attribute, the other must have the same attribute. */
7809 if ((l1 != l2) || (s1 != s2))
7810 return 0;
7812 /* Disallow mixed attributes. */
7813 if ((l1 & s2) || (l2 & s1))
7814 return 0;
7817 /* Check for mismatched ISR attribute. */
7818 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7819 if (! l1)
7820 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7821 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7822 if (! l2)
7823 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7824 if (l1 != l2)
7825 return 0;
7827 l1 = lookup_attribute ("cmse_nonsecure_call",
7828 TYPE_ATTRIBUTES (type1)) != NULL;
7829 l2 = lookup_attribute ("cmse_nonsecure_call",
7830 TYPE_ATTRIBUTES (type2)) != NULL;
7832 if (l1 != l2)
7833 return 0;
7835 return 1;
7838 /* Assigns default attributes to newly defined type. This is used to
7839 set short_call/long_call attributes for function types of
7840 functions defined inside corresponding #pragma scopes. */
7841 static void
7842 arm_set_default_type_attributes (tree type)
7844 /* Add __attribute__ ((long_call)) to all functions, when
7845 inside #pragma long_calls or __attribute__ ((short_call)),
7846 when inside #pragma no_long_calls. */
7847 if (FUNC_OR_METHOD_TYPE_P (type))
7849 tree type_attr_list, attr_name;
7850 type_attr_list = TYPE_ATTRIBUTES (type);
7852 if (arm_pragma_long_calls == LONG)
7853 attr_name = get_identifier ("long_call");
7854 else if (arm_pragma_long_calls == SHORT)
7855 attr_name = get_identifier ("short_call");
7856 else
7857 return;
7859 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7860 TYPE_ATTRIBUTES (type) = type_attr_list;
7864 /* Return true if DECL is known to be linked into section SECTION. */
7866 static bool
7867 arm_function_in_section_p (tree decl, section *section)
7869 /* We can only be certain about the prevailing symbol definition. */
7870 if (!decl_binds_to_current_def_p (decl))
7871 return false;
7873 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7874 if (!DECL_SECTION_NAME (decl))
7876 /* Make sure that we will not create a unique section for DECL. */
7877 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7878 return false;
7881 return function_section (decl) == section;
7884 /* Return nonzero if a 32-bit "long_call" should be generated for
7885 a call from the current function to DECL. We generate a long_call
7886 if the function:
7888 a. has an __attribute__((long call))
7889 or b. is within the scope of a #pragma long_calls
7890 or c. the -mlong-calls command line switch has been specified
7892 However we do not generate a long call if the function:
7894 d. has an __attribute__ ((short_call))
7895 or e. is inside the scope of a #pragma no_long_calls
7896 or f. is defined in the same section as the current function. */
7898 bool
7899 arm_is_long_call_p (tree decl)
7901 tree attrs;
7903 if (!decl)
7904 return TARGET_LONG_CALLS;
7906 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7907 if (lookup_attribute ("short_call", attrs))
7908 return false;
7910 /* For "f", be conservative, and only cater for cases in which the
7911 whole of the current function is placed in the same section. */
7912 if (!flag_reorder_blocks_and_partition
7913 && TREE_CODE (decl) == FUNCTION_DECL
7914 && arm_function_in_section_p (decl, current_function_section ()))
7915 return false;
7917 if (lookup_attribute ("long_call", attrs))
7918 return true;
7920 return TARGET_LONG_CALLS;
7923 /* Return nonzero if it is ok to make a tail-call to DECL. */
7924 static bool
7925 arm_function_ok_for_sibcall (tree decl, tree exp)
7927 unsigned long func_type;
7929 if (cfun->machine->sibcall_blocked)
7930 return false;
7932 if (TARGET_FDPIC)
7934 /* In FDPIC, never tailcall something for which we have no decl:
7935 the target function could be in a different module, requiring
7936 a different FDPIC register value. */
7937 if (decl == NULL)
7938 return false;
7941 /* Never tailcall something if we are generating code for Thumb-1. */
7942 if (TARGET_THUMB1)
7943 return false;
7945 /* The PIC register is live on entry to VxWorks PLT entries, so we
7946 must make the call before restoring the PIC register. */
7947 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7948 return false;
7950 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7951 may be used both as target of the call and base register for restoring
7952 the VFP registers */
7953 if (TARGET_APCS_FRAME && TARGET_ARM
7954 && TARGET_HARD_FLOAT
7955 && decl && arm_is_long_call_p (decl))
7956 return false;
7958 /* If we are interworking and the function is not declared static
7959 then we can't tail-call it unless we know that it exists in this
7960 compilation unit (since it might be a Thumb routine). */
7961 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7962 && !TREE_ASM_WRITTEN (decl))
7963 return false;
7965 func_type = arm_current_func_type ();
7966 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7967 if (IS_INTERRUPT (func_type))
7968 return false;
7970 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7971 generated for entry functions themselves. */
7972 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7973 return false;
7975 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7976 this would complicate matters for later code generation. */
7977 if (TREE_CODE (exp) == CALL_EXPR)
7979 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7980 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7981 return false;
7984 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7986 /* Check that the return value locations are the same. For
7987 example that we aren't returning a value from the sibling in
7988 a VFP register but then need to transfer it to a core
7989 register. */
7990 rtx a, b;
7991 tree decl_or_type = decl;
7993 /* If it is an indirect function pointer, get the function type. */
7994 if (!decl)
7995 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7997 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7998 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7999 cfun->decl, false);
8000 if (!rtx_equal_p (a, b))
8001 return false;
8004 /* Never tailcall if function may be called with a misaligned SP. */
8005 if (IS_STACKALIGN (func_type))
8006 return false;
8008 /* The AAPCS says that, on bare-metal, calls to unresolved weak
8009 references should become a NOP. Don't convert such calls into
8010 sibling calls. */
8011 if (TARGET_AAPCS_BASED
8012 && arm_abi == ARM_ABI_AAPCS
8013 && decl
8014 && DECL_WEAK (decl))
8015 return false;
8017 /* Indirect tailcalls need a call-clobbered register to hold the function
8018 address. But we only have r0-r3 and ip in that class. If r0-r3 all hold
8019 function arguments, then we can only use IP. But IP may be needed in the
8020 epilogue (for PAC validation), or for passing the static chain. We have
8021 to disable the tail call if nothing is available. */
8022 if (!decl
8023 && ((CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
8024 || arm_current_function_pac_enabled_p()))
8026 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
8027 CUMULATIVE_ARGS cum;
8028 cumulative_args_t cum_v;
8030 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
8031 cum_v = pack_cumulative_args (&cum);
8033 tree arg;
8034 call_expr_arg_iterator iter;
8035 unsigned used_regs = 0;
8037 /* Layout each actual argument in turn. If it is allocated to
8038 core regs, note which regs have been allocated. */
8039 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
8041 tree type = TREE_TYPE (arg);
8042 function_arg_info arg_info (type, /*named=*/true);
8043 rtx reg = arm_function_arg (cum_v, arg_info);
8044 if (reg && REG_P (reg)
8045 && REGNO (reg) <= LAST_ARG_REGNUM)
8047 /* Avoid any chance of UB here. We don't care if TYPE
8048 is very large since it will use up all the argument regs. */
8049 unsigned nregs = MIN (ARM_NUM_REGS2 (GET_MODE (reg), type),
8050 LAST_ARG_REGNUM + 1);
8051 used_regs |= ((1 << nregs) - 1) << REGNO (reg);
8053 arm_function_arg_advance (cum_v, arg_info);
8056 /* We've used all the argument regs, and we know IP is live during the
8057 epilogue for some reason, so we can't tailcall. */
8058 if ((used_regs & ((1 << (LAST_ARG_REGNUM + 1)) - 1))
8059 == ((1 << (LAST_ARG_REGNUM + 1)) - 1))
8060 return false;
8063 /* Everything else is ok. */
8064 return true;
8068 /* Addressing mode support functions. */
8070 /* Return nonzero if X is a legitimate immediate operand when compiling
8071 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
8073 legitimate_pic_operand_p (rtx x)
8075 if (SYMBOL_REF_P (x)
8076 || (GET_CODE (x) == CONST
8077 && GET_CODE (XEXP (x, 0)) == PLUS
8078 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
8079 return 0;
8081 return 1;
8084 /* Record that the current function needs a PIC register. If PIC_REG is null,
8085 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8086 both case cfun->machine->pic_reg is initialized if we have not already done
8087 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8088 PIC register is reloaded in the current position of the instruction stream
8089 irregardless of whether it was loaded before. Otherwise, it is only loaded
8090 if not already done so (crtl->uses_pic_offset_table is null). Note that
8091 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8092 is only supported iff COMPUTE_NOW is false. */
8094 static void
8095 require_pic_register (rtx pic_reg, bool compute_now)
8097 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8099 /* A lot of the logic here is made obscure by the fact that this
8100 routine gets called as part of the rtx cost estimation process.
8101 We don't want those calls to affect any assumptions about the real
8102 function; and further, we can't call entry_of_function() until we
8103 start the real expansion process. */
8104 if (!crtl->uses_pic_offset_table || compute_now)
8106 gcc_assert (can_create_pseudo_p ()
8107 || (pic_reg != NULL_RTX
8108 && REG_P (pic_reg)
8109 && GET_MODE (pic_reg) == Pmode));
8110 if (arm_pic_register != INVALID_REGNUM
8111 && !compute_now
8112 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8114 if (!cfun->machine->pic_reg)
8115 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8117 /* Play games to avoid marking the function as needing pic
8118 if we are being called as part of the cost-estimation
8119 process. */
8120 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8121 crtl->uses_pic_offset_table = 1;
8123 else
8125 rtx_insn *seq, *insn;
8127 if (pic_reg == NULL_RTX)
8128 pic_reg = gen_reg_rtx (Pmode);
8129 if (!cfun->machine->pic_reg)
8130 cfun->machine->pic_reg = pic_reg;
8132 /* Play games to avoid marking the function as needing pic
8133 if we are being called as part of the cost-estimation
8134 process. */
8135 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8137 crtl->uses_pic_offset_table = 1;
8138 start_sequence ();
8140 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8141 && arm_pic_register > LAST_LO_REGNUM
8142 && !compute_now)
8143 emit_move_insn (cfun->machine->pic_reg,
8144 gen_rtx_REG (Pmode, arm_pic_register));
8145 else
8146 arm_load_pic_register (0UL, pic_reg);
8148 seq = get_insns ();
8149 end_sequence ();
8151 for (insn = seq; insn; insn = NEXT_INSN (insn))
8152 if (INSN_P (insn))
8153 INSN_LOCATION (insn) = prologue_location;
8155 /* We can be called during expansion of PHI nodes, where
8156 we can't yet emit instructions directly in the final
8157 insn stream. Queue the insns on the entry edge, they will
8158 be committed after everything else is expanded. */
8159 if (currently_expanding_to_rtl)
8160 insert_insn_on_edge (seq,
8161 single_succ_edge
8162 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8163 else
8164 emit_insn (seq);
8170 /* Generate insns to calculate the address of ORIG in pic mode. */
8171 static rtx_insn *
8172 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8174 rtx pat;
8175 rtx mem;
8177 pat = gen_calculate_pic_address (reg, pic_reg, orig);
8179 /* Make the MEM as close to a constant as possible. */
8180 mem = SET_SRC (pat);
8181 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8182 MEM_READONLY_P (mem) = 1;
8183 MEM_NOTRAP_P (mem) = 1;
8185 return emit_insn (pat);
8188 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8189 created to hold the result of the load. If not NULL, PIC_REG indicates
8190 which register to use as PIC register, otherwise it is decided by register
8191 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8192 location in the instruction stream, irregardless of whether it was loaded
8193 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8194 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8196 Returns the register REG into which the PIC load is performed. */
8199 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8200 bool compute_now)
8202 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8204 if (SYMBOL_REF_P (orig)
8205 || LABEL_REF_P (orig))
8207 if (reg == 0)
8209 gcc_assert (can_create_pseudo_p ());
8210 reg = gen_reg_rtx (Pmode);
8213 /* VxWorks does not impose a fixed gap between segments; the run-time
8214 gap can be different from the object-file gap. We therefore can't
8215 use GOTOFF unless we are absolutely sure that the symbol is in the
8216 same segment as the GOT. Unfortunately, the flexibility of linker
8217 scripts means that we can't be sure of that in general, so assume
8218 that GOTOFF is never valid on VxWorks. */
8219 /* References to weak symbols cannot be resolved locally: they
8220 may be overridden by a non-weak definition at link time. */
8221 rtx_insn *insn;
8222 if ((LABEL_REF_P (orig)
8223 || (SYMBOL_REF_P (orig)
8224 && SYMBOL_REF_LOCAL_P (orig)
8225 && (SYMBOL_REF_DECL (orig)
8226 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8227 && (!SYMBOL_REF_FUNCTION_P (orig)
8228 || arm_fdpic_local_funcdesc_p (orig))))
8229 && NEED_GOT_RELOC
8230 && arm_pic_data_is_text_relative)
8231 insn = arm_pic_static_addr (orig, reg);
8232 else
8234 /* If this function doesn't have a pic register, create one now. */
8235 require_pic_register (pic_reg, compute_now);
8237 if (pic_reg == NULL_RTX)
8238 pic_reg = cfun->machine->pic_reg;
8240 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8243 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8244 by loop. */
8245 set_unique_reg_note (insn, REG_EQUAL, orig);
8247 return reg;
8249 else if (GET_CODE (orig) == CONST)
8251 rtx base, offset;
8253 if (GET_CODE (XEXP (orig, 0)) == PLUS
8254 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8255 return orig;
8257 /* Handle the case where we have: const (UNSPEC_TLS). */
8258 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8259 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8260 return orig;
8262 /* Handle the case where we have:
8263 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8264 CONST_INT. */
8265 if (GET_CODE (XEXP (orig, 0)) == PLUS
8266 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8267 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8269 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8270 return orig;
8273 if (reg == 0)
8275 gcc_assert (can_create_pseudo_p ());
8276 reg = gen_reg_rtx (Pmode);
8279 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8281 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8282 pic_reg, compute_now);
8283 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8284 base == reg ? 0 : reg, pic_reg,
8285 compute_now);
8287 if (CONST_INT_P (offset))
8289 /* The base register doesn't really matter, we only want to
8290 test the index for the appropriate mode. */
8291 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8293 gcc_assert (can_create_pseudo_p ());
8294 offset = force_reg (Pmode, offset);
8297 if (CONST_INT_P (offset))
8298 return plus_constant (Pmode, base, INTVAL (offset));
8301 if (GET_MODE_SIZE (mode) > 4
8302 && (GET_MODE_CLASS (mode) == MODE_INT
8303 || TARGET_SOFT_FLOAT))
8305 emit_insn (gen_addsi3 (reg, base, offset));
8306 return reg;
8309 return gen_rtx_PLUS (Pmode, base, offset);
8312 return orig;
8316 /* Generate insns that produce the address of the stack canary */
8318 arm_stack_protect_tls_canary_mem (bool reload)
8320 rtx tp = gen_reg_rtx (SImode);
8321 if (reload)
8322 emit_insn (gen_reload_tp_hard (tp));
8323 else
8324 emit_insn (gen_load_tp_hard (tp));
8326 rtx reg = gen_reg_rtx (SImode);
8327 rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8328 emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8329 return gen_rtx_MEM (SImode, reg);
8333 /* Whether a register is callee saved or not. This is necessary because high
8334 registers are marked as caller saved when optimizing for size on Thumb-1
8335 targets despite being callee saved in order to avoid using them. */
8336 #define callee_saved_reg_p(reg) \
8337 (!call_used_or_fixed_reg_p (reg) \
8338 || (TARGET_THUMB1 && optimize_size \
8339 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8341 /* Return a mask for the call-clobbered low registers that are unused
8342 at the end of the prologue. */
8343 static unsigned long
8344 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8346 unsigned long mask = 0;
8347 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8349 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8350 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8351 mask |= 1 << (reg - FIRST_LO_REGNUM);
8352 return mask;
8355 /* Similarly for the start of the epilogue. */
8356 static unsigned long
8357 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8359 unsigned long mask = 0;
8360 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8362 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8363 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8364 mask |= 1 << (reg - FIRST_LO_REGNUM);
8365 return mask;
8368 /* Find a spare register to use during the prolog of a function. */
8370 static int
8371 thumb_find_work_register (unsigned long pushed_regs_mask)
8373 int reg;
8375 unsigned long unused_regs
8376 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8378 /* Check the argument registers first as these are call-used. The
8379 register allocation order means that sometimes r3 might be used
8380 but earlier argument registers might not, so check them all. */
8381 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8382 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8383 return reg;
8385 /* Otherwise look for a call-saved register that is going to be pushed. */
8386 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8387 if (pushed_regs_mask & (1 << reg))
8388 return reg;
8390 if (TARGET_THUMB2)
8392 /* Thumb-2 can use high regs. */
8393 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8394 if (pushed_regs_mask & (1 << reg))
8395 return reg;
8397 /* Something went wrong - thumb_compute_save_reg_mask()
8398 should have arranged for a suitable register to be pushed. */
8399 gcc_unreachable ();
8402 static GTY(()) int pic_labelno;
8404 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8405 low register. */
8407 void
8408 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8410 rtx l1, labelno, pic_tmp, pic_rtx;
8412 if (crtl->uses_pic_offset_table == 0
8413 || TARGET_SINGLE_PIC_BASE
8414 || TARGET_FDPIC)
8415 return;
8417 gcc_assert (flag_pic);
8419 if (pic_reg == NULL_RTX)
8420 pic_reg = cfun->machine->pic_reg;
8421 if (TARGET_VXWORKS_RTP)
8423 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8424 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8425 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8427 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8429 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8430 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8432 else
8434 /* We use an UNSPEC rather than a LABEL_REF because this label
8435 never appears in the code stream. */
8437 labelno = GEN_INT (pic_labelno++);
8438 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8439 l1 = gen_rtx_CONST (VOIDmode, l1);
8441 /* On the ARM the PC register contains 'dot + 8' at the time of the
8442 addition, on the Thumb it is 'dot + 4'. */
8443 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8444 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8445 UNSPEC_GOTSYM_OFF);
8446 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8448 if (TARGET_32BIT)
8450 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8452 else /* TARGET_THUMB1 */
8454 if (arm_pic_register != INVALID_REGNUM
8455 && REGNO (pic_reg) > LAST_LO_REGNUM)
8457 /* We will have pushed the pic register, so we should always be
8458 able to find a work register. */
8459 pic_tmp = gen_rtx_REG (SImode,
8460 thumb_find_work_register (saved_regs));
8461 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8462 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8463 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8465 else if (arm_pic_register != INVALID_REGNUM
8466 && arm_pic_register > LAST_LO_REGNUM
8467 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8469 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8470 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8471 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8473 else
8474 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8478 /* Need to emit this whether or not we obey regdecls,
8479 since setjmp/longjmp can cause life info to screw up. */
8480 emit_use (pic_reg);
8483 /* Try to determine whether an object, referenced via ORIG, will be
8484 placed in the text or data segment. This is used in FDPIC mode, to
8485 decide which relocations to use when accessing ORIG. *IS_READONLY
8486 is set to true if ORIG is a read-only location, false otherwise.
8487 Return true if we could determine the location of ORIG, false
8488 otherwise. *IS_READONLY is valid only when we return true. */
8489 static bool
8490 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8492 *is_readonly = false;
8494 if (LABEL_REF_P (orig))
8496 *is_readonly = true;
8497 return true;
8500 if (SYMBOL_REF_P (orig))
8502 if (CONSTANT_POOL_ADDRESS_P (orig))
8504 *is_readonly = true;
8505 return true;
8507 if (SYMBOL_REF_LOCAL_P (orig)
8508 && !SYMBOL_REF_EXTERNAL_P (orig)
8509 && SYMBOL_REF_DECL (orig)
8510 && (!DECL_P (SYMBOL_REF_DECL (orig))
8511 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8513 tree decl = SYMBOL_REF_DECL (orig);
8514 tree init = VAR_P (decl)
8515 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8516 ? decl : 0;
8517 int reloc = 0;
8518 bool named_section, readonly;
8520 if (init && init != error_mark_node)
8521 reloc = compute_reloc_for_constant (init);
8523 named_section = VAR_P (decl)
8524 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8525 readonly = decl_readonly_section (decl, reloc);
8527 /* We don't know where the link script will put a named
8528 section, so return false in such a case. */
8529 if (named_section)
8530 return false;
8532 *is_readonly = readonly;
8533 return true;
8536 /* We don't know. */
8537 return false;
8540 gcc_unreachable ();
8543 /* Generate code to load the address of a static var when flag_pic is set. */
8544 static rtx_insn *
8545 arm_pic_static_addr (rtx orig, rtx reg)
8547 rtx l1, labelno, offset_rtx;
8548 rtx_insn *insn;
8550 gcc_assert (flag_pic);
8552 bool is_readonly = false;
8553 bool info_known = false;
8555 if (TARGET_FDPIC
8556 && SYMBOL_REF_P (orig)
8557 && !SYMBOL_REF_FUNCTION_P (orig))
8558 info_known = arm_is_segment_info_known (orig, &is_readonly);
8560 if (TARGET_FDPIC
8561 && SYMBOL_REF_P (orig)
8562 && !SYMBOL_REF_FUNCTION_P (orig)
8563 && !info_known)
8565 /* We don't know where orig is stored, so we have be
8566 pessimistic and use a GOT relocation. */
8567 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8569 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8571 else if (TARGET_FDPIC
8572 && SYMBOL_REF_P (orig)
8573 && (SYMBOL_REF_FUNCTION_P (orig)
8574 || !is_readonly))
8576 /* We use the GOTOFF relocation. */
8577 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8579 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8580 emit_insn (gen_movsi (reg, l1));
8581 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8583 else
8585 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8586 PC-relative access. */
8587 /* We use an UNSPEC rather than a LABEL_REF because this label
8588 never appears in the code stream. */
8589 labelno = GEN_INT (pic_labelno++);
8590 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8591 l1 = gen_rtx_CONST (VOIDmode, l1);
8593 /* On the ARM the PC register contains 'dot + 8' at the time of the
8594 addition, on the Thumb it is 'dot + 4'. */
8595 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8596 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8597 UNSPEC_SYMBOL_OFFSET);
8598 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8600 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8601 labelno));
8604 return insn;
8607 /* Return nonzero if X is valid as an ARM state addressing register. */
8608 static int
8609 arm_address_register_rtx_p (rtx x, int strict_p)
8611 int regno;
8613 if (!REG_P (x))
8614 return 0;
8616 regno = REGNO (x);
8618 if (strict_p)
8619 return ARM_REGNO_OK_FOR_BASE_P (regno);
8621 return (regno <= LAST_ARM_REGNUM
8622 || regno >= FIRST_PSEUDO_REGISTER
8623 || regno == FRAME_POINTER_REGNUM
8624 || regno == ARG_POINTER_REGNUM);
8627 /* Return TRUE if this rtx is the difference of a symbol and a label,
8628 and will reduce to a PC-relative relocation in the object file.
8629 Expressions like this can be left alone when generating PIC, rather
8630 than forced through the GOT. */
8631 static int
8632 pcrel_constant_p (rtx x)
8634 if (GET_CODE (x) == MINUS)
8635 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8637 return FALSE;
8640 /* Return true if X will surely end up in an index register after next
8641 splitting pass. */
8642 static bool
8643 will_be_in_index_register (const_rtx x)
8645 /* arm.md: calculate_pic_address will split this into a register. */
8646 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8649 /* Return nonzero if X is a valid ARM state address operand. */
8651 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8652 int strict_p)
8654 bool use_ldrd;
8655 enum rtx_code code = GET_CODE (x);
8657 if (arm_address_register_rtx_p (x, strict_p))
8658 return 1;
8660 use_ldrd = (TARGET_LDRD
8661 && (mode == DImode || mode == DFmode));
8663 if (code == POST_INC || code == PRE_DEC
8664 || ((code == PRE_INC || code == POST_DEC)
8665 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8666 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8668 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8669 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8670 && GET_CODE (XEXP (x, 1)) == PLUS
8671 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8673 rtx addend = XEXP (XEXP (x, 1), 1);
8675 /* Don't allow ldrd post increment by register because it's hard
8676 to fixup invalid register choices. */
8677 if (use_ldrd
8678 && GET_CODE (x) == POST_MODIFY
8679 && REG_P (addend))
8680 return 0;
8682 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8683 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8686 /* After reload constants split into minipools will have addresses
8687 from a LABEL_REF. */
8688 else if (reload_completed
8689 && (code == LABEL_REF
8690 || (code == CONST
8691 && GET_CODE (XEXP (x, 0)) == PLUS
8692 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8693 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8694 return 1;
8696 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8697 return 0;
8699 else if (code == PLUS)
8701 rtx xop0 = XEXP (x, 0);
8702 rtx xop1 = XEXP (x, 1);
8704 return ((arm_address_register_rtx_p (xop0, strict_p)
8705 && ((CONST_INT_P (xop1)
8706 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8707 || (!strict_p && will_be_in_index_register (xop1))))
8708 || (arm_address_register_rtx_p (xop1, strict_p)
8709 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8712 #if 0
8713 /* Reload currently can't handle MINUS, so disable this for now */
8714 else if (GET_CODE (x) == MINUS)
8716 rtx xop0 = XEXP (x, 0);
8717 rtx xop1 = XEXP (x, 1);
8719 return (arm_address_register_rtx_p (xop0, strict_p)
8720 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8722 #endif
8724 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8725 && code == SYMBOL_REF
8726 && CONSTANT_POOL_ADDRESS_P (x)
8727 && ! (flag_pic
8728 && symbol_mentioned_p (get_pool_constant (x))
8729 && ! pcrel_constant_p (get_pool_constant (x))))
8730 return 1;
8732 return 0;
8735 /* Return true if we can avoid creating a constant pool entry for x. */
8736 static bool
8737 can_avoid_literal_pool_for_label_p (rtx x)
8739 /* Normally we can assign constant values to target registers without
8740 the help of constant pool. But there are cases we have to use constant
8741 pool like:
8742 1) assign a label to register.
8743 2) sign-extend a 8bit value to 32bit and then assign to register.
8745 Constant pool access in format:
8746 (set (reg r0) (mem (symbol_ref (".LC0"))))
8747 will cause the use of literal pool (later in function arm_reorg).
8748 So here we mark such format as an invalid format, then the compiler
8749 will adjust it into:
8750 (set (reg r0) (symbol_ref (".LC0")))
8751 (set (reg r0) (mem (reg r0))).
8752 No extra register is required, and (mem (reg r0)) won't cause the use
8753 of literal pools. */
8754 if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8755 && CONSTANT_POOL_ADDRESS_P (x))
8756 return 1;
8757 return 0;
8761 /* Return nonzero if X is a valid Thumb-2 address operand. */
8762 static int
8763 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8765 bool use_ldrd;
8766 enum rtx_code code = GET_CODE (x);
8768 /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8769 can store and load it like any other 16-bit value. */
8770 if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
8771 mode = HImode;
8773 if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8774 return mve_vector_mem_operand (mode, x, strict_p);
8776 if (arm_address_register_rtx_p (x, strict_p))
8777 return 1;
8779 use_ldrd = (TARGET_LDRD
8780 && (mode == DImode || mode == DFmode));
8782 if (code == POST_INC || code == PRE_DEC
8783 || ((code == PRE_INC || code == POST_DEC)
8784 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8785 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8787 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8788 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8789 && GET_CODE (XEXP (x, 1)) == PLUS
8790 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8792 /* Thumb-2 only has autoincrement by constant. */
8793 rtx addend = XEXP (XEXP (x, 1), 1);
8794 HOST_WIDE_INT offset;
8796 if (!CONST_INT_P (addend))
8797 return 0;
8799 offset = INTVAL(addend);
8800 if (GET_MODE_SIZE (mode) <= 4)
8801 return (offset > -256 && offset < 256);
8803 return (use_ldrd && offset > -1024 && offset < 1024
8804 && (offset & 3) == 0);
8807 /* After reload constants split into minipools will have addresses
8808 from a LABEL_REF. */
8809 else if (reload_completed
8810 && (code == LABEL_REF
8811 || (code == CONST
8812 && GET_CODE (XEXP (x, 0)) == PLUS
8813 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8814 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8815 return 1;
8817 else if (mode == TImode
8818 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8819 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8820 return 0;
8822 else if (code == PLUS)
8824 rtx xop0 = XEXP (x, 0);
8825 rtx xop1 = XEXP (x, 1);
8827 return ((arm_address_register_rtx_p (xop0, strict_p)
8828 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8829 || (!strict_p && will_be_in_index_register (xop1))))
8830 || (arm_address_register_rtx_p (xop1, strict_p)
8831 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8834 else if (can_avoid_literal_pool_for_label_p (x))
8835 return 0;
8837 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8838 && code == SYMBOL_REF
8839 && CONSTANT_POOL_ADDRESS_P (x)
8840 && ! (flag_pic
8841 && symbol_mentioned_p (get_pool_constant (x))
8842 && ! pcrel_constant_p (get_pool_constant (x))))
8843 return 1;
8845 return 0;
8848 /* Return nonzero if INDEX is valid for an address index operand in
8849 ARM state. */
8850 static int
8851 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8852 int strict_p)
8854 HOST_WIDE_INT range;
8855 enum rtx_code code = GET_CODE (index);
8857 /* Standard coprocessor addressing modes. */
8858 if (TARGET_HARD_FLOAT
8859 && (mode == SFmode || mode == DFmode))
8860 return (code == CONST_INT && INTVAL (index) < 1024
8861 && INTVAL (index) > -1024
8862 && (INTVAL (index) & 3) == 0);
8864 if (arm_address_register_rtx_p (index, strict_p)
8865 && (GET_MODE_SIZE (mode) <= 4))
8866 return 1;
8868 /* This handles DFmode only if !TARGET_HARD_FLOAT. */
8869 if (mode == DImode || mode == DFmode)
8871 if (code == CONST_INT)
8873 HOST_WIDE_INT val = INTVAL (index);
8875 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8876 If vldr is selected it uses arm_coproc_mem_operand. */
8877 if (TARGET_LDRD)
8878 return val > -256 && val < 256;
8879 else
8880 return val > -4096 && val < 4092;
8883 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8886 /* For quad modes, we restrict the constant offset to be slightly less
8887 than what the instruction format permits. We do this because for
8888 quad mode moves, we will actually decompose them into two separate
8889 double-mode reads or writes. INDEX must therefore be a valid
8890 (double-mode) offset and so should INDEX+8. */
8891 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8892 return (code == CONST_INT
8893 && INTVAL (index) < 1016
8894 && INTVAL (index) > -1024
8895 && (INTVAL (index) & 3) == 0);
8897 /* We have no such constraint on double mode offsets, so we permit the
8898 full range of the instruction format. Note DImode is included here. */
8899 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8900 return (code == CONST_INT
8901 && INTVAL (index) < 1024
8902 && INTVAL (index) > -1024
8903 && (INTVAL (index) & 3) == 0);
8905 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8906 return (code == CONST_INT
8907 && INTVAL (index) < 1024
8908 && INTVAL (index) > -1024
8909 && (INTVAL (index) & 3) == 0);
8911 if (GET_MODE_SIZE (mode) <= 4
8912 && ! (arm_arch4
8913 && (mode == HImode
8914 || mode == HFmode
8915 || (mode == QImode && outer == SIGN_EXTEND))))
8917 if (code == MULT)
8919 rtx xiop0 = XEXP (index, 0);
8920 rtx xiop1 = XEXP (index, 1);
8922 return ((arm_address_register_rtx_p (xiop0, strict_p)
8923 && power_of_two_operand (xiop1, SImode))
8924 || (arm_address_register_rtx_p (xiop1, strict_p)
8925 && power_of_two_operand (xiop0, SImode)));
8927 else if (code == LSHIFTRT || code == ASHIFTRT
8928 || code == ASHIFT || code == ROTATERT)
8930 rtx op = XEXP (index, 1);
8932 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8933 && CONST_INT_P (op)
8934 && INTVAL (op) > 0
8935 && INTVAL (op) <= 31);
8939 /* For ARM v4 we may be doing a sign-extend operation during the
8940 load. */
8941 if (arm_arch4)
8943 if (mode == HImode
8944 || mode == HFmode
8945 || (outer == SIGN_EXTEND && mode == QImode))
8946 range = 256;
8947 else
8948 range = 4096;
8950 else
8951 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8953 return (code == CONST_INT
8954 && INTVAL (index) < range
8955 && INTVAL (index) > -range);
8958 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8959 index operand. i.e. 1, 2, 4 or 8. */
8960 static bool
8961 thumb2_index_mul_operand (rtx op)
8963 HOST_WIDE_INT val;
8965 if (!CONST_INT_P (op))
8966 return false;
8968 val = INTVAL(op);
8969 return (val == 1 || val == 2 || val == 4 || val == 8);
8972 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8973 static int
8974 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8976 enum rtx_code code = GET_CODE (index);
8978 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8979 /* Standard coprocessor addressing modes. */
8980 if (TARGET_VFP_BASE
8981 && (mode == SFmode || mode == DFmode))
8982 return (code == CONST_INT && INTVAL (index) < 1024
8983 /* Thumb-2 allows only > -256 index range for it's core register
8984 load/stores. Since we allow SF/DF in core registers, we have
8985 to use the intersection between -256~4096 (core) and -1024~1024
8986 (coprocessor). */
8987 && INTVAL (index) > -256
8988 && (INTVAL (index) & 3) == 0);
8990 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8992 /* For DImode assume values will usually live in core regs
8993 and only allow LDRD addressing modes. */
8994 if (!TARGET_LDRD || mode != DImode)
8995 return (code == CONST_INT
8996 && INTVAL (index) < 1024
8997 && INTVAL (index) > -1024
8998 && (INTVAL (index) & 3) == 0);
9001 /* For quad modes, we restrict the constant offset to be slightly less
9002 than what the instruction format permits. We do this because for
9003 quad mode moves, we will actually decompose them into two separate
9004 double-mode reads or writes. INDEX must therefore be a valid
9005 (double-mode) offset and so should INDEX+8. */
9006 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
9007 return (code == CONST_INT
9008 && INTVAL (index) < 1016
9009 && INTVAL (index) > -1024
9010 && (INTVAL (index) & 3) == 0);
9012 /* We have no such constraint on double mode offsets, so we permit the
9013 full range of the instruction format. Note DImode is included here. */
9014 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
9015 return (code == CONST_INT
9016 && INTVAL (index) < 1024
9017 && INTVAL (index) > -1024
9018 && (INTVAL (index) & 3) == 0);
9020 if (arm_address_register_rtx_p (index, strict_p)
9021 && (GET_MODE_SIZE (mode) <= 4))
9022 return 1;
9024 /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE. */
9025 if (mode == DImode || mode == DFmode)
9027 if (code == CONST_INT)
9029 HOST_WIDE_INT val = INTVAL (index);
9030 /* Thumb-2 ldrd only has reg+const addressing modes.
9031 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
9032 If vldr is selected it uses arm_coproc_mem_operand. */
9033 if (TARGET_LDRD)
9034 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
9035 else
9036 return IN_RANGE (val, -255, 4095 - 4);
9038 else
9039 return 0;
9042 if (code == MULT)
9044 rtx xiop0 = XEXP (index, 0);
9045 rtx xiop1 = XEXP (index, 1);
9047 return ((arm_address_register_rtx_p (xiop0, strict_p)
9048 && thumb2_index_mul_operand (xiop1))
9049 || (arm_address_register_rtx_p (xiop1, strict_p)
9050 && thumb2_index_mul_operand (xiop0)));
9052 else if (code == ASHIFT)
9054 rtx op = XEXP (index, 1);
9056 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
9057 && CONST_INT_P (op)
9058 && INTVAL (op) > 0
9059 && INTVAL (op) <= 3);
9062 return (code == CONST_INT
9063 && INTVAL (index) < 4096
9064 && INTVAL (index) > -256);
9067 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
9068 static int
9069 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
9071 int regno;
9073 if (!REG_P (x))
9074 return 0;
9076 regno = REGNO (x);
9078 if (strict_p)
9079 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
9081 return (regno <= LAST_LO_REGNUM
9082 || regno > LAST_VIRTUAL_REGISTER
9083 || regno == FRAME_POINTER_REGNUM
9084 || (GET_MODE_SIZE (mode) >= 4
9085 && (regno == STACK_POINTER_REGNUM
9086 || regno >= FIRST_PSEUDO_REGISTER
9087 || x == hard_frame_pointer_rtx
9088 || x == arg_pointer_rtx)));
9091 /* Return nonzero if x is a legitimate index register. This is the case
9092 for any base register that can access a QImode object. */
9093 inline static int
9094 thumb1_index_register_rtx_p (rtx x, int strict_p)
9096 return thumb1_base_register_rtx_p (x, QImode, strict_p);
9099 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9101 The AP may be eliminated to either the SP or the FP, so we use the
9102 least common denominator, e.g. SImode, and offsets from 0 to 64.
9104 ??? Verify whether the above is the right approach.
9106 ??? Also, the FP may be eliminated to the SP, so perhaps that
9107 needs special handling also.
9109 ??? Look at how the mips16 port solves this problem. It probably uses
9110 better ways to solve some of these problems.
9112 Although it is not incorrect, we don't accept QImode and HImode
9113 addresses based on the frame pointer or arg pointer until the
9114 reload pass starts. This is so that eliminating such addresses
9115 into stack based ones won't produce impossible code. */
9117 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9119 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9120 return 0;
9122 /* ??? Not clear if this is right. Experiment. */
9123 if (GET_MODE_SIZE (mode) < 4
9124 && !(reload_in_progress || reload_completed)
9125 && (reg_mentioned_p (frame_pointer_rtx, x)
9126 || reg_mentioned_p (arg_pointer_rtx, x)
9127 || reg_mentioned_p (virtual_incoming_args_rtx, x)
9128 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9129 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9130 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9131 return 0;
9133 /* Accept any base register. SP only in SImode or larger. */
9134 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9135 return 1;
9137 /* This is PC relative data before arm_reorg runs. */
9138 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9139 && SYMBOL_REF_P (x)
9140 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9141 && !arm_disable_literal_pool)
9142 return 1;
9144 /* This is PC relative data after arm_reorg runs. */
9145 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9146 && reload_completed
9147 && (LABEL_REF_P (x)
9148 || (GET_CODE (x) == CONST
9149 && GET_CODE (XEXP (x, 0)) == PLUS
9150 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9151 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9152 return 1;
9154 /* Post-inc indexing only supported for SImode and larger. */
9155 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9156 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9157 return 1;
9159 else if (GET_CODE (x) == PLUS)
9161 /* REG+REG address can be any two index registers. */
9162 /* We disallow FRAME+REG addressing since we know that FRAME
9163 will be replaced with STACK, and SP relative addressing only
9164 permits SP+OFFSET. */
9165 if (GET_MODE_SIZE (mode) <= 4
9166 && XEXP (x, 0) != frame_pointer_rtx
9167 && XEXP (x, 1) != frame_pointer_rtx
9168 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9169 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9170 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9171 return 1;
9173 /* REG+const has 5-7 bit offset for non-SP registers. */
9174 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9175 || XEXP (x, 0) == arg_pointer_rtx)
9176 && CONST_INT_P (XEXP (x, 1))
9177 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9178 return 1;
9180 /* REG+const has 10-bit offset for SP, but only SImode and
9181 larger is supported. */
9182 /* ??? Should probably check for DI/DFmode overflow here
9183 just like GO_IF_LEGITIMATE_OFFSET does. */
9184 else if (REG_P (XEXP (x, 0))
9185 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9186 && GET_MODE_SIZE (mode) >= 4
9187 && CONST_INT_P (XEXP (x, 1))
9188 && INTVAL (XEXP (x, 1)) >= 0
9189 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9190 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9191 return 1;
9193 else if (REG_P (XEXP (x, 0))
9194 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9195 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9196 || VIRTUAL_REGISTER_P (XEXP (x, 0)))
9197 && GET_MODE_SIZE (mode) >= 4
9198 && CONST_INT_P (XEXP (x, 1))
9199 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9200 return 1;
9203 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9204 && GET_MODE_SIZE (mode) == 4
9205 && SYMBOL_REF_P (x)
9206 && CONSTANT_POOL_ADDRESS_P (x)
9207 && !arm_disable_literal_pool
9208 && ! (flag_pic
9209 && symbol_mentioned_p (get_pool_constant (x))
9210 && ! pcrel_constant_p (get_pool_constant (x))))
9211 return 1;
9213 return 0;
9216 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9217 instruction of mode MODE. */
9219 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9221 switch (GET_MODE_SIZE (mode))
9223 case 1:
9224 return val >= 0 && val < 32;
9226 case 2:
9227 return val >= 0 && val < 64 && (val & 1) == 0;
9229 default:
9230 return (val >= 0
9231 && (val + GET_MODE_SIZE (mode)) <= 128
9232 && (val & 3) == 0);
9236 bool
9237 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, code_helper)
9239 if (TARGET_ARM)
9240 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9241 else if (TARGET_THUMB2)
9242 return thumb2_legitimate_address_p (mode, x, strict_p);
9243 else /* if (TARGET_THUMB1) */
9244 return thumb1_legitimate_address_p (mode, x, strict_p);
9247 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9249 Given an rtx X being reloaded into a reg required to be
9250 in class CLASS, return the class of reg to actually use.
9251 In general this is just CLASS, but for the Thumb core registers and
9252 immediate constants we prefer a LO_REGS class or a subset. */
9254 static reg_class_t
9255 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9257 if (TARGET_32BIT)
9258 return rclass;
9259 else
9261 if (rclass == GENERAL_REGS)
9262 return LO_REGS;
9263 else
9264 return rclass;
9268 /* Build the SYMBOL_REF for __tls_get_addr. */
9270 static GTY(()) rtx tls_get_addr_libfunc;
9272 static rtx
9273 get_tls_get_addr (void)
9275 if (!tls_get_addr_libfunc)
9276 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9277 return tls_get_addr_libfunc;
9281 arm_load_tp (rtx target)
9283 if (!target)
9284 target = gen_reg_rtx (SImode);
9286 if (TARGET_HARD_TP)
9288 /* Can return in any reg. */
9289 emit_insn (gen_load_tp_hard (target));
9291 else
9293 /* Always returned in r0. Immediately copy the result into a pseudo,
9294 otherwise other uses of r0 (e.g. setting up function arguments) may
9295 clobber the value. */
9297 rtx tmp;
9299 if (TARGET_FDPIC)
9301 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9302 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9304 emit_insn (gen_load_tp_soft_fdpic ());
9306 /* Restore r9. */
9307 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9309 else
9310 emit_insn (gen_load_tp_soft ());
9312 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9313 emit_move_insn (target, tmp);
9315 return target;
9318 static rtx
9319 load_tls_operand (rtx x, rtx reg)
9321 rtx tmp;
9323 if (reg == NULL_RTX)
9324 reg = gen_reg_rtx (SImode);
9326 tmp = gen_rtx_CONST (SImode, x);
9328 emit_move_insn (reg, tmp);
9330 return reg;
9333 static rtx_insn *
9334 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9336 rtx label, labelno = NULL_RTX, sum;
9338 gcc_assert (reloc != TLS_DESCSEQ);
9339 start_sequence ();
9341 if (TARGET_FDPIC)
9343 sum = gen_rtx_UNSPEC (Pmode,
9344 gen_rtvec (2, x, GEN_INT (reloc)),
9345 UNSPEC_TLS);
9347 else
9349 labelno = GEN_INT (pic_labelno++);
9350 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9351 label = gen_rtx_CONST (VOIDmode, label);
9353 sum = gen_rtx_UNSPEC (Pmode,
9354 gen_rtvec (4, x, GEN_INT (reloc), label,
9355 GEN_INT (TARGET_ARM ? 8 : 4)),
9356 UNSPEC_TLS);
9358 reg = load_tls_operand (sum, reg);
9360 if (TARGET_FDPIC)
9361 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9362 else if (TARGET_ARM)
9363 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9364 else
9365 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9367 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9368 LCT_PURE, /* LCT_CONST? */
9369 Pmode, reg, Pmode);
9371 rtx_insn *insns = get_insns ();
9372 end_sequence ();
9374 return insns;
9377 static rtx
9378 arm_tls_descseq_addr (rtx x, rtx reg)
9380 rtx labelno = GEN_INT (pic_labelno++);
9381 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9382 rtx sum = gen_rtx_UNSPEC (Pmode,
9383 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9384 gen_rtx_CONST (VOIDmode, label),
9385 GEN_INT (!TARGET_ARM)),
9386 UNSPEC_TLS);
9387 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9389 emit_insn (gen_tlscall (x, labelno));
9390 if (!reg)
9391 reg = gen_reg_rtx (SImode);
9392 else
9393 gcc_assert (REGNO (reg) != R0_REGNUM);
9395 emit_move_insn (reg, reg0);
9397 return reg;
9402 legitimize_tls_address (rtx x, rtx reg)
9404 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9405 rtx_insn *insns;
9406 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9408 switch (model)
9410 case TLS_MODEL_GLOBAL_DYNAMIC:
9411 if (TARGET_GNU2_TLS)
9413 gcc_assert (!TARGET_FDPIC);
9415 reg = arm_tls_descseq_addr (x, reg);
9417 tp = arm_load_tp (NULL_RTX);
9419 dest = gen_rtx_PLUS (Pmode, tp, reg);
9421 else
9423 /* Original scheme */
9424 if (TARGET_FDPIC)
9425 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9426 else
9427 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9428 dest = gen_reg_rtx (Pmode);
9429 emit_libcall_block (insns, dest, ret, x);
9431 return dest;
9433 case TLS_MODEL_LOCAL_DYNAMIC:
9434 if (TARGET_GNU2_TLS)
9436 gcc_assert (!TARGET_FDPIC);
9438 reg = arm_tls_descseq_addr (x, reg);
9440 tp = arm_load_tp (NULL_RTX);
9442 dest = gen_rtx_PLUS (Pmode, tp, reg);
9444 else
9446 if (TARGET_FDPIC)
9447 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9448 else
9449 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9451 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9452 share the LDM result with other LD model accesses. */
9453 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9454 UNSPEC_TLS);
9455 dest = gen_reg_rtx (Pmode);
9456 emit_libcall_block (insns, dest, ret, eqv);
9458 /* Load the addend. */
9459 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9460 GEN_INT (TLS_LDO32)),
9461 UNSPEC_TLS);
9462 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9463 dest = gen_rtx_PLUS (Pmode, dest, addend);
9465 return dest;
9467 case TLS_MODEL_INITIAL_EXEC:
9468 if (TARGET_FDPIC)
9470 sum = gen_rtx_UNSPEC (Pmode,
9471 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9472 UNSPEC_TLS);
9473 reg = load_tls_operand (sum, reg);
9474 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9475 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9477 else
9479 labelno = GEN_INT (pic_labelno++);
9480 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9481 label = gen_rtx_CONST (VOIDmode, label);
9482 sum = gen_rtx_UNSPEC (Pmode,
9483 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9484 GEN_INT (TARGET_ARM ? 8 : 4)),
9485 UNSPEC_TLS);
9486 reg = load_tls_operand (sum, reg);
9488 if (TARGET_ARM)
9489 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9490 else if (TARGET_THUMB2)
9491 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9492 else
9494 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9495 emit_move_insn (reg, gen_const_mem (SImode, reg));
9499 tp = arm_load_tp (NULL_RTX);
9501 return gen_rtx_PLUS (Pmode, tp, reg);
9503 case TLS_MODEL_LOCAL_EXEC:
9504 tp = arm_load_tp (NULL_RTX);
9506 reg = gen_rtx_UNSPEC (Pmode,
9507 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9508 UNSPEC_TLS);
9509 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9511 return gen_rtx_PLUS (Pmode, tp, reg);
9513 default:
9514 abort ();
9518 /* Try machine-dependent ways of modifying an illegitimate address
9519 to be legitimate. If we find one, return the new, valid address. */
9521 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9523 if (arm_tls_referenced_p (x))
9525 rtx addend = NULL;
9527 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9529 addend = XEXP (XEXP (x, 0), 1);
9530 x = XEXP (XEXP (x, 0), 0);
9533 if (!SYMBOL_REF_P (x))
9534 return x;
9536 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9538 x = legitimize_tls_address (x, NULL_RTX);
9540 if (addend)
9542 x = gen_rtx_PLUS (SImode, x, addend);
9543 orig_x = x;
9545 else
9546 return x;
9549 if (TARGET_THUMB1)
9550 return thumb_legitimize_address (x, orig_x, mode);
9552 if (GET_CODE (x) == PLUS)
9554 rtx xop0 = XEXP (x, 0);
9555 rtx xop1 = XEXP (x, 1);
9557 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9558 xop0 = force_reg (SImode, xop0);
9560 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9561 && !symbol_mentioned_p (xop1))
9562 xop1 = force_reg (SImode, xop1);
9564 if (ARM_BASE_REGISTER_RTX_P (xop0)
9565 && CONST_INT_P (xop1))
9567 HOST_WIDE_INT n, low_n;
9568 rtx base_reg, val;
9569 n = INTVAL (xop1);
9571 /* VFP addressing modes actually allow greater offsets, but for
9572 now we just stick with the lowest common denominator. */
9573 if (mode == DImode || mode == DFmode)
9575 low_n = n & 0x0f;
9576 n &= ~0x0f;
9577 if (low_n > 4)
9579 n += 16;
9580 low_n -= 16;
9583 else
9585 low_n = ((mode) == TImode ? 0
9586 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9587 n -= low_n;
9590 base_reg = gen_reg_rtx (SImode);
9591 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9592 emit_move_insn (base_reg, val);
9593 x = plus_constant (Pmode, base_reg, low_n);
9595 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9596 x = gen_rtx_PLUS (SImode, xop0, xop1);
9599 /* XXX We don't allow MINUS any more -- see comment in
9600 arm_legitimate_address_outer_p (). */
9601 else if (GET_CODE (x) == MINUS)
9603 rtx xop0 = XEXP (x, 0);
9604 rtx xop1 = XEXP (x, 1);
9606 if (CONSTANT_P (xop0))
9607 xop0 = force_reg (SImode, xop0);
9609 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9610 xop1 = force_reg (SImode, xop1);
9612 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9613 x = gen_rtx_MINUS (SImode, xop0, xop1);
9616 /* Make sure to take full advantage of the pre-indexed addressing mode
9617 with absolute addresses which often allows for the base register to
9618 be factorized for multiple adjacent memory references, and it might
9619 even allows for the mini pool to be avoided entirely. */
9620 else if (CONST_INT_P (x) && optimize > 0)
9622 unsigned int bits;
9623 HOST_WIDE_INT mask, base, index;
9624 rtx base_reg;
9626 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9627 only use a 8-bit index. So let's use a 12-bit index for
9628 SImode only and hope that arm_gen_constant will enable LDRB
9629 to use more bits. */
9630 bits = (mode == SImode) ? 12 : 8;
9631 mask = (1 << bits) - 1;
9632 base = INTVAL (x) & ~mask;
9633 index = INTVAL (x) & mask;
9634 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9636 /* It'll most probably be more efficient to generate the
9637 base with more bits set and use a negative index instead.
9638 Don't do this for Thumb as negative offsets are much more
9639 limited. */
9640 base |= mask;
9641 index -= mask;
9643 base_reg = force_reg (SImode, GEN_INT (base));
9644 x = plus_constant (Pmode, base_reg, index);
9647 if (flag_pic)
9649 /* We need to find and carefully transform any SYMBOL and LABEL
9650 references; so go back to the original address expression. */
9651 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9652 false /*compute_now*/);
9654 if (new_x != orig_x)
9655 x = new_x;
9658 return x;
9662 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9663 to be legitimate. If we find one, return the new, valid address. */
9665 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9667 if (GET_CODE (x) == PLUS
9668 && CONST_INT_P (XEXP (x, 1))
9669 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9670 || INTVAL (XEXP (x, 1)) < 0))
9672 rtx xop0 = XEXP (x, 0);
9673 rtx xop1 = XEXP (x, 1);
9674 HOST_WIDE_INT offset = INTVAL (xop1);
9676 /* Try and fold the offset into a biasing of the base register and
9677 then offsetting that. Don't do this when optimizing for space
9678 since it can cause too many CSEs. */
9679 if (optimize_size && offset >= 0
9680 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9682 HOST_WIDE_INT delta;
9684 if (offset >= 256)
9685 delta = offset - (256 - GET_MODE_SIZE (mode));
9686 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9687 delta = 31 * GET_MODE_SIZE (mode);
9688 else
9689 delta = offset & (~31 * GET_MODE_SIZE (mode));
9691 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9692 NULL_RTX);
9693 x = plus_constant (Pmode, xop0, delta);
9695 else if (offset < 0 && offset > -256)
9696 /* Small negative offsets are best done with a subtract before the
9697 dereference, forcing these into a register normally takes two
9698 instructions. */
9699 x = force_operand (x, NULL_RTX);
9700 else
9702 /* For the remaining cases, force the constant into a register. */
9703 xop1 = force_reg (SImode, xop1);
9704 x = gen_rtx_PLUS (SImode, xop0, xop1);
9707 else if (GET_CODE (x) == PLUS
9708 && s_register_operand (XEXP (x, 1), SImode)
9709 && !s_register_operand (XEXP (x, 0), SImode))
9711 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9713 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9716 if (flag_pic)
9718 /* We need to find and carefully transform any SYMBOL and LABEL
9719 references; so go back to the original address expression. */
9720 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9721 false /*compute_now*/);
9723 if (new_x != orig_x)
9724 x = new_x;
9727 return x;
9730 /* Return TRUE if X contains any TLS symbol references. */
9732 bool
9733 arm_tls_referenced_p (rtx x)
9735 if (! TARGET_HAVE_TLS)
9736 return false;
9738 subrtx_iterator::array_type array;
9739 FOR_EACH_SUBRTX (iter, array, x, ALL)
9741 const_rtx x = *iter;
9742 if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9744 /* ARM currently does not provide relocations to encode TLS variables
9745 into AArch32 instructions, only data, so there is no way to
9746 currently implement these if a literal pool is disabled. */
9747 if (arm_disable_literal_pool)
9748 sorry ("accessing thread-local storage is not currently supported "
9749 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9751 return true;
9754 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9755 TLS offsets, not real symbol references. */
9756 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9757 iter.skip_subrtxes ();
9759 return false;
9762 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9764 On the ARM, allow any integer (invalid ones are removed later by insn
9765 patterns), nice doubles and symbol_refs which refer to the function's
9766 constant pool XXX.
9768 When generating pic allow anything. */
9770 static bool
9771 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9773 if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9774 return false;
9776 return flag_pic || !label_mentioned_p (x);
9779 static bool
9780 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9782 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9783 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9784 for ARMv8-M Baseline or later the result is valid. */
9785 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9786 x = XEXP (x, 0);
9788 return (CONST_INT_P (x)
9789 || CONST_DOUBLE_P (x)
9790 || CONSTANT_ADDRESS_P (x)
9791 || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9792 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9793 we build the symbol address with upper/lower
9794 relocations. */
9795 || (TARGET_THUMB1
9796 && !label_mentioned_p (x)
9797 && arm_valid_symbolic_address_p (x)
9798 && arm_disable_literal_pool)
9799 || flag_pic);
9802 static bool
9803 arm_legitimate_constant_p (machine_mode mode, rtx x)
9805 return (!arm_cannot_force_const_mem (mode, x)
9806 && (TARGET_32BIT
9807 ? arm_legitimate_constant_p_1 (mode, x)
9808 : thumb_legitimate_constant_p (mode, x)));
9811 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9813 static bool
9814 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9816 rtx base, offset;
9817 split_const (x, &base, &offset);
9819 if (SYMBOL_REF_P (base))
9821 /* Function symbols cannot have an offset due to the Thumb bit. */
9822 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9823 && INTVAL (offset) != 0)
9824 return true;
9826 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9827 && !offset_within_block_p (base, INTVAL (offset)))
9828 return true;
9830 return arm_tls_referenced_p (x);
9833 #define REG_OR_SUBREG_REG(X) \
9834 (REG_P (X) \
9835 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9837 #define REG_OR_SUBREG_RTX(X) \
9838 (REG_P (X) ? (X) : SUBREG_REG (X))
9840 static inline int
9841 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9843 machine_mode mode = GET_MODE (x);
9844 int total, words;
9846 switch (code)
9848 case ASHIFT:
9849 case ASHIFTRT:
9850 case LSHIFTRT:
9851 case ROTATERT:
9852 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9854 case PLUS:
9855 case MINUS:
9856 case COMPARE:
9857 case NEG:
9858 case NOT:
9859 return COSTS_N_INSNS (1);
9861 case MULT:
9862 if (arm_arch6m && arm_m_profile_small_mul)
9863 return COSTS_N_INSNS (32);
9865 if (CONST_INT_P (XEXP (x, 1)))
9867 int cycles = 0;
9868 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9870 while (i)
9872 i >>= 2;
9873 cycles++;
9875 return COSTS_N_INSNS (2) + cycles;
9877 return COSTS_N_INSNS (1) + 16;
9879 case SET:
9880 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9881 the mode. */
9882 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9883 return (COSTS_N_INSNS (words)
9884 + 4 * ((MEM_P (SET_SRC (x)))
9885 + MEM_P (SET_DEST (x))));
9887 case CONST_INT:
9888 if (outer == SET)
9890 if (UINTVAL (x) < 256
9891 /* 16-bit constant. */
9892 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9893 return 0;
9894 if (thumb_shiftable_const (INTVAL (x)))
9895 return COSTS_N_INSNS (2);
9896 return arm_disable_literal_pool
9897 ? COSTS_N_INSNS (8)
9898 : COSTS_N_INSNS (3);
9900 else if ((outer == PLUS || outer == COMPARE)
9901 && INTVAL (x) < 256 && INTVAL (x) > -256)
9902 return 0;
9903 else if ((outer == IOR || outer == XOR || outer == AND)
9904 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9905 return COSTS_N_INSNS (1);
9906 else if (outer == AND)
9908 int i;
9909 /* This duplicates the tests in the andsi3 expander. */
9910 for (i = 9; i <= 31; i++)
9911 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9912 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9913 return COSTS_N_INSNS (2);
9915 else if (outer == ASHIFT || outer == ASHIFTRT
9916 || outer == LSHIFTRT)
9917 return 0;
9918 return COSTS_N_INSNS (2);
9920 case CONST:
9921 case CONST_DOUBLE:
9922 case LABEL_REF:
9923 case SYMBOL_REF:
9924 return COSTS_N_INSNS (3);
9926 case UDIV:
9927 case UMOD:
9928 case DIV:
9929 case MOD:
9930 return 100;
9932 case TRUNCATE:
9933 return 99;
9935 case AND:
9936 case XOR:
9937 case IOR:
9938 /* XXX guess. */
9939 return 8;
9941 case MEM:
9942 /* XXX another guess. */
9943 /* Memory costs quite a lot for the first word, but subsequent words
9944 load at the equivalent of a single insn each. */
9945 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9946 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9947 ? 4 : 0));
9949 case IF_THEN_ELSE:
9950 /* XXX a guess. */
9951 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9952 return 14;
9953 return 2;
9955 case SIGN_EXTEND:
9956 case ZERO_EXTEND:
9957 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9958 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9960 if (mode == SImode)
9961 return total;
9963 if (arm_arch6)
9964 return total + COSTS_N_INSNS (1);
9966 /* Assume a two-shift sequence. Increase the cost slightly so
9967 we prefer actual shifts over an extend operation. */
9968 return total + 1 + COSTS_N_INSNS (2);
9970 default:
9971 return 99;
9975 /* Estimates the size cost of thumb1 instructions.
9976 For now most of the code is copied from thumb1_rtx_costs. We need more
9977 fine grain tuning when we have more related test cases. */
9978 static inline int
9979 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9981 machine_mode mode = GET_MODE (x);
9982 int words, cost;
9984 switch (code)
9986 case ASHIFT:
9987 case ASHIFTRT:
9988 case LSHIFTRT:
9989 case ROTATERT:
9990 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9992 case PLUS:
9993 case MINUS:
9994 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9995 defined by RTL expansion, especially for the expansion of
9996 multiplication. */
9997 if ((GET_CODE (XEXP (x, 0)) == MULT
9998 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9999 || (GET_CODE (XEXP (x, 1)) == MULT
10000 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
10001 return COSTS_N_INSNS (2);
10002 /* Fall through. */
10003 case COMPARE:
10004 case NEG:
10005 case NOT:
10006 return COSTS_N_INSNS (1);
10008 case MULT:
10009 if (CONST_INT_P (XEXP (x, 1)))
10011 /* Thumb1 mul instruction can't operate on const. We must Load it
10012 into a register first. */
10013 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
10014 /* For the targets which have a very small and high-latency multiply
10015 unit, we prefer to synthesize the mult with up to 5 instructions,
10016 giving a good balance between size and performance. */
10017 if (arm_arch6m && arm_m_profile_small_mul)
10018 return COSTS_N_INSNS (5);
10019 else
10020 return COSTS_N_INSNS (1) + const_size;
10022 return COSTS_N_INSNS (1);
10024 case SET:
10025 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
10026 the mode. */
10027 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
10028 cost = COSTS_N_INSNS (words);
10029 if (satisfies_constraint_J (SET_SRC (x))
10030 || satisfies_constraint_K (SET_SRC (x))
10031 /* Too big an immediate for a 2-byte mov, using MOVT. */
10032 || (CONST_INT_P (SET_SRC (x))
10033 && UINTVAL (SET_SRC (x)) >= 256
10034 && TARGET_HAVE_MOVT
10035 && satisfies_constraint_j (SET_SRC (x)))
10036 /* thumb1_movdi_insn. */
10037 || ((words > 1) && MEM_P (SET_SRC (x))))
10038 cost += COSTS_N_INSNS (1);
10039 return cost;
10041 case CONST_INT:
10042 if (outer == SET)
10044 if (UINTVAL (x) < 256)
10045 return COSTS_N_INSNS (1);
10046 /* movw is 4byte long. */
10047 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
10048 return COSTS_N_INSNS (2);
10049 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
10050 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
10051 return COSTS_N_INSNS (2);
10052 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
10053 if (thumb_shiftable_const (INTVAL (x)))
10054 return COSTS_N_INSNS (2);
10055 return arm_disable_literal_pool
10056 ? COSTS_N_INSNS (8)
10057 : COSTS_N_INSNS (3);
10059 else if ((outer == PLUS || outer == COMPARE)
10060 && INTVAL (x) < 256 && INTVAL (x) > -256)
10061 return 0;
10062 else if ((outer == IOR || outer == XOR || outer == AND)
10063 && INTVAL (x) < 256 && INTVAL (x) >= -256)
10064 return COSTS_N_INSNS (1);
10065 else if (outer == AND)
10067 int i;
10068 /* This duplicates the tests in the andsi3 expander. */
10069 for (i = 9; i <= 31; i++)
10070 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
10071 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
10072 return COSTS_N_INSNS (2);
10074 else if (outer == ASHIFT || outer == ASHIFTRT
10075 || outer == LSHIFTRT)
10076 return 0;
10077 return COSTS_N_INSNS (2);
10079 case CONST:
10080 case CONST_DOUBLE:
10081 case LABEL_REF:
10082 case SYMBOL_REF:
10083 return COSTS_N_INSNS (3);
10085 case UDIV:
10086 case UMOD:
10087 case DIV:
10088 case MOD:
10089 return 100;
10091 case TRUNCATE:
10092 return 99;
10094 case AND:
10095 case XOR:
10096 case IOR:
10097 return COSTS_N_INSNS (1);
10099 case MEM:
10100 return (COSTS_N_INSNS (1)
10101 + COSTS_N_INSNS (1)
10102 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10103 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10104 ? COSTS_N_INSNS (1) : 0));
10106 case IF_THEN_ELSE:
10107 /* XXX a guess. */
10108 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10109 return 14;
10110 return 2;
10112 case ZERO_EXTEND:
10113 /* XXX still guessing. */
10114 switch (GET_MODE (XEXP (x, 0)))
10116 case E_QImode:
10117 return (1 + (mode == DImode ? 4 : 0)
10118 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10120 case E_HImode:
10121 return (4 + (mode == DImode ? 4 : 0)
10122 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10124 case E_SImode:
10125 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10127 default:
10128 return 99;
10131 default:
10132 return 99;
10136 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10137 PLUS, adds the carry flag, then return the other operand. If
10138 neither is a carry, return OP unchanged. */
10139 static rtx
10140 strip_carry_operation (rtx op)
10142 gcc_assert (GET_CODE (op) == PLUS);
10143 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10144 return XEXP (op, 1);
10145 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10146 return XEXP (op, 0);
10147 return op;
10150 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10151 operand, then return the operand that is being shifted. If the shift
10152 is not by a constant, then set SHIFT_REG to point to the operand.
10153 Return NULL if OP is not a shifter operand. */
10154 static rtx
10155 shifter_op_p (rtx op, rtx *shift_reg)
10157 enum rtx_code code = GET_CODE (op);
10159 if (code == MULT && CONST_INT_P (XEXP (op, 1))
10160 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10161 return XEXP (op, 0);
10162 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10163 return XEXP (op, 0);
10164 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10165 || code == ASHIFTRT)
10167 if (!CONST_INT_P (XEXP (op, 1)))
10168 *shift_reg = XEXP (op, 1);
10169 return XEXP (op, 0);
10172 return NULL;
10175 static bool
10176 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10178 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10179 rtx_code code = GET_CODE (x);
10180 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10182 switch (XINT (x, 1))
10184 case UNSPEC_UNALIGNED_LOAD:
10185 /* We can only do unaligned loads into the integer unit, and we can't
10186 use LDM or LDRD. */
10187 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10188 if (speed_p)
10189 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10190 + extra_cost->ldst.load_unaligned);
10192 #ifdef NOT_YET
10193 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10194 ADDR_SPACE_GENERIC, speed_p);
10195 #endif
10196 return true;
10198 case UNSPEC_UNALIGNED_STORE:
10199 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10200 if (speed_p)
10201 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10202 + extra_cost->ldst.store_unaligned);
10204 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10205 #ifdef NOT_YET
10206 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10207 ADDR_SPACE_GENERIC, speed_p);
10208 #endif
10209 return true;
10211 case UNSPEC_VRINTZ:
10212 case UNSPEC_VRINTP:
10213 case UNSPEC_VRINTM:
10214 case UNSPEC_VRINTR:
10215 case UNSPEC_VRINTX:
10216 case UNSPEC_VRINTA:
10217 if (speed_p)
10218 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10220 return true;
10221 default:
10222 *cost = COSTS_N_INSNS (2);
10223 break;
10225 return true;
10228 /* Cost of a libcall. We assume one insn per argument, an amount for the
10229 call (one insn for -Os) and then one for processing the result. */
10230 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10232 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10233 do \
10235 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10236 if (shift_op != NULL \
10237 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10239 if (shift_reg) \
10241 if (speed_p) \
10242 *cost += extra_cost->alu.arith_shift_reg; \
10243 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10244 ASHIFT, 1, speed_p); \
10246 else if (speed_p) \
10247 *cost += extra_cost->alu.arith_shift; \
10249 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10250 ASHIFT, 0, speed_p) \
10251 + rtx_cost (XEXP (x, 1 - IDX), \
10252 GET_MODE (shift_op), \
10253 OP, 1, speed_p)); \
10254 return true; \
10257 while (0)
10259 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10260 considering the costs of the addressing mode and memory access
10261 separately. */
10262 static bool
10263 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10264 int *cost, bool speed_p)
10266 machine_mode mode = GET_MODE (x);
10268 *cost = COSTS_N_INSNS (1);
10270 if (flag_pic
10271 && GET_CODE (XEXP (x, 0)) == PLUS
10272 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10273 /* This will be split into two instructions. Add the cost of the
10274 additional instruction here. The cost of the memory access is computed
10275 below. See arm.md:calculate_pic_address. */
10276 *cost += COSTS_N_INSNS (1);
10278 /* Calculate cost of the addressing mode. */
10279 if (speed_p)
10281 arm_addr_mode_op op_type;
10282 switch (GET_CODE (XEXP (x, 0)))
10284 default:
10285 case REG:
10286 op_type = AMO_DEFAULT;
10287 break;
10288 case MINUS:
10289 /* MINUS does not appear in RTL, but the architecture supports it,
10290 so handle this case defensively. */
10291 /* fall through */
10292 case PLUS:
10293 op_type = AMO_NO_WB;
10294 break;
10295 case PRE_INC:
10296 case PRE_DEC:
10297 case POST_INC:
10298 case POST_DEC:
10299 case PRE_MODIFY:
10300 case POST_MODIFY:
10301 op_type = AMO_WB;
10302 break;
10305 if (VECTOR_MODE_P (mode))
10306 *cost += current_tune->addr_mode_costs->vector[op_type];
10307 else if (FLOAT_MODE_P (mode))
10308 *cost += current_tune->addr_mode_costs->fp[op_type];
10309 else
10310 *cost += current_tune->addr_mode_costs->integer[op_type];
10313 /* Calculate cost of memory access. */
10314 if (speed_p)
10316 if (FLOAT_MODE_P (mode))
10318 if (GET_MODE_SIZE (mode) == 8)
10319 *cost += extra_cost->ldst.loadd;
10320 else
10321 *cost += extra_cost->ldst.loadf;
10323 else if (VECTOR_MODE_P (mode))
10324 *cost += extra_cost->ldst.loadv;
10325 else
10327 /* Integer modes */
10328 if (GET_MODE_SIZE (mode) == 8)
10329 *cost += extra_cost->ldst.ldrd;
10330 else
10331 *cost += extra_cost->ldst.load;
10335 return true;
10338 /* Helper for arm_bfi_p. */
10339 static bool
10340 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10342 unsigned HOST_WIDE_INT const1;
10343 unsigned HOST_WIDE_INT const2 = 0;
10345 if (!CONST_INT_P (XEXP (op0, 1)))
10346 return false;
10348 const1 = UINTVAL (XEXP (op0, 1));
10349 if (!CONST_INT_P (XEXP (op1, 1))
10350 || ~UINTVAL (XEXP (op1, 1)) != const1)
10351 return false;
10353 if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10354 && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10356 const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10357 *sub0 = XEXP (XEXP (op0, 0), 0);
10359 else
10360 *sub0 = XEXP (op0, 0);
10362 if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10363 return false;
10365 *sub1 = XEXP (op1, 0);
10366 return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10369 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10370 format looks something like:
10372 (IOR (AND (reg1) (~const1))
10373 (AND (ASHIFT (reg2) (const2))
10374 (const1)))
10376 where const1 is a consecutive sequence of 1-bits with the
10377 least-significant non-zero bit starting at bit position const2. If
10378 const2 is zero, then the shift will not appear at all, due to
10379 canonicalization. The two arms of the IOR expression may be
10380 flipped. */
10381 static bool
10382 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10384 if (GET_CODE (x) != IOR)
10385 return false;
10386 if (GET_CODE (XEXP (x, 0)) != AND
10387 || GET_CODE (XEXP (x, 1)) != AND)
10388 return false;
10389 return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10390 || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10393 /* RTX costs. Make an estimate of the cost of executing the operation
10394 X, which is contained within an operation with code OUTER_CODE.
10395 SPEED_P indicates whether the cost desired is the performance cost,
10396 or the size cost. The estimate is stored in COST and the return
10397 value is TRUE if the cost calculation is final, or FALSE if the
10398 caller should recurse through the operands of X to add additional
10399 costs.
10401 We currently make no attempt to model the size savings of Thumb-2
10402 16-bit instructions. At the normal points in compilation where
10403 this code is called we have no measure of whether the condition
10404 flags are live or not, and thus no realistic way to determine what
10405 the size will eventually be. */
10406 static bool
10407 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10408 const struct cpu_cost_table *extra_cost,
10409 int *cost, bool speed_p)
10411 machine_mode mode = GET_MODE (x);
10413 *cost = COSTS_N_INSNS (1);
10415 if (TARGET_THUMB1)
10417 if (speed_p)
10418 *cost = thumb1_rtx_costs (x, code, outer_code);
10419 else
10420 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10421 return true;
10424 switch (code)
10426 case SET:
10427 *cost = 0;
10428 /* SET RTXs don't have a mode so we get it from the destination. */
10429 mode = GET_MODE (SET_DEST (x));
10431 if (REG_P (SET_SRC (x))
10432 && REG_P (SET_DEST (x)))
10434 /* Assume that most copies can be done with a single insn,
10435 unless we don't have HW FP, in which case everything
10436 larger than word mode will require two insns. */
10437 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10438 && GET_MODE_SIZE (mode) > 4)
10439 || mode == DImode)
10440 ? 2 : 1);
10441 /* Conditional register moves can be encoded
10442 in 16 bits in Thumb mode. */
10443 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10444 *cost >>= 1;
10446 return true;
10449 if (CONST_INT_P (SET_SRC (x)))
10451 /* Handle CONST_INT here, since the value doesn't have a mode
10452 and we would otherwise be unable to work out the true cost. */
10453 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10454 0, speed_p);
10455 outer_code = SET;
10456 /* Slightly lower the cost of setting a core reg to a constant.
10457 This helps break up chains and allows for better scheduling. */
10458 if (REG_P (SET_DEST (x))
10459 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10460 *cost -= 1;
10461 x = SET_SRC (x);
10462 /* Immediate moves with an immediate in the range [0, 255] can be
10463 encoded in 16 bits in Thumb mode. */
10464 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10465 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10466 *cost >>= 1;
10467 goto const_int_cost;
10470 return false;
10472 case MEM:
10473 return arm_mem_costs (x, extra_cost, cost, speed_p);
10475 case PARALLEL:
10477 /* Calculations of LDM costs are complex. We assume an initial cost
10478 (ldm_1st) which will load the number of registers mentioned in
10479 ldm_regs_per_insn_1st registers; then each additional
10480 ldm_regs_per_insn_subsequent registers cost one more insn. The
10481 formula for N regs is thus:
10483 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10484 + ldm_regs_per_insn_subsequent - 1)
10485 / ldm_regs_per_insn_subsequent).
10487 Additional costs may also be added for addressing. A similar
10488 formula is used for STM. */
10490 bool is_ldm = load_multiple_operation (x, SImode);
10491 bool is_stm = store_multiple_operation (x, SImode);
10493 if (is_ldm || is_stm)
10495 if (speed_p)
10497 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10498 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10499 ? extra_cost->ldst.ldm_regs_per_insn_1st
10500 : extra_cost->ldst.stm_regs_per_insn_1st;
10501 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10502 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10503 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10505 *cost += regs_per_insn_1st
10506 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10507 + regs_per_insn_sub - 1)
10508 / regs_per_insn_sub);
10509 return true;
10513 return false;
10515 case DIV:
10516 case UDIV:
10517 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10518 && (mode == SFmode || !TARGET_VFP_SINGLE))
10519 *cost += COSTS_N_INSNS (speed_p
10520 ? extra_cost->fp[mode != SFmode].div : 0);
10521 else if (mode == SImode && TARGET_IDIV)
10522 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10523 else
10524 *cost = LIBCALL_COST (2);
10526 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10527 possible udiv is prefered. */
10528 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10529 return false; /* All arguments must be in registers. */
10531 case MOD:
10532 /* MOD by a power of 2 can be expanded as:
10533 rsbs r1, r0, #0
10534 and r0, r0, #(n - 1)
10535 and r1, r1, #(n - 1)
10536 rsbpl r0, r1, #0. */
10537 if (CONST_INT_P (XEXP (x, 1))
10538 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10539 && mode == SImode)
10541 *cost += COSTS_N_INSNS (3);
10543 if (speed_p)
10544 *cost += 2 * extra_cost->alu.logical
10545 + extra_cost->alu.arith;
10546 return true;
10549 /* Fall-through. */
10550 case UMOD:
10551 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10552 possible udiv is prefered. */
10553 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10554 return false; /* All arguments must be in registers. */
10556 case ROTATE:
10557 if (mode == SImode && REG_P (XEXP (x, 1)))
10559 *cost += (COSTS_N_INSNS (1)
10560 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10561 if (speed_p)
10562 *cost += extra_cost->alu.shift_reg;
10563 return true;
10565 /* Fall through */
10566 case ROTATERT:
10567 case ASHIFT:
10568 case LSHIFTRT:
10569 case ASHIFTRT:
10570 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10572 *cost += (COSTS_N_INSNS (2)
10573 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10574 if (speed_p)
10575 *cost += 2 * extra_cost->alu.shift;
10576 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10577 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10578 *cost += 1;
10579 return true;
10581 else if (mode == SImode)
10583 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10584 /* Slightly disparage register shifts at -Os, but not by much. */
10585 if (!CONST_INT_P (XEXP (x, 1)))
10586 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10587 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10588 return true;
10590 else if (GET_MODE_CLASS (mode) == MODE_INT
10591 && GET_MODE_SIZE (mode) < 4)
10593 if (code == ASHIFT)
10595 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10596 /* Slightly disparage register shifts at -Os, but not by
10597 much. */
10598 if (!CONST_INT_P (XEXP (x, 1)))
10599 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10600 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10602 else if (code == LSHIFTRT || code == ASHIFTRT)
10604 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10606 /* Can use SBFX/UBFX. */
10607 if (speed_p)
10608 *cost += extra_cost->alu.bfx;
10609 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10611 else
10613 *cost += COSTS_N_INSNS (1);
10614 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10615 if (speed_p)
10617 if (CONST_INT_P (XEXP (x, 1)))
10618 *cost += 2 * extra_cost->alu.shift;
10619 else
10620 *cost += (extra_cost->alu.shift
10621 + extra_cost->alu.shift_reg);
10623 else
10624 /* Slightly disparage register shifts. */
10625 *cost += !CONST_INT_P (XEXP (x, 1));
10628 else /* Rotates. */
10630 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10631 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10632 if (speed_p)
10634 if (CONST_INT_P (XEXP (x, 1)))
10635 *cost += (2 * extra_cost->alu.shift
10636 + extra_cost->alu.log_shift);
10637 else
10638 *cost += (extra_cost->alu.shift
10639 + extra_cost->alu.shift_reg
10640 + extra_cost->alu.log_shift_reg);
10643 return true;
10646 *cost = LIBCALL_COST (2);
10647 return false;
10649 case BSWAP:
10650 if (arm_arch6)
10652 if (mode == SImode)
10654 if (speed_p)
10655 *cost += extra_cost->alu.rev;
10657 return false;
10660 else
10662 /* No rev instruction available. Look at arm_legacy_rev
10663 and thumb_legacy_rev for the form of RTL used then. */
10664 if (TARGET_THUMB)
10666 *cost += COSTS_N_INSNS (9);
10668 if (speed_p)
10670 *cost += 6 * extra_cost->alu.shift;
10671 *cost += 3 * extra_cost->alu.logical;
10674 else
10676 *cost += COSTS_N_INSNS (4);
10678 if (speed_p)
10680 *cost += 2 * extra_cost->alu.shift;
10681 *cost += extra_cost->alu.arith_shift;
10682 *cost += 2 * extra_cost->alu.logical;
10685 return true;
10687 return false;
10689 case MINUS:
10690 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10691 && (mode == SFmode || !TARGET_VFP_SINGLE))
10693 if (GET_CODE (XEXP (x, 0)) == MULT
10694 || GET_CODE (XEXP (x, 1)) == MULT)
10696 rtx mul_op0, mul_op1, sub_op;
10698 if (speed_p)
10699 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10701 if (GET_CODE (XEXP (x, 0)) == MULT)
10703 mul_op0 = XEXP (XEXP (x, 0), 0);
10704 mul_op1 = XEXP (XEXP (x, 0), 1);
10705 sub_op = XEXP (x, 1);
10707 else
10709 mul_op0 = XEXP (XEXP (x, 1), 0);
10710 mul_op1 = XEXP (XEXP (x, 1), 1);
10711 sub_op = XEXP (x, 0);
10714 /* The first operand of the multiply may be optionally
10715 negated. */
10716 if (GET_CODE (mul_op0) == NEG)
10717 mul_op0 = XEXP (mul_op0, 0);
10719 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10720 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10721 + rtx_cost (sub_op, mode, code, 0, speed_p));
10723 return true;
10726 if (speed_p)
10727 *cost += extra_cost->fp[mode != SFmode].addsub;
10728 return false;
10731 if (mode == SImode)
10733 rtx shift_by_reg = NULL;
10734 rtx shift_op;
10735 rtx non_shift_op;
10736 rtx op0 = XEXP (x, 0);
10737 rtx op1 = XEXP (x, 1);
10739 /* Factor out any borrow operation. There's more than one way
10740 of expressing this; try to recognize them all. */
10741 if (GET_CODE (op0) == MINUS)
10743 if (arm_borrow_operation (op1, SImode))
10745 op1 = XEXP (op0, 1);
10746 op0 = XEXP (op0, 0);
10748 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10749 op0 = XEXP (op0, 0);
10751 else if (GET_CODE (op1) == PLUS
10752 && arm_borrow_operation (XEXP (op1, 0), SImode))
10753 op1 = XEXP (op1, 0);
10754 else if (GET_CODE (op0) == NEG
10755 && arm_borrow_operation (op1, SImode))
10757 /* Negate with carry-in. For Thumb2 this is done with
10758 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10759 RSC instruction that exists in Arm mode. */
10760 if (speed_p)
10761 *cost += (TARGET_THUMB2
10762 ? extra_cost->alu.arith_shift
10763 : extra_cost->alu.arith);
10764 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10765 return true;
10767 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10768 Note we do mean ~borrow here. */
10769 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10771 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10772 return true;
10775 shift_op = shifter_op_p (op0, &shift_by_reg);
10776 if (shift_op == NULL)
10778 shift_op = shifter_op_p (op1, &shift_by_reg);
10779 non_shift_op = op0;
10781 else
10782 non_shift_op = op1;
10784 if (shift_op != NULL)
10786 if (shift_by_reg != NULL)
10788 if (speed_p)
10789 *cost += extra_cost->alu.arith_shift_reg;
10790 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10792 else if (speed_p)
10793 *cost += extra_cost->alu.arith_shift;
10795 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10796 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10797 return true;
10800 if (arm_arch_thumb2
10801 && GET_CODE (XEXP (x, 1)) == MULT)
10803 /* MLS. */
10804 if (speed_p)
10805 *cost += extra_cost->mult[0].add;
10806 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10807 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10808 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10809 return true;
10812 if (CONST_INT_P (op0))
10814 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10815 INTVAL (op0), NULL_RTX,
10816 NULL_RTX, 1, 0);
10817 *cost = COSTS_N_INSNS (insns);
10818 if (speed_p)
10819 *cost += insns * extra_cost->alu.arith;
10820 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10821 return true;
10823 else if (speed_p)
10824 *cost += extra_cost->alu.arith;
10826 /* Don't recurse as we don't want to cost any borrow that
10827 we've stripped. */
10828 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10829 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10830 return true;
10833 if (GET_MODE_CLASS (mode) == MODE_INT
10834 && GET_MODE_SIZE (mode) < 4)
10836 rtx shift_op, shift_reg;
10837 shift_reg = NULL;
10839 /* We check both sides of the MINUS for shifter operands since,
10840 unlike PLUS, it's not commutative. */
10842 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10843 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10845 /* Slightly disparage, as we might need to widen the result. */
10846 *cost += 1;
10847 if (speed_p)
10848 *cost += extra_cost->alu.arith;
10850 if (CONST_INT_P (XEXP (x, 0)))
10852 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10853 return true;
10856 return false;
10859 if (mode == DImode)
10861 *cost += COSTS_N_INSNS (1);
10863 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10865 rtx op1 = XEXP (x, 1);
10867 if (speed_p)
10868 *cost += 2 * extra_cost->alu.arith;
10870 if (GET_CODE (op1) == ZERO_EXTEND)
10871 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10872 0, speed_p);
10873 else
10874 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10875 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10876 0, speed_p);
10877 return true;
10879 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10881 if (speed_p)
10882 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10883 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10884 0, speed_p)
10885 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10886 return true;
10888 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10889 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10891 if (speed_p)
10892 *cost += (extra_cost->alu.arith
10893 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10894 ? extra_cost->alu.arith
10895 : extra_cost->alu.arith_shift));
10896 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10897 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10898 GET_CODE (XEXP (x, 1)), 0, speed_p));
10899 return true;
10902 if (speed_p)
10903 *cost += 2 * extra_cost->alu.arith;
10904 return false;
10907 /* Vector mode? */
10909 *cost = LIBCALL_COST (2);
10910 return false;
10912 case PLUS:
10913 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10914 && (mode == SFmode || !TARGET_VFP_SINGLE))
10916 if (GET_CODE (XEXP (x, 0)) == MULT)
10918 rtx mul_op0, mul_op1, add_op;
10920 if (speed_p)
10921 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10923 mul_op0 = XEXP (XEXP (x, 0), 0);
10924 mul_op1 = XEXP (XEXP (x, 0), 1);
10925 add_op = XEXP (x, 1);
10927 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10928 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10929 + rtx_cost (add_op, mode, code, 0, speed_p));
10931 return true;
10934 if (speed_p)
10935 *cost += extra_cost->fp[mode != SFmode].addsub;
10936 return false;
10938 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10940 *cost = LIBCALL_COST (2);
10941 return false;
10944 /* Narrow modes can be synthesized in SImode, but the range
10945 of useful sub-operations is limited. Check for shift operations
10946 on one of the operands. Only left shifts can be used in the
10947 narrow modes. */
10948 if (GET_MODE_CLASS (mode) == MODE_INT
10949 && GET_MODE_SIZE (mode) < 4)
10951 rtx shift_op, shift_reg;
10952 shift_reg = NULL;
10954 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10956 if (CONST_INT_P (XEXP (x, 1)))
10958 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10959 INTVAL (XEXP (x, 1)), NULL_RTX,
10960 NULL_RTX, 1, 0);
10961 *cost = COSTS_N_INSNS (insns);
10962 if (speed_p)
10963 *cost += insns * extra_cost->alu.arith;
10964 /* Slightly penalize a narrow operation as the result may
10965 need widening. */
10966 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10967 return true;
10970 /* Slightly penalize a narrow operation as the result may
10971 need widening. */
10972 *cost += 1;
10973 if (speed_p)
10974 *cost += extra_cost->alu.arith;
10976 return false;
10979 if (mode == SImode)
10981 rtx shift_op, shift_reg;
10983 if (TARGET_INT_SIMD
10984 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10985 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10987 /* UXTA[BH] or SXTA[BH]. */
10988 if (speed_p)
10989 *cost += extra_cost->alu.extend_arith;
10990 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10991 0, speed_p)
10992 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10993 return true;
10996 rtx op0 = XEXP (x, 0);
10997 rtx op1 = XEXP (x, 1);
10999 /* Handle a side effect of adding in the carry to an addition. */
11000 if (GET_CODE (op0) == PLUS
11001 && arm_carry_operation (op1, mode))
11003 op1 = XEXP (op0, 1);
11004 op0 = XEXP (op0, 0);
11006 else if (GET_CODE (op1) == PLUS
11007 && arm_carry_operation (op0, mode))
11009 op0 = XEXP (op1, 0);
11010 op1 = XEXP (op1, 1);
11012 else if (GET_CODE (op0) == PLUS)
11014 op0 = strip_carry_operation (op0);
11015 if (swap_commutative_operands_p (op0, op1))
11016 std::swap (op0, op1);
11019 if (arm_carry_operation (op0, mode))
11021 /* Adding the carry to a register is a canonicalization of
11022 adding 0 to the register plus the carry. */
11023 if (speed_p)
11024 *cost += extra_cost->alu.arith;
11025 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11026 return true;
11029 shift_reg = NULL;
11030 shift_op = shifter_op_p (op0, &shift_reg);
11031 if (shift_op != NULL)
11033 if (shift_reg)
11035 if (speed_p)
11036 *cost += extra_cost->alu.arith_shift_reg;
11037 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11039 else if (speed_p)
11040 *cost += extra_cost->alu.arith_shift;
11042 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11043 + rtx_cost (op1, mode, PLUS, 1, speed_p));
11044 return true;
11047 if (GET_CODE (op0) == MULT)
11049 rtx mul_op = op0;
11051 if (TARGET_DSP_MULTIPLY
11052 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
11053 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11054 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11055 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11056 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
11057 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
11058 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
11059 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
11060 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11061 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11062 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11063 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
11064 == 16))))))
11066 /* SMLA[BT][BT]. */
11067 if (speed_p)
11068 *cost += extra_cost->mult[0].extend_add;
11069 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
11070 SIGN_EXTEND, 0, speed_p)
11071 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
11072 SIGN_EXTEND, 0, speed_p)
11073 + rtx_cost (op1, mode, PLUS, 1, speed_p));
11074 return true;
11077 if (speed_p)
11078 *cost += extra_cost->mult[0].add;
11079 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
11080 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
11081 + rtx_cost (op1, mode, PLUS, 1, speed_p));
11082 return true;
11085 if (CONST_INT_P (op1))
11087 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11088 INTVAL (op1), NULL_RTX,
11089 NULL_RTX, 1, 0);
11090 *cost = COSTS_N_INSNS (insns);
11091 if (speed_p)
11092 *cost += insns * extra_cost->alu.arith;
11093 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11094 return true;
11097 if (speed_p)
11098 *cost += extra_cost->alu.arith;
11100 /* Don't recurse here because we want to test the operands
11101 without any carry operation. */
11102 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11103 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11104 return true;
11107 if (mode == DImode)
11109 if (GET_CODE (XEXP (x, 0)) == MULT
11110 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11111 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11112 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11113 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11115 if (speed_p)
11116 *cost += extra_cost->mult[1].extend_add;
11117 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11118 ZERO_EXTEND, 0, speed_p)
11119 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11120 ZERO_EXTEND, 0, speed_p)
11121 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11122 return true;
11125 *cost += COSTS_N_INSNS (1);
11127 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11128 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11130 if (speed_p)
11131 *cost += (extra_cost->alu.arith
11132 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11133 ? extra_cost->alu.arith
11134 : extra_cost->alu.arith_shift));
11136 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11137 0, speed_p)
11138 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11139 return true;
11142 if (speed_p)
11143 *cost += 2 * extra_cost->alu.arith;
11144 return false;
11147 /* Vector mode? */
11148 *cost = LIBCALL_COST (2);
11149 return false;
11150 case IOR:
11152 rtx sub0, sub1;
11153 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11155 if (speed_p)
11156 *cost += extra_cost->alu.rev;
11158 return true;
11160 else if (mode == SImode && arm_arch_thumb2
11161 && arm_bfi_p (x, &sub0, &sub1))
11163 *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11164 *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11165 if (speed_p)
11166 *cost += extra_cost->alu.bfi;
11168 return true;
11172 /* Fall through. */
11173 case AND: case XOR:
11174 if (mode == SImode)
11176 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11177 rtx op0 = XEXP (x, 0);
11178 rtx shift_op, shift_reg;
11180 if (subcode == NOT
11181 && (code == AND
11182 || (code == IOR && TARGET_THUMB2)))
11183 op0 = XEXP (op0, 0);
11185 shift_reg = NULL;
11186 shift_op = shifter_op_p (op0, &shift_reg);
11187 if (shift_op != NULL)
11189 if (shift_reg)
11191 if (speed_p)
11192 *cost += extra_cost->alu.log_shift_reg;
11193 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11195 else if (speed_p)
11196 *cost += extra_cost->alu.log_shift;
11198 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11199 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11200 return true;
11203 if (CONST_INT_P (XEXP (x, 1)))
11205 int insns = arm_gen_constant (code, SImode, NULL_RTX,
11206 INTVAL (XEXP (x, 1)), NULL_RTX,
11207 NULL_RTX, 1, 0);
11209 *cost = COSTS_N_INSNS (insns);
11210 if (speed_p)
11211 *cost += insns * extra_cost->alu.logical;
11212 *cost += rtx_cost (op0, mode, code, 0, speed_p);
11213 return true;
11216 if (speed_p)
11217 *cost += extra_cost->alu.logical;
11218 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11219 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11220 return true;
11223 if (mode == DImode)
11225 rtx op0 = XEXP (x, 0);
11226 enum rtx_code subcode = GET_CODE (op0);
11228 *cost += COSTS_N_INSNS (1);
11230 if (subcode == NOT
11231 && (code == AND
11232 || (code == IOR && TARGET_THUMB2)))
11233 op0 = XEXP (op0, 0);
11235 if (GET_CODE (op0) == ZERO_EXTEND)
11237 if (speed_p)
11238 *cost += 2 * extra_cost->alu.logical;
11240 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11241 0, speed_p)
11242 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11243 return true;
11245 else if (GET_CODE (op0) == SIGN_EXTEND)
11247 if (speed_p)
11248 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11250 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11251 0, speed_p)
11252 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11253 return true;
11256 if (speed_p)
11257 *cost += 2 * extra_cost->alu.logical;
11259 return true;
11261 /* Vector mode? */
11263 *cost = LIBCALL_COST (2);
11264 return false;
11266 case MULT:
11267 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11268 && (mode == SFmode || !TARGET_VFP_SINGLE))
11270 rtx op0 = XEXP (x, 0);
11272 if (GET_CODE (op0) == NEG && !flag_rounding_math)
11273 op0 = XEXP (op0, 0);
11275 if (speed_p)
11276 *cost += extra_cost->fp[mode != SFmode].mult;
11278 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11279 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11280 return true;
11282 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11284 *cost = LIBCALL_COST (2);
11285 return false;
11288 if (mode == SImode)
11290 if (TARGET_DSP_MULTIPLY
11291 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11292 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11293 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11294 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11295 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11296 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11297 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11298 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11299 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11300 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11301 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11302 && (INTVAL (XEXP (XEXP (x, 1), 1))
11303 == 16))))))
11305 /* SMUL[TB][TB]. */
11306 if (speed_p)
11307 *cost += extra_cost->mult[0].extend;
11308 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11309 SIGN_EXTEND, 0, speed_p);
11310 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11311 SIGN_EXTEND, 1, speed_p);
11312 return true;
11314 if (speed_p)
11315 *cost += extra_cost->mult[0].simple;
11316 return false;
11319 if (mode == DImode)
11321 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11322 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11323 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11324 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11326 if (speed_p)
11327 *cost += extra_cost->mult[1].extend;
11328 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11329 ZERO_EXTEND, 0, speed_p)
11330 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11331 ZERO_EXTEND, 0, speed_p));
11332 return true;
11335 *cost = LIBCALL_COST (2);
11336 return false;
11339 /* Vector mode? */
11340 *cost = LIBCALL_COST (2);
11341 return false;
11343 case NEG:
11344 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11345 && (mode == SFmode || !TARGET_VFP_SINGLE))
11347 if (GET_CODE (XEXP (x, 0)) == MULT)
11349 /* VNMUL. */
11350 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11351 return true;
11354 if (speed_p)
11355 *cost += extra_cost->fp[mode != SFmode].neg;
11357 return false;
11359 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11361 *cost = LIBCALL_COST (1);
11362 return false;
11365 if (mode == SImode)
11367 if (GET_CODE (XEXP (x, 0)) == ABS)
11369 *cost += COSTS_N_INSNS (1);
11370 /* Assume the non-flag-changing variant. */
11371 if (speed_p)
11372 *cost += (extra_cost->alu.log_shift
11373 + extra_cost->alu.arith_shift);
11374 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11375 return true;
11378 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11379 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11381 *cost += COSTS_N_INSNS (1);
11382 /* No extra cost for MOV imm and MVN imm. */
11383 /* If the comparison op is using the flags, there's no further
11384 cost, otherwise we need to add the cost of the comparison. */
11385 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11386 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11387 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11389 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11390 *cost += (COSTS_N_INSNS (1)
11391 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11392 0, speed_p)
11393 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11394 1, speed_p));
11395 if (speed_p)
11396 *cost += extra_cost->alu.arith;
11398 return true;
11401 if (speed_p)
11402 *cost += extra_cost->alu.arith;
11403 return false;
11406 if (GET_MODE_CLASS (mode) == MODE_INT
11407 && GET_MODE_SIZE (mode) < 4)
11409 /* Slightly disparage, as we might need an extend operation. */
11410 *cost += 1;
11411 if (speed_p)
11412 *cost += extra_cost->alu.arith;
11413 return false;
11416 if (mode == DImode)
11418 *cost += COSTS_N_INSNS (1);
11419 if (speed_p)
11420 *cost += 2 * extra_cost->alu.arith;
11421 return false;
11424 /* Vector mode? */
11425 *cost = LIBCALL_COST (1);
11426 return false;
11428 case NOT:
11429 if (mode == SImode)
11431 rtx shift_op;
11432 rtx shift_reg = NULL;
11434 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11436 if (shift_op)
11438 if (shift_reg != NULL)
11440 if (speed_p)
11441 *cost += extra_cost->alu.log_shift_reg;
11442 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11444 else if (speed_p)
11445 *cost += extra_cost->alu.log_shift;
11446 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11447 return true;
11450 if (speed_p)
11451 *cost += extra_cost->alu.logical;
11452 return false;
11454 if (mode == DImode)
11456 *cost += COSTS_N_INSNS (1);
11457 return false;
11460 /* Vector mode? */
11462 *cost += LIBCALL_COST (1);
11463 return false;
11465 case IF_THEN_ELSE:
11467 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11469 *cost += COSTS_N_INSNS (3);
11470 return true;
11472 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11473 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11475 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11476 /* Assume that if one arm of the if_then_else is a register,
11477 that it will be tied with the result and eliminate the
11478 conditional insn. */
11479 if (REG_P (XEXP (x, 1)))
11480 *cost += op2cost;
11481 else if (REG_P (XEXP (x, 2)))
11482 *cost += op1cost;
11483 else
11485 if (speed_p)
11487 if (extra_cost->alu.non_exec_costs_exec)
11488 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11489 else
11490 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11492 else
11493 *cost += op1cost + op2cost;
11496 return true;
11498 case COMPARE:
11499 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11500 *cost = 0;
11501 else
11503 machine_mode op0mode;
11504 /* We'll mostly assume that the cost of a compare is the cost of the
11505 LHS. However, there are some notable exceptions. */
11507 /* Floating point compares are never done as side-effects. */
11508 op0mode = GET_MODE (XEXP (x, 0));
11509 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11510 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11512 if (speed_p)
11513 *cost += extra_cost->fp[op0mode != SFmode].compare;
11515 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11517 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11518 return true;
11521 return false;
11523 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11525 *cost = LIBCALL_COST (2);
11526 return false;
11529 /* DImode compares normally take two insns. */
11530 if (op0mode == DImode)
11532 *cost += COSTS_N_INSNS (1);
11533 if (speed_p)
11534 *cost += 2 * extra_cost->alu.arith;
11535 return false;
11538 if (op0mode == SImode)
11540 rtx shift_op;
11541 rtx shift_reg;
11543 if (XEXP (x, 1) == const0_rtx
11544 && !(REG_P (XEXP (x, 0))
11545 || (GET_CODE (XEXP (x, 0)) == SUBREG
11546 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11548 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11550 /* Multiply operations that set the flags are often
11551 significantly more expensive. */
11552 if (speed_p
11553 && GET_CODE (XEXP (x, 0)) == MULT
11554 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11555 *cost += extra_cost->mult[0].flag_setting;
11557 if (speed_p
11558 && GET_CODE (XEXP (x, 0)) == PLUS
11559 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11560 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11561 0), 1), mode))
11562 *cost += extra_cost->mult[0].flag_setting;
11563 return true;
11566 shift_reg = NULL;
11567 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11568 if (shift_op != NULL)
11570 if (shift_reg != NULL)
11572 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11573 1, speed_p);
11574 if (speed_p)
11575 *cost += extra_cost->alu.arith_shift_reg;
11577 else if (speed_p)
11578 *cost += extra_cost->alu.arith_shift;
11579 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11580 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11581 return true;
11584 if (speed_p)
11585 *cost += extra_cost->alu.arith;
11586 if (CONST_INT_P (XEXP (x, 1))
11587 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11589 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11590 return true;
11592 return false;
11595 /* Vector mode? */
11597 *cost = LIBCALL_COST (2);
11598 return false;
11600 return true;
11602 case EQ:
11603 case GE:
11604 case GT:
11605 case LE:
11606 case LT:
11607 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11608 vcle and vclt). */
11609 if (TARGET_NEON
11610 && TARGET_HARD_FLOAT
11611 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11612 && (XEXP (x, 1) == CONST0_RTX (mode)))
11614 *cost = 0;
11615 return true;
11618 /* Fall through. */
11619 case NE:
11620 case LTU:
11621 case LEU:
11622 case GEU:
11623 case GTU:
11624 case ORDERED:
11625 case UNORDERED:
11626 case UNEQ:
11627 case UNLE:
11628 case UNLT:
11629 case UNGE:
11630 case UNGT:
11631 case LTGT:
11632 if (outer_code == SET)
11634 /* Is it a store-flag operation? */
11635 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11636 && XEXP (x, 1) == const0_rtx)
11638 /* Thumb also needs an IT insn. */
11639 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11640 return true;
11642 if (XEXP (x, 1) == const0_rtx)
11644 switch (code)
11646 case LT:
11647 /* LSR Rd, Rn, #31. */
11648 if (speed_p)
11649 *cost += extra_cost->alu.shift;
11650 break;
11652 case EQ:
11653 /* RSBS T1, Rn, #0
11654 ADC Rd, Rn, T1. */
11656 case NE:
11657 /* SUBS T1, Rn, #1
11658 SBC Rd, Rn, T1. */
11659 *cost += COSTS_N_INSNS (1);
11660 break;
11662 case LE:
11663 /* RSBS T1, Rn, Rn, LSR #31
11664 ADC Rd, Rn, T1. */
11665 *cost += COSTS_N_INSNS (1);
11666 if (speed_p)
11667 *cost += extra_cost->alu.arith_shift;
11668 break;
11670 case GT:
11671 /* RSB Rd, Rn, Rn, ASR #1
11672 LSR Rd, Rd, #31. */
11673 *cost += COSTS_N_INSNS (1);
11674 if (speed_p)
11675 *cost += (extra_cost->alu.arith_shift
11676 + extra_cost->alu.shift);
11677 break;
11679 case GE:
11680 /* ASR Rd, Rn, #31
11681 ADD Rd, Rn, #1. */
11682 *cost += COSTS_N_INSNS (1);
11683 if (speed_p)
11684 *cost += extra_cost->alu.shift;
11685 break;
11687 default:
11688 /* Remaining cases are either meaningless or would take
11689 three insns anyway. */
11690 *cost = COSTS_N_INSNS (3);
11691 break;
11693 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11694 return true;
11696 else
11698 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11699 if (CONST_INT_P (XEXP (x, 1))
11700 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11702 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11703 return true;
11706 return false;
11709 /* Not directly inside a set. If it involves the condition code
11710 register it must be the condition for a branch, cond_exec or
11711 I_T_E operation. Since the comparison is performed elsewhere
11712 this is just the control part which has no additional
11713 cost. */
11714 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11715 && XEXP (x, 1) == const0_rtx)
11717 *cost = 0;
11718 return true;
11720 return false;
11722 case ABS:
11723 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11724 && (mode == SFmode || !TARGET_VFP_SINGLE))
11726 if (speed_p)
11727 *cost += extra_cost->fp[mode != SFmode].neg;
11729 return false;
11731 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11733 *cost = LIBCALL_COST (1);
11734 return false;
11737 if (mode == SImode)
11739 if (speed_p)
11740 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11741 return false;
11743 /* Vector mode? */
11744 *cost = LIBCALL_COST (1);
11745 return false;
11747 case SIGN_EXTEND:
11748 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11749 && MEM_P (XEXP (x, 0)))
11751 if (mode == DImode)
11752 *cost += COSTS_N_INSNS (1);
11754 if (!speed_p)
11755 return true;
11757 if (GET_MODE (XEXP (x, 0)) == SImode)
11758 *cost += extra_cost->ldst.load;
11759 else
11760 *cost += extra_cost->ldst.load_sign_extend;
11762 if (mode == DImode)
11763 *cost += extra_cost->alu.shift;
11765 return true;
11768 /* Widening from less than 32-bits requires an extend operation. */
11769 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11771 /* We have SXTB/SXTH. */
11772 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11773 if (speed_p)
11774 *cost += extra_cost->alu.extend;
11776 else if (GET_MODE (XEXP (x, 0)) != SImode)
11778 /* Needs two shifts. */
11779 *cost += COSTS_N_INSNS (1);
11780 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11781 if (speed_p)
11782 *cost += 2 * extra_cost->alu.shift;
11785 /* Widening beyond 32-bits requires one more insn. */
11786 if (mode == DImode)
11788 *cost += COSTS_N_INSNS (1);
11789 if (speed_p)
11790 *cost += extra_cost->alu.shift;
11793 return true;
11795 case ZERO_EXTEND:
11796 if ((arm_arch4
11797 || GET_MODE (XEXP (x, 0)) == SImode
11798 || GET_MODE (XEXP (x, 0)) == QImode)
11799 && MEM_P (XEXP (x, 0)))
11801 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11803 if (mode == DImode)
11804 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11806 return true;
11809 /* Widening from less than 32-bits requires an extend operation. */
11810 if (GET_MODE (XEXP (x, 0)) == QImode)
11812 /* UXTB can be a shorter instruction in Thumb2, but it might
11813 be slower than the AND Rd, Rn, #255 alternative. When
11814 optimizing for speed it should never be slower to use
11815 AND, and we don't really model 16-bit vs 32-bit insns
11816 here. */
11817 if (speed_p)
11818 *cost += extra_cost->alu.logical;
11820 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11822 /* We have UXTB/UXTH. */
11823 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11824 if (speed_p)
11825 *cost += extra_cost->alu.extend;
11827 else if (GET_MODE (XEXP (x, 0)) != SImode)
11829 /* Needs two shifts. It's marginally preferable to use
11830 shifts rather than two BIC instructions as the second
11831 shift may merge with a subsequent insn as a shifter
11832 op. */
11833 *cost = COSTS_N_INSNS (2);
11834 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11835 if (speed_p)
11836 *cost += 2 * extra_cost->alu.shift;
11839 /* Widening beyond 32-bits requires one more insn. */
11840 if (mode == DImode)
11842 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11845 return true;
11847 case CONST_INT:
11848 *cost = 0;
11849 /* CONST_INT has no mode, so we cannot tell for sure how many
11850 insns are really going to be needed. The best we can do is
11851 look at the value passed. If it fits in SImode, then assume
11852 that's the mode it will be used for. Otherwise assume it
11853 will be used in DImode. */
11854 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11855 mode = SImode;
11856 else
11857 mode = DImode;
11859 /* Avoid blowing up in arm_gen_constant (). */
11860 if (!(outer_code == PLUS
11861 || outer_code == AND
11862 || outer_code == IOR
11863 || outer_code == XOR
11864 || outer_code == MINUS))
11865 outer_code = SET;
11867 const_int_cost:
11868 if (mode == SImode)
11870 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11871 INTVAL (x), NULL, NULL,
11872 0, 0));
11873 /* Extra costs? */
11875 else
11877 *cost += COSTS_N_INSNS (arm_gen_constant
11878 (outer_code, SImode, NULL,
11879 trunc_int_for_mode (INTVAL (x), SImode),
11880 NULL, NULL, 0, 0)
11881 + arm_gen_constant (outer_code, SImode, NULL,
11882 INTVAL (x) >> 32, NULL,
11883 NULL, 0, 0));
11884 /* Extra costs? */
11887 return true;
11889 case CONST:
11890 case LABEL_REF:
11891 case SYMBOL_REF:
11892 if (speed_p)
11894 if (arm_arch_thumb2 && !flag_pic)
11895 *cost += COSTS_N_INSNS (1);
11896 else
11897 *cost += extra_cost->ldst.load;
11899 else
11900 *cost += COSTS_N_INSNS (1);
11902 if (flag_pic)
11904 *cost += COSTS_N_INSNS (1);
11905 if (speed_p)
11906 *cost += extra_cost->alu.arith;
11909 return true;
11911 case CONST_FIXED:
11912 *cost = COSTS_N_INSNS (4);
11913 /* Fixme. */
11914 return true;
11916 case CONST_DOUBLE:
11917 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11918 && (mode == SFmode || mode == HFmode || !TARGET_VFP_SINGLE))
11920 if (vfp3_const_double_rtx (x))
11922 if (speed_p)
11923 *cost += extra_cost->fp[mode == DFmode].fpconst;
11924 return true;
11927 if (speed_p)
11929 if (mode == DFmode)
11930 *cost += extra_cost->ldst.loadd;
11931 else
11932 *cost += extra_cost->ldst.loadf;
11934 else
11935 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11937 return true;
11939 *cost = COSTS_N_INSNS (4);
11940 return true;
11942 case CONST_VECTOR:
11943 if (((TARGET_NEON && TARGET_HARD_FLOAT
11944 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11945 || TARGET_HAVE_MVE)
11946 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11947 *cost = COSTS_N_INSNS (1);
11948 else if (TARGET_HAVE_MVE)
11950 /* 128-bit vector requires two vldr.64 on MVE. */
11951 *cost = COSTS_N_INSNS (2);
11952 if (speed_p)
11953 *cost += extra_cost->ldst.loadd * 2;
11955 else
11956 *cost = COSTS_N_INSNS (4);
11957 return true;
11959 case HIGH:
11960 case LO_SUM:
11961 /* When optimizing for size, we prefer constant pool entries to
11962 MOVW/MOVT pairs, so bump the cost of these slightly. */
11963 if (!speed_p)
11964 *cost += 1;
11965 return true;
11967 case CLZ:
11968 if (speed_p)
11969 *cost += extra_cost->alu.clz;
11970 return false;
11972 case SMIN:
11973 if (XEXP (x, 1) == const0_rtx)
11975 if (speed_p)
11976 *cost += extra_cost->alu.log_shift;
11977 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11978 return true;
11980 /* Fall through. */
11981 case SMAX:
11982 case UMIN:
11983 case UMAX:
11984 *cost += COSTS_N_INSNS (1);
11985 return false;
11987 case TRUNCATE:
11988 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11989 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11990 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11991 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11992 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11993 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11994 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11995 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11996 == ZERO_EXTEND))))
11998 if (speed_p)
11999 *cost += extra_cost->mult[1].extend;
12000 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
12001 ZERO_EXTEND, 0, speed_p)
12002 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
12003 ZERO_EXTEND, 0, speed_p));
12004 return true;
12006 *cost = LIBCALL_COST (1);
12007 return false;
12009 case UNSPEC_VOLATILE:
12010 case UNSPEC:
12011 return arm_unspec_cost (x, outer_code, speed_p, cost);
12013 case PC:
12014 /* Reading the PC is like reading any other register. Writing it
12015 is more expensive, but we take that into account elsewhere. */
12016 *cost = 0;
12017 return true;
12019 case ZERO_EXTRACT:
12020 /* TODO: Simple zero_extract of bottom bits using AND. */
12021 /* Fall through. */
12022 case SIGN_EXTRACT:
12023 if (arm_arch6
12024 && mode == SImode
12025 && CONST_INT_P (XEXP (x, 1))
12026 && CONST_INT_P (XEXP (x, 2)))
12028 if (speed_p)
12029 *cost += extra_cost->alu.bfx;
12030 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12031 return true;
12033 /* Without UBFX/SBFX, need to resort to shift operations. */
12034 *cost += COSTS_N_INSNS (1);
12035 if (speed_p)
12036 *cost += 2 * extra_cost->alu.shift;
12037 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
12038 return true;
12040 case FLOAT_EXTEND:
12041 if (TARGET_HARD_FLOAT)
12043 if (speed_p)
12044 *cost += extra_cost->fp[mode == DFmode].widen;
12045 if (!TARGET_VFP5
12046 && GET_MODE (XEXP (x, 0)) == HFmode)
12048 /* Pre v8, widening HF->DF is a two-step process, first
12049 widening to SFmode. */
12050 *cost += COSTS_N_INSNS (1);
12051 if (speed_p)
12052 *cost += extra_cost->fp[0].widen;
12054 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
12055 return true;
12058 *cost = LIBCALL_COST (1);
12059 return false;
12061 case FLOAT_TRUNCATE:
12062 if (TARGET_HARD_FLOAT)
12064 if (speed_p)
12065 *cost += extra_cost->fp[mode == DFmode].narrow;
12066 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
12067 return true;
12068 /* Vector modes? */
12070 *cost = LIBCALL_COST (1);
12071 return false;
12073 case FMA:
12074 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
12076 rtx op0 = XEXP (x, 0);
12077 rtx op1 = XEXP (x, 1);
12078 rtx op2 = XEXP (x, 2);
12081 /* vfms or vfnma. */
12082 if (GET_CODE (op0) == NEG)
12083 op0 = XEXP (op0, 0);
12085 /* vfnms or vfnma. */
12086 if (GET_CODE (op2) == NEG)
12087 op2 = XEXP (op2, 0);
12089 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
12090 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
12091 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
12093 if (speed_p)
12094 *cost += extra_cost->fp[mode ==DFmode].fma;
12096 return true;
12099 *cost = LIBCALL_COST (3);
12100 return false;
12102 case FIX:
12103 case UNSIGNED_FIX:
12104 if (TARGET_HARD_FLOAT)
12106 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12107 a vcvt fixed-point conversion. */
12108 if (code == FIX && mode == SImode
12109 && GET_CODE (XEXP (x, 0)) == FIX
12110 && GET_MODE (XEXP (x, 0)) == SFmode
12111 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12112 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12113 > 0)
12115 if (speed_p)
12116 *cost += extra_cost->fp[0].toint;
12118 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12119 code, 0, speed_p);
12120 return true;
12123 if (GET_MODE_CLASS (mode) == MODE_INT)
12125 mode = GET_MODE (XEXP (x, 0));
12126 if (speed_p)
12127 *cost += extra_cost->fp[mode == DFmode].toint;
12128 /* Strip of the 'cost' of rounding towards zero. */
12129 if (GET_CODE (XEXP (x, 0)) == FIX)
12130 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12131 0, speed_p);
12132 else
12133 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12134 /* ??? Increase the cost to deal with transferring from
12135 FP -> CORE registers? */
12136 return true;
12138 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12139 && TARGET_VFP5)
12141 if (speed_p)
12142 *cost += extra_cost->fp[mode == DFmode].roundint;
12143 return false;
12145 /* Vector costs? */
12147 *cost = LIBCALL_COST (1);
12148 return false;
12150 case FLOAT:
12151 case UNSIGNED_FLOAT:
12152 if (TARGET_HARD_FLOAT)
12154 /* ??? Increase the cost to deal with transferring from CORE
12155 -> FP registers? */
12156 if (speed_p)
12157 *cost += extra_cost->fp[mode == DFmode].fromint;
12158 return false;
12160 *cost = LIBCALL_COST (1);
12161 return false;
12163 case CALL:
12164 return true;
12166 case ASM_OPERANDS:
12168 /* Just a guess. Guess number of instructions in the asm
12169 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12170 though (see PR60663). */
12171 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12172 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12174 *cost = COSTS_N_INSNS (asm_length + num_operands);
12175 return true;
12177 default:
12178 if (mode != VOIDmode)
12179 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12180 else
12181 *cost = COSTS_N_INSNS (4); /* Who knows? */
12182 return false;
12186 #undef HANDLE_NARROW_SHIFT_ARITH
12188 /* RTX costs entry point. */
12190 static bool
12191 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12192 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12194 bool result;
12195 int code = GET_CODE (x);
12196 gcc_assert (current_tune->insn_extra_cost);
12198 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
12199 (enum rtx_code) outer_code,
12200 current_tune->insn_extra_cost,
12201 total, speed);
12203 if (dump_file && arm_verbose_cost)
12205 print_rtl_single (dump_file, x);
12206 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12207 *total, result ? "final" : "partial");
12209 return result;
12212 static int
12213 arm_insn_cost (rtx_insn *insn, bool speed)
12215 int cost;
12217 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12218 will likely disappear during register allocation. */
12219 if (!reload_completed
12220 && GET_CODE (PATTERN (insn)) == SET
12221 && REG_P (SET_DEST (PATTERN (insn)))
12222 && REG_P (SET_SRC (PATTERN (insn))))
12223 return 2;
12224 cost = pattern_cost (PATTERN (insn), speed);
12225 /* If the cost is zero, then it's likely a complex insn. We don't want the
12226 cost of these to be less than something we know about. */
12227 return cost ? cost : COSTS_N_INSNS (2);
12230 /* All address computations that can be done are free, but rtx cost returns
12231 the same for practically all of them. So we weight the different types
12232 of address here in the order (most pref first):
12233 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12234 static inline int
12235 arm_arm_address_cost (rtx x)
12237 enum rtx_code c = GET_CODE (x);
12239 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12240 return 0;
12241 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12242 return 10;
12244 if (c == PLUS)
12246 if (CONST_INT_P (XEXP (x, 1)))
12247 return 2;
12249 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12250 return 3;
12252 return 4;
12255 return 6;
12258 static inline int
12259 arm_thumb_address_cost (rtx x)
12261 enum rtx_code c = GET_CODE (x);
12263 if (c == REG)
12264 return 1;
12265 if (c == PLUS
12266 && REG_P (XEXP (x, 0))
12267 && CONST_INT_P (XEXP (x, 1)))
12268 return 1;
12270 return 2;
12273 static int
12274 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12275 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12277 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12280 /* Adjust cost hook for XScale. */
12281 static bool
12282 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12283 int * cost)
12285 /* Some true dependencies can have a higher cost depending
12286 on precisely how certain input operands are used. */
12287 if (dep_type == 0
12288 && recog_memoized (insn) >= 0
12289 && recog_memoized (dep) >= 0)
12291 int shift_opnum = get_attr_shift (insn);
12292 enum attr_type attr_type = get_attr_type (dep);
12294 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12295 operand for INSN. If we have a shifted input operand and the
12296 instruction we depend on is another ALU instruction, then we may
12297 have to account for an additional stall. */
12298 if (shift_opnum != 0
12299 && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12300 || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12301 || attr_type == TYPE_ALUS_SHIFT_IMM
12302 || attr_type == TYPE_LOGIC_SHIFT_IMM
12303 || attr_type == TYPE_LOGICS_SHIFT_IMM
12304 || attr_type == TYPE_ALU_SHIFT_REG
12305 || attr_type == TYPE_ALUS_SHIFT_REG
12306 || attr_type == TYPE_LOGIC_SHIFT_REG
12307 || attr_type == TYPE_LOGICS_SHIFT_REG
12308 || attr_type == TYPE_MOV_SHIFT
12309 || attr_type == TYPE_MVN_SHIFT
12310 || attr_type == TYPE_MOV_SHIFT_REG
12311 || attr_type == TYPE_MVN_SHIFT_REG))
12313 rtx shifted_operand;
12314 int opno;
12316 /* Get the shifted operand. */
12317 extract_insn (insn);
12318 shifted_operand = recog_data.operand[shift_opnum];
12320 /* Iterate over all the operands in DEP. If we write an operand
12321 that overlaps with SHIFTED_OPERAND, then we have increase the
12322 cost of this dependency. */
12323 extract_insn (dep);
12324 preprocess_constraints (dep);
12325 for (opno = 0; opno < recog_data.n_operands; opno++)
12327 /* We can ignore strict inputs. */
12328 if (recog_data.operand_type[opno] == OP_IN)
12329 continue;
12331 if (reg_overlap_mentioned_p (recog_data.operand[opno],
12332 shifted_operand))
12334 *cost = 2;
12335 return false;
12340 return true;
12343 /* Adjust cost hook for Cortex A9. */
12344 static bool
12345 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12346 int * cost)
12348 switch (dep_type)
12350 case REG_DEP_ANTI:
12351 *cost = 0;
12352 return false;
12354 case REG_DEP_TRUE:
12355 case REG_DEP_OUTPUT:
12356 if (recog_memoized (insn) >= 0
12357 && recog_memoized (dep) >= 0)
12359 if (GET_CODE (PATTERN (insn)) == SET)
12361 if (GET_MODE_CLASS
12362 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12363 || GET_MODE_CLASS
12364 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12366 enum attr_type attr_type_insn = get_attr_type (insn);
12367 enum attr_type attr_type_dep = get_attr_type (dep);
12369 /* By default all dependencies of the form
12370 s0 = s0 <op> s1
12371 s0 = s0 <op> s2
12372 have an extra latency of 1 cycle because
12373 of the input and output dependency in this
12374 case. However this gets modeled as an true
12375 dependency and hence all these checks. */
12376 if (REG_P (SET_DEST (PATTERN (insn)))
12377 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12379 /* FMACS is a special case where the dependent
12380 instruction can be issued 3 cycles before
12381 the normal latency in case of an output
12382 dependency. */
12383 if ((attr_type_insn == TYPE_FMACS
12384 || attr_type_insn == TYPE_FMACD)
12385 && (attr_type_dep == TYPE_FMACS
12386 || attr_type_dep == TYPE_FMACD))
12388 if (dep_type == REG_DEP_OUTPUT)
12389 *cost = insn_default_latency (dep) - 3;
12390 else
12391 *cost = insn_default_latency (dep);
12392 return false;
12394 else
12396 if (dep_type == REG_DEP_OUTPUT)
12397 *cost = insn_default_latency (dep) + 1;
12398 else
12399 *cost = insn_default_latency (dep);
12401 return false;
12406 break;
12408 default:
12409 gcc_unreachable ();
12412 return true;
12415 /* Adjust cost hook for FA726TE. */
12416 static bool
12417 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12418 int * cost)
12420 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12421 have penalty of 3. */
12422 if (dep_type == REG_DEP_TRUE
12423 && recog_memoized (insn) >= 0
12424 && recog_memoized (dep) >= 0
12425 && get_attr_conds (dep) == CONDS_SET)
12427 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12428 if (get_attr_conds (insn) == CONDS_USE
12429 && get_attr_type (insn) != TYPE_BRANCH)
12431 *cost = 3;
12432 return false;
12435 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12436 || get_attr_conds (insn) == CONDS_USE)
12438 *cost = 0;
12439 return false;
12443 return true;
12446 /* Implement TARGET_REGISTER_MOVE_COST.
12448 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12449 it is typically more expensive than a single memory access. We set
12450 the cost to less than two memory accesses so that floating
12451 point to integer conversion does not go through memory. */
12454 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12455 reg_class_t from, reg_class_t to)
12457 if (TARGET_32BIT)
12459 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12460 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12461 return 15;
12462 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12463 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12464 return 4;
12465 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12466 return 20;
12467 else
12468 return 2;
12470 else
12472 if (from == HI_REGS || to == HI_REGS)
12473 return 4;
12474 else
12475 return 2;
12479 /* Implement TARGET_MEMORY_MOVE_COST. */
12482 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12483 bool in ATTRIBUTE_UNUSED)
12485 if (TARGET_32BIT)
12486 return 10;
12487 else
12489 if (GET_MODE_SIZE (mode) < 4)
12490 return 8;
12491 else
12492 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12496 /* Vectorizer cost model implementation. */
12498 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12499 static int
12500 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12501 tree vectype,
12502 int misalign ATTRIBUTE_UNUSED)
12504 unsigned elements;
12506 switch (type_of_cost)
12508 case scalar_stmt:
12509 return current_tune->vec_costs->scalar_stmt_cost;
12511 case scalar_load:
12512 return current_tune->vec_costs->scalar_load_cost;
12514 case scalar_store:
12515 return current_tune->vec_costs->scalar_store_cost;
12517 case vector_stmt:
12518 return current_tune->vec_costs->vec_stmt_cost;
12520 case vector_load:
12521 return current_tune->vec_costs->vec_align_load_cost;
12523 case vector_store:
12524 return current_tune->vec_costs->vec_store_cost;
12526 case vec_to_scalar:
12527 return current_tune->vec_costs->vec_to_scalar_cost;
12529 case scalar_to_vec:
12530 return current_tune->vec_costs->scalar_to_vec_cost;
12532 case unaligned_load:
12533 case vector_gather_load:
12534 return current_tune->vec_costs->vec_unalign_load_cost;
12536 case unaligned_store:
12537 case vector_scatter_store:
12538 return current_tune->vec_costs->vec_unalign_store_cost;
12540 case cond_branch_taken:
12541 return current_tune->vec_costs->cond_taken_branch_cost;
12543 case cond_branch_not_taken:
12544 return current_tune->vec_costs->cond_not_taken_branch_cost;
12546 case vec_perm:
12547 case vec_promote_demote:
12548 return current_tune->vec_costs->vec_stmt_cost;
12550 case vec_construct:
12551 elements = TYPE_VECTOR_SUBPARTS (vectype);
12552 return elements / 2 + 1;
12554 default:
12555 gcc_unreachable ();
12559 /* Return true if and only if this insn can dual-issue only as older. */
12560 static bool
12561 cortexa7_older_only (rtx_insn *insn)
12563 if (recog_memoized (insn) < 0)
12564 return false;
12566 switch (get_attr_type (insn))
12568 case TYPE_ALU_DSP_REG:
12569 case TYPE_ALU_SREG:
12570 case TYPE_ALUS_SREG:
12571 case TYPE_LOGIC_REG:
12572 case TYPE_LOGICS_REG:
12573 case TYPE_ADC_REG:
12574 case TYPE_ADCS_REG:
12575 case TYPE_ADR:
12576 case TYPE_BFM:
12577 case TYPE_REV:
12578 case TYPE_MVN_REG:
12579 case TYPE_SHIFT_IMM:
12580 case TYPE_SHIFT_REG:
12581 case TYPE_LOAD_BYTE:
12582 case TYPE_LOAD_4:
12583 case TYPE_STORE_4:
12584 case TYPE_FFARITHS:
12585 case TYPE_FADDS:
12586 case TYPE_FFARITHD:
12587 case TYPE_FADDD:
12588 case TYPE_FMOV:
12589 case TYPE_F_CVT:
12590 case TYPE_FCMPS:
12591 case TYPE_FCMPD:
12592 case TYPE_FCONSTS:
12593 case TYPE_FCONSTD:
12594 case TYPE_FMULS:
12595 case TYPE_FMACS:
12596 case TYPE_FMULD:
12597 case TYPE_FMACD:
12598 case TYPE_FDIVS:
12599 case TYPE_FDIVD:
12600 case TYPE_F_MRC:
12601 case TYPE_F_MRRC:
12602 case TYPE_F_FLAG:
12603 case TYPE_F_LOADS:
12604 case TYPE_F_STORES:
12605 return true;
12606 default:
12607 return false;
12611 /* Return true if and only if this insn can dual-issue as younger. */
12612 static bool
12613 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12615 if (recog_memoized (insn) < 0)
12617 if (verbose > 5)
12618 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12619 return false;
12622 switch (get_attr_type (insn))
12624 case TYPE_ALU_IMM:
12625 case TYPE_ALUS_IMM:
12626 case TYPE_LOGIC_IMM:
12627 case TYPE_LOGICS_IMM:
12628 case TYPE_EXTEND:
12629 case TYPE_MVN_IMM:
12630 case TYPE_MOV_IMM:
12631 case TYPE_MOV_REG:
12632 case TYPE_MOV_SHIFT:
12633 case TYPE_MOV_SHIFT_REG:
12634 case TYPE_BRANCH:
12635 case TYPE_CALL:
12636 return true;
12637 default:
12638 return false;
12643 /* Look for an instruction that can dual issue only as an older
12644 instruction, and move it in front of any instructions that can
12645 dual-issue as younger, while preserving the relative order of all
12646 other instructions in the ready list. This is a hueuristic to help
12647 dual-issue in later cycles, by postponing issue of more flexible
12648 instructions. This heuristic may affect dual issue opportunities
12649 in the current cycle. */
12650 static void
12651 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12652 int *n_readyp, int clock)
12654 int i;
12655 int first_older_only = -1, first_younger = -1;
12657 if (verbose > 5)
12658 fprintf (file,
12659 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12660 clock,
12661 *n_readyp);
12663 /* Traverse the ready list from the head (the instruction to issue
12664 first), and looking for the first instruction that can issue as
12665 younger and the first instruction that can dual-issue only as
12666 older. */
12667 for (i = *n_readyp - 1; i >= 0; i--)
12669 rtx_insn *insn = ready[i];
12670 if (cortexa7_older_only (insn))
12672 first_older_only = i;
12673 if (verbose > 5)
12674 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12675 break;
12677 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12678 first_younger = i;
12681 /* Nothing to reorder because either no younger insn found or insn
12682 that can dual-issue only as older appears before any insn that
12683 can dual-issue as younger. */
12684 if (first_younger == -1)
12686 if (verbose > 5)
12687 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12688 return;
12691 /* Nothing to reorder because no older-only insn in the ready list. */
12692 if (first_older_only == -1)
12694 if (verbose > 5)
12695 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12696 return;
12699 /* Move first_older_only insn before first_younger. */
12700 if (verbose > 5)
12701 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12702 INSN_UID(ready [first_older_only]),
12703 INSN_UID(ready [first_younger]));
12704 rtx_insn *first_older_only_insn = ready [first_older_only];
12705 for (i = first_older_only; i < first_younger; i++)
12707 ready[i] = ready[i+1];
12710 ready[i] = first_older_only_insn;
12711 return;
12714 /* Implement TARGET_SCHED_REORDER. */
12715 static int
12716 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12717 int clock)
12719 switch (arm_tune)
12721 case TARGET_CPU_cortexa7:
12722 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12723 break;
12724 default:
12725 /* Do nothing for other cores. */
12726 break;
12729 return arm_issue_rate ();
12732 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12733 It corrects the value of COST based on the relationship between
12734 INSN and DEP through the dependence LINK. It returns the new
12735 value. There is a per-core adjust_cost hook to adjust scheduler costs
12736 and the per-core hook can choose to completely override the generic
12737 adjust_cost function. Only put bits of code into arm_adjust_cost that
12738 are common across all cores. */
12739 static int
12740 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12741 unsigned int)
12743 rtx i_pat, d_pat;
12745 /* When generating Thumb-1 code, we want to place flag-setting operations
12746 close to a conditional branch which depends on them, so that we can
12747 omit the comparison. */
12748 if (TARGET_THUMB1
12749 && dep_type == 0
12750 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12751 && recog_memoized (dep) >= 0
12752 && get_attr_conds (dep) == CONDS_SET)
12753 return 0;
12755 if (current_tune->sched_adjust_cost != NULL)
12757 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12758 return cost;
12761 /* XXX Is this strictly true? */
12762 if (dep_type == REG_DEP_ANTI
12763 || dep_type == REG_DEP_OUTPUT)
12764 return 0;
12766 /* Call insns don't incur a stall, even if they follow a load. */
12767 if (dep_type == 0
12768 && CALL_P (insn))
12769 return 1;
12771 if ((i_pat = single_set (insn)) != NULL
12772 && MEM_P (SET_SRC (i_pat))
12773 && (d_pat = single_set (dep)) != NULL
12774 && MEM_P (SET_DEST (d_pat)))
12776 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12777 /* This is a load after a store, there is no conflict if the load reads
12778 from a cached area. Assume that loads from the stack, and from the
12779 constant pool are cached, and that others will miss. This is a
12780 hack. */
12782 if ((SYMBOL_REF_P (src_mem)
12783 && CONSTANT_POOL_ADDRESS_P (src_mem))
12784 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12785 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12786 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12787 return 1;
12790 return cost;
12794 arm_max_conditional_execute (void)
12796 return max_insns_skipped;
12799 static int
12800 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12802 if (TARGET_32BIT)
12803 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12804 else
12805 return (optimize > 0) ? 2 : 0;
12808 static int
12809 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12811 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12814 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12815 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12816 sequences of non-executed instructions in IT blocks probably take the same
12817 amount of time as executed instructions (and the IT instruction itself takes
12818 space in icache). This function was experimentally determined to give good
12819 results on a popular embedded benchmark. */
12821 static int
12822 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12824 return (TARGET_32BIT && speed_p) ? 1
12825 : arm_default_branch_cost (speed_p, predictable_p);
12828 static int
12829 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12831 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12834 /* Return TRUE if rtx X is a valid immediate FP constant. */
12836 arm_const_double_rtx (rtx x)
12838 return (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT
12839 && x == CONST0_RTX (GET_MODE (x)));
12842 /* VFPv3 has a fairly wide range of representable immediates, formed from
12843 "quarter-precision" floating-point values. These can be evaluated using this
12844 formula (with ^ for exponentiation):
12846 -1^s * n * 2^-r
12848 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12849 16 <= n <= 31 and 0 <= r <= 7.
12851 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12853 - A (most-significant) is the sign bit.
12854 - BCD are the exponent (encoded as r XOR 3).
12855 - EFGH are the mantissa (encoded as n - 16).
12858 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12859 fconst[sd] instruction, or -1 if X isn't suitable. */
12860 static int
12861 vfp3_const_double_index (rtx x)
12863 REAL_VALUE_TYPE r, m;
12864 int sign, exponent;
12865 unsigned HOST_WIDE_INT mantissa, mant_hi;
12866 unsigned HOST_WIDE_INT mask;
12867 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12868 bool fail;
12870 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12871 return -1;
12873 r = *CONST_DOUBLE_REAL_VALUE (x);
12875 /* We can't represent these things, so detect them first. */
12876 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12877 return -1;
12879 /* Extract sign, exponent and mantissa. */
12880 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12881 r = real_value_abs (&r);
12882 exponent = REAL_EXP (&r);
12883 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12884 highest (sign) bit, with a fixed binary point at bit point_pos.
12885 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12886 bits for the mantissa, this may fail (low bits would be lost). */
12887 real_ldexp (&m, &r, point_pos - exponent);
12888 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12889 mantissa = w.elt (0);
12890 mant_hi = w.elt (1);
12892 /* If there are bits set in the low part of the mantissa, we can't
12893 represent this value. */
12894 if (mantissa != 0)
12895 return -1;
12897 /* Now make it so that mantissa contains the most-significant bits, and move
12898 the point_pos to indicate that the least-significant bits have been
12899 discarded. */
12900 point_pos -= HOST_BITS_PER_WIDE_INT;
12901 mantissa = mant_hi;
12903 /* We can permit four significant bits of mantissa only, plus a high bit
12904 which is always 1. */
12905 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12906 if ((mantissa & mask) != 0)
12907 return -1;
12909 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12910 mantissa >>= point_pos - 5;
12912 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12913 floating-point immediate zero with Neon using an integer-zero load, but
12914 that case is handled elsewhere.) */
12915 if (mantissa == 0)
12916 return -1;
12918 gcc_assert (mantissa >= 16 && mantissa <= 31);
12920 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12921 normalized significands are in the range [1, 2). (Our mantissa is shifted
12922 left 4 places at this point relative to normalized IEEE754 values). GCC
12923 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12924 REAL_EXP must be altered. */
12925 exponent = 5 - exponent;
12927 if (exponent < 0 || exponent > 7)
12928 return -1;
12930 /* Sign, mantissa and exponent are now in the correct form to plug into the
12931 formula described in the comment above. */
12932 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12935 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12937 vfp3_const_double_rtx (rtx x)
12939 if (!TARGET_VFP3)
12940 return 0;
12942 return vfp3_const_double_index (x) != -1;
12945 /* Recognize immediates which can be used in various Neon and MVE instructions.
12946 Legal immediates are described by the following table (for VMVN variants, the
12947 bitwise inverse of the constant shown is recognized. In either case, VMOV
12948 is output and the correct instruction to use for a given constant is chosen
12949 by the assembler). The constant shown is replicated across all elements of
12950 the destination vector.
12952 insn elems variant constant (binary)
12953 ---- ----- ------- -----------------
12954 vmov i32 0 00000000 00000000 00000000 abcdefgh
12955 vmov i32 1 00000000 00000000 abcdefgh 00000000
12956 vmov i32 2 00000000 abcdefgh 00000000 00000000
12957 vmov i32 3 abcdefgh 00000000 00000000 00000000
12958 vmov i16 4 00000000 abcdefgh
12959 vmov i16 5 abcdefgh 00000000
12960 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12961 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12962 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12963 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12964 vmvn i16 10 00000000 abcdefgh
12965 vmvn i16 11 abcdefgh 00000000
12966 vmov i32 12 00000000 00000000 abcdefgh 11111111
12967 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12968 vmov i32 14 00000000 abcdefgh 11111111 11111111
12969 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12970 vmov i8 16 abcdefgh
12971 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12972 eeeeeeee ffffffff gggggggg hhhhhhhh
12973 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12974 vmov f32 19 00000000 00000000 00000000 00000000
12976 For case 18, B = !b. Representable values are exactly those accepted by
12977 vfp3_const_double_index, but are output as floating-point numbers rather
12978 than indices.
12980 For case 19, we will change it to vmov.i32 when assembling.
12982 Variants 0-5 (inclusive) may also be used as immediates for the second
12983 operand of VORR/VBIC instructions.
12985 The INVERSE argument causes the bitwise inverse of the given operand to be
12986 recognized instead (used for recognizing legal immediates for the VAND/VORN
12987 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12988 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12989 output, rather than the real insns vbic/vorr).
12991 INVERSE makes no difference to the recognition of float vectors.
12993 The return value is the variant of immediate as shown in the above table, or
12994 -1 if the given value doesn't match any of the listed patterns.
12996 static int
12997 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12998 rtx *modconst, int *elementwidth)
13000 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
13001 matches = 1; \
13002 for (i = 0; i < idx; i += (STRIDE)) \
13003 if (!(TEST)) \
13004 matches = 0; \
13005 if (matches) \
13007 immtype = (CLASS); \
13008 elsize = (ELSIZE); \
13009 break; \
13012 unsigned int i, elsize = 0, idx = 0, n_elts;
13013 unsigned int innersize;
13014 unsigned char bytes[16] = {};
13015 int immtype = -1, matches;
13016 unsigned int invmask = inverse ? 0xff : 0;
13017 bool vector = GET_CODE (op) == CONST_VECTOR;
13019 if (vector)
13020 n_elts = CONST_VECTOR_NUNITS (op);
13021 else
13023 n_elts = 1;
13024 gcc_assert (mode != VOIDmode);
13027 innersize = GET_MODE_UNIT_SIZE (mode);
13029 /* Only support 128-bit vectors for MVE. */
13030 if (TARGET_HAVE_MVE
13031 && (!vector
13032 || VALID_MVE_PRED_MODE (mode)
13033 || n_elts * innersize != 16))
13034 return -1;
13036 if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
13037 return -1;
13039 /* Vectors of float constants. */
13040 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13042 rtx el0 = CONST_VECTOR_ELT (op, 0);
13044 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
13045 return -1;
13047 /* FP16 vectors cannot be represented. */
13048 if (GET_MODE_INNER (mode) == HFmode)
13049 return -1;
13051 /* All elements in the vector must be the same. Note that 0.0 and -0.0
13052 are distinct in this context. */
13053 if (!const_vec_duplicate_p (op))
13054 return -1;
13056 if (modconst)
13057 *modconst = CONST_VECTOR_ELT (op, 0);
13059 if (elementwidth)
13060 *elementwidth = 0;
13062 if (el0 == CONST0_RTX (GET_MODE (el0)))
13063 return 19;
13064 else
13065 return 18;
13068 /* The tricks done in the code below apply for little-endian vector layout.
13069 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13070 FIXME: Implement logic for big-endian vectors. */
13071 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13072 return -1;
13074 /* Splat vector constant out into a byte vector. */
13075 for (i = 0; i < n_elts; i++)
13077 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13078 unsigned HOST_WIDE_INT elpart;
13080 gcc_assert (CONST_INT_P (el));
13081 elpart = INTVAL (el);
13083 for (unsigned int byte = 0; byte < innersize; byte++)
13085 bytes[idx++] = (elpart & 0xff) ^ invmask;
13086 elpart >>= BITS_PER_UNIT;
13090 /* Sanity check. */
13091 gcc_assert (idx == GET_MODE_SIZE (mode));
13095 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13096 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13098 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13099 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13101 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13102 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13104 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13105 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13107 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13109 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13111 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13112 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13114 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13115 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13117 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13118 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13120 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13121 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13123 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13125 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13127 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13128 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13130 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13131 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13133 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13134 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13136 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13137 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13139 CHECK (1, 8, 16, bytes[i] == bytes[0]);
13141 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13142 && bytes[i] == bytes[(i + 8) % idx]);
13144 while (0);
13146 if (immtype == -1)
13147 return -1;
13149 if (elementwidth)
13150 *elementwidth = elsize;
13152 if (modconst)
13154 unsigned HOST_WIDE_INT imm = 0;
13156 /* Un-invert bytes of recognized vector, if necessary. */
13157 if (invmask != 0)
13158 for (i = 0; i < idx; i++)
13159 bytes[i] ^= invmask;
13161 if (immtype == 17)
13163 /* FIXME: Broken on 32-bit H_W_I hosts. */
13164 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13166 for (i = 0; i < 8; i++)
13167 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13168 << (i * BITS_PER_UNIT);
13170 *modconst = GEN_INT (imm);
13172 else
13174 unsigned HOST_WIDE_INT imm = 0;
13176 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13177 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13179 *modconst = GEN_INT (imm);
13183 return immtype;
13184 #undef CHECK
13187 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13188 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13189 (or zero for float elements), and a modified constant (whatever should be
13190 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13191 modified to "simd_immediate_valid_for_move" as this function will be used
13192 both by neon and mve. */
13194 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13195 rtx *modconst, int *elementwidth)
13197 rtx tmpconst;
13198 int tmpwidth;
13199 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13201 if (retval == -1)
13202 return 0;
13204 if (modconst)
13205 *modconst = tmpconst;
13207 if (elementwidth)
13208 *elementwidth = tmpwidth;
13210 return 1;
13213 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13214 the immediate is valid, write a constant suitable for using as an operand
13215 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13216 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13219 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13220 rtx *modconst, int *elementwidth)
13222 rtx tmpconst;
13223 int tmpwidth;
13224 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13226 if (retval < 0 || retval > 5)
13227 return 0;
13229 if (modconst)
13230 *modconst = tmpconst;
13232 if (elementwidth)
13233 *elementwidth = tmpwidth;
13235 return 1;
13238 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13239 the immediate is valid, write a constant suitable for using as an operand
13240 to VSHR/VSHL to *MODCONST and the corresponding element width to
13241 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13242 because they have different limitations. */
13245 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13246 rtx *modconst, int *elementwidth,
13247 bool isleftshift)
13249 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13250 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13251 unsigned HOST_WIDE_INT last_elt = 0;
13252 unsigned HOST_WIDE_INT maxshift;
13254 /* Split vector constant out into a byte vector. */
13255 for (i = 0; i < n_elts; i++)
13257 rtx el = CONST_VECTOR_ELT (op, i);
13258 unsigned HOST_WIDE_INT elpart;
13260 if (CONST_INT_P (el))
13261 elpart = INTVAL (el);
13262 else if (CONST_DOUBLE_P (el))
13263 return 0;
13264 else
13265 gcc_unreachable ();
13267 if (i != 0 && elpart != last_elt)
13268 return 0;
13270 last_elt = elpart;
13273 /* Shift less than element size. */
13274 maxshift = innersize * 8;
13276 if (isleftshift)
13278 /* Left shift immediate value can be from 0 to <size>-1. */
13279 if (last_elt >= maxshift)
13280 return 0;
13282 else
13284 /* Right shift immediate value can be from 1 to <size>. */
13285 if (last_elt == 0 || last_elt > maxshift)
13286 return 0;
13289 if (elementwidth)
13290 *elementwidth = innersize * 8;
13292 if (modconst)
13293 *modconst = CONST_VECTOR_ELT (op, 0);
13295 return 1;
13298 /* Return a string suitable for output of Neon immediate logic operation
13299 MNEM. */
13301 char *
13302 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13303 int inverse, int quad)
13305 int width, is_valid;
13306 static char templ[40];
13308 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13310 gcc_assert (is_valid != 0);
13312 if (quad)
13313 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13314 else
13315 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13317 return templ;
13320 /* Return a string suitable for output of Neon immediate shift operation
13321 (VSHR or VSHL) MNEM. */
13323 char *
13324 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13325 machine_mode mode, int quad,
13326 bool isleftshift)
13328 int width, is_valid;
13329 static char templ[40];
13331 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13332 gcc_assert (is_valid != 0);
13334 if (quad)
13335 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13336 else
13337 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13339 return templ;
13342 /* Output a sequence of pairwise operations to implement a reduction.
13343 NOTE: We do "too much work" here, because pairwise operations work on two
13344 registers-worth of operands in one go. Unfortunately we can't exploit those
13345 extra calculations to do the full operation in fewer steps, I don't think.
13346 Although all vector elements of the result but the first are ignored, we
13347 actually calculate the same result in each of the elements. An alternative
13348 such as initially loading a vector with zero to use as each of the second
13349 operands would use up an additional register and take an extra instruction,
13350 for no particular gain. */
13352 void
13353 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13354 rtx (*reduc) (rtx, rtx, rtx))
13356 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13357 rtx tmpsum = op1;
13359 for (i = parts / 2; i >= 1; i /= 2)
13361 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13362 emit_insn (reduc (dest, tmpsum, tmpsum));
13363 tmpsum = dest;
13367 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13368 loaded into a register using VDUP.
13370 If this is the case, and GENERATE is set, we also generate
13371 instructions to do this and return an RTX to assign to the register. */
13373 static rtx
13374 neon_vdup_constant (rtx vals, bool generate)
13376 machine_mode mode = GET_MODE (vals);
13377 machine_mode inner_mode = GET_MODE_INNER (mode);
13378 rtx x;
13380 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13381 return NULL_RTX;
13383 if (!const_vec_duplicate_p (vals, &x))
13384 /* The elements are not all the same. We could handle repeating
13385 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13386 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13387 vdup.i16). */
13388 return NULL_RTX;
13390 if (!generate)
13391 return x;
13393 /* We can load this constant by using VDUP and a constant in a
13394 single ARM register. This will be cheaper than a vector
13395 load. */
13397 x = copy_to_mode_reg (inner_mode, x);
13398 return gen_vec_duplicate (mode, x);
13401 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13403 mve_bool_vec_to_const (rtx const_vec)
13405 machine_mode mode = GET_MODE (const_vec);
13407 if (!VECTOR_MODE_P (mode))
13408 return const_vec;
13410 unsigned n_elts = GET_MODE_NUNITS (mode);
13411 unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
13412 unsigned shift_c = 16 / n_elts;
13413 unsigned i;
13414 int hi_val = 0;
13416 for (i = 0; i < n_elts; i++)
13418 rtx el = CONST_VECTOR_ELT (const_vec, i);
13419 unsigned HOST_WIDE_INT elpart;
13421 gcc_assert (CONST_INT_P (el));
13422 elpart = INTVAL (el) & ((1U << el_prec) - 1);
13424 unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
13426 hi_val |= elpart << (index * shift_c);
13428 /* We are using mov immediate to encode this constant which writes 32-bits
13429 so we need to make sure the top 16-bits are all 0, otherwise we can't
13430 guarantee we can actually write this immediate. */
13431 return gen_int_mode (hi_val, SImode);
13434 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13435 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13436 into a register.
13438 If this is the case, and GENERATE is set, we also generate code to do
13439 this and return an RTX to copy into the register. */
13442 neon_make_constant (rtx vals, bool generate)
13444 machine_mode mode = GET_MODE (vals);
13445 rtx target;
13446 rtx const_vec = NULL_RTX;
13447 int n_elts = GET_MODE_NUNITS (mode);
13448 int n_const = 0;
13449 int i;
13451 if (GET_CODE (vals) == CONST_VECTOR)
13452 const_vec = vals;
13453 else if (GET_CODE (vals) == PARALLEL)
13455 /* A CONST_VECTOR must contain only CONST_INTs and
13456 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13457 Only store valid constants in a CONST_VECTOR. */
13458 for (i = 0; i < n_elts; ++i)
13460 rtx x = XVECEXP (vals, 0, i);
13461 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13462 n_const++;
13464 if (n_const == n_elts)
13465 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13467 else
13468 gcc_unreachable ();
13470 if (const_vec != NULL
13471 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13472 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13473 return const_vec;
13474 else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
13475 return mve_bool_vec_to_const (const_vec);
13476 else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13477 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13478 pipeline cycle; creating the constant takes one or two ARM
13479 pipeline cycles. */
13480 return target;
13481 else if (const_vec != NULL_RTX)
13482 /* Load from constant pool. On Cortex-A8 this takes two cycles
13483 (for either double or quad vectors). We cannot take advantage
13484 of single-cycle VLD1 because we need a PC-relative addressing
13485 mode. */
13486 return arm_disable_literal_pool ? NULL_RTX : const_vec;
13487 else
13488 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13489 We cannot construct an initializer. */
13490 return NULL_RTX;
13493 /* Initialize vector TARGET to VALS. */
13495 void
13496 neon_expand_vector_init (rtx target, rtx vals)
13498 machine_mode mode = GET_MODE (target);
13499 machine_mode inner_mode = GET_MODE_INNER (mode);
13500 int n_elts = GET_MODE_NUNITS (mode);
13501 int n_var = 0, one_var = -1;
13502 bool all_same = true;
13503 rtx x, mem;
13504 int i;
13506 for (i = 0; i < n_elts; ++i)
13508 x = XVECEXP (vals, 0, i);
13509 if (!CONSTANT_P (x))
13510 ++n_var, one_var = i;
13512 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13513 all_same = false;
13516 if (n_var == 0)
13518 rtx constant = neon_make_constant (vals);
13519 if (constant != NULL_RTX)
13521 emit_move_insn (target, constant);
13522 return;
13526 /* Splat a single non-constant element if we can. */
13527 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13529 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13530 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13531 return;
13534 /* One field is non-constant. Load constant then overwrite varying
13535 field. This is more efficient than using the stack. */
13536 if (n_var == 1)
13538 rtx copy = copy_rtx (vals);
13539 rtx merge_mask = GEN_INT (1 << one_var);
13541 /* Load constant part of vector, substitute neighboring value for
13542 varying element. */
13543 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13544 neon_expand_vector_init (target, copy);
13546 /* Insert variable. */
13547 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13548 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13549 return;
13552 /* Construct the vector in memory one field at a time
13553 and load the whole vector. */
13554 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13555 for (i = 0; i < n_elts; i++)
13556 emit_move_insn (adjust_address_nv (mem, inner_mode,
13557 i * GET_MODE_SIZE (inner_mode)),
13558 XVECEXP (vals, 0, i));
13559 emit_move_insn (target, mem);
13562 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13563 ERR if it doesn't. EXP indicates the source location, which includes the
13564 inlining history for intrinsics. */
13566 static void
13567 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13568 const_tree exp, const char *desc)
13570 HOST_WIDE_INT lane;
13572 gcc_assert (CONST_INT_P (operand));
13574 lane = INTVAL (operand);
13576 if (lane < low || lane >= high)
13578 if (exp)
13579 error_at (EXPR_LOCATION (exp),
13580 "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13581 else
13582 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13586 /* Bounds-check lanes. */
13588 void
13589 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13590 const_tree exp)
13592 bounds_check (operand, low, high, exp, "lane");
13595 /* Bounds-check constants. */
13597 void
13598 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13600 bounds_check (operand, low, high, NULL_TREE, "constant");
13603 HOST_WIDE_INT
13604 neon_element_bits (machine_mode mode)
13606 return GET_MODE_UNIT_BITSIZE (mode);
13610 /* Predicates for `match_operand' and `match_operator'. */
13612 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13613 WB level is 2 if full writeback address modes are allowed, 1
13614 if limited writeback address modes (POST_INC and PRE_DEC) are
13615 allowed and 0 if no writeback at all is supported. */
13618 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13620 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13621 rtx ind;
13623 /* Reject eliminable registers. */
13624 if (! (reload_in_progress || reload_completed || lra_in_progress)
13625 && ( reg_mentioned_p (frame_pointer_rtx, op)
13626 || reg_mentioned_p (arg_pointer_rtx, op)
13627 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13628 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13629 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13630 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13631 return FALSE;
13633 /* Constants are converted into offsets from labels. */
13634 if (!MEM_P (op))
13635 return FALSE;
13637 ind = XEXP (op, 0);
13639 if (reload_completed
13640 && (LABEL_REF_P (ind)
13641 || (GET_CODE (ind) == CONST
13642 && GET_CODE (XEXP (ind, 0)) == PLUS
13643 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13644 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13645 return TRUE;
13647 /* Match: (mem (reg)). */
13648 if (REG_P (ind))
13649 return arm_address_register_rtx_p (ind, 0);
13651 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13652 acceptable in any case (subject to verification by
13653 arm_address_register_rtx_p). We need full writeback to accept
13654 PRE_INC and POST_DEC, and at least restricted writeback for
13655 PRE_INC and POST_DEC. */
13656 if (wb_level > 0
13657 && (GET_CODE (ind) == POST_INC
13658 || GET_CODE (ind) == PRE_DEC
13659 || (wb_level > 1
13660 && (GET_CODE (ind) == PRE_INC
13661 || GET_CODE (ind) == POST_DEC))))
13662 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13664 if (wb_level > 1
13665 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13666 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13667 && GET_CODE (XEXP (ind, 1)) == PLUS
13668 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13669 ind = XEXP (ind, 1);
13671 /* Match:
13672 (plus (reg)
13673 (const))
13675 The encoded immediate for 16-bit modes is multiplied by 2,
13676 while the encoded immediate for 32-bit and 64-bit modes is
13677 multiplied by 4. */
13678 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13679 if (GET_CODE (ind) == PLUS
13680 && REG_P (XEXP (ind, 0))
13681 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13682 && CONST_INT_P (XEXP (ind, 1))
13683 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13684 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13685 return TRUE;
13687 return FALSE;
13690 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13691 WB is true if full writeback address modes are allowed and is false
13692 if limited writeback address modes (POST_INC and PRE_DEC) are
13693 allowed. */
13695 int arm_coproc_mem_operand (rtx op, bool wb)
13697 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13700 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13701 context in which no writeback address modes are allowed. */
13704 arm_coproc_mem_operand_no_writeback (rtx op)
13706 return arm_coproc_mem_operand_wb (op, 0);
13709 /* In non-STRICT mode, return the register number; in STRICT mode return
13710 the hard regno or the replacement if it won't be a mem. Otherwise, return
13711 the original pseudo number. */
13712 static int
13713 arm_effective_regno (rtx op, bool strict)
13715 gcc_assert (REG_P (op));
13716 if (!strict || REGNO (op) < FIRST_PSEUDO_REGISTER
13717 || !reg_renumber || reg_renumber[REGNO (op)] < 0)
13718 return REGNO (op);
13719 return reg_renumber[REGNO (op)];
13722 /* This function returns TRUE on matching mode and op.
13723 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13724 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13726 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13728 enum rtx_code code;
13729 int val, reg_no;
13731 /* Match: (mem (reg)). */
13732 if (REG_P (op))
13734 reg_no = arm_effective_regno (op, strict);
13735 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13736 ? reg_no <= LAST_LO_REGNUM
13737 : reg_no < LAST_ARM_REGNUM)
13738 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13740 code = GET_CODE (op);
13742 if ((code == POST_INC
13743 || code == PRE_DEC
13744 || code == PRE_INC
13745 || code == POST_DEC)
13746 && REG_P (XEXP (op, 0)))
13748 reg_no = arm_effective_regno (XEXP (op, 0), strict);
13749 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13750 ? reg_no <= LAST_LO_REGNUM
13751 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13752 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13754 else if (((code == POST_MODIFY || code == PRE_MODIFY)
13755 && GET_CODE (XEXP (op, 1)) == PLUS
13756 && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13757 && REG_P (XEXP (op, 0))
13758 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13759 /* Make sure to only accept PLUS after reload_completed, otherwise
13760 this will interfere with auto_inc's pattern detection. */
13761 || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13762 && GET_CODE (XEXP (op, 1)) == CONST_INT))
13764 reg_no = arm_effective_regno (XEXP (op, 0), strict);
13765 if (code == PLUS)
13766 val = INTVAL (XEXP (op, 1));
13767 else
13768 val = INTVAL (XEXP(XEXP (op, 1), 1));
13770 switch (mode)
13772 case E_V16QImode:
13773 case E_V8QImode:
13774 case E_V4QImode:
13775 if (abs (val) > 127)
13776 return FALSE;
13777 break;
13778 case E_V8HImode:
13779 case E_V8HFmode:
13780 case E_V4HImode:
13781 case E_V4HFmode:
13782 if (val % 2 != 0 || abs (val) > 254)
13783 return FALSE;
13784 break;
13785 case E_V4SImode:
13786 case E_V4SFmode:
13787 if (val % 4 != 0 || abs (val) > 508)
13788 return FALSE;
13789 break;
13790 default:
13791 return FALSE;
13793 return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13794 || (MVE_STN_LDW_MODE (mode)
13795 ? reg_no <= LAST_LO_REGNUM
13796 : (reg_no < LAST_ARM_REGNUM
13797 && (code == PLUS || reg_no != SP_REGNUM))));
13799 return FALSE;
13802 /* Return TRUE if OP is a memory operand which we can load or store a vector
13803 to/from. TYPE is one of the following values:
13804 0 - Vector load/stor (vldr)
13805 1 - Core registers (ldm)
13806 2 - Element/structure loads (vld1)
13809 neon_vector_mem_operand (rtx op, int type, bool strict)
13811 rtx ind;
13813 /* Reject eliminable registers. */
13814 if (strict && ! (reload_in_progress || reload_completed)
13815 && (reg_mentioned_p (frame_pointer_rtx, op)
13816 || reg_mentioned_p (arg_pointer_rtx, op)
13817 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13818 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13819 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13820 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13821 return FALSE;
13823 /* Constants are converted into offsets from labels. */
13824 if (!MEM_P (op))
13825 return FALSE;
13827 ind = XEXP (op, 0);
13829 if (reload_completed
13830 && (LABEL_REF_P (ind)
13831 || (GET_CODE (ind) == CONST
13832 && GET_CODE (XEXP (ind, 0)) == PLUS
13833 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13834 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13835 return TRUE;
13837 /* Match: (mem (reg)). */
13838 if (REG_P (ind))
13839 return arm_address_register_rtx_p (ind, 0);
13841 /* Allow post-increment with Neon registers. */
13842 if ((type != 1 && GET_CODE (ind) == POST_INC)
13843 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13844 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13846 /* Allow post-increment by register for VLDn */
13847 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13848 && GET_CODE (XEXP (ind, 1)) == PLUS
13849 && REG_P (XEXP (XEXP (ind, 1), 1))
13850 && REG_P (XEXP (ind, 0))
13851 && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13852 return true;
13854 /* Match:
13855 (plus (reg)
13856 (const)). */
13857 if (type == 0
13858 && GET_CODE (ind) == PLUS
13859 && REG_P (XEXP (ind, 0))
13860 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13861 && CONST_INT_P (XEXP (ind, 1))
13862 && INTVAL (XEXP (ind, 1)) > -1024
13863 /* For quad modes, we restrict the constant offset to be slightly less
13864 than what the instruction format permits. We have no such constraint
13865 on double mode offsets. (This must match arm_legitimate_index_p.) */
13866 && (INTVAL (XEXP (ind, 1))
13867 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13868 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13869 return TRUE;
13871 return FALSE;
13874 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13875 type. */
13877 mve_struct_mem_operand (rtx op)
13879 rtx ind = XEXP (op, 0);
13881 /* Match: (mem (reg)). */
13882 if (REG_P (ind))
13883 return arm_address_register_rtx_p (ind, 0);
13885 /* Allow only post-increment by the mode size. */
13886 if (GET_CODE (ind) == POST_INC)
13887 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13889 return FALSE;
13892 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13893 type. */
13895 neon_struct_mem_operand (rtx op)
13897 rtx ind;
13899 /* Reject eliminable registers. */
13900 if (! (reload_in_progress || reload_completed)
13901 && ( reg_mentioned_p (frame_pointer_rtx, op)
13902 || reg_mentioned_p (arg_pointer_rtx, op)
13903 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13904 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13905 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13906 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13907 return FALSE;
13909 /* Constants are converted into offsets from labels. */
13910 if (!MEM_P (op))
13911 return FALSE;
13913 ind = XEXP (op, 0);
13915 if (reload_completed
13916 && (LABEL_REF_P (ind)
13917 || (GET_CODE (ind) == CONST
13918 && GET_CODE (XEXP (ind, 0)) == PLUS
13919 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13920 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13921 return TRUE;
13923 /* Match: (mem (reg)). */
13924 if (REG_P (ind))
13925 return arm_address_register_rtx_p (ind, 0);
13927 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13928 if (GET_CODE (ind) == POST_INC
13929 || GET_CODE (ind) == PRE_DEC)
13930 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13932 return FALSE;
13935 /* Prepares the operands for the VCMLA by lane instruction such that the right
13936 register number is selected. This instruction is special in that it always
13937 requires a D register, however there is a choice to be made between Dn[0],
13938 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13940 The VCMLA by lane function always selects two values. For instance given D0
13941 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13942 used by the instruction. However given V4SF then index 0 and 1 are valid as
13943 D0[0] or D1[0] are both valid.
13945 This function centralizes that information based on OPERANDS, OPERANDS[3]
13946 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13947 updated to contain the right index. */
13949 rtx *
13950 neon_vcmla_lane_prepare_operands (rtx *operands)
13952 int lane = INTVAL (operands[4]);
13953 machine_mode constmode = SImode;
13954 machine_mode mode = GET_MODE (operands[3]);
13955 int regno = REGNO (operands[3]);
13956 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13957 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13959 operands[3] = gen_int_mode (regno + 1, constmode);
13960 operands[4]
13961 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13963 else
13965 operands[3] = gen_int_mode (regno, constmode);
13966 operands[4] = gen_int_mode (lane, constmode);
13968 return operands;
13972 /* Return true if X is a register that will be eliminated later on. */
13974 arm_eliminable_register (rtx x)
13976 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13977 || REGNO (x) == ARG_POINTER_REGNUM
13978 || VIRTUAL_REGISTER_P (x));
13981 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13982 coprocessor registers. Otherwise return NO_REGS. */
13984 enum reg_class
13985 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13987 if (mode == HFmode)
13989 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13990 return GENERAL_REGS;
13991 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13992 return NO_REGS;
13993 return GENERAL_REGS;
13996 /* The neon move patterns handle all legitimate vector and struct
13997 addresses. */
13998 if (TARGET_NEON
13999 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
14000 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
14001 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
14002 || VALID_NEON_STRUCT_MODE (mode)))
14003 return NO_REGS;
14005 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
14006 return NO_REGS;
14008 return GENERAL_REGS;
14011 /* Values which must be returned in the most-significant end of the return
14012 register. */
14014 static bool
14015 arm_return_in_msb (const_tree valtype)
14017 return (TARGET_AAPCS_BASED
14018 && BYTES_BIG_ENDIAN
14019 && (AGGREGATE_TYPE_P (valtype)
14020 || TREE_CODE (valtype) == COMPLEX_TYPE
14021 || FIXED_POINT_TYPE_P (valtype)));
14024 /* Return TRUE if X references a SYMBOL_REF. */
14026 symbol_mentioned_p (rtx x)
14028 const char * fmt;
14029 int i;
14031 if (SYMBOL_REF_P (x))
14032 return 1;
14034 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
14035 are constant offsets, not symbols. */
14036 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14037 return 0;
14039 fmt = GET_RTX_FORMAT (GET_CODE (x));
14041 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14043 if (fmt[i] == 'E')
14045 int j;
14047 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14048 if (symbol_mentioned_p (XVECEXP (x, i, j)))
14049 return 1;
14051 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
14052 return 1;
14055 return 0;
14058 /* Return TRUE if X references a LABEL_REF. */
14060 label_mentioned_p (rtx x)
14062 const char * fmt;
14063 int i;
14065 if (LABEL_REF_P (x))
14066 return 1;
14068 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14069 instruction, but they are constant offsets, not symbols. */
14070 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14071 return 0;
14073 fmt = GET_RTX_FORMAT (GET_CODE (x));
14074 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14076 if (fmt[i] == 'E')
14078 int j;
14080 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14081 if (label_mentioned_p (XVECEXP (x, i, j)))
14082 return 1;
14084 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
14085 return 1;
14088 return 0;
14092 tls_mentioned_p (rtx x)
14094 switch (GET_CODE (x))
14096 case CONST:
14097 return tls_mentioned_p (XEXP (x, 0));
14099 case UNSPEC:
14100 if (XINT (x, 1) == UNSPEC_TLS)
14101 return 1;
14103 /* Fall through. */
14104 default:
14105 return 0;
14109 /* Must not copy any rtx that uses a pc-relative address.
14110 Also, disallow copying of load-exclusive instructions that
14111 may appear after splitting of compare-and-swap-style operations
14112 so as to prevent those loops from being transformed away from their
14113 canonical forms (see PR 69904). */
14115 static bool
14116 arm_cannot_copy_insn_p (rtx_insn *insn)
14118 /* The tls call insn cannot be copied, as it is paired with a data
14119 word. */
14120 if (recog_memoized (insn) == CODE_FOR_tlscall)
14121 return true;
14123 subrtx_iterator::array_type array;
14124 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14126 const_rtx x = *iter;
14127 if (GET_CODE (x) == UNSPEC
14128 && (XINT (x, 1) == UNSPEC_PIC_BASE
14129 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14130 return true;
14133 rtx set = single_set (insn);
14134 if (set)
14136 rtx src = SET_SRC (set);
14137 if (GET_CODE (src) == ZERO_EXTEND)
14138 src = XEXP (src, 0);
14140 /* Catch the load-exclusive and load-acquire operations. */
14141 if (GET_CODE (src) == UNSPEC_VOLATILE
14142 && (XINT (src, 1) == VUNSPEC_LL
14143 || XINT (src, 1) == VUNSPEC_LAX))
14144 return true;
14146 return false;
14149 enum rtx_code
14150 minmax_code (rtx x)
14152 enum rtx_code code = GET_CODE (x);
14154 switch (code)
14156 case SMAX:
14157 return GE;
14158 case SMIN:
14159 return LE;
14160 case UMIN:
14161 return LEU;
14162 case UMAX:
14163 return GEU;
14164 default:
14165 gcc_unreachable ();
14169 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14171 bool
14172 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14173 int *mask, bool *signed_sat)
14175 /* The high bound must be a power of two minus one. */
14176 int log = exact_log2 (INTVAL (hi_bound) + 1);
14177 if (log == -1)
14178 return false;
14180 /* The low bound is either zero (for usat) or one less than the
14181 negation of the high bound (for ssat). */
14182 if (INTVAL (lo_bound) == 0)
14184 if (mask)
14185 *mask = log;
14186 if (signed_sat)
14187 *signed_sat = false;
14189 return true;
14192 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14194 if (mask)
14195 *mask = log + 1;
14196 if (signed_sat)
14197 *signed_sat = true;
14199 return true;
14202 return false;
14205 /* Return 1 if memory locations are adjacent. */
14207 adjacent_mem_locations (rtx a, rtx b)
14209 /* We don't guarantee to preserve the order of these memory refs. */
14210 if (volatile_refs_p (a) || volatile_refs_p (b))
14211 return 0;
14213 if ((REG_P (XEXP (a, 0))
14214 || (GET_CODE (XEXP (a, 0)) == PLUS
14215 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14216 && (REG_P (XEXP (b, 0))
14217 || (GET_CODE (XEXP (b, 0)) == PLUS
14218 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14220 HOST_WIDE_INT val0 = 0, val1 = 0;
14221 rtx reg0, reg1;
14222 int val_diff;
14224 if (GET_CODE (XEXP (a, 0)) == PLUS)
14226 reg0 = XEXP (XEXP (a, 0), 0);
14227 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14229 else
14230 reg0 = XEXP (a, 0);
14232 if (GET_CODE (XEXP (b, 0)) == PLUS)
14234 reg1 = XEXP (XEXP (b, 0), 0);
14235 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14237 else
14238 reg1 = XEXP (b, 0);
14240 /* Don't accept any offset that will require multiple
14241 instructions to handle, since this would cause the
14242 arith_adjacentmem pattern to output an overlong sequence. */
14243 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14244 return 0;
14246 /* Don't allow an eliminable register: register elimination can make
14247 the offset too large. */
14248 if (arm_eliminable_register (reg0))
14249 return 0;
14251 val_diff = val1 - val0;
14253 if (arm_ld_sched)
14255 /* If the target has load delay slots, then there's no benefit
14256 to using an ldm instruction unless the offset is zero and
14257 we are optimizing for size. */
14258 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14259 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14260 && (val_diff == 4 || val_diff == -4));
14263 return ((REGNO (reg0) == REGNO (reg1))
14264 && (val_diff == 4 || val_diff == -4));
14267 return 0;
14270 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14271 for load operations, false for store operations. CONSECUTIVE is true
14272 if the register numbers in the operation must be consecutive in the register
14273 bank. RETURN_PC is true if value is to be loaded in PC.
14274 The pattern we are trying to match for load is:
14275 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14276 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14279 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14281 where
14282 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14283 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14284 3. If consecutive is TRUE, then for kth register being loaded,
14285 REGNO (R_dk) = REGNO (R_d0) + k.
14286 The pattern for store is similar. */
14287 bool
14288 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14289 bool consecutive, bool return_pc)
14291 HOST_WIDE_INT count = XVECLEN (op, 0);
14292 rtx reg, mem, addr;
14293 unsigned regno;
14294 unsigned first_regno;
14295 HOST_WIDE_INT i = 1, base = 0, offset = 0;
14296 rtx elt;
14297 bool addr_reg_in_reglist = false;
14298 bool update = false;
14299 int reg_increment;
14300 int offset_adj;
14301 int regs_per_val;
14303 /* If not in SImode, then registers must be consecutive
14304 (e.g., VLDM instructions for DFmode). */
14305 gcc_assert ((mode == SImode) || consecutive);
14306 /* Setting return_pc for stores is illegal. */
14307 gcc_assert (!return_pc || load);
14309 /* Set up the increments and the regs per val based on the mode. */
14310 reg_increment = GET_MODE_SIZE (mode);
14311 regs_per_val = reg_increment / 4;
14312 offset_adj = return_pc ? 1 : 0;
14314 if (count <= 1
14315 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14316 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14317 return false;
14319 /* Check if this is a write-back. */
14320 elt = XVECEXP (op, 0, offset_adj);
14321 if (GET_CODE (SET_SRC (elt)) == PLUS)
14323 i++;
14324 base = 1;
14325 update = true;
14327 /* The offset adjustment must be the number of registers being
14328 popped times the size of a single register. */
14329 if (!REG_P (SET_DEST (elt))
14330 || !REG_P (XEXP (SET_SRC (elt), 0))
14331 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14332 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14333 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14334 ((count - 1 - offset_adj) * reg_increment))
14335 return false;
14338 i = i + offset_adj;
14339 base = base + offset_adj;
14340 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14341 success depends on the type: VLDM can do just one reg,
14342 LDM must do at least two. */
14343 if ((count <= i) && (mode == SImode))
14344 return false;
14346 elt = XVECEXP (op, 0, i - 1);
14347 if (GET_CODE (elt) != SET)
14348 return false;
14350 if (load)
14352 reg = SET_DEST (elt);
14353 mem = SET_SRC (elt);
14355 else
14357 reg = SET_SRC (elt);
14358 mem = SET_DEST (elt);
14361 if (!REG_P (reg) || !MEM_P (mem))
14362 return false;
14364 regno = REGNO (reg);
14365 first_regno = regno;
14366 addr = XEXP (mem, 0);
14367 if (GET_CODE (addr) == PLUS)
14369 if (!CONST_INT_P (XEXP (addr, 1)))
14370 return false;
14372 offset = INTVAL (XEXP (addr, 1));
14373 addr = XEXP (addr, 0);
14376 if (!REG_P (addr))
14377 return false;
14379 /* Don't allow SP to be loaded unless it is also the base register. It
14380 guarantees that SP is reset correctly when an LDM instruction
14381 is interrupted. Otherwise, we might end up with a corrupt stack. */
14382 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14383 return false;
14385 if (regno == REGNO (addr))
14386 addr_reg_in_reglist = true;
14388 for (; i < count; i++)
14390 elt = XVECEXP (op, 0, i);
14391 if (GET_CODE (elt) != SET)
14392 return false;
14394 if (load)
14396 reg = SET_DEST (elt);
14397 mem = SET_SRC (elt);
14399 else
14401 reg = SET_SRC (elt);
14402 mem = SET_DEST (elt);
14405 if (!REG_P (reg)
14406 || GET_MODE (reg) != mode
14407 || REGNO (reg) <= regno
14408 || (consecutive
14409 && (REGNO (reg) !=
14410 (unsigned int) (first_regno + regs_per_val * (i - base))))
14411 /* Don't allow SP to be loaded unless it is also the base register. It
14412 guarantees that SP is reset correctly when an LDM instruction
14413 is interrupted. Otherwise, we might end up with a corrupt stack. */
14414 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14415 || !MEM_P (mem)
14416 || GET_MODE (mem) != mode
14417 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14418 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14419 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14420 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14421 offset + (i - base) * reg_increment))
14422 && (!REG_P (XEXP (mem, 0))
14423 || offset + (i - base) * reg_increment != 0)))
14424 return false;
14426 regno = REGNO (reg);
14427 if (regno == REGNO (addr))
14428 addr_reg_in_reglist = true;
14431 if (load)
14433 if (update && addr_reg_in_reglist)
14434 return false;
14436 /* For Thumb-1, address register is always modified - either by write-back
14437 or by explicit load. If the pattern does not describe an update,
14438 then the address register must be in the list of loaded registers. */
14439 if (TARGET_THUMB1)
14440 return update || addr_reg_in_reglist;
14443 return true;
14446 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14447 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14448 following form:
14450 [(set (reg:SI <N>) (const_int 0))
14451 (set (reg:SI <M>) (const_int 0))
14453 (unspec_volatile [(const_int 0)]
14454 VUNSPEC_CLRM_APSR)
14455 (clobber (reg:CC CC_REGNUM))
14458 Any number (including 0) of set expressions is valid, the volatile unspec is
14459 optional. All registers but SP and PC are allowed and registers must be in
14460 strict increasing order.
14462 To be a valid VSCCLRM pattern, OP must have the following form:
14464 [(unspec_volatile [(const_int 0)]
14465 VUNSPEC_VSCCLRM_VPR)
14466 (set (reg:SF <N>) (const_int 0))
14467 (set (reg:SF <M>) (const_int 0))
14471 As with CLRM, any number (including 0) of set expressions is valid, however
14472 the volatile unspec is mandatory here. Any VFP single-precision register is
14473 accepted but all registers must be consecutive and in increasing order. */
14475 bool
14476 clear_operation_p (rtx op, bool vfp)
14478 unsigned regno;
14479 unsigned last_regno = INVALID_REGNUM;
14480 rtx elt, reg, zero;
14481 int count = XVECLEN (op, 0);
14482 int first_set = vfp ? 1 : 0;
14483 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14485 for (int i = first_set; i < count; i++)
14487 elt = XVECEXP (op, 0, i);
14489 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14491 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14492 || XVECLEN (elt, 0) != 1
14493 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14494 || i != count - 2)
14495 return false;
14497 continue;
14500 if (GET_CODE (elt) == CLOBBER)
14501 continue;
14503 if (GET_CODE (elt) != SET)
14504 return false;
14506 reg = SET_DEST (elt);
14507 zero = SET_SRC (elt);
14509 if (!REG_P (reg)
14510 || GET_MODE (reg) != expected_mode
14511 || zero != CONST0_RTX (SImode))
14512 return false;
14514 regno = REGNO (reg);
14516 if (vfp)
14518 if (i != first_set && regno != last_regno + 1)
14519 return false;
14521 else
14523 if (regno == SP_REGNUM || regno == PC_REGNUM)
14524 return false;
14525 if (i != first_set && regno <= last_regno)
14526 return false;
14529 last_regno = regno;
14532 return true;
14535 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14536 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14537 instruction. ADD_OFFSET is nonzero if the base address register needs
14538 to be modified with an add instruction before we can use it. */
14540 static bool
14541 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14542 int nops, HOST_WIDE_INT add_offset)
14544 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14545 if the offset isn't small enough. The reason 2 ldrs are faster
14546 is because these ARMs are able to do more than one cache access
14547 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14548 whilst the ARM8 has a double bandwidth cache. This means that
14549 these cores can do both an instruction fetch and a data fetch in
14550 a single cycle, so the trick of calculating the address into a
14551 scratch register (one of the result regs) and then doing a load
14552 multiple actually becomes slower (and no smaller in code size).
14553 That is the transformation
14555 ldr rd1, [rbase + offset]
14556 ldr rd2, [rbase + offset + 4]
14560 add rd1, rbase, offset
14561 ldmia rd1, {rd1, rd2}
14563 produces worse code -- '3 cycles + any stalls on rd2' instead of
14564 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14565 access per cycle, the first sequence could never complete in less
14566 than 6 cycles, whereas the ldm sequence would only take 5 and
14567 would make better use of sequential accesses if not hitting the
14568 cache.
14570 We cheat here and test 'arm_ld_sched' which we currently know to
14571 only be true for the ARM8, ARM9 and StrongARM. If this ever
14572 changes, then the test below needs to be reworked. */
14573 if (nops == 2 && arm_ld_sched && add_offset != 0)
14574 return false;
14576 /* XScale has load-store double instructions, but they have stricter
14577 alignment requirements than load-store multiple, so we cannot
14578 use them.
14580 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14581 the pipeline until completion.
14583 NREGS CYCLES
14589 An ldr instruction takes 1-3 cycles, but does not block the
14590 pipeline.
14592 NREGS CYCLES
14593 1 1-3
14594 2 2-6
14595 3 3-9
14596 4 4-12
14598 Best case ldr will always win. However, the more ldr instructions
14599 we issue, the less likely we are to be able to schedule them well.
14600 Using ldr instructions also increases code size.
14602 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14603 for counts of 3 or 4 regs. */
14604 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14605 return false;
14606 return true;
14609 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14610 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14611 an array ORDER which describes the sequence to use when accessing the
14612 offsets that produces an ascending order. In this sequence, each
14613 offset must be larger by exactly 4 than the previous one. ORDER[0]
14614 must have been filled in with the lowest offset by the caller.
14615 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14616 we use to verify that ORDER produces an ascending order of registers.
14617 Return true if it was possible to construct such an order, false if
14618 not. */
14620 static bool
14621 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14622 int *unsorted_regs)
14624 int i;
14625 for (i = 1; i < nops; i++)
14627 int j;
14629 order[i] = order[i - 1];
14630 for (j = 0; j < nops; j++)
14631 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14633 /* We must find exactly one offset that is higher than the
14634 previous one by 4. */
14635 if (order[i] != order[i - 1])
14636 return false;
14637 order[i] = j;
14639 if (order[i] == order[i - 1])
14640 return false;
14641 /* The register numbers must be ascending. */
14642 if (unsorted_regs != NULL
14643 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14644 return false;
14646 return true;
14649 /* Used to determine in a peephole whether a sequence of load
14650 instructions can be changed into a load-multiple instruction.
14651 NOPS is the number of separate load instructions we are examining. The
14652 first NOPS entries in OPERANDS are the destination registers, the
14653 next NOPS entries are memory operands. If this function is
14654 successful, *BASE is set to the common base register of the memory
14655 accesses; *LOAD_OFFSET is set to the first memory location's offset
14656 from that base register.
14657 REGS is an array filled in with the destination register numbers.
14658 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14659 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14660 the sequence of registers in REGS matches the loads from ascending memory
14661 locations, and the function verifies that the register numbers are
14662 themselves ascending. If CHECK_REGS is false, the register numbers
14663 are stored in the order they are found in the operands. */
14664 static int
14665 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14666 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14668 int unsorted_regs[MAX_LDM_STM_OPS];
14669 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14670 int order[MAX_LDM_STM_OPS];
14671 int base_reg = -1;
14672 int i, ldm_case;
14674 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14675 easily extended if required. */
14676 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14678 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14680 /* Loop over the operands and check that the memory references are
14681 suitable (i.e. immediate offsets from the same base register). At
14682 the same time, extract the target register, and the memory
14683 offsets. */
14684 for (i = 0; i < nops; i++)
14686 rtx reg;
14687 rtx offset;
14689 /* Convert a subreg of a mem into the mem itself. */
14690 if (GET_CODE (operands[nops + i]) == SUBREG)
14691 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14693 gcc_assert (MEM_P (operands[nops + i]));
14695 /* Don't reorder volatile memory references; it doesn't seem worth
14696 looking for the case where the order is ok anyway. */
14697 if (MEM_VOLATILE_P (operands[nops + i]))
14698 return 0;
14700 offset = const0_rtx;
14702 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14703 || (SUBREG_P (reg)
14704 && REG_P (reg = SUBREG_REG (reg))))
14705 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14706 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14707 || (SUBREG_P (reg)
14708 && REG_P (reg = SUBREG_REG (reg))))
14709 && (CONST_INT_P (offset
14710 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14712 if (i == 0)
14714 base_reg = REGNO (reg);
14715 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14716 return 0;
14718 else if (base_reg != (int) REGNO (reg))
14719 /* Not addressed from the same base register. */
14720 return 0;
14722 unsorted_regs[i] = (REG_P (operands[i])
14723 ? REGNO (operands[i])
14724 : REGNO (SUBREG_REG (operands[i])));
14726 /* If it isn't an integer register, or if it overwrites the
14727 base register but isn't the last insn in the list, then
14728 we can't do this. */
14729 if (unsorted_regs[i] < 0
14730 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14731 || unsorted_regs[i] > 14
14732 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14733 return 0;
14735 /* Don't allow SP to be loaded unless it is also the base
14736 register. It guarantees that SP is reset correctly when
14737 an LDM instruction is interrupted. Otherwise, we might
14738 end up with a corrupt stack. */
14739 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14740 return 0;
14742 unsorted_offsets[i] = INTVAL (offset);
14743 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14744 order[0] = i;
14746 else
14747 /* Not a suitable memory address. */
14748 return 0;
14751 /* All the useful information has now been extracted from the
14752 operands into unsorted_regs and unsorted_offsets; additionally,
14753 order[0] has been set to the lowest offset in the list. Sort
14754 the offsets into order, verifying that they are adjacent, and
14755 check that the register numbers are ascending. */
14756 if (!compute_offset_order (nops, unsorted_offsets, order,
14757 check_regs ? unsorted_regs : NULL))
14758 return 0;
14760 if (saved_order)
14761 memcpy (saved_order, order, sizeof order);
14763 if (base)
14765 *base = base_reg;
14767 for (i = 0; i < nops; i++)
14768 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14770 *load_offset = unsorted_offsets[order[0]];
14773 if (unsorted_offsets[order[0]] == 0)
14774 ldm_case = 1; /* ldmia */
14775 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14776 ldm_case = 2; /* ldmib */
14777 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14778 ldm_case = 3; /* ldmda */
14779 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14780 ldm_case = 4; /* ldmdb */
14781 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14782 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14783 ldm_case = 5;
14784 else
14785 return 0;
14787 if (!multiple_operation_profitable_p (false, nops,
14788 ldm_case == 5
14789 ? unsorted_offsets[order[0]] : 0))
14790 return 0;
14792 return ldm_case;
14795 /* Used to determine in a peephole whether a sequence of store instructions can
14796 be changed into a store-multiple instruction.
14797 NOPS is the number of separate store instructions we are examining.
14798 NOPS_TOTAL is the total number of instructions recognized by the peephole
14799 pattern.
14800 The first NOPS entries in OPERANDS are the source registers, the next
14801 NOPS entries are memory operands. If this function is successful, *BASE is
14802 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14803 to the first memory location's offset from that base register. REGS is an
14804 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14805 likewise filled with the corresponding rtx's.
14806 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14807 numbers to an ascending order of stores.
14808 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14809 from ascending memory locations, and the function verifies that the register
14810 numbers are themselves ascending. If CHECK_REGS is false, the register
14811 numbers are stored in the order they are found in the operands. */
14812 static int
14813 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14814 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14815 HOST_WIDE_INT *load_offset, bool check_regs)
14817 int unsorted_regs[MAX_LDM_STM_OPS];
14818 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14819 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14820 int order[MAX_LDM_STM_OPS];
14821 int base_reg = -1;
14822 rtx base_reg_rtx = NULL;
14823 int i, stm_case;
14825 /* Write back of base register is currently only supported for Thumb 1. */
14826 int base_writeback = TARGET_THUMB1;
14828 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14829 easily extended if required. */
14830 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14832 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14834 /* Loop over the operands and check that the memory references are
14835 suitable (i.e. immediate offsets from the same base register). At
14836 the same time, extract the target register, and the memory
14837 offsets. */
14838 for (i = 0; i < nops; i++)
14840 rtx reg;
14841 rtx offset;
14843 /* Convert a subreg of a mem into the mem itself. */
14844 if (GET_CODE (operands[nops + i]) == SUBREG)
14845 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14847 gcc_assert (MEM_P (operands[nops + i]));
14849 /* Don't reorder volatile memory references; it doesn't seem worth
14850 looking for the case where the order is ok anyway. */
14851 if (MEM_VOLATILE_P (operands[nops + i]))
14852 return 0;
14854 offset = const0_rtx;
14856 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14857 || (SUBREG_P (reg)
14858 && REG_P (reg = SUBREG_REG (reg))))
14859 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14860 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14861 || (SUBREG_P (reg)
14862 && REG_P (reg = SUBREG_REG (reg))))
14863 && (CONST_INT_P (offset
14864 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14866 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14867 ? operands[i] : SUBREG_REG (operands[i]));
14868 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14870 if (i == 0)
14872 base_reg = REGNO (reg);
14873 base_reg_rtx = reg;
14874 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14875 return 0;
14877 else if (base_reg != (int) REGNO (reg))
14878 /* Not addressed from the same base register. */
14879 return 0;
14881 /* If it isn't an integer register, then we can't do this. */
14882 if (unsorted_regs[i] < 0
14883 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14884 /* The effects are unpredictable if the base register is
14885 both updated and stored. */
14886 || (base_writeback && unsorted_regs[i] == base_reg)
14887 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14888 || unsorted_regs[i] > 14)
14889 return 0;
14891 unsorted_offsets[i] = INTVAL (offset);
14892 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14893 order[0] = i;
14895 else
14896 /* Not a suitable memory address. */
14897 return 0;
14900 /* All the useful information has now been extracted from the
14901 operands into unsorted_regs and unsorted_offsets; additionally,
14902 order[0] has been set to the lowest offset in the list. Sort
14903 the offsets into order, verifying that they are adjacent, and
14904 check that the register numbers are ascending. */
14905 if (!compute_offset_order (nops, unsorted_offsets, order,
14906 check_regs ? unsorted_regs : NULL))
14907 return 0;
14909 if (saved_order)
14910 memcpy (saved_order, order, sizeof order);
14912 if (base)
14914 *base = base_reg;
14916 for (i = 0; i < nops; i++)
14918 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14919 if (reg_rtxs)
14920 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14923 *load_offset = unsorted_offsets[order[0]];
14926 if (TARGET_THUMB1
14927 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14928 return 0;
14930 if (unsorted_offsets[order[0]] == 0)
14931 stm_case = 1; /* stmia */
14932 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14933 stm_case = 2; /* stmib */
14934 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14935 stm_case = 3; /* stmda */
14936 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14937 stm_case = 4; /* stmdb */
14938 else
14939 return 0;
14941 if (!multiple_operation_profitable_p (false, nops, 0))
14942 return 0;
14944 return stm_case;
14947 /* Routines for use in generating RTL. */
14949 /* Generate a load-multiple instruction. COUNT is the number of loads in
14950 the instruction; REGS and MEMS are arrays containing the operands.
14951 BASEREG is the base register to be used in addressing the memory operands.
14952 WBACK_OFFSET is nonzero if the instruction should update the base
14953 register. */
14955 static rtx
14956 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14957 HOST_WIDE_INT wback_offset)
14959 int i = 0, j;
14960 rtx result;
14962 if (!multiple_operation_profitable_p (false, count, 0))
14964 rtx seq;
14966 start_sequence ();
14968 for (i = 0; i < count; i++)
14969 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14971 if (wback_offset != 0)
14972 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14974 seq = get_insns ();
14975 end_sequence ();
14977 return seq;
14980 result = gen_rtx_PARALLEL (VOIDmode,
14981 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14982 if (wback_offset != 0)
14984 XVECEXP (result, 0, 0)
14985 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14986 i = 1;
14987 count++;
14990 for (j = 0; i < count; i++, j++)
14991 XVECEXP (result, 0, i)
14992 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14994 return result;
14997 /* Generate a store-multiple instruction. COUNT is the number of stores in
14998 the instruction; REGS and MEMS are arrays containing the operands.
14999 BASEREG is the base register to be used in addressing the memory operands.
15000 WBACK_OFFSET is nonzero if the instruction should update the base
15001 register. */
15003 static rtx
15004 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
15005 HOST_WIDE_INT wback_offset)
15007 int i = 0, j;
15008 rtx result;
15010 if (GET_CODE (basereg) == PLUS)
15011 basereg = XEXP (basereg, 0);
15013 if (!multiple_operation_profitable_p (false, count, 0))
15015 rtx seq;
15017 start_sequence ();
15019 for (i = 0; i < count; i++)
15020 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
15022 if (wback_offset != 0)
15023 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
15025 seq = get_insns ();
15026 end_sequence ();
15028 return seq;
15031 result = gen_rtx_PARALLEL (VOIDmode,
15032 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
15033 if (wback_offset != 0)
15035 XVECEXP (result, 0, 0)
15036 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
15037 i = 1;
15038 count++;
15041 for (j = 0; i < count; i++, j++)
15042 XVECEXP (result, 0, i)
15043 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
15045 return result;
15048 /* Generate either a load-multiple or a store-multiple instruction. This
15049 function can be used in situations where we can start with a single MEM
15050 rtx and adjust its address upwards.
15051 COUNT is the number of operations in the instruction, not counting a
15052 possible update of the base register. REGS is an array containing the
15053 register operands.
15054 BASEREG is the base register to be used in addressing the memory operands,
15055 which are constructed from BASEMEM.
15056 WRITE_BACK specifies whether the generated instruction should include an
15057 update of the base register.
15058 OFFSETP is used to pass an offset to and from this function; this offset
15059 is not used when constructing the address (instead BASEMEM should have an
15060 appropriate offset in its address), it is used only for setting
15061 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
15063 static rtx
15064 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
15065 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
15067 rtx mems[MAX_LDM_STM_OPS];
15068 HOST_WIDE_INT offset = *offsetp;
15069 int i;
15071 gcc_assert (count <= MAX_LDM_STM_OPS);
15073 if (GET_CODE (basereg) == PLUS)
15074 basereg = XEXP (basereg, 0);
15076 for (i = 0; i < count; i++)
15078 rtx addr = plus_constant (Pmode, basereg, i * 4);
15079 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
15080 offset += 4;
15083 if (write_back)
15084 *offsetp = offset;
15086 if (is_load)
15087 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
15088 write_back ? 4 * count : 0);
15089 else
15090 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
15091 write_back ? 4 * count : 0);
15095 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15096 rtx basemem, HOST_WIDE_INT *offsetp)
15098 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15099 offsetp);
15103 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15104 rtx basemem, HOST_WIDE_INT *offsetp)
15106 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15107 offsetp);
15110 /* Called from a peephole2 expander to turn a sequence of loads into an
15111 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15112 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15113 is true if we can reorder the registers because they are used commutatively
15114 subsequently.
15115 Returns true iff we could generate a new instruction. */
15117 bool
15118 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15120 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15121 rtx mems[MAX_LDM_STM_OPS];
15122 int i, j, base_reg;
15123 rtx base_reg_rtx;
15124 HOST_WIDE_INT offset;
15125 int write_back = FALSE;
15126 int ldm_case;
15127 rtx addr;
15129 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15130 &base_reg, &offset, !sort_regs);
15132 if (ldm_case == 0)
15133 return false;
15135 if (sort_regs)
15136 for (i = 0; i < nops - 1; i++)
15137 for (j = i + 1; j < nops; j++)
15138 if (regs[i] > regs[j])
15140 int t = regs[i];
15141 regs[i] = regs[j];
15142 regs[j] = t;
15144 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15146 if (TARGET_THUMB1)
15148 gcc_assert (ldm_case == 1 || ldm_case == 5);
15150 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15151 write_back = true;
15152 for (i = 0; i < nops; i++)
15153 if (base_reg == regs[i])
15154 write_back = false;
15156 /* Ensure the base is dead if it is updated. */
15157 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15158 return false;
15161 if (ldm_case == 5)
15163 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15164 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15165 offset = 0;
15166 base_reg_rtx = newbase;
15169 for (i = 0; i < nops; i++)
15171 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15172 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15173 SImode, addr, 0);
15175 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15176 write_back ? offset + i * 4 : 0));
15177 return true;
15180 /* Called from a peephole2 expander to turn a sequence of stores into an
15181 STM instruction. OPERANDS are the operands found by the peephole matcher;
15182 NOPS indicates how many separate stores we are trying to combine.
15183 Returns true iff we could generate a new instruction. */
15185 bool
15186 gen_stm_seq (rtx *operands, int nops)
15188 int i;
15189 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15190 rtx mems[MAX_LDM_STM_OPS];
15191 int base_reg;
15192 rtx base_reg_rtx;
15193 HOST_WIDE_INT offset;
15194 int write_back = FALSE;
15195 int stm_case;
15196 rtx addr;
15197 bool base_reg_dies;
15199 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15200 mem_order, &base_reg, &offset, true);
15202 if (stm_case == 0)
15203 return false;
15205 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15207 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15208 if (TARGET_THUMB1)
15210 gcc_assert (base_reg_dies);
15211 write_back = TRUE;
15214 if (stm_case == 5)
15216 gcc_assert (base_reg_dies);
15217 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15218 offset = 0;
15221 addr = plus_constant (Pmode, base_reg_rtx, offset);
15223 for (i = 0; i < nops; i++)
15225 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15226 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15227 SImode, addr, 0);
15229 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15230 write_back ? offset + i * 4 : 0));
15231 return true;
15234 /* Called from a peephole2 expander to turn a sequence of stores that are
15235 preceded by constant loads into an STM instruction. OPERANDS are the
15236 operands found by the peephole matcher; NOPS indicates how many
15237 separate stores we are trying to combine; there are 2 * NOPS
15238 instructions in the peephole.
15239 Returns true iff we could generate a new instruction. */
15241 bool
15242 gen_const_stm_seq (rtx *operands, int nops)
15244 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15245 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15246 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15247 rtx mems[MAX_LDM_STM_OPS];
15248 int base_reg;
15249 rtx base_reg_rtx;
15250 HOST_WIDE_INT offset;
15251 int write_back = FALSE;
15252 int stm_case;
15253 rtx addr;
15254 bool base_reg_dies;
15255 int i, j;
15256 HARD_REG_SET allocated;
15258 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15259 mem_order, &base_reg, &offset, false);
15261 if (stm_case == 0)
15262 return false;
15264 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15266 /* If the same register is used more than once, try to find a free
15267 register. */
15268 CLEAR_HARD_REG_SET (allocated);
15269 for (i = 0; i < nops; i++)
15271 for (j = i + 1; j < nops; j++)
15272 if (regs[i] == regs[j])
15274 rtx t = peep2_find_free_register (0, nops * 2,
15275 TARGET_THUMB1 ? "l" : "r",
15276 SImode, &allocated);
15277 if (t == NULL_RTX)
15278 return false;
15279 reg_rtxs[i] = t;
15280 regs[i] = REGNO (t);
15284 /* Compute an ordering that maps the register numbers to an ascending
15285 sequence. */
15286 reg_order[0] = 0;
15287 for (i = 0; i < nops; i++)
15288 if (regs[i] < regs[reg_order[0]])
15289 reg_order[0] = i;
15291 for (i = 1; i < nops; i++)
15293 int this_order = reg_order[i - 1];
15294 for (j = 0; j < nops; j++)
15295 if (regs[j] > regs[reg_order[i - 1]]
15296 && (this_order == reg_order[i - 1]
15297 || regs[j] < regs[this_order]))
15298 this_order = j;
15299 reg_order[i] = this_order;
15302 /* Ensure that registers that must be live after the instruction end
15303 up with the correct value. */
15304 for (i = 0; i < nops; i++)
15306 int this_order = reg_order[i];
15307 if ((this_order != mem_order[i]
15308 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15309 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15310 return false;
15313 /* Load the constants. */
15314 for (i = 0; i < nops; i++)
15316 rtx op = operands[2 * nops + mem_order[i]];
15317 sorted_regs[i] = regs[reg_order[i]];
15318 emit_move_insn (reg_rtxs[reg_order[i]], op);
15321 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15323 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15324 if (TARGET_THUMB1)
15326 gcc_assert (base_reg_dies);
15327 write_back = TRUE;
15330 if (stm_case == 5)
15332 gcc_assert (base_reg_dies);
15333 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15334 offset = 0;
15337 addr = plus_constant (Pmode, base_reg_rtx, offset);
15339 for (i = 0; i < nops; i++)
15341 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15342 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15343 SImode, addr, 0);
15345 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15346 write_back ? offset + i * 4 : 0));
15347 return true;
15350 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15351 unaligned copies on processors which support unaligned semantics for those
15352 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15353 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15354 An interleave factor of 1 (the minimum) will perform no interleaving.
15355 Load/store multiple are used for aligned addresses where possible. */
15357 static void
15358 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15359 HOST_WIDE_INT length,
15360 unsigned int interleave_factor)
15362 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15363 int *regnos = XALLOCAVEC (int, interleave_factor);
15364 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15365 HOST_WIDE_INT i, j;
15366 HOST_WIDE_INT remaining = length, words;
15367 rtx halfword_tmp = NULL, byte_tmp = NULL;
15368 rtx dst, src;
15369 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15370 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15371 HOST_WIDE_INT srcoffset, dstoffset;
15372 HOST_WIDE_INT src_autoinc, dst_autoinc;
15373 rtx mem, addr;
15375 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15377 /* Use hard registers if we have aligned source or destination so we can use
15378 load/store multiple with contiguous registers. */
15379 if (dst_aligned || src_aligned)
15380 for (i = 0; i < interleave_factor; i++)
15381 regs[i] = gen_rtx_REG (SImode, i);
15382 else
15383 for (i = 0; i < interleave_factor; i++)
15384 regs[i] = gen_reg_rtx (SImode);
15386 dst = copy_addr_to_reg (XEXP (dstbase, 0));
15387 src = copy_addr_to_reg (XEXP (srcbase, 0));
15389 srcoffset = dstoffset = 0;
15391 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15392 For copying the last bytes we want to subtract this offset again. */
15393 src_autoinc = dst_autoinc = 0;
15395 for (i = 0; i < interleave_factor; i++)
15396 regnos[i] = i;
15398 /* Copy BLOCK_SIZE_BYTES chunks. */
15400 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15402 /* Load words. */
15403 if (src_aligned && interleave_factor > 1)
15405 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15406 TRUE, srcbase, &srcoffset));
15407 src_autoinc += UNITS_PER_WORD * interleave_factor;
15409 else
15411 for (j = 0; j < interleave_factor; j++)
15413 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15414 - src_autoinc));
15415 mem = adjust_automodify_address (srcbase, SImode, addr,
15416 srcoffset + j * UNITS_PER_WORD);
15417 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15419 srcoffset += block_size_bytes;
15422 /* Store words. */
15423 if (dst_aligned && interleave_factor > 1)
15425 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15426 TRUE, dstbase, &dstoffset));
15427 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15429 else
15431 for (j = 0; j < interleave_factor; j++)
15433 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15434 - dst_autoinc));
15435 mem = adjust_automodify_address (dstbase, SImode, addr,
15436 dstoffset + j * UNITS_PER_WORD);
15437 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15439 dstoffset += block_size_bytes;
15442 remaining -= block_size_bytes;
15445 /* Copy any whole words left (note these aren't interleaved with any
15446 subsequent halfword/byte load/stores in the interests of simplicity). */
15448 words = remaining / UNITS_PER_WORD;
15450 gcc_assert (words < interleave_factor);
15452 if (src_aligned && words > 1)
15454 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15455 &srcoffset));
15456 src_autoinc += UNITS_PER_WORD * words;
15458 else
15460 for (j = 0; j < words; j++)
15462 addr = plus_constant (Pmode, src,
15463 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15464 mem = adjust_automodify_address (srcbase, SImode, addr,
15465 srcoffset + j * UNITS_PER_WORD);
15466 if (src_aligned)
15467 emit_move_insn (regs[j], mem);
15468 else
15469 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15471 srcoffset += words * UNITS_PER_WORD;
15474 if (dst_aligned && words > 1)
15476 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15477 &dstoffset));
15478 dst_autoinc += words * UNITS_PER_WORD;
15480 else
15482 for (j = 0; j < words; j++)
15484 addr = plus_constant (Pmode, dst,
15485 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15486 mem = adjust_automodify_address (dstbase, SImode, addr,
15487 dstoffset + j * UNITS_PER_WORD);
15488 if (dst_aligned)
15489 emit_move_insn (mem, regs[j]);
15490 else
15491 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15493 dstoffset += words * UNITS_PER_WORD;
15496 remaining -= words * UNITS_PER_WORD;
15498 gcc_assert (remaining < 4);
15500 /* Copy a halfword if necessary. */
15502 if (remaining >= 2)
15504 halfword_tmp = gen_reg_rtx (SImode);
15506 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15507 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15508 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15510 /* Either write out immediately, or delay until we've loaded the last
15511 byte, depending on interleave factor. */
15512 if (interleave_factor == 1)
15514 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15515 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15516 emit_insn (gen_unaligned_storehi (mem,
15517 gen_lowpart (HImode, halfword_tmp)));
15518 halfword_tmp = NULL;
15519 dstoffset += 2;
15522 remaining -= 2;
15523 srcoffset += 2;
15526 gcc_assert (remaining < 2);
15528 /* Copy last byte. */
15530 if ((remaining & 1) != 0)
15532 byte_tmp = gen_reg_rtx (SImode);
15534 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15535 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15536 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15538 if (interleave_factor == 1)
15540 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15541 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15542 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15543 byte_tmp = NULL;
15544 dstoffset++;
15547 remaining--;
15548 srcoffset++;
15551 /* Store last halfword if we haven't done so already. */
15553 if (halfword_tmp)
15555 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15556 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15557 emit_insn (gen_unaligned_storehi (mem,
15558 gen_lowpart (HImode, halfword_tmp)));
15559 dstoffset += 2;
15562 /* Likewise for last byte. */
15564 if (byte_tmp)
15566 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15567 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15568 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15569 dstoffset++;
15572 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15575 /* From mips_adjust_block_mem:
15577 Helper function for doing a loop-based block operation on memory
15578 reference MEM. Each iteration of the loop will operate on LENGTH
15579 bytes of MEM.
15581 Create a new base register for use within the loop and point it to
15582 the start of MEM. Create a new memory reference that uses this
15583 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15585 static void
15586 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15587 rtx *loop_mem)
15589 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15591 /* Although the new mem does not refer to a known location,
15592 it does keep up to LENGTH bytes of alignment. */
15593 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15594 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15597 /* From mips_block_move_loop:
15599 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15600 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15601 the memory regions do not overlap. */
15603 static void
15604 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15605 unsigned int interleave_factor,
15606 HOST_WIDE_INT bytes_per_iter)
15608 rtx src_reg, dest_reg, final_src, test;
15609 HOST_WIDE_INT leftover;
15611 leftover = length % bytes_per_iter;
15612 length -= leftover;
15614 /* Create registers and memory references for use within the loop. */
15615 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15616 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15618 /* Calculate the value that SRC_REG should have after the last iteration of
15619 the loop. */
15620 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15621 0, 0, OPTAB_WIDEN);
15623 /* Emit the start of the loop. */
15624 rtx_code_label *label = gen_label_rtx ();
15625 emit_label (label);
15627 /* Emit the loop body. */
15628 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15629 interleave_factor);
15631 /* Move on to the next block. */
15632 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15633 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15635 /* Emit the loop condition. */
15636 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15637 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15639 /* Mop up any left-over bytes. */
15640 if (leftover)
15641 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15644 /* Emit a block move when either the source or destination is unaligned (not
15645 aligned to a four-byte boundary). This may need further tuning depending on
15646 core type, optimize_size setting, etc. */
15648 static int
15649 arm_cpymemqi_unaligned (rtx *operands)
15651 HOST_WIDE_INT length = INTVAL (operands[2]);
15653 if (optimize_size)
15655 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15656 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15657 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15658 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15659 or dst_aligned though: allow more interleaving in those cases since the
15660 resulting code can be smaller. */
15661 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15662 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15664 if (length > 12)
15665 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15666 interleave_factor, bytes_per_iter);
15667 else
15668 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15669 interleave_factor);
15671 else
15673 /* Note that the loop created by arm_block_move_unaligned_loop may be
15674 subject to loop unrolling, which makes tuning this condition a little
15675 redundant. */
15676 if (length > 32)
15677 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15678 else
15679 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15682 return 1;
15686 arm_gen_cpymemqi (rtx *operands)
15688 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15689 HOST_WIDE_INT srcoffset, dstoffset;
15690 rtx src, dst, srcbase, dstbase;
15691 rtx part_bytes_reg = NULL;
15692 rtx mem;
15694 if (!CONST_INT_P (operands[2])
15695 || !CONST_INT_P (operands[3])
15696 || INTVAL (operands[2]) > 64)
15697 return 0;
15699 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15700 return arm_cpymemqi_unaligned (operands);
15702 if (INTVAL (operands[3]) & 3)
15703 return 0;
15705 dstbase = operands[0];
15706 srcbase = operands[1];
15708 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15709 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15711 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15712 out_words_to_go = INTVAL (operands[2]) / 4;
15713 last_bytes = INTVAL (operands[2]) & 3;
15714 dstoffset = srcoffset = 0;
15716 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15717 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15719 while (in_words_to_go >= 2)
15721 if (in_words_to_go > 4)
15722 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15723 TRUE, srcbase, &srcoffset));
15724 else
15725 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15726 src, FALSE, srcbase,
15727 &srcoffset));
15729 if (out_words_to_go)
15731 if (out_words_to_go > 4)
15732 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15733 TRUE, dstbase, &dstoffset));
15734 else if (out_words_to_go != 1)
15735 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15736 out_words_to_go, dst,
15737 (last_bytes == 0
15738 ? FALSE : TRUE),
15739 dstbase, &dstoffset));
15740 else
15742 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15743 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15744 if (last_bytes != 0)
15746 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15747 dstoffset += 4;
15752 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15753 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15756 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15757 if (out_words_to_go)
15759 rtx sreg;
15761 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15762 sreg = copy_to_reg (mem);
15764 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15765 emit_move_insn (mem, sreg);
15766 in_words_to_go--;
15768 gcc_assert (!in_words_to_go); /* Sanity check */
15771 if (in_words_to_go)
15773 gcc_assert (in_words_to_go > 0);
15775 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15776 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15779 gcc_assert (!last_bytes || part_bytes_reg);
15781 if (BYTES_BIG_ENDIAN && last_bytes)
15783 rtx tmp = gen_reg_rtx (SImode);
15785 /* The bytes we want are in the top end of the word. */
15786 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15787 GEN_INT (8 * (4 - last_bytes))));
15788 part_bytes_reg = tmp;
15790 while (last_bytes)
15792 mem = adjust_automodify_address (dstbase, QImode,
15793 plus_constant (Pmode, dst,
15794 last_bytes - 1),
15795 dstoffset + last_bytes - 1);
15796 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15798 if (--last_bytes)
15800 tmp = gen_reg_rtx (SImode);
15801 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15802 part_bytes_reg = tmp;
15807 else
15809 if (last_bytes > 1)
15811 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15812 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15813 last_bytes -= 2;
15814 if (last_bytes)
15816 rtx tmp = gen_reg_rtx (SImode);
15817 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15818 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15819 part_bytes_reg = tmp;
15820 dstoffset += 2;
15824 if (last_bytes)
15826 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15827 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15831 return 1;
15834 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15835 by mode size. */
15836 inline static rtx
15837 next_consecutive_mem (rtx mem)
15839 machine_mode mode = GET_MODE (mem);
15840 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15841 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15843 return adjust_automodify_address (mem, mode, addr, offset);
15846 /* Copy using LDRD/STRD instructions whenever possible.
15847 Returns true upon success. */
15848 bool
15849 gen_cpymem_ldrd_strd (rtx *operands)
15851 unsigned HOST_WIDE_INT len;
15852 HOST_WIDE_INT align;
15853 rtx src, dst, base;
15854 rtx reg0;
15855 bool src_aligned, dst_aligned;
15856 bool src_volatile, dst_volatile;
15858 gcc_assert (CONST_INT_P (operands[2]));
15859 gcc_assert (CONST_INT_P (operands[3]));
15861 len = UINTVAL (operands[2]);
15862 if (len > 64)
15863 return false;
15865 /* Maximum alignment we can assume for both src and dst buffers. */
15866 align = INTVAL (operands[3]);
15868 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15869 return false;
15871 /* Place src and dst addresses in registers
15872 and update the corresponding mem rtx. */
15873 dst = operands[0];
15874 dst_volatile = MEM_VOLATILE_P (dst);
15875 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15876 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15877 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15879 src = operands[1];
15880 src_volatile = MEM_VOLATILE_P (src);
15881 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15882 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15883 src = adjust_automodify_address (src, VOIDmode, base, 0);
15885 if (!unaligned_access && !(src_aligned && dst_aligned))
15886 return false;
15888 if (src_volatile || dst_volatile)
15889 return false;
15891 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15892 if (!(dst_aligned || src_aligned))
15893 return arm_gen_cpymemqi (operands);
15895 /* If the either src or dst is unaligned we'll be accessing it as pairs
15896 of unaligned SImode accesses. Otherwise we can generate DImode
15897 ldrd/strd instructions. */
15898 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15899 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15901 while (len >= 8)
15903 len -= 8;
15904 reg0 = gen_reg_rtx (DImode);
15905 rtx first_reg = NULL_RTX;
15906 rtx second_reg = NULL_RTX;
15908 if (!src_aligned || !dst_aligned)
15910 if (BYTES_BIG_ENDIAN)
15912 second_reg = gen_lowpart (SImode, reg0);
15913 first_reg = gen_highpart_mode (SImode, DImode, reg0);
15915 else
15917 first_reg = gen_lowpart (SImode, reg0);
15918 second_reg = gen_highpart_mode (SImode, DImode, reg0);
15921 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15922 emit_move_insn (reg0, src);
15923 else if (src_aligned)
15924 emit_insn (gen_unaligned_loaddi (reg0, src));
15925 else
15927 emit_insn (gen_unaligned_loadsi (first_reg, src));
15928 src = next_consecutive_mem (src);
15929 emit_insn (gen_unaligned_loadsi (second_reg, src));
15932 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15933 emit_move_insn (dst, reg0);
15934 else if (dst_aligned)
15935 emit_insn (gen_unaligned_storedi (dst, reg0));
15936 else
15938 emit_insn (gen_unaligned_storesi (dst, first_reg));
15939 dst = next_consecutive_mem (dst);
15940 emit_insn (gen_unaligned_storesi (dst, second_reg));
15943 src = next_consecutive_mem (src);
15944 dst = next_consecutive_mem (dst);
15947 gcc_assert (len < 8);
15948 if (len >= 4)
15950 /* More than a word but less than a double-word to copy. Copy a word. */
15951 reg0 = gen_reg_rtx (SImode);
15952 src = adjust_address (src, SImode, 0);
15953 dst = adjust_address (dst, SImode, 0);
15954 if (src_aligned)
15955 emit_move_insn (reg0, src);
15956 else
15957 emit_insn (gen_unaligned_loadsi (reg0, src));
15959 if (dst_aligned)
15960 emit_move_insn (dst, reg0);
15961 else
15962 emit_insn (gen_unaligned_storesi (dst, reg0));
15964 src = next_consecutive_mem (src);
15965 dst = next_consecutive_mem (dst);
15966 len -= 4;
15969 if (len == 0)
15970 return true;
15972 /* Copy the remaining bytes. */
15973 if (len >= 2)
15975 dst = adjust_address (dst, HImode, 0);
15976 src = adjust_address (src, HImode, 0);
15977 reg0 = gen_reg_rtx (SImode);
15978 if (src_aligned)
15979 emit_insn (gen_zero_extendhisi2 (reg0, src));
15980 else
15981 emit_insn (gen_unaligned_loadhiu (reg0, src));
15983 if (dst_aligned)
15984 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15985 else
15986 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15988 src = next_consecutive_mem (src);
15989 dst = next_consecutive_mem (dst);
15990 if (len == 2)
15991 return true;
15994 dst = adjust_address (dst, QImode, 0);
15995 src = adjust_address (src, QImode, 0);
15996 reg0 = gen_reg_rtx (QImode);
15997 emit_move_insn (reg0, src);
15998 emit_move_insn (dst, reg0);
15999 return true;
16002 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
16003 into its component 32-bit subregs. OP2 may be an immediate
16004 constant and we want to simplify it in that case. */
16005 void
16006 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
16007 rtx *lo_op2, rtx *hi_op2)
16009 *lo_op1 = gen_lowpart (SImode, op1);
16010 *hi_op1 = gen_highpart (SImode, op1);
16011 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
16012 subreg_lowpart_offset (SImode, DImode));
16013 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
16014 subreg_highpart_offset (SImode, DImode));
16017 /* Select a dominance comparison mode if possible for a test of the general
16018 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
16019 COND_OR == DOM_CC_X_AND_Y => (X && Y)
16020 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
16021 COND_OR == DOM_CC_X_OR_Y => (X || Y)
16022 In all cases OP will be either EQ or NE, but we don't need to know which
16023 here. If we are unable to support a dominance comparison we return
16024 CC mode. This will then fail to match for the RTL expressions that
16025 generate this call. */
16026 machine_mode
16027 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
16029 enum rtx_code cond1, cond2;
16030 int swapped = 0;
16032 /* Currently we will probably get the wrong result if the individual
16033 comparisons are not simple. This also ensures that it is safe to
16034 reverse a comparison if necessary. */
16035 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
16036 != CCmode)
16037 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
16038 != CCmode))
16039 return CCmode;
16041 /* The if_then_else variant of this tests the second condition if the
16042 first passes, but is true if the first fails. Reverse the first
16043 condition to get a true "inclusive-or" expression. */
16044 if (cond_or == DOM_CC_NX_OR_Y)
16045 cond1 = reverse_condition (cond1);
16047 /* If the comparisons are not equal, and one doesn't dominate the other,
16048 then we can't do this. */
16049 if (cond1 != cond2
16050 && !comparison_dominates_p (cond1, cond2)
16051 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
16052 return CCmode;
16054 if (swapped)
16055 std::swap (cond1, cond2);
16057 switch (cond1)
16059 case EQ:
16060 if (cond_or == DOM_CC_X_AND_Y)
16061 return CC_DEQmode;
16063 switch (cond2)
16065 case EQ: return CC_DEQmode;
16066 case LE: return CC_DLEmode;
16067 case LEU: return CC_DLEUmode;
16068 case GE: return CC_DGEmode;
16069 case GEU: return CC_DGEUmode;
16070 default: gcc_unreachable ();
16073 case LT:
16074 if (cond_or == DOM_CC_X_AND_Y)
16075 return CC_DLTmode;
16077 switch (cond2)
16079 case LT:
16080 return CC_DLTmode;
16081 case LE:
16082 return CC_DLEmode;
16083 case NE:
16084 return CC_DNEmode;
16085 default:
16086 gcc_unreachable ();
16089 case GT:
16090 if (cond_or == DOM_CC_X_AND_Y)
16091 return CC_DGTmode;
16093 switch (cond2)
16095 case GT:
16096 return CC_DGTmode;
16097 case GE:
16098 return CC_DGEmode;
16099 case NE:
16100 return CC_DNEmode;
16101 default:
16102 gcc_unreachable ();
16105 case LTU:
16106 if (cond_or == DOM_CC_X_AND_Y)
16107 return CC_DLTUmode;
16109 switch (cond2)
16111 case LTU:
16112 return CC_DLTUmode;
16113 case LEU:
16114 return CC_DLEUmode;
16115 case NE:
16116 return CC_DNEmode;
16117 default:
16118 gcc_unreachable ();
16121 case GTU:
16122 if (cond_or == DOM_CC_X_AND_Y)
16123 return CC_DGTUmode;
16125 switch (cond2)
16127 case GTU:
16128 return CC_DGTUmode;
16129 case GEU:
16130 return CC_DGEUmode;
16131 case NE:
16132 return CC_DNEmode;
16133 default:
16134 gcc_unreachable ();
16137 /* The remaining cases only occur when both comparisons are the
16138 same. */
16139 case NE:
16140 gcc_assert (cond1 == cond2);
16141 return CC_DNEmode;
16143 case LE:
16144 gcc_assert (cond1 == cond2);
16145 return CC_DLEmode;
16147 case GE:
16148 gcc_assert (cond1 == cond2);
16149 return CC_DGEmode;
16151 case LEU:
16152 gcc_assert (cond1 == cond2);
16153 return CC_DLEUmode;
16155 case GEU:
16156 gcc_assert (cond1 == cond2);
16157 return CC_DGEUmode;
16159 default:
16160 gcc_unreachable ();
16164 machine_mode
16165 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16167 /* All floating point compares return CCFP if it is an equality
16168 comparison, and CCFPE otherwise. */
16169 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16171 switch (op)
16173 case EQ:
16174 case NE:
16175 case UNORDERED:
16176 case ORDERED:
16177 case UNLT:
16178 case UNLE:
16179 case UNGT:
16180 case UNGE:
16181 case UNEQ:
16182 case LTGT:
16183 return CCFPmode;
16185 case LT:
16186 case LE:
16187 case GT:
16188 case GE:
16189 return CCFPEmode;
16191 default:
16192 gcc_unreachable ();
16196 /* A compare with a shifted operand. Because of canonicalization, the
16197 comparison will have to be swapped when we emit the assembler. */
16198 if (GET_MODE (y) == SImode
16199 && (REG_P (y) || (SUBREG_P (y)))
16200 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16201 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16202 || GET_CODE (x) == ROTATERT))
16203 return CC_SWPmode;
16205 /* A widened compare of the sum of a value plus a carry against a
16206 constant. This is a representation of RSC. We want to swap the
16207 result of the comparison at output. Not valid if the Z bit is
16208 needed. */
16209 if (GET_MODE (x) == DImode
16210 && GET_CODE (x) == PLUS
16211 && arm_borrow_operation (XEXP (x, 1), DImode)
16212 && CONST_INT_P (y)
16213 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16214 && (op == LE || op == GT))
16215 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16216 && (op == LEU || op == GTU))))
16217 return CC_SWPmode;
16219 /* If X is a constant we want to use CC_RSBmode. This is
16220 non-canonical, but arm_gen_compare_reg uses this to generate the
16221 correct canonical form. */
16222 if (GET_MODE (y) == SImode
16223 && (REG_P (y) || SUBREG_P (y))
16224 && CONST_INT_P (x))
16225 return CC_RSBmode;
16227 /* This operation is performed swapped, but since we only rely on the Z
16228 flag we don't need an additional mode. */
16229 if (GET_MODE (y) == SImode
16230 && (REG_P (y) || (SUBREG_P (y)))
16231 && GET_CODE (x) == NEG
16232 && (op == EQ || op == NE))
16233 return CC_Zmode;
16235 /* This is a special case that is used by combine to allow a
16236 comparison of a shifted byte load to be split into a zero-extend
16237 followed by a comparison of the shifted integer (only valid for
16238 equalities and unsigned inequalities). */
16239 if (GET_MODE (x) == SImode
16240 && GET_CODE (x) == ASHIFT
16241 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16242 && GET_CODE (XEXP (x, 0)) == SUBREG
16243 && MEM_P (SUBREG_REG (XEXP (x, 0)))
16244 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16245 && (op == EQ || op == NE
16246 || op == GEU || op == GTU || op == LTU || op == LEU)
16247 && CONST_INT_P (y))
16248 return CC_Zmode;
16250 /* A construct for a conditional compare, if the false arm contains
16251 0, then both conditions must be true, otherwise either condition
16252 must be true. Not all conditions are possible, so CCmode is
16253 returned if it can't be done. */
16254 if (GET_CODE (x) == IF_THEN_ELSE
16255 && (XEXP (x, 2) == const0_rtx
16256 || XEXP (x, 2) == const1_rtx)
16257 && COMPARISON_P (XEXP (x, 0))
16258 && COMPARISON_P (XEXP (x, 1)))
16259 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16260 INTVAL (XEXP (x, 2)));
16262 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16263 if (GET_CODE (x) == AND
16264 && (op == EQ || op == NE)
16265 && COMPARISON_P (XEXP (x, 0))
16266 && COMPARISON_P (XEXP (x, 1)))
16267 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16268 DOM_CC_X_AND_Y);
16270 if (GET_CODE (x) == IOR
16271 && (op == EQ || op == NE)
16272 && COMPARISON_P (XEXP (x, 0))
16273 && COMPARISON_P (XEXP (x, 1)))
16274 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16275 DOM_CC_X_OR_Y);
16277 /* An operation (on Thumb) where we want to test for a single bit.
16278 This is done by shifting that bit up into the top bit of a
16279 scratch register; we can then branch on the sign bit. */
16280 if (TARGET_THUMB1
16281 && GET_MODE (x) == SImode
16282 && (op == EQ || op == NE)
16283 && GET_CODE (x) == ZERO_EXTRACT
16284 && XEXP (x, 1) == const1_rtx)
16285 return CC_Nmode;
16287 /* An operation that sets the condition codes as a side-effect, the
16288 V flag is not set correctly, so we can only use comparisons where
16289 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16290 instead.) */
16291 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16292 if (GET_MODE (x) == SImode
16293 && y == const0_rtx
16294 && (op == EQ || op == NE || op == LT || op == GE)
16295 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16296 || GET_CODE (x) == AND || GET_CODE (x) == IOR
16297 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16298 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16299 || GET_CODE (x) == LSHIFTRT
16300 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16301 || GET_CODE (x) == ROTATERT
16302 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16303 return CC_NZmode;
16305 /* A comparison of ~reg with a const is really a special
16306 canoncialization of compare (~const, reg), which is a reverse
16307 subtract operation. We may not get here if CONST is 0, but that
16308 doesn't matter because ~0 isn't a valid immediate for RSB. */
16309 if (GET_MODE (x) == SImode
16310 && GET_CODE (x) == NOT
16311 && CONST_INT_P (y))
16312 return CC_RSBmode;
16314 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16315 return CC_Zmode;
16317 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16318 && GET_CODE (x) == PLUS
16319 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16320 return CC_Cmode;
16322 if (GET_MODE (x) == DImode
16323 && GET_CODE (x) == PLUS
16324 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16325 && CONST_INT_P (y)
16326 && UINTVAL (y) == 0x800000000
16327 && (op == GEU || op == LTU))
16328 return CC_ADCmode;
16330 if (GET_MODE (x) == DImode
16331 && (op == GE || op == LT)
16332 && GET_CODE (x) == SIGN_EXTEND
16333 && ((GET_CODE (y) == PLUS
16334 && arm_borrow_operation (XEXP (y, 0), DImode))
16335 || arm_borrow_operation (y, DImode)))
16336 return CC_NVmode;
16338 if (GET_MODE (x) == DImode
16339 && (op == GEU || op == LTU)
16340 && GET_CODE (x) == ZERO_EXTEND
16341 && ((GET_CODE (y) == PLUS
16342 && arm_borrow_operation (XEXP (y, 0), DImode))
16343 || arm_borrow_operation (y, DImode)))
16344 return CC_Bmode;
16346 if (GET_MODE (x) == DImode
16347 && (op == EQ || op == NE)
16348 && (GET_CODE (x) == PLUS
16349 || GET_CODE (x) == MINUS)
16350 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16351 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16352 && GET_CODE (y) == SIGN_EXTEND
16353 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16354 return CC_Vmode;
16356 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16357 return GET_MODE (x);
16359 return CCmode;
16362 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16363 the sequence of instructions needed to generate a suitable condition
16364 code register. Return the CC register result. */
16365 static rtx
16366 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16368 machine_mode mode;
16369 rtx cc_reg;
16371 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16372 gcc_assert (TARGET_32BIT);
16373 gcc_assert (!CONST_INT_P (x));
16375 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16376 subreg_lowpart_offset (SImode, DImode));
16377 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16378 subreg_highpart_offset (SImode, DImode));
16379 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16380 subreg_lowpart_offset (SImode, DImode));
16381 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16382 subreg_highpart_offset (SImode, DImode));
16383 switch (code)
16385 case EQ:
16386 case NE:
16388 if (y_lo == const0_rtx || y_hi == const0_rtx)
16390 if (y_lo != const0_rtx)
16392 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16394 gcc_assert (y_hi == const0_rtx);
16395 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16396 if (!arm_add_operand (y_lo, SImode))
16397 y_lo = force_reg (SImode, y_lo);
16398 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16399 x_lo = scratch2;
16401 else if (y_hi != const0_rtx)
16403 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16405 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16406 if (!arm_add_operand (y_hi, SImode))
16407 y_hi = force_reg (SImode, y_hi);
16408 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16409 x_hi = scratch2;
16412 if (!scratch)
16414 gcc_assert (!reload_completed);
16415 scratch = gen_rtx_SCRATCH (SImode);
16418 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16419 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16421 rtx set
16422 = gen_rtx_SET (cc_reg,
16423 gen_rtx_COMPARE (CC_NZmode,
16424 gen_rtx_IOR (SImode, x_lo, x_hi),
16425 const0_rtx));
16426 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16427 clobber)));
16428 return cc_reg;
16431 if (!arm_add_operand (y_lo, SImode))
16432 y_lo = force_reg (SImode, y_lo);
16434 if (!arm_add_operand (y_hi, SImode))
16435 y_hi = force_reg (SImode, y_hi);
16437 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16438 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16439 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16440 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16441 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16443 emit_insn (gen_rtx_SET (cc_reg,
16444 gen_rtx_COMPARE (mode, conjunction,
16445 const0_rtx)));
16446 return cc_reg;
16449 case LT:
16450 case GE:
16452 if (y_lo == const0_rtx)
16454 /* If the low word of y is 0, then this is simply a normal
16455 compare of the upper words. */
16456 if (!arm_add_operand (y_hi, SImode))
16457 y_hi = force_reg (SImode, y_hi);
16459 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16462 if (!arm_add_operand (y_lo, SImode))
16463 y_lo = force_reg (SImode, y_lo);
16465 rtx cmp1
16466 = gen_rtx_LTU (DImode,
16467 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16468 const0_rtx);
16470 if (!scratch)
16471 scratch = gen_rtx_SCRATCH (SImode);
16473 if (!arm_not_operand (y_hi, SImode))
16474 y_hi = force_reg (SImode, y_hi);
16476 rtx_insn *insn;
16477 if (y_hi == const0_rtx)
16478 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16479 cmp1));
16480 else if (CONST_INT_P (y_hi))
16481 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16482 y_hi, cmp1));
16483 else
16484 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16485 cmp1));
16486 return SET_DEST (single_set (insn));
16489 case LE:
16490 case GT:
16492 /* During expansion, we only expect to get here if y is a
16493 constant that we want to handle, otherwise we should have
16494 swapped the operands already. */
16495 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16497 if (!const_ok_for_arm (INTVAL (y_lo)))
16498 y_lo = force_reg (SImode, y_lo);
16500 /* Perform a reverse subtract and compare. */
16501 rtx cmp1
16502 = gen_rtx_LTU (DImode,
16503 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16504 const0_rtx);
16505 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16506 x_hi, cmp1));
16507 return SET_DEST (single_set (insn));
16510 case LTU:
16511 case GEU:
16513 if (y_lo == const0_rtx)
16515 /* If the low word of y is 0, then this is simply a normal
16516 compare of the upper words. */
16517 if (!arm_add_operand (y_hi, SImode))
16518 y_hi = force_reg (SImode, y_hi);
16520 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16523 if (!arm_add_operand (y_lo, SImode))
16524 y_lo = force_reg (SImode, y_lo);
16526 rtx cmp1
16527 = gen_rtx_LTU (DImode,
16528 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16529 const0_rtx);
16531 if (!scratch)
16532 scratch = gen_rtx_SCRATCH (SImode);
16533 if (!arm_not_operand (y_hi, SImode))
16534 y_hi = force_reg (SImode, y_hi);
16536 rtx_insn *insn;
16537 if (y_hi == const0_rtx)
16538 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16539 cmp1));
16540 else if (CONST_INT_P (y_hi))
16542 /* Constant is viewed as unsigned when zero-extended. */
16543 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16544 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16545 y_hi, cmp1));
16547 else
16548 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16549 cmp1));
16550 return SET_DEST (single_set (insn));
16553 case LEU:
16554 case GTU:
16556 /* During expansion, we only expect to get here if y is a
16557 constant that we want to handle, otherwise we should have
16558 swapped the operands already. */
16559 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16561 if (!const_ok_for_arm (INTVAL (y_lo)))
16562 y_lo = force_reg (SImode, y_lo);
16564 /* Perform a reverse subtract and compare. */
16565 rtx cmp1
16566 = gen_rtx_LTU (DImode,
16567 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16568 const0_rtx);
16569 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16570 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16571 x_hi, cmp1));
16572 return SET_DEST (single_set (insn));
16575 default:
16576 gcc_unreachable ();
16580 /* X and Y are two things to compare using CODE. Emit the compare insn and
16581 return the rtx for register 0 in the proper mode. */
16583 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16585 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16586 return arm_gen_dicompare_reg (code, x, y, scratch);
16588 machine_mode mode = SELECT_CC_MODE (code, x, y);
16589 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16590 if (mode == CC_RSBmode)
16592 if (!scratch)
16593 scratch = gen_rtx_SCRATCH (SImode);
16594 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16595 GEN_INT (~UINTVAL (x)), y));
16597 else
16598 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16600 return cc_reg;
16603 /* Generate a sequence of insns that will generate the correct return
16604 address mask depending on the physical architecture that the program
16605 is running on. */
16607 arm_gen_return_addr_mask (void)
16609 rtx reg = gen_reg_rtx (Pmode);
16611 emit_insn (gen_return_addr_mask (reg));
16612 return reg;
16615 void
16616 arm_reload_in_hi (rtx *operands)
16618 rtx ref = operands[1];
16619 rtx base, scratch;
16620 HOST_WIDE_INT offset = 0;
16622 if (SUBREG_P (ref))
16624 offset = SUBREG_BYTE (ref);
16625 ref = SUBREG_REG (ref);
16628 if (REG_P (ref))
16630 /* We have a pseudo which has been spilt onto the stack; there
16631 are two cases here: the first where there is a simple
16632 stack-slot replacement and a second where the stack-slot is
16633 out of range, or is used as a subreg. */
16634 if (reg_equiv_mem (REGNO (ref)))
16636 ref = reg_equiv_mem (REGNO (ref));
16637 base = find_replacement (&XEXP (ref, 0));
16639 else
16640 /* The slot is out of range, or was dressed up in a SUBREG. */
16641 base = reg_equiv_address (REGNO (ref));
16643 /* PR 62554: If there is no equivalent memory location then just move
16644 the value as an SImode register move. This happens when the target
16645 architecture variant does not have an HImode register move. */
16646 if (base == NULL)
16648 gcc_assert (REG_P (operands[0]));
16649 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16650 gen_rtx_SUBREG (SImode, ref, 0)));
16651 return;
16654 else
16655 base = find_replacement (&XEXP (ref, 0));
16657 /* Handle the case where the address is too complex to be offset by 1. */
16658 if (GET_CODE (base) == MINUS
16659 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16661 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16663 emit_set_insn (base_plus, base);
16664 base = base_plus;
16666 else if (GET_CODE (base) == PLUS)
16668 /* The addend must be CONST_INT, or we would have dealt with it above. */
16669 HOST_WIDE_INT hi, lo;
16671 offset += INTVAL (XEXP (base, 1));
16672 base = XEXP (base, 0);
16674 /* Rework the address into a legal sequence of insns. */
16675 /* Valid range for lo is -4095 -> 4095 */
16676 lo = (offset >= 0
16677 ? (offset & 0xfff)
16678 : -((-offset) & 0xfff));
16680 /* Corner case, if lo is the max offset then we would be out of range
16681 once we have added the additional 1 below, so bump the msb into the
16682 pre-loading insn(s). */
16683 if (lo == 4095)
16684 lo &= 0x7ff;
16686 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16687 ^ (HOST_WIDE_INT) 0x80000000)
16688 - (HOST_WIDE_INT) 0x80000000);
16690 gcc_assert (hi + lo == offset);
16692 if (hi != 0)
16694 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16696 /* Get the base address; addsi3 knows how to handle constants
16697 that require more than one insn. */
16698 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16699 base = base_plus;
16700 offset = lo;
16704 /* Operands[2] may overlap operands[0] (though it won't overlap
16705 operands[1]), that's why we asked for a DImode reg -- so we can
16706 use the bit that does not overlap. */
16707 if (REGNO (operands[2]) == REGNO (operands[0]))
16708 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16709 else
16710 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16712 emit_insn (gen_zero_extendqisi2 (scratch,
16713 gen_rtx_MEM (QImode,
16714 plus_constant (Pmode, base,
16715 offset))));
16716 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16717 gen_rtx_MEM (QImode,
16718 plus_constant (Pmode, base,
16719 offset + 1))));
16720 if (!BYTES_BIG_ENDIAN)
16721 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16722 gen_rtx_IOR (SImode,
16723 gen_rtx_ASHIFT
16724 (SImode,
16725 gen_rtx_SUBREG (SImode, operands[0], 0),
16726 GEN_INT (8)),
16727 scratch));
16728 else
16729 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16730 gen_rtx_IOR (SImode,
16731 gen_rtx_ASHIFT (SImode, scratch,
16732 GEN_INT (8)),
16733 gen_rtx_SUBREG (SImode, operands[0], 0)));
16736 /* Handle storing a half-word to memory during reload by synthesizing as two
16737 byte stores. Take care not to clobber the input values until after we
16738 have moved them somewhere safe. This code assumes that if the DImode
16739 scratch in operands[2] overlaps either the input value or output address
16740 in some way, then that value must die in this insn (we absolutely need
16741 two scratch registers for some corner cases). */
16742 void
16743 arm_reload_out_hi (rtx *operands)
16745 rtx ref = operands[0];
16746 rtx outval = operands[1];
16747 rtx base, scratch;
16748 HOST_WIDE_INT offset = 0;
16750 if (SUBREG_P (ref))
16752 offset = SUBREG_BYTE (ref);
16753 ref = SUBREG_REG (ref);
16756 if (REG_P (ref))
16758 /* We have a pseudo which has been spilt onto the stack; there
16759 are two cases here: the first where there is a simple
16760 stack-slot replacement and a second where the stack-slot is
16761 out of range, or is used as a subreg. */
16762 if (reg_equiv_mem (REGNO (ref)))
16764 ref = reg_equiv_mem (REGNO (ref));
16765 base = find_replacement (&XEXP (ref, 0));
16767 else
16768 /* The slot is out of range, or was dressed up in a SUBREG. */
16769 base = reg_equiv_address (REGNO (ref));
16771 /* PR 62254: If there is no equivalent memory location then just move
16772 the value as an SImode register move. This happens when the target
16773 architecture variant does not have an HImode register move. */
16774 if (base == NULL)
16776 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16778 if (REG_P (outval))
16780 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16781 gen_rtx_SUBREG (SImode, outval, 0)));
16783 else /* SUBREG_P (outval) */
16785 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16786 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16787 SUBREG_REG (outval)));
16788 else
16789 /* FIXME: Handle other cases ? */
16790 gcc_unreachable ();
16792 return;
16795 else
16796 base = find_replacement (&XEXP (ref, 0));
16798 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16800 /* Handle the case where the address is too complex to be offset by 1. */
16801 if (GET_CODE (base) == MINUS
16802 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16804 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16806 /* Be careful not to destroy OUTVAL. */
16807 if (reg_overlap_mentioned_p (base_plus, outval))
16809 /* Updating base_plus might destroy outval, see if we can
16810 swap the scratch and base_plus. */
16811 if (!reg_overlap_mentioned_p (scratch, outval))
16812 std::swap (scratch, base_plus);
16813 else
16815 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16817 /* Be conservative and copy OUTVAL into the scratch now,
16818 this should only be necessary if outval is a subreg
16819 of something larger than a word. */
16820 /* XXX Might this clobber base? I can't see how it can,
16821 since scratch is known to overlap with OUTVAL, and
16822 must be wider than a word. */
16823 emit_insn (gen_movhi (scratch_hi, outval));
16824 outval = scratch_hi;
16828 emit_set_insn (base_plus, base);
16829 base = base_plus;
16831 else if (GET_CODE (base) == PLUS)
16833 /* The addend must be CONST_INT, or we would have dealt with it above. */
16834 HOST_WIDE_INT hi, lo;
16836 offset += INTVAL (XEXP (base, 1));
16837 base = XEXP (base, 0);
16839 /* Rework the address into a legal sequence of insns. */
16840 /* Valid range for lo is -4095 -> 4095 */
16841 lo = (offset >= 0
16842 ? (offset & 0xfff)
16843 : -((-offset) & 0xfff));
16845 /* Corner case, if lo is the max offset then we would be out of range
16846 once we have added the additional 1 below, so bump the msb into the
16847 pre-loading insn(s). */
16848 if (lo == 4095)
16849 lo &= 0x7ff;
16851 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16852 ^ (HOST_WIDE_INT) 0x80000000)
16853 - (HOST_WIDE_INT) 0x80000000);
16855 gcc_assert (hi + lo == offset);
16857 if (hi != 0)
16859 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16861 /* Be careful not to destroy OUTVAL. */
16862 if (reg_overlap_mentioned_p (base_plus, outval))
16864 /* Updating base_plus might destroy outval, see if we
16865 can swap the scratch and base_plus. */
16866 if (!reg_overlap_mentioned_p (scratch, outval))
16867 std::swap (scratch, base_plus);
16868 else
16870 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16872 /* Be conservative and copy outval into scratch now,
16873 this should only be necessary if outval is a
16874 subreg of something larger than a word. */
16875 /* XXX Might this clobber base? I can't see how it
16876 can, since scratch is known to overlap with
16877 outval. */
16878 emit_insn (gen_movhi (scratch_hi, outval));
16879 outval = scratch_hi;
16883 /* Get the base address; addsi3 knows how to handle constants
16884 that require more than one insn. */
16885 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16886 base = base_plus;
16887 offset = lo;
16891 if (BYTES_BIG_ENDIAN)
16893 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16894 plus_constant (Pmode, base,
16895 offset + 1)),
16896 gen_lowpart (QImode, outval)));
16897 emit_insn (gen_lshrsi3 (scratch,
16898 gen_rtx_SUBREG (SImode, outval, 0),
16899 GEN_INT (8)));
16900 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16901 offset)),
16902 gen_lowpart (QImode, scratch)));
16904 else
16906 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16907 offset)),
16908 gen_lowpart (QImode, outval)));
16909 emit_insn (gen_lshrsi3 (scratch,
16910 gen_rtx_SUBREG (SImode, outval, 0),
16911 GEN_INT (8)));
16912 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16913 plus_constant (Pmode, base,
16914 offset + 1)),
16915 gen_lowpart (QImode, scratch)));
16919 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16920 (padded to the size of a word) should be passed in a register. */
16922 static bool
16923 arm_must_pass_in_stack (const function_arg_info &arg)
16925 if (TARGET_AAPCS_BASED)
16926 return must_pass_in_stack_var_size (arg);
16927 else
16928 return must_pass_in_stack_var_size_or_pad (arg);
16932 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16933 byte of a stack argument has useful data. For legacy APCS ABIs we use
16934 the default. For AAPCS based ABIs small aggregate types are placed
16935 in the lowest memory address. */
16937 static pad_direction
16938 arm_function_arg_padding (machine_mode mode, const_tree type)
16940 if (!TARGET_AAPCS_BASED)
16941 return default_function_arg_padding (mode, type);
16943 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16944 return PAD_DOWNWARD;
16946 return PAD_UPWARD;
16950 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16951 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16952 register has useful data, and return the opposite if the most
16953 significant byte does. */
16955 bool
16956 arm_pad_reg_upward (machine_mode mode,
16957 tree type, int first ATTRIBUTE_UNUSED)
16959 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16961 /* For AAPCS, small aggregates, small fixed-point types,
16962 and small complex types are always padded upwards. */
16963 if (type)
16965 if ((AGGREGATE_TYPE_P (type)
16966 || TREE_CODE (type) == COMPLEX_TYPE
16967 || FIXED_POINT_TYPE_P (type))
16968 && int_size_in_bytes (type) <= 4)
16969 return true;
16971 else
16973 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16974 && GET_MODE_SIZE (mode) <= 4)
16975 return true;
16979 /* Otherwise, use default padding. */
16980 return !BYTES_BIG_ENDIAN;
16983 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16984 assuming that the address in the base register is word aligned. */
16985 bool
16986 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16988 HOST_WIDE_INT max_offset;
16990 /* Offset must be a multiple of 4 in Thumb mode. */
16991 if (TARGET_THUMB2 && ((offset & 3) != 0))
16992 return false;
16994 if (TARGET_THUMB2)
16995 max_offset = 1020;
16996 else if (TARGET_ARM)
16997 max_offset = 255;
16998 else
16999 return false;
17001 return ((offset <= max_offset) && (offset >= -max_offset));
17004 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
17005 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
17006 Assumes that the address in the base register RN is word aligned. Pattern
17007 guarantees that both memory accesses use the same base register,
17008 the offsets are constants within the range, and the gap between the offsets is 4.
17009 If preload complete then check that registers are legal. WBACK indicates whether
17010 address is updated. LOAD indicates whether memory access is load or store. */
17011 bool
17012 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
17013 bool wback, bool load)
17015 unsigned int t, t2, n;
17017 if (!reload_completed)
17018 return true;
17020 if (!offset_ok_for_ldrd_strd (offset))
17021 return false;
17023 t = REGNO (rt);
17024 t2 = REGNO (rt2);
17025 n = REGNO (rn);
17027 if ((TARGET_THUMB2)
17028 && ((wback && (n == t || n == t2))
17029 || (t == SP_REGNUM)
17030 || (t == PC_REGNUM)
17031 || (t2 == SP_REGNUM)
17032 || (t2 == PC_REGNUM)
17033 || (!load && (n == PC_REGNUM))
17034 || (load && (t == t2))
17035 /* Triggers Cortex-M3 LDRD errata. */
17036 || (!wback && load && fix_cm3_ldrd && (n == t))))
17037 return false;
17039 if ((TARGET_ARM)
17040 && ((wback && (n == t || n == t2))
17041 || (t2 == PC_REGNUM)
17042 || (t % 2 != 0) /* First destination register is not even. */
17043 || (t2 != t + 1)
17044 /* PC can be used as base register (for offset addressing only),
17045 but it is depricated. */
17046 || (n == PC_REGNUM)))
17047 return false;
17049 return true;
17052 /* Return true if a 64-bit access with alignment ALIGN and with a
17053 constant offset OFFSET from the base pointer is permitted on this
17054 architecture. */
17055 static bool
17056 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
17058 return (unaligned_access
17059 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
17060 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
17063 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
17064 operand MEM's address contains an immediate offset from the base
17065 register and has no side effects, in which case it sets BASE,
17066 OFFSET and ALIGN accordingly. */
17067 static bool
17068 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
17070 rtx addr;
17072 gcc_assert (base != NULL && offset != NULL);
17074 /* TODO: Handle more general memory operand patterns, such as
17075 PRE_DEC and PRE_INC. */
17077 if (side_effects_p (mem))
17078 return false;
17080 /* Can't deal with subregs. */
17081 if (SUBREG_P (mem))
17082 return false;
17084 gcc_assert (MEM_P (mem));
17086 *offset = const0_rtx;
17087 *align = MEM_ALIGN (mem);
17089 addr = XEXP (mem, 0);
17091 /* If addr isn't valid for DImode, then we can't handle it. */
17092 if (!arm_legitimate_address_p (DImode, addr,
17093 reload_in_progress || reload_completed))
17094 return false;
17096 if (REG_P (addr))
17098 *base = addr;
17099 return true;
17101 else if (GET_CODE (addr) == PLUS)
17103 *base = XEXP (addr, 0);
17104 *offset = XEXP (addr, 1);
17105 return (REG_P (*base) && CONST_INT_P (*offset));
17108 return false;
17111 /* Called from a peephole2 to replace two word-size accesses with a
17112 single LDRD/STRD instruction. Returns true iff we can generate a
17113 new instruction sequence. That is, both accesses use the same base
17114 register and the gap between constant offsets is 4. This function
17115 may reorder its operands to match ldrd/strd RTL templates.
17116 OPERANDS are the operands found by the peephole matcher;
17117 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17118 corresponding memory operands. LOAD indicaates whether the access
17119 is load or store. CONST_STORE indicates a store of constant
17120 integer values held in OPERANDS[4,5] and assumes that the pattern
17121 is of length 4 insn, for the purpose of checking dead registers.
17122 COMMUTE indicates that register operands may be reordered. */
17123 bool
17124 gen_operands_ldrd_strd (rtx *operands, bool load,
17125 bool const_store, bool commute)
17127 int nops = 2;
17128 HOST_WIDE_INT offsets[2], offset, align[2];
17129 rtx base = NULL_RTX;
17130 rtx cur_base, cur_offset, tmp;
17131 int i, gap;
17132 HARD_REG_SET regset;
17134 gcc_assert (!const_store || !load);
17135 /* Check that the memory references are immediate offsets from the
17136 same base register. Extract the base register, the destination
17137 registers, and the corresponding memory offsets. */
17138 for (i = 0; i < nops; i++)
17140 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17141 &align[i]))
17142 return false;
17144 if (i == 0)
17145 base = cur_base;
17146 else if (REGNO (base) != REGNO (cur_base))
17147 return false;
17149 offsets[i] = INTVAL (cur_offset);
17150 if (GET_CODE (operands[i]) == SUBREG)
17152 tmp = SUBREG_REG (operands[i]);
17153 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17154 operands[i] = tmp;
17158 /* Make sure there is no dependency between the individual loads. */
17159 if (load && REGNO (operands[0]) == REGNO (base))
17160 return false; /* RAW */
17162 if (load && REGNO (operands[0]) == REGNO (operands[1]))
17163 return false; /* WAW */
17165 /* If the same input register is used in both stores
17166 when storing different constants, try to find a free register.
17167 For example, the code
17168 mov r0, 0
17169 str r0, [r2]
17170 mov r0, 1
17171 str r0, [r2, #4]
17172 can be transformed into
17173 mov r1, 0
17174 mov r0, 1
17175 strd r1, r0, [r2]
17176 in Thumb mode assuming that r1 is free.
17177 For ARM mode do the same but only if the starting register
17178 can be made to be even. */
17179 if (const_store
17180 && REGNO (operands[0]) == REGNO (operands[1])
17181 && INTVAL (operands[4]) != INTVAL (operands[5]))
17183 if (TARGET_THUMB2)
17185 CLEAR_HARD_REG_SET (regset);
17186 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17187 if (tmp == NULL_RTX)
17188 return false;
17190 /* Use the new register in the first load to ensure that
17191 if the original input register is not dead after peephole,
17192 then it will have the correct constant value. */
17193 operands[0] = tmp;
17195 else if (TARGET_ARM)
17197 int regno = REGNO (operands[0]);
17198 if (!peep2_reg_dead_p (4, operands[0]))
17200 /* When the input register is even and is not dead after the
17201 pattern, it has to hold the second constant but we cannot
17202 form a legal STRD in ARM mode with this register as the second
17203 register. */
17204 if (regno % 2 == 0)
17205 return false;
17207 /* Is regno-1 free? */
17208 SET_HARD_REG_SET (regset);
17209 CLEAR_HARD_REG_BIT(regset, regno - 1);
17210 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17211 if (tmp == NULL_RTX)
17212 return false;
17214 operands[0] = tmp;
17216 else
17218 /* Find a DImode register. */
17219 CLEAR_HARD_REG_SET (regset);
17220 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17221 if (tmp != NULL_RTX)
17223 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17224 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17226 else
17228 /* Can we use the input register to form a DI register? */
17229 SET_HARD_REG_SET (regset);
17230 CLEAR_HARD_REG_BIT(regset,
17231 regno % 2 == 0 ? regno + 1 : regno - 1);
17232 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17233 if (tmp == NULL_RTX)
17234 return false;
17235 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17239 gcc_assert (operands[0] != NULL_RTX);
17240 gcc_assert (operands[1] != NULL_RTX);
17241 gcc_assert (REGNO (operands[0]) % 2 == 0);
17242 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17246 /* Make sure the instructions are ordered with lower memory access first. */
17247 if (offsets[0] > offsets[1])
17249 gap = offsets[0] - offsets[1];
17250 offset = offsets[1];
17252 /* Swap the instructions such that lower memory is accessed first. */
17253 std::swap (operands[0], operands[1]);
17254 std::swap (operands[2], operands[3]);
17255 std::swap (align[0], align[1]);
17256 if (const_store)
17257 std::swap (operands[4], operands[5]);
17259 else
17261 gap = offsets[1] - offsets[0];
17262 offset = offsets[0];
17265 /* Make sure accesses are to consecutive memory locations. */
17266 if (gap != GET_MODE_SIZE (SImode))
17267 return false;
17269 if (!align_ok_ldrd_strd (align[0], offset))
17270 return false;
17272 /* Make sure we generate legal instructions. */
17273 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17274 false, load))
17275 return true;
17277 /* In Thumb state, where registers are almost unconstrained, there
17278 is little hope to fix it. */
17279 if (TARGET_THUMB2)
17280 return false;
17282 if (load && commute)
17284 /* Try reordering registers. */
17285 std::swap (operands[0], operands[1]);
17286 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17287 false, load))
17288 return true;
17291 if (const_store)
17293 /* If input registers are dead after this pattern, they can be
17294 reordered or replaced by other registers that are free in the
17295 current pattern. */
17296 if (!peep2_reg_dead_p (4, operands[0])
17297 || !peep2_reg_dead_p (4, operands[1]))
17298 return false;
17300 /* Try to reorder the input registers. */
17301 /* For example, the code
17302 mov r0, 0
17303 mov r1, 1
17304 str r1, [r2]
17305 str r0, [r2, #4]
17306 can be transformed into
17307 mov r1, 0
17308 mov r0, 1
17309 strd r0, [r2]
17311 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17312 false, false))
17314 std::swap (operands[0], operands[1]);
17315 return true;
17318 /* Try to find a free DI register. */
17319 CLEAR_HARD_REG_SET (regset);
17320 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17321 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17322 while (true)
17324 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17325 if (tmp == NULL_RTX)
17326 return false;
17328 /* DREG must be an even-numbered register in DImode.
17329 Split it into SI registers. */
17330 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17331 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17332 gcc_assert (operands[0] != NULL_RTX);
17333 gcc_assert (operands[1] != NULL_RTX);
17334 gcc_assert (REGNO (operands[0]) % 2 == 0);
17335 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17337 return (operands_ok_ldrd_strd (operands[0], operands[1],
17338 base, offset,
17339 false, load));
17343 return false;
17347 /* Return true if parallel execution of the two word-size accesses provided
17348 could be satisfied with a single LDRD/STRD instruction. Two word-size
17349 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17350 register operands and OPERANDS[2,3] are the corresponding memory operands.
17352 bool
17353 valid_operands_ldrd_strd (rtx *operands, bool load)
17355 int nops = 2;
17356 HOST_WIDE_INT offsets[2], offset, align[2];
17357 rtx base = NULL_RTX;
17358 rtx cur_base, cur_offset;
17359 int i, gap;
17361 /* Check that the memory references are immediate offsets from the
17362 same base register. Extract the base register, the destination
17363 registers, and the corresponding memory offsets. */
17364 for (i = 0; i < nops; i++)
17366 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17367 &align[i]))
17368 return false;
17370 if (i == 0)
17371 base = cur_base;
17372 else if (REGNO (base) != REGNO (cur_base))
17373 return false;
17375 offsets[i] = INTVAL (cur_offset);
17376 if (GET_CODE (operands[i]) == SUBREG)
17377 return false;
17380 if (offsets[0] > offsets[1])
17381 return false;
17383 gap = offsets[1] - offsets[0];
17384 offset = offsets[0];
17386 /* Make sure accesses are to consecutive memory locations. */
17387 if (gap != GET_MODE_SIZE (SImode))
17388 return false;
17390 if (!align_ok_ldrd_strd (align[0], offset))
17391 return false;
17393 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17394 false, load);
17398 /* Print a symbolic form of X to the debug file, F. */
17399 static void
17400 arm_print_value (FILE *f, rtx x)
17402 switch (GET_CODE (x))
17404 case CONST_INT:
17405 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17406 return;
17408 case CONST_DOUBLE:
17410 char fpstr[20];
17411 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17412 sizeof (fpstr), 0, 1);
17413 fputs (fpstr, f);
17415 return;
17417 case CONST_VECTOR:
17419 int i;
17421 fprintf (f, "<");
17422 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17424 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17425 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17426 fputc (',', f);
17428 fprintf (f, ">");
17430 return;
17432 case CONST_STRING:
17433 fprintf (f, "\"%s\"", XSTR (x, 0));
17434 return;
17436 case SYMBOL_REF:
17437 fprintf (f, "`%s'", XSTR (x, 0));
17438 return;
17440 case LABEL_REF:
17441 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17442 return;
17444 case CONST:
17445 arm_print_value (f, XEXP (x, 0));
17446 return;
17448 case PLUS:
17449 arm_print_value (f, XEXP (x, 0));
17450 fprintf (f, "+");
17451 arm_print_value (f, XEXP (x, 1));
17452 return;
17454 case PC:
17455 fprintf (f, "pc");
17456 return;
17458 default:
17459 fprintf (f, "????");
17460 return;
17464 /* Routines for manipulation of the constant pool. */
17466 /* Arm instructions cannot load a large constant directly into a
17467 register; they have to come from a pc relative load. The constant
17468 must therefore be placed in the addressable range of the pc
17469 relative load. Depending on the precise pc relative load
17470 instruction the range is somewhere between 256 bytes and 4k. This
17471 means that we often have to dump a constant inside a function, and
17472 generate code to branch around it.
17474 It is important to minimize this, since the branches will slow
17475 things down and make the code larger.
17477 Normally we can hide the table after an existing unconditional
17478 branch so that there is no interruption of the flow, but in the
17479 worst case the code looks like this:
17481 ldr rn, L1
17483 b L2
17484 align
17485 L1: .long value
17489 ldr rn, L3
17491 b L4
17492 align
17493 L3: .long value
17497 We fix this by performing a scan after scheduling, which notices
17498 which instructions need to have their operands fetched from the
17499 constant table and builds the table.
17501 The algorithm starts by building a table of all the constants that
17502 need fixing up and all the natural barriers in the function (places
17503 where a constant table can be dropped without breaking the flow).
17504 For each fixup we note how far the pc-relative replacement will be
17505 able to reach and the offset of the instruction into the function.
17507 Having built the table we then group the fixes together to form
17508 tables that are as large as possible (subject to addressing
17509 constraints) and emit each table of constants after the last
17510 barrier that is within range of all the instructions in the group.
17511 If a group does not contain a barrier, then we forcibly create one
17512 by inserting a jump instruction into the flow. Once the table has
17513 been inserted, the insns are then modified to reference the
17514 relevant entry in the pool.
17516 Possible enhancements to the algorithm (not implemented) are:
17518 1) For some processors and object formats, there may be benefit in
17519 aligning the pools to the start of cache lines; this alignment
17520 would need to be taken into account when calculating addressability
17521 of a pool. */
17523 /* These typedefs are located at the start of this file, so that
17524 they can be used in the prototypes there. This comment is to
17525 remind readers of that fact so that the following structures
17526 can be understood more easily.
17528 typedef struct minipool_node Mnode;
17529 typedef struct minipool_fixup Mfix; */
17531 struct minipool_node
17533 /* Doubly linked chain of entries. */
17534 Mnode * next;
17535 Mnode * prev;
17536 /* The maximum offset into the code that this entry can be placed. While
17537 pushing fixes for forward references, all entries are sorted in order
17538 of increasing max_address. */
17539 HOST_WIDE_INT max_address;
17540 /* Similarly for an entry inserted for a backwards ref. */
17541 HOST_WIDE_INT min_address;
17542 /* The number of fixes referencing this entry. This can become zero
17543 if we "unpush" an entry. In this case we ignore the entry when we
17544 come to emit the code. */
17545 int refcount;
17546 /* The offset from the start of the minipool. */
17547 HOST_WIDE_INT offset;
17548 /* The value in table. */
17549 rtx value;
17550 /* The mode of value. */
17551 machine_mode mode;
17552 /* The size of the value. With iWMMXt enabled
17553 sizes > 4 also imply an alignment of 8-bytes. */
17554 int fix_size;
17557 struct minipool_fixup
17559 Mfix * next;
17560 rtx_insn * insn;
17561 HOST_WIDE_INT address;
17562 rtx * loc;
17563 machine_mode mode;
17564 int fix_size;
17565 rtx value;
17566 Mnode * minipool;
17567 HOST_WIDE_INT forwards;
17568 HOST_WIDE_INT backwards;
17571 /* Fixes less than a word need padding out to a word boundary. */
17572 #define MINIPOOL_FIX_SIZE(mode) \
17573 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17575 static Mnode * minipool_vector_head;
17576 static Mnode * minipool_vector_tail;
17577 static rtx_code_label *minipool_vector_label;
17578 static int minipool_pad;
17580 /* The linked list of all minipool fixes required for this function. */
17581 Mfix * minipool_fix_head;
17582 Mfix * minipool_fix_tail;
17583 /* The fix entry for the current minipool, once it has been placed. */
17584 Mfix * minipool_barrier;
17586 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17587 #define JUMP_TABLES_IN_TEXT_SECTION 0
17588 #endif
17590 static HOST_WIDE_INT
17591 get_jump_table_size (rtx_jump_table_data *insn)
17593 /* ADDR_VECs only take room if read-only data does into the text
17594 section. */
17595 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17597 rtx body = PATTERN (insn);
17598 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17599 HOST_WIDE_INT size;
17600 HOST_WIDE_INT modesize;
17602 modesize = GET_MODE_SIZE (GET_MODE (body));
17603 size = modesize * XVECLEN (body, elt);
17604 switch (modesize)
17606 case 1:
17607 /* Round up size of TBB table to a halfword boundary. */
17608 size = (size + 1) & ~HOST_WIDE_INT_1;
17609 break;
17610 case 2:
17611 /* No padding necessary for TBH. */
17612 break;
17613 case 4:
17614 /* Add two bytes for alignment on Thumb. */
17615 if (TARGET_THUMB)
17616 size += 2;
17617 break;
17618 default:
17619 gcc_unreachable ();
17621 return size;
17624 return 0;
17627 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17628 function descriptor) into a register and the GOT address into the
17629 FDPIC register, returning an rtx for the register holding the
17630 function address. */
17633 arm_load_function_descriptor (rtx funcdesc)
17635 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17636 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17637 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17638 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17640 emit_move_insn (fnaddr_reg, fnaddr);
17642 /* The ABI requires the entry point address to be loaded first, but
17643 since we cannot support lazy binding for lack of atomic load of
17644 two 32-bits values, we do not need to bother to prevent the
17645 previous load from being moved after that of the GOT address. */
17646 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17648 return fnaddr_reg;
17651 /* Return the maximum amount of padding that will be inserted before
17652 label LABEL. */
17653 static HOST_WIDE_INT
17654 get_label_padding (rtx label)
17656 HOST_WIDE_INT align, min_insn_size;
17658 align = 1 << label_to_alignment (label).levels[0].log;
17659 min_insn_size = TARGET_THUMB ? 2 : 4;
17660 return align > min_insn_size ? align - min_insn_size : 0;
17663 /* Move a minipool fix MP from its current location to before MAX_MP.
17664 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17665 constraints may need updating. */
17666 static Mnode *
17667 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17668 HOST_WIDE_INT max_address)
17670 /* The code below assumes these are different. */
17671 gcc_assert (mp != max_mp);
17673 if (max_mp == NULL)
17675 if (max_address < mp->max_address)
17676 mp->max_address = max_address;
17678 else
17680 if (max_address > max_mp->max_address - mp->fix_size)
17681 mp->max_address = max_mp->max_address - mp->fix_size;
17682 else
17683 mp->max_address = max_address;
17685 /* Unlink MP from its current position. Since max_mp is non-null,
17686 mp->prev must be non-null. */
17687 mp->prev->next = mp->next;
17688 if (mp->next != NULL)
17689 mp->next->prev = mp->prev;
17690 else
17691 minipool_vector_tail = mp->prev;
17693 /* Re-insert it before MAX_MP. */
17694 mp->next = max_mp;
17695 mp->prev = max_mp->prev;
17696 max_mp->prev = mp;
17698 if (mp->prev != NULL)
17699 mp->prev->next = mp;
17700 else
17701 minipool_vector_head = mp;
17704 /* Save the new entry. */
17705 max_mp = mp;
17707 /* Scan over the preceding entries and adjust their addresses as
17708 required. */
17709 while (mp->prev != NULL
17710 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17712 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17713 mp = mp->prev;
17716 return max_mp;
17719 /* Add a constant to the minipool for a forward reference. Returns the
17720 node added or NULL if the constant will not fit in this pool. */
17721 static Mnode *
17722 add_minipool_forward_ref (Mfix *fix)
17724 /* If set, max_mp is the first pool_entry that has a lower
17725 constraint than the one we are trying to add. */
17726 Mnode * max_mp = NULL;
17727 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17728 Mnode * mp;
17730 /* If the minipool starts before the end of FIX->INSN then this FIX
17731 cannot be placed into the current pool. Furthermore, adding the
17732 new constant pool entry may cause the pool to start FIX_SIZE bytes
17733 earlier. */
17734 if (minipool_vector_head &&
17735 (fix->address + get_attr_length (fix->insn)
17736 >= minipool_vector_head->max_address - fix->fix_size))
17737 return NULL;
17739 /* Scan the pool to see if a constant with the same value has
17740 already been added. While we are doing this, also note the
17741 location where we must insert the constant if it doesn't already
17742 exist. */
17743 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17745 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17746 && fix->mode == mp->mode
17747 && (!LABEL_P (fix->value)
17748 || (CODE_LABEL_NUMBER (fix->value)
17749 == CODE_LABEL_NUMBER (mp->value)))
17750 && rtx_equal_p (fix->value, mp->value))
17752 /* More than one fix references this entry. */
17753 mp->refcount++;
17754 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17757 /* Note the insertion point if necessary. */
17758 if (max_mp == NULL
17759 && mp->max_address > max_address)
17760 max_mp = mp;
17762 /* If we are inserting an 8-bytes aligned quantity and
17763 we have not already found an insertion point, then
17764 make sure that all such 8-byte aligned quantities are
17765 placed at the start of the pool. */
17766 if (ARM_DOUBLEWORD_ALIGN
17767 && max_mp == NULL
17768 && fix->fix_size >= 8
17769 && mp->fix_size < 8)
17771 max_mp = mp;
17772 max_address = mp->max_address;
17776 /* The value is not currently in the minipool, so we need to create
17777 a new entry for it. If MAX_MP is NULL, the entry will be put on
17778 the end of the list since the placement is less constrained than
17779 any existing entry. Otherwise, we insert the new fix before
17780 MAX_MP and, if necessary, adjust the constraints on the other
17781 entries. */
17782 mp = XNEW (Mnode);
17783 mp->fix_size = fix->fix_size;
17784 mp->mode = fix->mode;
17785 mp->value = fix->value;
17786 mp->refcount = 1;
17787 /* Not yet required for a backwards ref. */
17788 mp->min_address = -65536;
17790 if (max_mp == NULL)
17792 mp->max_address = max_address;
17793 mp->next = NULL;
17794 mp->prev = minipool_vector_tail;
17796 if (mp->prev == NULL)
17798 minipool_vector_head = mp;
17799 minipool_vector_label = gen_label_rtx ();
17801 else
17802 mp->prev->next = mp;
17804 minipool_vector_tail = mp;
17806 else
17808 if (max_address > max_mp->max_address - mp->fix_size)
17809 mp->max_address = max_mp->max_address - mp->fix_size;
17810 else
17811 mp->max_address = max_address;
17813 mp->next = max_mp;
17814 mp->prev = max_mp->prev;
17815 max_mp->prev = mp;
17816 if (mp->prev != NULL)
17817 mp->prev->next = mp;
17818 else
17819 minipool_vector_head = mp;
17822 /* Save the new entry. */
17823 max_mp = mp;
17825 /* Scan over the preceding entries and adjust their addresses as
17826 required. */
17827 while (mp->prev != NULL
17828 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17830 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17831 mp = mp->prev;
17834 return max_mp;
17837 static Mnode *
17838 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17839 HOST_WIDE_INT min_address)
17841 HOST_WIDE_INT offset;
17843 /* The code below assumes these are different. */
17844 gcc_assert (mp != min_mp);
17846 if (min_mp == NULL)
17848 if (min_address > mp->min_address)
17849 mp->min_address = min_address;
17851 else
17853 /* We will adjust this below if it is too loose. */
17854 mp->min_address = min_address;
17856 /* Unlink MP from its current position. Since min_mp is non-null,
17857 mp->next must be non-null. */
17858 mp->next->prev = mp->prev;
17859 if (mp->prev != NULL)
17860 mp->prev->next = mp->next;
17861 else
17862 minipool_vector_head = mp->next;
17864 /* Reinsert it after MIN_MP. */
17865 mp->prev = min_mp;
17866 mp->next = min_mp->next;
17867 min_mp->next = mp;
17868 if (mp->next != NULL)
17869 mp->next->prev = mp;
17870 else
17871 minipool_vector_tail = mp;
17874 min_mp = mp;
17876 offset = 0;
17877 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17879 mp->offset = offset;
17880 if (mp->refcount > 0)
17881 offset += mp->fix_size;
17883 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17884 mp->next->min_address = mp->min_address + mp->fix_size;
17887 return min_mp;
17890 /* Add a constant to the minipool for a backward reference. Returns the
17891 node added or NULL if the constant will not fit in this pool.
17893 Note that the code for insertion for a backwards reference can be
17894 somewhat confusing because the calculated offsets for each fix do
17895 not take into account the size of the pool (which is still under
17896 construction. */
17897 static Mnode *
17898 add_minipool_backward_ref (Mfix *fix)
17900 /* If set, min_mp is the last pool_entry that has a lower constraint
17901 than the one we are trying to add. */
17902 Mnode *min_mp = NULL;
17903 /* This can be negative, since it is only a constraint. */
17904 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17905 Mnode *mp;
17907 /* If we can't reach the current pool from this insn, or if we can't
17908 insert this entry at the end of the pool without pushing other
17909 fixes out of range, then we don't try. This ensures that we
17910 can't fail later on. */
17911 if (min_address >= minipool_barrier->address
17912 || (minipool_vector_tail->min_address + fix->fix_size
17913 >= minipool_barrier->address))
17914 return NULL;
17916 /* Scan the pool to see if a constant with the same value has
17917 already been added. While we are doing this, also note the
17918 location where we must insert the constant if it doesn't already
17919 exist. */
17920 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17922 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17923 && fix->mode == mp->mode
17924 && (!LABEL_P (fix->value)
17925 || (CODE_LABEL_NUMBER (fix->value)
17926 == CODE_LABEL_NUMBER (mp->value)))
17927 && rtx_equal_p (fix->value, mp->value)
17928 /* Check that there is enough slack to move this entry to the
17929 end of the table (this is conservative). */
17930 && (mp->max_address
17931 > (minipool_barrier->address
17932 + minipool_vector_tail->offset
17933 + minipool_vector_tail->fix_size)))
17935 mp->refcount++;
17936 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17939 if (min_mp != NULL)
17940 mp->min_address += fix->fix_size;
17941 else
17943 /* Note the insertion point if necessary. */
17944 if (mp->min_address < min_address)
17946 /* For now, we do not allow the insertion of 8-byte alignment
17947 requiring nodes anywhere but at the start of the pool. */
17948 if (ARM_DOUBLEWORD_ALIGN
17949 && fix->fix_size >= 8 && mp->fix_size < 8)
17950 return NULL;
17951 else
17952 min_mp = mp;
17954 else if (mp->max_address
17955 < minipool_barrier->address + mp->offset + fix->fix_size)
17957 /* Inserting before this entry would push the fix beyond
17958 its maximum address (which can happen if we have
17959 re-located a forwards fix); force the new fix to come
17960 after it. */
17961 if (ARM_DOUBLEWORD_ALIGN
17962 && fix->fix_size >= 8 && mp->fix_size < 8)
17963 return NULL;
17964 else
17966 min_mp = mp;
17967 min_address = mp->min_address + fix->fix_size;
17970 /* Do not insert a non-8-byte aligned quantity before 8-byte
17971 aligned quantities. */
17972 else if (ARM_DOUBLEWORD_ALIGN
17973 && fix->fix_size < 8
17974 && mp->fix_size >= 8)
17976 min_mp = mp;
17977 min_address = mp->min_address + fix->fix_size;
17982 /* We need to create a new entry. */
17983 mp = XNEW (Mnode);
17984 mp->fix_size = fix->fix_size;
17985 mp->mode = fix->mode;
17986 mp->value = fix->value;
17987 mp->refcount = 1;
17988 mp->max_address = minipool_barrier->address + 65536;
17990 mp->min_address = min_address;
17992 if (min_mp == NULL)
17994 mp->prev = NULL;
17995 mp->next = minipool_vector_head;
17997 if (mp->next == NULL)
17999 minipool_vector_tail = mp;
18000 minipool_vector_label = gen_label_rtx ();
18002 else
18003 mp->next->prev = mp;
18005 minipool_vector_head = mp;
18007 else
18009 mp->next = min_mp->next;
18010 mp->prev = min_mp;
18011 min_mp->next = mp;
18013 if (mp->next != NULL)
18014 mp->next->prev = mp;
18015 else
18016 minipool_vector_tail = mp;
18019 /* Save the new entry. */
18020 min_mp = mp;
18022 if (mp->prev)
18023 mp = mp->prev;
18024 else
18025 mp->offset = 0;
18027 /* Scan over the following entries and adjust their offsets. */
18028 while (mp->next != NULL)
18030 if (mp->next->min_address < mp->min_address + mp->fix_size)
18031 mp->next->min_address = mp->min_address + mp->fix_size;
18033 if (mp->refcount)
18034 mp->next->offset = mp->offset + mp->fix_size;
18035 else
18036 mp->next->offset = mp->offset;
18038 mp = mp->next;
18041 return min_mp;
18044 static void
18045 assign_minipool_offsets (Mfix *barrier)
18047 HOST_WIDE_INT offset = 0;
18048 Mnode *mp;
18050 minipool_barrier = barrier;
18052 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18054 mp->offset = offset;
18056 if (mp->refcount > 0)
18057 offset += mp->fix_size;
18061 /* Output the literal table */
18062 static void
18063 dump_minipool (rtx_insn *scan)
18065 Mnode * mp;
18066 Mnode * nmp;
18067 int align64 = 0;
18069 if (ARM_DOUBLEWORD_ALIGN)
18070 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18071 if (mp->refcount > 0 && mp->fix_size >= 8)
18073 align64 = 1;
18074 break;
18077 if (dump_file)
18078 fprintf (dump_file,
18079 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18080 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
18082 scan = emit_label_after (gen_label_rtx (), scan);
18083 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
18084 scan = emit_label_after (minipool_vector_label, scan);
18086 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
18088 if (mp->refcount > 0)
18090 if (dump_file)
18092 fprintf (dump_file,
18093 ";; Offset %u, min %ld, max %ld ",
18094 (unsigned) mp->offset, (unsigned long) mp->min_address,
18095 (unsigned long) mp->max_address);
18096 arm_print_value (dump_file, mp->value);
18097 fputc ('\n', dump_file);
18100 rtx val = copy_rtx (mp->value);
18102 switch (GET_MODE_SIZE (mp->mode))
18104 #ifdef HAVE_consttable_1
18105 case 1:
18106 scan = emit_insn_after (gen_consttable_1 (val), scan);
18107 break;
18109 #endif
18110 #ifdef HAVE_consttable_2
18111 case 2:
18112 scan = emit_insn_after (gen_consttable_2 (val), scan);
18113 break;
18115 #endif
18116 #ifdef HAVE_consttable_4
18117 case 4:
18118 scan = emit_insn_after (gen_consttable_4 (val), scan);
18119 break;
18121 #endif
18122 #ifdef HAVE_consttable_8
18123 case 8:
18124 scan = emit_insn_after (gen_consttable_8 (val), scan);
18125 break;
18127 #endif
18128 #ifdef HAVE_consttable_16
18129 case 16:
18130 scan = emit_insn_after (gen_consttable_16 (val), scan);
18131 break;
18133 #endif
18134 default:
18135 gcc_unreachable ();
18139 nmp = mp->next;
18140 free (mp);
18143 minipool_vector_head = minipool_vector_tail = NULL;
18144 scan = emit_insn_after (gen_consttable_end (), scan);
18145 scan = emit_barrier_after (scan);
18148 /* Return the cost of forcibly inserting a barrier after INSN. */
18149 static int
18150 arm_barrier_cost (rtx_insn *insn)
18152 /* Basing the location of the pool on the loop depth is preferable,
18153 but at the moment, the basic block information seems to be
18154 corrupt by this stage of the compilation. */
18155 int base_cost = 50;
18156 rtx_insn *next = next_nonnote_insn (insn);
18158 if (next != NULL && LABEL_P (next))
18159 base_cost -= 20;
18161 switch (GET_CODE (insn))
18163 case CODE_LABEL:
18164 /* It will always be better to place the table before the label, rather
18165 than after it. */
18166 return 50;
18168 case INSN:
18169 case CALL_INSN:
18170 return base_cost;
18172 case JUMP_INSN:
18173 return base_cost - 10;
18175 default:
18176 return base_cost + 10;
18180 /* Find the best place in the insn stream in the range
18181 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18182 Create the barrier by inserting a jump and add a new fix entry for
18183 it. */
18184 static Mfix *
18185 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18187 HOST_WIDE_INT count = 0;
18188 rtx_barrier *barrier;
18189 rtx_insn *from = fix->insn;
18190 /* The instruction after which we will insert the jump. */
18191 rtx_insn *selected = NULL;
18192 int selected_cost;
18193 /* The address at which the jump instruction will be placed. */
18194 HOST_WIDE_INT selected_address;
18195 Mfix * new_fix;
18196 HOST_WIDE_INT max_count = max_address - fix->address;
18197 rtx_code_label *label = gen_label_rtx ();
18199 selected_cost = arm_barrier_cost (from);
18200 selected_address = fix->address;
18202 while (from && count < max_count)
18204 rtx_jump_table_data *tmp;
18205 int new_cost;
18207 /* This code shouldn't have been called if there was a natural barrier
18208 within range. */
18209 gcc_assert (!BARRIER_P (from));
18211 /* Count the length of this insn. This must stay in sync with the
18212 code that pushes minipool fixes. */
18213 if (LABEL_P (from))
18214 count += get_label_padding (from);
18215 else
18216 count += get_attr_length (from);
18218 /* If there is a jump table, add its length. */
18219 if (tablejump_p (from, NULL, &tmp))
18221 count += get_jump_table_size (tmp);
18223 /* Jump tables aren't in a basic block, so base the cost on
18224 the dispatch insn. If we select this location, we will
18225 still put the pool after the table. */
18226 new_cost = arm_barrier_cost (from);
18228 if (count < max_count
18229 && (!selected || new_cost <= selected_cost))
18231 selected = tmp;
18232 selected_cost = new_cost;
18233 selected_address = fix->address + count;
18236 /* Continue after the dispatch table. */
18237 from = NEXT_INSN (tmp);
18238 continue;
18241 new_cost = arm_barrier_cost (from);
18243 if (count < max_count
18244 && (!selected || new_cost <= selected_cost))
18246 selected = from;
18247 selected_cost = new_cost;
18248 selected_address = fix->address + count;
18251 from = NEXT_INSN (from);
18254 /* Make sure that we found a place to insert the jump. */
18255 gcc_assert (selected);
18257 /* Create a new JUMP_INSN that branches around a barrier. */
18258 from = emit_jump_insn_after (gen_jump (label), selected);
18259 JUMP_LABEL (from) = label;
18260 barrier = emit_barrier_after (from);
18261 emit_label_after (label, barrier);
18263 /* Create a minipool barrier entry for the new barrier. */
18264 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18265 new_fix->insn = barrier;
18266 new_fix->address = selected_address;
18267 new_fix->next = fix->next;
18268 fix->next = new_fix;
18270 return new_fix;
18273 /* Record that there is a natural barrier in the insn stream at
18274 ADDRESS. */
18275 static void
18276 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18278 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18280 fix->insn = insn;
18281 fix->address = address;
18283 fix->next = NULL;
18284 if (minipool_fix_head != NULL)
18285 minipool_fix_tail->next = fix;
18286 else
18287 minipool_fix_head = fix;
18289 minipool_fix_tail = fix;
18292 /* Record INSN, which will need fixing up to load a value from the
18293 minipool. ADDRESS is the offset of the insn since the start of the
18294 function; LOC is a pointer to the part of the insn which requires
18295 fixing; VALUE is the constant that must be loaded, which is of type
18296 MODE. */
18297 static void
18298 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18299 machine_mode mode, rtx value)
18301 gcc_assert (!arm_disable_literal_pool);
18302 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18304 fix->insn = insn;
18305 fix->address = address;
18306 fix->loc = loc;
18307 fix->mode = mode;
18308 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18309 fix->value = value;
18310 fix->forwards = get_attr_pool_range (insn);
18311 fix->backwards = get_attr_neg_pool_range (insn);
18312 fix->minipool = NULL;
18314 /* If an insn doesn't have a range defined for it, then it isn't
18315 expecting to be reworked by this code. Better to stop now than
18316 to generate duff assembly code. */
18317 gcc_assert (fix->forwards || fix->backwards);
18319 /* If an entry requires 8-byte alignment then assume all constant pools
18320 require 4 bytes of padding. Trying to do this later on a per-pool
18321 basis is awkward because existing pool entries have to be modified. */
18322 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18323 minipool_pad = 4;
18325 if (dump_file)
18327 fprintf (dump_file,
18328 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18329 GET_MODE_NAME (mode),
18330 INSN_UID (insn), (unsigned long) address,
18331 -1 * (long)fix->backwards, (long)fix->forwards);
18332 arm_print_value (dump_file, fix->value);
18333 fprintf (dump_file, "\n");
18336 /* Add it to the chain of fixes. */
18337 fix->next = NULL;
18339 if (minipool_fix_head != NULL)
18340 minipool_fix_tail->next = fix;
18341 else
18342 minipool_fix_head = fix;
18344 minipool_fix_tail = fix;
18347 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18348 Returns the number of insns needed, or 99 if we always want to synthesize
18349 the value. */
18351 arm_max_const_double_inline_cost ()
18353 return ((optimize_size || arm_ld_sched) ? 3 : 4);
18356 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18357 Returns the number of insns needed, or 99 if we don't know how to
18358 do it. */
18360 arm_const_double_inline_cost (rtx val)
18362 rtx lowpart, highpart;
18363 machine_mode mode;
18365 mode = GET_MODE (val);
18367 if (mode == VOIDmode)
18368 mode = DImode;
18370 gcc_assert (GET_MODE_SIZE (mode) == 8);
18372 lowpart = gen_lowpart (SImode, val);
18373 highpart = gen_highpart_mode (SImode, mode, val);
18375 gcc_assert (CONST_INT_P (lowpart));
18376 gcc_assert (CONST_INT_P (highpart));
18378 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18379 NULL_RTX, NULL_RTX, 0, 0)
18380 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18381 NULL_RTX, NULL_RTX, 0, 0));
18384 /* Cost of loading a SImode constant. */
18385 static inline int
18386 arm_const_inline_cost (enum rtx_code code, rtx val)
18388 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18389 NULL_RTX, NULL_RTX, 1, 0);
18392 /* Return true if it is worthwhile to split a 64-bit constant into two
18393 32-bit operations. This is the case if optimizing for size, or
18394 if we have load delay slots, or if one 32-bit part can be done with
18395 a single data operation. */
18396 bool
18397 arm_const_double_by_parts (rtx val)
18399 machine_mode mode = GET_MODE (val);
18400 rtx part;
18402 if (optimize_size || arm_ld_sched)
18403 return true;
18405 if (mode == VOIDmode)
18406 mode = DImode;
18408 part = gen_highpart_mode (SImode, mode, val);
18410 gcc_assert (CONST_INT_P (part));
18412 if (const_ok_for_arm (INTVAL (part))
18413 || const_ok_for_arm (~INTVAL (part)))
18414 return true;
18416 part = gen_lowpart (SImode, val);
18418 gcc_assert (CONST_INT_P (part));
18420 if (const_ok_for_arm (INTVAL (part))
18421 || const_ok_for_arm (~INTVAL (part)))
18422 return true;
18424 return false;
18427 /* Return true if it is possible to inline both the high and low parts
18428 of a 64-bit constant into 32-bit data processing instructions. */
18429 bool
18430 arm_const_double_by_immediates (rtx val)
18432 machine_mode mode = GET_MODE (val);
18433 rtx part;
18435 if (mode == VOIDmode)
18436 mode = DImode;
18438 part = gen_highpart_mode (SImode, mode, val);
18440 gcc_assert (CONST_INT_P (part));
18442 if (!const_ok_for_arm (INTVAL (part)))
18443 return false;
18445 part = gen_lowpart (SImode, val);
18447 gcc_assert (CONST_INT_P (part));
18449 if (!const_ok_for_arm (INTVAL (part)))
18450 return false;
18452 return true;
18455 /* Scan INSN and note any of its operands that need fixing.
18456 If DO_PUSHES is false we do not actually push any of the fixups
18457 needed. */
18458 static void
18459 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18461 int opno;
18463 extract_constrain_insn (insn);
18465 if (recog_data.n_alternatives == 0)
18466 return;
18468 /* Fill in recog_op_alt with information about the constraints of
18469 this insn. */
18470 preprocess_constraints (insn);
18472 const operand_alternative *op_alt = which_op_alt ();
18473 for (opno = 0; opno < recog_data.n_operands; opno++)
18475 /* Things we need to fix can only occur in inputs. */
18476 if (recog_data.operand_type[opno] != OP_IN)
18477 continue;
18479 /* If this alternative is a memory reference, then any mention
18480 of constants in this alternative is really to fool reload
18481 into allowing us to accept one there. We need to fix them up
18482 now so that we output the right code. */
18483 if (op_alt[opno].memory_ok)
18485 rtx op = recog_data.operand[opno];
18487 if (CONSTANT_P (op))
18489 if (do_pushes)
18490 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18491 recog_data.operand_mode[opno], op);
18493 else if (MEM_P (op)
18494 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18495 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18497 if (do_pushes)
18499 rtx cop = avoid_constant_pool_reference (op);
18501 /* Casting the address of something to a mode narrower
18502 than a word can cause avoid_constant_pool_reference()
18503 to return the pool reference itself. That's no good to
18504 us here. Lets just hope that we can use the
18505 constant pool value directly. */
18506 if (op == cop)
18507 cop = get_pool_constant (XEXP (op, 0));
18509 push_minipool_fix (insn, address,
18510 recog_data.operand_loc[opno],
18511 recog_data.operand_mode[opno], cop);
18518 return;
18521 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18522 and unions in the context of ARMv8-M Security Extensions. It is used as a
18523 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18524 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18525 or four masks, depending on whether it is being computed for a
18526 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18527 respectively. The tree for the type of the argument or a field within an
18528 argument is passed in ARG_TYPE, the current register this argument or field
18529 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18530 argument or field starts at is passed in STARTING_BIT and the last used bit
18531 is kept in LAST_USED_BIT which is also updated accordingly. */
18533 static unsigned HOST_WIDE_INT
18534 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18535 uint32_t * padding_bits_to_clear,
18536 unsigned starting_bit, int * last_used_bit)
18539 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18541 if (TREE_CODE (arg_type) == RECORD_TYPE)
18543 unsigned current_bit = starting_bit;
18544 tree field;
18545 long int offset, size;
18548 field = TYPE_FIELDS (arg_type);
18549 while (field)
18551 /* The offset within a structure is always an offset from
18552 the start of that structure. Make sure we take that into the
18553 calculation of the register based offset that we use here. */
18554 offset = starting_bit;
18555 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18556 offset %= 32;
18558 /* This is the actual size of the field, for bitfields this is the
18559 bitfield width and not the container size. */
18560 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18562 if (*last_used_bit != offset)
18564 if (offset < *last_used_bit)
18566 /* This field's offset is before the 'last_used_bit', that
18567 means this field goes on the next register. So we need to
18568 pad the rest of the current register and increase the
18569 register number. */
18570 uint32_t mask;
18571 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18572 mask++;
18574 padding_bits_to_clear[*regno] |= mask;
18575 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18576 (*regno)++;
18578 else
18580 /* Otherwise we pad the bits between the last field's end and
18581 the start of the new field. */
18582 uint32_t mask;
18584 mask = ((uint32_t)-1) >> (32 - offset);
18585 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18586 padding_bits_to_clear[*regno] |= mask;
18588 current_bit = offset;
18591 /* Calculate further padding bits for inner structs/unions too. */
18592 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18594 *last_used_bit = current_bit;
18595 not_to_clear_reg_mask
18596 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18597 padding_bits_to_clear, offset,
18598 last_used_bit);
18600 else
18602 /* Update 'current_bit' with this field's size. If the
18603 'current_bit' lies in a subsequent register, update 'regno' and
18604 reset 'current_bit' to point to the current bit in that new
18605 register. */
18606 current_bit += size;
18607 while (current_bit >= 32)
18609 current_bit-=32;
18610 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18611 (*regno)++;
18613 *last_used_bit = current_bit;
18616 field = TREE_CHAIN (field);
18618 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18620 else if (TREE_CODE (arg_type) == UNION_TYPE)
18622 tree field, field_t;
18623 int i, regno_t, field_size;
18624 int max_reg = -1;
18625 int max_bit = -1;
18626 uint32_t mask;
18627 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18628 = {-1, -1, -1, -1};
18630 /* To compute the padding bits in a union we only consider bits as
18631 padding bits if they are always either a padding bit or fall outside a
18632 fields size for all fields in the union. */
18633 field = TYPE_FIELDS (arg_type);
18634 while (field)
18636 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18637 = {0U, 0U, 0U, 0U};
18638 int last_used_bit_t = *last_used_bit;
18639 regno_t = *regno;
18640 field_t = TREE_TYPE (field);
18642 /* If the field's type is either a record or a union make sure to
18643 compute their padding bits too. */
18644 if (RECORD_OR_UNION_TYPE_P (field_t))
18645 not_to_clear_reg_mask
18646 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18647 &padding_bits_to_clear_t[0],
18648 starting_bit, &last_used_bit_t);
18649 else
18651 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18652 regno_t = (field_size / 32) + *regno;
18653 last_used_bit_t = (starting_bit + field_size) % 32;
18656 for (i = *regno; i < regno_t; i++)
18658 /* For all but the last register used by this field only keep the
18659 padding bits that were padding bits in this field. */
18660 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18663 /* For the last register, keep all padding bits that were padding
18664 bits in this field and any padding bits that are still valid
18665 as padding bits but fall outside of this field's size. */
18666 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18667 padding_bits_to_clear_res[regno_t]
18668 &= padding_bits_to_clear_t[regno_t] | mask;
18670 /* Update the maximum size of the fields in terms of registers used
18671 ('max_reg') and the 'last_used_bit' in said register. */
18672 if (max_reg < regno_t)
18674 max_reg = regno_t;
18675 max_bit = last_used_bit_t;
18677 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18678 max_bit = last_used_bit_t;
18680 field = TREE_CHAIN (field);
18683 /* Update the current padding_bits_to_clear using the intersection of the
18684 padding bits of all the fields. */
18685 for (i=*regno; i < max_reg; i++)
18686 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18688 /* Do not keep trailing padding bits, we do not know yet whether this
18689 is the end of the argument. */
18690 mask = ((uint32_t) 1 << max_bit) - 1;
18691 padding_bits_to_clear[max_reg]
18692 |= padding_bits_to_clear_res[max_reg] & mask;
18694 *regno = max_reg;
18695 *last_used_bit = max_bit;
18697 else
18698 /* This function should only be used for structs and unions. */
18699 gcc_unreachable ();
18701 return not_to_clear_reg_mask;
18704 /* In the context of ARMv8-M Security Extensions, this function is used for both
18705 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18706 registers are used when returning or passing arguments, which is then
18707 returned as a mask. It will also compute a mask to indicate padding/unused
18708 bits for each of these registers, and passes this through the
18709 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18710 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18711 the starting register used to pass this argument or return value is passed
18712 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18713 for struct and union types. */
18715 static unsigned HOST_WIDE_INT
18716 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18717 uint32_t * padding_bits_to_clear)
18720 int last_used_bit = 0;
18721 unsigned HOST_WIDE_INT not_to_clear_mask;
18723 if (RECORD_OR_UNION_TYPE_P (arg_type))
18725 not_to_clear_mask
18726 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18727 padding_bits_to_clear, 0,
18728 &last_used_bit);
18731 /* If the 'last_used_bit' is not zero, that means we are still using a
18732 part of the last 'regno'. In such cases we must clear the trailing
18733 bits. Otherwise we are not using regno and we should mark it as to
18734 clear. */
18735 if (last_used_bit != 0)
18736 padding_bits_to_clear[regno]
18737 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18738 else
18739 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18741 else
18743 not_to_clear_mask = 0;
18744 /* We are not dealing with structs nor unions. So these arguments may be
18745 passed in floating point registers too. In some cases a BLKmode is
18746 used when returning or passing arguments in multiple VFP registers. */
18747 if (GET_MODE (arg_rtx) == BLKmode)
18749 int i, arg_regs;
18750 rtx reg;
18752 /* This should really only occur when dealing with the hard-float
18753 ABI. */
18754 gcc_assert (TARGET_HARD_FLOAT_ABI);
18756 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18758 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18759 gcc_assert (REG_P (reg));
18761 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18763 /* If we are dealing with DF mode, make sure we don't
18764 clear either of the registers it addresses. */
18765 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18766 if (arg_regs > 1)
18768 unsigned HOST_WIDE_INT mask;
18769 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18770 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18771 not_to_clear_mask |= mask;
18775 else
18777 /* Otherwise we can rely on the MODE to determine how many registers
18778 are being used by this argument. */
18779 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18780 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18781 if (arg_regs > 1)
18783 unsigned HOST_WIDE_INT
18784 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18785 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18786 not_to_clear_mask |= mask;
18791 return not_to_clear_mask;
18794 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18795 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18796 are to be fully cleared, using the value in register CLEARING_REG if more
18797 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18798 the bits that needs to be cleared in caller-saved core registers, with
18799 SCRATCH_REG used as a scratch register for that clearing.
18801 NOTE: one of three following assertions must hold:
18802 - SCRATCH_REG is a low register
18803 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18804 in TO_CLEAR_BITMAP)
18805 - CLEARING_REG is a low register. */
18807 static void
18808 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18809 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18811 bool saved_clearing = false;
18812 rtx saved_clearing_reg = NULL_RTX;
18813 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18815 gcc_assert (arm_arch_cmse);
18817 if (!bitmap_empty_p (to_clear_bitmap))
18819 minregno = bitmap_first_set_bit (to_clear_bitmap);
18820 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18822 clearing_regno = REGNO (clearing_reg);
18824 /* Clear padding bits. */
18825 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18826 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18828 uint64_t mask;
18829 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18831 if (padding_bits_to_clear[i] == 0)
18832 continue;
18834 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18835 CLEARING_REG as scratch. */
18836 if (TARGET_THUMB1
18837 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18839 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18840 such that we can use clearing_reg to clear the unused bits in the
18841 arguments. */
18842 if ((clearing_regno > maxregno
18843 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18844 && !saved_clearing)
18846 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18847 emit_move_insn (scratch_reg, clearing_reg);
18848 saved_clearing = true;
18849 saved_clearing_reg = scratch_reg;
18851 scratch_reg = clearing_reg;
18854 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18855 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18856 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18858 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18859 mask = (~padding_bits_to_clear[i]) >> 16;
18860 rtx16 = gen_int_mode (16, SImode);
18861 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18862 if (mask)
18863 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18865 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18867 if (saved_clearing)
18868 emit_move_insn (clearing_reg, saved_clearing_reg);
18871 /* Clear full registers. */
18873 if (TARGET_HAVE_FPCXT_CMSE)
18875 rtvec vunspec_vec;
18876 int i, j, k, nb_regs;
18877 rtx use_seq, par, reg, set, vunspec;
18878 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18879 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18880 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18882 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18884 /* Find next register to clear and exit if none. */
18885 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18886 if (i > maxregno)
18887 break;
18889 /* Compute number of consecutive registers to clear. */
18890 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18891 j++);
18892 nb_regs = j - i;
18894 /* Create VSCCLRM RTX pattern. */
18895 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18896 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18897 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18898 VUNSPEC_VSCCLRM_VPR);
18899 XVECEXP (par, 0, 0) = vunspec;
18901 /* Insert VFP register clearing RTX in the pattern. */
18902 start_sequence ();
18903 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18905 if (!bitmap_bit_p (to_clear_bitmap, j))
18906 continue;
18908 reg = gen_rtx_REG (SFmode, j);
18909 set = gen_rtx_SET (reg, const0_rtx);
18910 XVECEXP (par, 0, k++) = set;
18911 emit_use (reg);
18913 use_seq = get_insns ();
18914 end_sequence ();
18916 emit_insn_after (use_seq, emit_insn (par));
18919 /* Get set of core registers to clear. */
18920 bitmap_clear (core_regs_bitmap);
18921 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18922 IP_REGNUM - R0_REGNUM + 1);
18923 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18924 core_regs_bitmap);
18925 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18927 if (bitmap_empty_p (to_clear_core_bitmap))
18928 return;
18930 /* Create clrm RTX pattern. */
18931 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18932 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18934 /* Insert core register clearing RTX in the pattern. */
18935 start_sequence ();
18936 for (j = 0, i = minregno; j < nb_regs; i++)
18938 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18939 continue;
18941 reg = gen_rtx_REG (SImode, i);
18942 set = gen_rtx_SET (reg, const0_rtx);
18943 XVECEXP (par, 0, j++) = set;
18944 emit_use (reg);
18947 /* Insert APSR register clearing RTX in the pattern
18948 * along with clobbering CC. */
18949 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18950 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18951 VUNSPEC_CLRM_APSR);
18953 XVECEXP (par, 0, j++) = vunspec;
18955 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18956 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18957 XVECEXP (par, 0, j) = clobber;
18959 use_seq = get_insns ();
18960 end_sequence ();
18962 emit_insn_after (use_seq, emit_insn (par));
18964 else
18966 /* If not marked for clearing, clearing_reg already does not contain
18967 any secret. */
18968 if (clearing_regno <= maxregno
18969 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18971 emit_move_insn (clearing_reg, const0_rtx);
18972 emit_use (clearing_reg);
18973 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18976 for (regno = minregno; regno <= maxregno; regno++)
18978 if (!bitmap_bit_p (to_clear_bitmap, regno))
18979 continue;
18981 if (IS_VFP_REGNUM (regno))
18983 /* If regno is an even vfp register and its successor is also to
18984 be cleared, use vmov. */
18985 if (TARGET_VFP_DOUBLE
18986 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18987 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18989 emit_move_insn (gen_rtx_REG (DFmode, regno),
18990 CONST1_RTX (DFmode));
18991 emit_use (gen_rtx_REG (DFmode, regno));
18992 regno++;
18994 else
18996 emit_move_insn (gen_rtx_REG (SFmode, regno),
18997 CONST1_RTX (SFmode));
18998 emit_use (gen_rtx_REG (SFmode, regno));
19001 else
19003 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
19004 emit_use (gen_rtx_REG (SImode, regno));
19010 /* Clear core and caller-saved VFP registers not used to pass arguments before
19011 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
19012 registers is done in the __gnu_cmse_nonsecure_call libcall. See
19013 libgcc/config/arm/cmse_nonsecure_call.S. */
19015 static void
19016 cmse_nonsecure_call_inline_register_clear (void)
19018 basic_block bb;
19020 FOR_EACH_BB_FN (bb, cfun)
19022 rtx_insn *insn;
19024 FOR_BB_INSNS (bb, insn)
19026 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
19027 /* frame = VFP regs + FPSCR + VPR. */
19028 unsigned lazy_store_stack_frame_size
19029 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
19030 unsigned long callee_saved_mask
19031 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
19032 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
19033 unsigned address_regnum, regno;
19034 unsigned max_int_regno
19035 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
19036 unsigned max_fp_regno
19037 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
19038 unsigned maxregno
19039 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
19040 auto_sbitmap to_clear_bitmap (maxregno + 1);
19041 rtx_insn *seq;
19042 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
19043 rtx address;
19044 CUMULATIVE_ARGS args_so_far_v;
19045 cumulative_args_t args_so_far;
19046 tree arg_type, fntype;
19047 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
19048 function_args_iterator args_iter;
19049 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
19051 if (!NONDEBUG_INSN_P (insn))
19052 continue;
19054 if (!CALL_P (insn))
19055 continue;
19057 pat = PATTERN (insn);
19058 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
19059 call = XVECEXP (pat, 0, 0);
19061 /* Get the real call RTX if the insn sets a value, ie. returns. */
19062 if (GET_CODE (call) == SET)
19063 call = SET_SRC (call);
19065 /* Check if it is a cmse_nonsecure_call. */
19066 unspec = XEXP (call, 0);
19067 if (GET_CODE (unspec) != UNSPEC
19068 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
19069 continue;
19071 /* Mark registers that needs to be cleared. Those that holds a
19072 parameter are removed from the set further below. */
19073 bitmap_clear (to_clear_bitmap);
19074 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
19075 max_int_regno - R0_REGNUM + 1);
19077 /* Only look at the caller-saved floating point registers in case of
19078 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
19079 lazy store and loads which clear both caller- and callee-saved
19080 registers. */
19081 if (!lazy_fpclear)
19083 auto_sbitmap float_bitmap (maxregno + 1);
19085 bitmap_clear (float_bitmap);
19086 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
19087 max_fp_regno - FIRST_VFP_REGNUM + 1);
19088 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
19091 /* Make sure the register used to hold the function address is not
19092 cleared. */
19093 address = RTVEC_ELT (XVEC (unspec, 0), 0);
19094 gcc_assert (MEM_P (address));
19095 gcc_assert (REG_P (XEXP (address, 0)));
19096 address_regnum = REGNO (XEXP (address, 0));
19097 if (address_regnum <= max_int_regno)
19098 bitmap_clear_bit (to_clear_bitmap, address_regnum);
19100 /* Set basic block of call insn so that df rescan is performed on
19101 insns inserted here. */
19102 set_block_for_insn (insn, bb);
19103 df_set_flags (DF_DEFER_INSN_RESCAN);
19104 start_sequence ();
19106 /* Make sure the scheduler doesn't schedule other insns beyond
19107 here. */
19108 emit_insn (gen_blockage ());
19110 /* Walk through all arguments and clear registers appropriately.
19112 fntype = TREE_TYPE (MEM_EXPR (address));
19113 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19114 NULL_TREE);
19115 args_so_far = pack_cumulative_args (&args_so_far_v);
19116 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19118 rtx arg_rtx;
19119 uint64_t to_clear_args_mask;
19121 if (VOID_TYPE_P (arg_type))
19122 continue;
19124 function_arg_info arg (arg_type, /*named=*/true);
19125 if (!first_param)
19126 /* ??? We should advance after processing the argument and pass
19127 the argument we're advancing past. */
19128 arm_function_arg_advance (args_so_far, arg);
19130 arg_rtx = arm_function_arg (args_so_far, arg);
19131 gcc_assert (REG_P (arg_rtx));
19132 to_clear_args_mask
19133 = compute_not_to_clear_mask (arg_type, arg_rtx,
19134 REGNO (arg_rtx),
19135 &padding_bits_to_clear[0]);
19136 if (to_clear_args_mask)
19138 for (regno = R0_REGNUM; regno <= maxregno; regno++)
19140 if (to_clear_args_mask & (1ULL << regno))
19141 bitmap_clear_bit (to_clear_bitmap, regno);
19145 first_param = false;
19148 /* We use right shift and left shift to clear the LSB of the address
19149 we jump to instead of using bic, to avoid having to use an extra
19150 register on Thumb-1. */
19151 clearing_reg = XEXP (address, 0);
19152 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19153 emit_insn (gen_rtx_SET (clearing_reg, shift));
19154 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19155 emit_insn (gen_rtx_SET (clearing_reg, shift));
19157 if (clear_callee_saved)
19159 rtx push_insn =
19160 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19161 /* Disable frame debug info in push because it needs to be
19162 disabled for pop (see below). */
19163 RTX_FRAME_RELATED_P (push_insn) = 0;
19165 /* Lazy store multiple. */
19166 if (lazy_fpclear)
19168 rtx imm;
19169 rtx_insn *add_insn;
19171 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19172 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19173 stack_pointer_rtx, imm));
19174 /* If we have the frame pointer, then it will be the
19175 CFA reg. Otherwise, the stack pointer is the CFA
19176 reg, so we need to emit a CFA adjust. */
19177 if (!frame_pointer_needed)
19178 arm_add_cfa_adjust_cfa_note (add_insn,
19179 - lazy_store_stack_frame_size,
19180 stack_pointer_rtx,
19181 stack_pointer_rtx);
19182 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19184 /* Save VFP callee-saved registers. */
19185 else
19187 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19188 (max_fp_regno - D7_VFP_REGNUM) / 2);
19189 /* Disable frame debug info in push because it needs to be
19190 disabled for vpop (see below). */
19191 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19195 /* Clear caller-saved registers that leak before doing a non-secure
19196 call. */
19197 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19198 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19199 NUM_ARG_REGS, ip_reg, clearing_reg);
19201 seq = get_insns ();
19202 end_sequence ();
19203 emit_insn_before (seq, insn);
19205 /* The AAPCS requires the callee to widen integral types narrower
19206 than 32 bits to the full width of the register; but when handling
19207 calls to non-secure space, we cannot trust the callee to have
19208 correctly done so. So forcibly re-widen the result here. */
19209 tree ret_type = TREE_TYPE (fntype);
19210 if ((TREE_CODE (ret_type) == INTEGER_TYPE
19211 || TREE_CODE (ret_type) == ENUMERAL_TYPE
19212 || TREE_CODE (ret_type) == BOOLEAN_TYPE)
19213 && known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 4))
19215 rtx ret_reg = gen_rtx_REG (TYPE_MODE (ret_type), R0_REGNUM);
19216 rtx si_reg = gen_rtx_REG (SImode, R0_REGNUM);
19217 rtx extend;
19218 if (TYPE_UNSIGNED (ret_type))
19219 extend = gen_rtx_SET (si_reg, gen_rtx_ZERO_EXTEND (SImode,
19220 ret_reg));
19221 else
19223 /* Signed-extension is a special case because of
19224 thumb1_extendhisi2. */
19225 if (TARGET_THUMB1
19226 && known_eq (GET_MODE_SIZE (TYPE_MODE (ret_type)), 2))
19227 extend = gen_thumb1_extendhisi2 (si_reg, ret_reg);
19228 else
19229 extend = gen_rtx_SET (si_reg,
19230 gen_rtx_SIGN_EXTEND (SImode,
19231 ret_reg));
19233 emit_insn_after (extend, insn);
19237 if (TARGET_HAVE_FPCXT_CMSE)
19239 rtx_insn *last, *pop_insn, *after = insn;
19241 start_sequence ();
19243 /* Lazy load multiple done as part of libcall in Armv8-M. */
19244 if (lazy_fpclear)
19246 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19247 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19248 rtx_insn *add_insn =
19249 emit_insn (gen_addsi3 (stack_pointer_rtx,
19250 stack_pointer_rtx, imm));
19251 if (!frame_pointer_needed)
19252 arm_add_cfa_adjust_cfa_note (add_insn,
19253 lazy_store_stack_frame_size,
19254 stack_pointer_rtx,
19255 stack_pointer_rtx);
19257 /* Restore VFP callee-saved registers. */
19258 else
19260 int nb_callee_saved_vfp_regs =
19261 (max_fp_regno - D7_VFP_REGNUM) / 2;
19262 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19263 nb_callee_saved_vfp_regs,
19264 stack_pointer_rtx);
19265 /* Disable frame debug info in vpop because the SP adjustment
19266 is made using a CFA adjustment note while CFA used is
19267 sometimes R7. This then causes an assert failure in the
19268 CFI note creation code. */
19269 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19272 arm_emit_multi_reg_pop (callee_saved_mask);
19273 pop_insn = get_last_insn ();
19275 /* Disable frame debug info in pop because they reset the state
19276 of popped registers to what it was at the beginning of the
19277 function, before the prologue. This leads to incorrect state
19278 when doing the pop after the nonsecure call for registers that
19279 are pushed both in prologue and before the nonsecure call.
19281 It also occasionally triggers an assert failure in CFI note
19282 creation code when there are two codepaths to the epilogue,
19283 one of which does not go through the nonsecure call.
19284 Obviously this mean that debugging between the push and pop is
19285 not reliable. */
19286 RTX_FRAME_RELATED_P (pop_insn) = 0;
19288 seq = get_insns ();
19289 last = get_last_insn ();
19290 end_sequence ();
19292 emit_insn_after (seq, after);
19294 /* Skip pop we have just inserted after nonsecure call, we know
19295 it does not contain a nonsecure call. */
19296 insn = last;
19302 /* Rewrite move insn into subtract of 0 if the condition codes will
19303 be useful in next conditional jump insn. */
19305 static void
19306 thumb1_reorg (void)
19308 basic_block bb;
19310 FOR_EACH_BB_FN (bb, cfun)
19312 rtx dest, src;
19313 rtx cmp, op0, op1, set = NULL;
19314 rtx_insn *prev, *insn = BB_END (bb);
19315 bool insn_clobbered = false;
19317 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19318 insn = PREV_INSN (insn);
19320 /* Find the last cbranchsi4_insn in basic block BB. */
19321 if (insn == BB_HEAD (bb)
19322 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19323 continue;
19325 /* Get the register with which we are comparing. */
19326 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19327 op0 = XEXP (cmp, 0);
19328 op1 = XEXP (cmp, 1);
19330 /* Check that comparison is against ZERO. */
19331 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19332 continue;
19334 /* Find the first flag setting insn before INSN in basic block BB. */
19335 gcc_assert (insn != BB_HEAD (bb));
19336 for (prev = PREV_INSN (insn);
19337 (!insn_clobbered
19338 && prev != BB_HEAD (bb)
19339 && (NOTE_P (prev)
19340 || DEBUG_INSN_P (prev)
19341 || ((set = single_set (prev)) != NULL
19342 && get_attr_conds (prev) == CONDS_NOCOND)));
19343 prev = PREV_INSN (prev))
19345 if (reg_set_p (op0, prev))
19346 insn_clobbered = true;
19349 /* Skip if op0 is clobbered by insn other than prev. */
19350 if (insn_clobbered)
19351 continue;
19353 if (!set)
19354 continue;
19356 dest = SET_DEST (set);
19357 src = SET_SRC (set);
19358 if (!low_register_operand (dest, SImode)
19359 || !low_register_operand (src, SImode))
19360 continue;
19362 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19363 in INSN. Both src and dest of the move insn are checked. */
19364 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19366 dest = copy_rtx (dest);
19367 src = copy_rtx (src);
19368 src = gen_rtx_MINUS (SImode, src, const0_rtx);
19369 PATTERN (prev) = gen_rtx_SET (dest, src);
19370 INSN_CODE (prev) = -1;
19371 /* Set test register in INSN to dest. */
19372 XEXP (cmp, 0) = copy_rtx (dest);
19373 INSN_CODE (insn) = -1;
19378 /* Convert instructions to their cc-clobbering variant if possible, since
19379 that allows us to use smaller encodings. */
19381 static void
19382 thumb2_reorg (void)
19384 basic_block bb;
19385 regset_head live;
19387 INIT_REG_SET (&live);
19389 /* We are freeing block_for_insn in the toplev to keep compatibility
19390 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19391 compute_bb_for_insn ();
19392 df_analyze ();
19394 enum Convert_Action {SKIP, CONV, SWAP_CONV};
19396 FOR_EACH_BB_FN (bb, cfun)
19398 if ((current_tune->disparage_flag_setting_t16_encodings
19399 == tune_params::DISPARAGE_FLAGS_ALL)
19400 && optimize_bb_for_speed_p (bb))
19401 continue;
19403 rtx_insn *insn;
19404 Convert_Action action = SKIP;
19405 Convert_Action action_for_partial_flag_setting
19406 = ((current_tune->disparage_flag_setting_t16_encodings
19407 != tune_params::DISPARAGE_FLAGS_NEITHER)
19408 && optimize_bb_for_speed_p (bb))
19409 ? SKIP : CONV;
19411 COPY_REG_SET (&live, DF_LR_OUT (bb));
19412 df_simulate_initialize_backwards (bb, &live);
19413 FOR_BB_INSNS_REVERSE (bb, insn)
19415 if (NONJUMP_INSN_P (insn)
19416 && !REGNO_REG_SET_P (&live, CC_REGNUM)
19417 && GET_CODE (PATTERN (insn)) == SET)
19419 action = SKIP;
19420 rtx pat = PATTERN (insn);
19421 rtx dst = XEXP (pat, 0);
19422 rtx src = XEXP (pat, 1);
19423 rtx op0 = NULL_RTX, op1 = NULL_RTX;
19425 if (UNARY_P (src) || BINARY_P (src))
19426 op0 = XEXP (src, 0);
19428 if (BINARY_P (src))
19429 op1 = XEXP (src, 1);
19431 if (low_register_operand (dst, SImode))
19433 switch (GET_CODE (src))
19435 case PLUS:
19436 /* Adding two registers and storing the result
19437 in the first source is already a 16-bit
19438 operation. */
19439 if (rtx_equal_p (dst, op0)
19440 && register_operand (op1, SImode))
19441 break;
19443 if (low_register_operand (op0, SImode))
19445 /* ADDS <Rd>,<Rn>,<Rm> */
19446 if (low_register_operand (op1, SImode))
19447 action = CONV;
19448 /* ADDS <Rdn>,#<imm8> */
19449 /* SUBS <Rdn>,#<imm8> */
19450 else if (rtx_equal_p (dst, op0)
19451 && CONST_INT_P (op1)
19452 && IN_RANGE (INTVAL (op1), -255, 255))
19453 action = CONV;
19454 /* ADDS <Rd>,<Rn>,#<imm3> */
19455 /* SUBS <Rd>,<Rn>,#<imm3> */
19456 else if (CONST_INT_P (op1)
19457 && IN_RANGE (INTVAL (op1), -7, 7))
19458 action = CONV;
19460 /* ADCS <Rd>, <Rn> */
19461 else if (GET_CODE (XEXP (src, 0)) == PLUS
19462 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19463 && low_register_operand (XEXP (XEXP (src, 0), 1),
19464 SImode)
19465 && COMPARISON_P (op1)
19466 && cc_register (XEXP (op1, 0), VOIDmode)
19467 && maybe_get_arm_condition_code (op1) == ARM_CS
19468 && XEXP (op1, 1) == const0_rtx)
19469 action = CONV;
19470 break;
19472 case MINUS:
19473 /* RSBS <Rd>,<Rn>,#0
19474 Not handled here: see NEG below. */
19475 /* SUBS <Rd>,<Rn>,#<imm3>
19476 SUBS <Rdn>,#<imm8>
19477 Not handled here: see PLUS above. */
19478 /* SUBS <Rd>,<Rn>,<Rm> */
19479 if (low_register_operand (op0, SImode)
19480 && low_register_operand (op1, SImode))
19481 action = CONV;
19482 break;
19484 case MULT:
19485 /* MULS <Rdm>,<Rn>,<Rdm>
19486 As an exception to the rule, this is only used
19487 when optimizing for size since MULS is slow on all
19488 known implementations. We do not even want to use
19489 MULS in cold code, if optimizing for speed, so we
19490 test the global flag here. */
19491 if (!optimize_size)
19492 break;
19493 /* Fall through. */
19494 case AND:
19495 case IOR:
19496 case XOR:
19497 /* ANDS <Rdn>,<Rm> */
19498 if (rtx_equal_p (dst, op0)
19499 && low_register_operand (op1, SImode))
19500 action = action_for_partial_flag_setting;
19501 else if (rtx_equal_p (dst, op1)
19502 && low_register_operand (op0, SImode))
19503 action = action_for_partial_flag_setting == SKIP
19504 ? SKIP : SWAP_CONV;
19505 break;
19507 case ASHIFTRT:
19508 case ASHIFT:
19509 case LSHIFTRT:
19510 /* ASRS <Rdn>,<Rm> */
19511 /* LSRS <Rdn>,<Rm> */
19512 /* LSLS <Rdn>,<Rm> */
19513 if (rtx_equal_p (dst, op0)
19514 && low_register_operand (op1, SImode))
19515 action = action_for_partial_flag_setting;
19516 /* ASRS <Rd>,<Rm>,#<imm5> */
19517 /* LSRS <Rd>,<Rm>,#<imm5> */
19518 /* LSLS <Rd>,<Rm>,#<imm5> */
19519 else if (low_register_operand (op0, SImode)
19520 && CONST_INT_P (op1)
19521 && IN_RANGE (INTVAL (op1), 0, 31))
19522 action = action_for_partial_flag_setting;
19523 break;
19525 case ROTATERT:
19526 /* RORS <Rdn>,<Rm> */
19527 if (rtx_equal_p (dst, op0)
19528 && low_register_operand (op1, SImode))
19529 action = action_for_partial_flag_setting;
19530 break;
19532 case NOT:
19533 /* MVNS <Rd>,<Rm> */
19534 if (low_register_operand (op0, SImode))
19535 action = action_for_partial_flag_setting;
19536 break;
19538 case NEG:
19539 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19540 if (low_register_operand (op0, SImode))
19541 action = CONV;
19542 break;
19544 case CONST_INT:
19545 /* MOVS <Rd>,#<imm8> */
19546 if (CONST_INT_P (src)
19547 && IN_RANGE (INTVAL (src), 0, 255))
19548 action = action_for_partial_flag_setting;
19549 break;
19551 case REG:
19552 /* MOVS and MOV<c> with registers have different
19553 encodings, so are not relevant here. */
19554 break;
19556 default:
19557 break;
19561 if (action != SKIP)
19563 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19564 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19565 rtvec vec;
19567 if (action == SWAP_CONV)
19569 src = copy_rtx (src);
19570 XEXP (src, 0) = op1;
19571 XEXP (src, 1) = op0;
19572 pat = gen_rtx_SET (dst, src);
19573 vec = gen_rtvec (2, pat, clobber);
19575 else /* action == CONV */
19576 vec = gen_rtvec (2, pat, clobber);
19578 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19579 INSN_CODE (insn) = -1;
19583 if (NONDEBUG_INSN_P (insn))
19584 df_simulate_one_insn_backwards (bb, insn, &live);
19588 CLEAR_REG_SET (&live);
19591 /* Gcc puts the pool in the wrong place for ARM, since we can only
19592 load addresses a limited distance around the pc. We do some
19593 special munging to move the constant pool values to the correct
19594 point in the code. */
19595 static void
19596 arm_reorg (void)
19598 rtx_insn *insn;
19599 HOST_WIDE_INT address = 0;
19600 Mfix * fix;
19602 if (use_cmse)
19603 cmse_nonsecure_call_inline_register_clear ();
19605 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19606 if (cfun->is_thunk)
19608 else if (TARGET_THUMB1)
19609 thumb1_reorg ();
19610 else if (TARGET_THUMB2)
19611 thumb2_reorg ();
19613 /* Ensure all insns that must be split have been split at this point.
19614 Otherwise, the pool placement code below may compute incorrect
19615 insn lengths. Note that when optimizing, all insns have already
19616 been split at this point. */
19617 if (!optimize)
19618 split_all_insns_noflow ();
19620 /* Make sure we do not attempt to create a literal pool even though it should
19621 no longer be necessary to create any. */
19622 if (arm_disable_literal_pool)
19623 return ;
19625 minipool_fix_head = minipool_fix_tail = NULL;
19627 /* The first insn must always be a note, or the code below won't
19628 scan it properly. */
19629 insn = get_insns ();
19630 gcc_assert (NOTE_P (insn));
19631 minipool_pad = 0;
19633 /* Scan all the insns and record the operands that will need fixing. */
19634 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19636 if (BARRIER_P (insn))
19637 push_minipool_barrier (insn, address);
19638 else if (INSN_P (insn))
19640 rtx_jump_table_data *table;
19642 note_invalid_constants (insn, address, true);
19643 address += get_attr_length (insn);
19645 /* If the insn is a vector jump, add the size of the table
19646 and skip the table. */
19647 if (tablejump_p (insn, NULL, &table))
19649 address += get_jump_table_size (table);
19650 insn = table;
19653 else if (LABEL_P (insn))
19654 /* Add the worst-case padding due to alignment. We don't add
19655 the _current_ padding because the minipool insertions
19656 themselves might change it. */
19657 address += get_label_padding (insn);
19660 fix = minipool_fix_head;
19662 /* Now scan the fixups and perform the required changes. */
19663 while (fix)
19665 Mfix * ftmp;
19666 Mfix * fdel;
19667 Mfix * last_added_fix;
19668 Mfix * last_barrier = NULL;
19669 Mfix * this_fix;
19671 /* Skip any further barriers before the next fix. */
19672 while (fix && BARRIER_P (fix->insn))
19673 fix = fix->next;
19675 /* No more fixes. */
19676 if (fix == NULL)
19677 break;
19679 last_added_fix = NULL;
19681 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19683 if (BARRIER_P (ftmp->insn))
19685 if (ftmp->address >= minipool_vector_head->max_address)
19686 break;
19688 last_barrier = ftmp;
19690 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19691 break;
19693 last_added_fix = ftmp; /* Keep track of the last fix added. */
19696 /* If we found a barrier, drop back to that; any fixes that we
19697 could have reached but come after the barrier will now go in
19698 the next mini-pool. */
19699 if (last_barrier != NULL)
19701 /* Reduce the refcount for those fixes that won't go into this
19702 pool after all. */
19703 for (fdel = last_barrier->next;
19704 fdel && fdel != ftmp;
19705 fdel = fdel->next)
19707 fdel->minipool->refcount--;
19708 fdel->minipool = NULL;
19711 ftmp = last_barrier;
19713 else
19715 /* ftmp is first fix that we can't fit into this pool and
19716 there no natural barriers that we could use. Insert a
19717 new barrier in the code somewhere between the previous
19718 fix and this one, and arrange to jump around it. */
19719 HOST_WIDE_INT max_address;
19721 /* The last item on the list of fixes must be a barrier, so
19722 we can never run off the end of the list of fixes without
19723 last_barrier being set. */
19724 gcc_assert (ftmp);
19726 max_address = minipool_vector_head->max_address;
19727 /* Check that there isn't another fix that is in range that
19728 we couldn't fit into this pool because the pool was
19729 already too large: we need to put the pool before such an
19730 instruction. The pool itself may come just after the
19731 fix because create_fix_barrier also allows space for a
19732 jump instruction. */
19733 if (ftmp->address < max_address)
19734 max_address = ftmp->address + 1;
19736 last_barrier = create_fix_barrier (last_added_fix, max_address);
19739 assign_minipool_offsets (last_barrier);
19741 while (ftmp)
19743 if (!BARRIER_P (ftmp->insn)
19744 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19745 == NULL))
19746 break;
19748 ftmp = ftmp->next;
19751 /* Scan over the fixes we have identified for this pool, fixing them
19752 up and adding the constants to the pool itself. */
19753 for (this_fix = fix; this_fix && ftmp != this_fix;
19754 this_fix = this_fix->next)
19755 if (!BARRIER_P (this_fix->insn))
19757 rtx addr
19758 = plus_constant (Pmode,
19759 gen_rtx_LABEL_REF (VOIDmode,
19760 minipool_vector_label),
19761 this_fix->minipool->offset);
19762 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19765 dump_minipool (last_barrier->insn);
19766 fix = ftmp;
19769 /* From now on we must synthesize any constants that we can't handle
19770 directly. This can happen if the RTL gets split during final
19771 instruction generation. */
19772 cfun->machine->after_arm_reorg = 1;
19774 /* Free the minipool memory. */
19775 obstack_free (&minipool_obstack, minipool_startobj);
19778 /* Routines to output assembly language. */
19780 /* OPERANDS[0] is the entire list of insns that constitute pop,
19781 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19782 is in the list, UPDATE is true iff the list contains explicit
19783 update of base register. */
19784 void
19785 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19786 bool update)
19788 int i;
19789 char pattern[100];
19790 int offset;
19791 const char *conditional;
19792 int num_saves = XVECLEN (operands[0], 0);
19793 unsigned int regno;
19794 unsigned int regno_base = REGNO (operands[1]);
19795 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19797 offset = 0;
19798 offset += update ? 1 : 0;
19799 offset += return_pc ? 1 : 0;
19801 /* Is the base register in the list? */
19802 for (i = offset; i < num_saves; i++)
19804 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19805 /* If SP is in the list, then the base register must be SP. */
19806 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19807 /* If base register is in the list, there must be no explicit update. */
19808 if (regno == regno_base)
19809 gcc_assert (!update);
19812 conditional = reverse ? "%?%D0" : "%?%d0";
19813 /* Can't use POP if returning from an interrupt. */
19814 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19815 sprintf (pattern, "pop%s\t{", conditional);
19816 else
19818 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19819 It's just a convention, their semantics are identical. */
19820 if (regno_base == SP_REGNUM)
19821 sprintf (pattern, "ldmfd%s\t", conditional);
19822 else if (update)
19823 sprintf (pattern, "ldmia%s\t", conditional);
19824 else
19825 sprintf (pattern, "ldm%s\t", conditional);
19827 strcat (pattern, reg_names[regno_base]);
19828 if (update)
19829 strcat (pattern, "!, {");
19830 else
19831 strcat (pattern, ", {");
19834 /* Output the first destination register. */
19835 strcat (pattern,
19836 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19838 /* Output the rest of the destination registers. */
19839 for (i = offset + 1; i < num_saves; i++)
19841 strcat (pattern, ", ");
19842 strcat (pattern,
19843 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19846 strcat (pattern, "}");
19848 if (interrupt_p && return_pc)
19849 strcat (pattern, "^");
19851 output_asm_insn (pattern, &cond);
19855 /* Output the assembly for a store multiple. */
19857 const char *
19858 vfp_output_vstmd (rtx * operands)
19860 char pattern[100];
19861 int p;
19862 int base;
19863 int i;
19864 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19865 ? XEXP (operands[0], 0)
19866 : XEXP (XEXP (operands[0], 0), 0);
19867 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19869 if (push_p)
19870 strcpy (pattern, "vpush%?.64\t{%P1");
19871 else
19872 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19874 p = strlen (pattern);
19876 gcc_assert (REG_P (operands[1]));
19878 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19879 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19881 p += sprintf (&pattern[p], ", d%d", base + i);
19883 strcpy (&pattern[p], "}");
19885 output_asm_insn (pattern, operands);
19886 return "";
19890 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19891 number of bytes pushed. */
19893 static int
19894 vfp_emit_fstmd (int base_reg, int count)
19896 rtx par;
19897 rtx dwarf;
19898 rtx tmp, reg;
19899 int i;
19901 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19902 register pairs are stored by a store multiple insn. We avoid this
19903 by pushing an extra pair. */
19904 if (count == 2 && !arm_arch6)
19906 if (base_reg == LAST_VFP_REGNUM - 3)
19907 base_reg -= 2;
19908 count++;
19911 /* FSTMD may not store more than 16 doubleword registers at once. Split
19912 larger stores into multiple parts (up to a maximum of two, in
19913 practice). */
19914 if (count > 16)
19916 int saved;
19917 /* NOTE: base_reg is an internal register number, so each D register
19918 counts as 2. */
19919 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19920 saved += vfp_emit_fstmd (base_reg, 16);
19921 return saved;
19924 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19925 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19927 reg = gen_rtx_REG (DFmode, base_reg);
19928 base_reg += 2;
19930 XVECEXP (par, 0, 0)
19931 = gen_rtx_SET (gen_frame_mem
19932 (BLKmode,
19933 gen_rtx_PRE_MODIFY (Pmode,
19934 stack_pointer_rtx,
19935 plus_constant
19936 (Pmode, stack_pointer_rtx,
19937 - (count * 8)))
19939 gen_rtx_UNSPEC (BLKmode,
19940 gen_rtvec (1, reg),
19941 UNSPEC_PUSH_MULT));
19943 tmp = gen_rtx_SET (stack_pointer_rtx,
19944 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19945 RTX_FRAME_RELATED_P (tmp) = 1;
19946 XVECEXP (dwarf, 0, 0) = tmp;
19948 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19949 RTX_FRAME_RELATED_P (tmp) = 1;
19950 XVECEXP (dwarf, 0, 1) = tmp;
19952 for (i = 1; i < count; i++)
19954 reg = gen_rtx_REG (DFmode, base_reg);
19955 base_reg += 2;
19956 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19958 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19959 plus_constant (Pmode,
19960 stack_pointer_rtx,
19961 i * 8)),
19962 reg);
19963 RTX_FRAME_RELATED_P (tmp) = 1;
19964 XVECEXP (dwarf, 0, i + 1) = tmp;
19967 par = emit_insn (par);
19968 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19969 RTX_FRAME_RELATED_P (par) = 1;
19971 return count * 8;
19974 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19975 has the cmse_nonsecure_call attribute and returns false otherwise. */
19977 bool
19978 detect_cmse_nonsecure_call (tree addr)
19980 if (!addr)
19981 return FALSE;
19983 tree fntype = TREE_TYPE (addr);
19984 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19985 TYPE_ATTRIBUTES (fntype)))
19986 return TRUE;
19987 return FALSE;
19991 /* Emit a call instruction with pattern PAT. ADDR is the address of
19992 the call target. */
19994 void
19995 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19997 rtx insn;
19999 insn = emit_call_insn (pat);
20001 /* The PIC register is live on entry to VxWorks PIC PLT entries.
20002 If the call might use such an entry, add a use of the PIC register
20003 to the instruction's CALL_INSN_FUNCTION_USAGE. */
20004 if (TARGET_VXWORKS_RTP
20005 && flag_pic
20006 && !sibcall
20007 && SYMBOL_REF_P (addr)
20008 && (SYMBOL_REF_DECL (addr)
20009 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
20010 : !SYMBOL_REF_LOCAL_P (addr)))
20012 require_pic_register (NULL_RTX, false /*compute_now*/);
20013 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
20016 if (TARGET_FDPIC)
20018 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
20019 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
20022 if (TARGET_AAPCS_BASED)
20024 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
20025 linker. We need to add an IP clobber to allow setting
20026 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
20027 is not needed since it's a fixed register. */
20028 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
20029 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
20033 /* Output a 'call' insn. */
20034 const char *
20035 output_call (rtx *operands)
20037 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
20039 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
20040 if (REGNO (operands[0]) == LR_REGNUM)
20042 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
20043 output_asm_insn ("mov%?\t%0, %|lr", operands);
20046 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
20048 if (TARGET_INTERWORK || arm_arch4t)
20049 output_asm_insn ("bx%?\t%0", operands);
20050 else
20051 output_asm_insn ("mov%?\t%|pc, %0", operands);
20053 return "";
20056 /* Output a move from arm registers to arm registers of a long double
20057 OPERANDS[0] is the destination.
20058 OPERANDS[1] is the source. */
20059 const char *
20060 output_mov_long_double_arm_from_arm (rtx *operands)
20062 /* We have to be careful here because the two might overlap. */
20063 int dest_start = REGNO (operands[0]);
20064 int src_start = REGNO (operands[1]);
20065 rtx ops[2];
20066 int i;
20068 if (dest_start < src_start)
20070 for (i = 0; i < 3; i++)
20072 ops[0] = gen_rtx_REG (SImode, dest_start + i);
20073 ops[1] = gen_rtx_REG (SImode, src_start + i);
20074 output_asm_insn ("mov%?\t%0, %1", ops);
20077 else
20079 for (i = 2; i >= 0; i--)
20081 ops[0] = gen_rtx_REG (SImode, dest_start + i);
20082 ops[1] = gen_rtx_REG (SImode, src_start + i);
20083 output_asm_insn ("mov%?\t%0, %1", ops);
20087 return "";
20090 void
20091 arm_emit_movpair (rtx dest, rtx src)
20093 /* If the src is an immediate, simplify it. */
20094 if (CONST_INT_P (src))
20096 HOST_WIDE_INT val = INTVAL (src);
20097 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
20098 if ((val >> 16) & 0x0000ffff)
20100 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
20101 GEN_INT (16)),
20102 GEN_INT ((val >> 16) & 0x0000ffff));
20103 rtx_insn *insn = get_last_insn ();
20104 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20106 return;
20108 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
20109 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
20110 rtx_insn *insn = get_last_insn ();
20111 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20114 /* Output a move between double words. It must be REG<-MEM
20115 or MEM<-REG. */
20116 const char *
20117 output_move_double (rtx *operands, bool emit, int *count)
20119 enum rtx_code code0 = GET_CODE (operands[0]);
20120 enum rtx_code code1 = GET_CODE (operands[1]);
20121 rtx otherops[3];
20122 if (count)
20123 *count = 1;
20125 /* The only case when this might happen is when
20126 you are looking at the length of a DImode instruction
20127 that has an invalid constant in it. */
20128 if (code0 == REG && code1 != MEM)
20130 gcc_assert (!emit);
20131 *count = 2;
20132 return "";
20135 if (code0 == REG)
20137 unsigned int reg0 = REGNO (operands[0]);
20138 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20140 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20142 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
20144 switch (GET_CODE (XEXP (operands[1], 0)))
20146 case REG:
20148 if (emit)
20150 if (can_ldrd
20151 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20152 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20153 else
20154 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20156 break;
20158 case PRE_INC:
20159 gcc_assert (can_ldrd);
20160 if (emit)
20161 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20162 break;
20164 case PRE_DEC:
20165 if (emit)
20167 if (can_ldrd)
20168 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20169 else
20170 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20172 break;
20174 case POST_INC:
20175 if (emit)
20177 if (can_ldrd)
20178 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20179 else
20180 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20182 break;
20184 case POST_DEC:
20185 gcc_assert (can_ldrd);
20186 if (emit)
20187 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20188 break;
20190 case PRE_MODIFY:
20191 case POST_MODIFY:
20192 /* Autoicrement addressing modes should never have overlapping
20193 base and destination registers, and overlapping index registers
20194 are already prohibited, so this doesn't need to worry about
20195 fix_cm3_ldrd. */
20196 otherops[0] = operands[0];
20197 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20198 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20200 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20202 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20204 /* Registers overlap so split out the increment. */
20205 if (emit)
20207 gcc_assert (can_ldrd);
20208 output_asm_insn ("add%?\t%1, %1, %2", otherops);
20209 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20211 if (count)
20212 *count = 2;
20214 else
20216 /* Use a single insn if we can.
20217 FIXME: IWMMXT allows offsets larger than ldrd can
20218 handle, fix these up with a pair of ldr. */
20219 if (can_ldrd
20220 && (TARGET_THUMB2
20221 || !CONST_INT_P (otherops[2])
20222 || (INTVAL (otherops[2]) > -256
20223 && INTVAL (otherops[2]) < 256)))
20225 if (emit)
20226 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20228 else
20230 if (emit)
20232 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20233 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20235 if (count)
20236 *count = 2;
20241 else
20243 /* Use a single insn if we can.
20244 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20245 fix these up with a pair of ldr. */
20246 if (can_ldrd
20247 && (TARGET_THUMB2
20248 || !CONST_INT_P (otherops[2])
20249 || (INTVAL (otherops[2]) > -256
20250 && INTVAL (otherops[2]) < 256)))
20252 if (emit)
20253 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20255 else
20257 if (emit)
20259 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20260 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20262 if (count)
20263 *count = 2;
20266 break;
20268 case LABEL_REF:
20269 case CONST:
20270 /* We might be able to use ldrd %0, %1 here. However the range is
20271 different to ldr/adr, and it is broken on some ARMv7-M
20272 implementations. */
20273 /* Use the second register of the pair to avoid problematic
20274 overlap. */
20275 otherops[1] = operands[1];
20276 if (emit)
20277 output_asm_insn ("adr%?\t%0, %1", otherops);
20278 operands[1] = otherops[0];
20279 if (emit)
20281 if (can_ldrd)
20282 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20283 else
20284 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20287 if (count)
20288 *count = 2;
20289 break;
20291 /* ??? This needs checking for thumb2. */
20292 default:
20293 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20294 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20296 otherops[0] = operands[0];
20297 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20298 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20300 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20302 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20304 switch ((int) INTVAL (otherops[2]))
20306 case -8:
20307 if (emit)
20308 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20309 return "";
20310 case -4:
20311 if (TARGET_THUMB2)
20312 break;
20313 if (emit)
20314 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20315 return "";
20316 case 4:
20317 if (TARGET_THUMB2)
20318 break;
20319 if (emit)
20320 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20321 return "";
20324 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20325 operands[1] = otherops[0];
20326 if (can_ldrd
20327 && (REG_P (otherops[2])
20328 || TARGET_THUMB2
20329 || (CONST_INT_P (otherops[2])
20330 && INTVAL (otherops[2]) > -256
20331 && INTVAL (otherops[2]) < 256)))
20333 if (reg_overlap_mentioned_p (operands[0],
20334 otherops[2]))
20336 /* Swap base and index registers over to
20337 avoid a conflict. */
20338 std::swap (otherops[1], otherops[2]);
20340 /* If both registers conflict, it will usually
20341 have been fixed by a splitter. */
20342 if (reg_overlap_mentioned_p (operands[0], otherops[2])
20343 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20345 if (emit)
20347 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20348 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20350 if (count)
20351 *count = 2;
20353 else
20355 otherops[0] = operands[0];
20356 if (emit)
20357 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20359 return "";
20362 if (CONST_INT_P (otherops[2]))
20364 if (emit)
20366 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20367 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20368 else
20369 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20372 else
20374 if (emit)
20375 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20378 else
20380 if (emit)
20381 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20384 if (count)
20385 *count = 2;
20387 if (can_ldrd)
20388 return "ldrd%?\t%0, [%1]";
20390 return "ldmia%?\t%1, %M0";
20392 else
20394 otherops[1] = adjust_address (operands[1], SImode, 4);
20395 /* Take care of overlapping base/data reg. */
20396 if (reg_mentioned_p (operands[0], operands[1]))
20398 if (emit)
20400 output_asm_insn ("ldr%?\t%0, %1", otherops);
20401 output_asm_insn ("ldr%?\t%0, %1", operands);
20403 if (count)
20404 *count = 2;
20407 else
20409 if (emit)
20411 output_asm_insn ("ldr%?\t%0, %1", operands);
20412 output_asm_insn ("ldr%?\t%0, %1", otherops);
20414 if (count)
20415 *count = 2;
20420 else
20422 /* Constraints should ensure this. */
20423 gcc_assert (code0 == MEM && code1 == REG);
20424 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20425 || (TARGET_ARM && TARGET_LDRD));
20427 /* For TARGET_ARM the first source register of an STRD
20428 must be even. This is usually the case for double-word
20429 values but user assembly constraints can force an odd
20430 starting register. */
20431 bool allow_strd = TARGET_LDRD
20432 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20433 switch (GET_CODE (XEXP (operands[0], 0)))
20435 case REG:
20436 if (emit)
20438 if (allow_strd)
20439 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20440 else
20441 output_asm_insn ("stm%?\t%m0, %M1", operands);
20443 break;
20445 case PRE_INC:
20446 gcc_assert (allow_strd);
20447 if (emit)
20448 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20449 break;
20451 case PRE_DEC:
20452 if (emit)
20454 if (allow_strd)
20455 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20456 else
20457 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20459 break;
20461 case POST_INC:
20462 if (emit)
20464 if (allow_strd)
20465 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20466 else
20467 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20469 break;
20471 case POST_DEC:
20472 gcc_assert (allow_strd);
20473 if (emit)
20474 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20475 break;
20477 case PRE_MODIFY:
20478 case POST_MODIFY:
20479 otherops[0] = operands[1];
20480 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20481 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20483 /* IWMMXT allows offsets larger than strd can handle,
20484 fix these up with a pair of str. */
20485 if (!TARGET_THUMB2
20486 && CONST_INT_P (otherops[2])
20487 && (INTVAL(otherops[2]) <= -256
20488 || INTVAL(otherops[2]) >= 256))
20490 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20492 if (emit)
20494 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20495 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20497 if (count)
20498 *count = 2;
20500 else
20502 if (emit)
20504 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20505 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20507 if (count)
20508 *count = 2;
20511 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20513 if (emit)
20514 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20516 else
20518 if (emit)
20519 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20521 break;
20523 case PLUS:
20524 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20525 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20527 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20529 case -8:
20530 if (emit)
20531 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20532 return "";
20534 case -4:
20535 if (TARGET_THUMB2)
20536 break;
20537 if (emit)
20538 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20539 return "";
20541 case 4:
20542 if (TARGET_THUMB2)
20543 break;
20544 if (emit)
20545 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20546 return "";
20549 if (allow_strd
20550 && (REG_P (otherops[2])
20551 || TARGET_THUMB2
20552 || (CONST_INT_P (otherops[2])
20553 && INTVAL (otherops[2]) > -256
20554 && INTVAL (otherops[2]) < 256)))
20556 otherops[0] = operands[1];
20557 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20558 if (emit)
20559 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20560 return "";
20562 /* Fall through */
20564 default:
20565 otherops[0] = adjust_address (operands[0], SImode, 4);
20566 otherops[1] = operands[1];
20567 if (emit)
20569 output_asm_insn ("str%?\t%1, %0", operands);
20570 output_asm_insn ("str%?\t%H1, %0", otherops);
20572 if (count)
20573 *count = 2;
20577 return "";
20580 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20581 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20583 const char *
20584 output_move_quad (rtx *operands)
20586 if (REG_P (operands[0]))
20588 /* Load, or reg->reg move. */
20590 if (MEM_P (operands[1]))
20592 switch (GET_CODE (XEXP (operands[1], 0)))
20594 case REG:
20595 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20596 break;
20598 case LABEL_REF:
20599 case CONST:
20600 output_asm_insn ("adr%?\t%0, %1", operands);
20601 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20602 break;
20604 default:
20605 gcc_unreachable ();
20608 else
20610 rtx ops[2];
20611 int dest, src, i;
20613 gcc_assert (REG_P (operands[1]));
20615 dest = REGNO (operands[0]);
20616 src = REGNO (operands[1]);
20618 /* This seems pretty dumb, but hopefully GCC won't try to do it
20619 very often. */
20620 if (dest < src)
20621 for (i = 0; i < 4; i++)
20623 ops[0] = gen_rtx_REG (SImode, dest + i);
20624 ops[1] = gen_rtx_REG (SImode, src + i);
20625 output_asm_insn ("mov%?\t%0, %1", ops);
20627 else
20628 for (i = 3; i >= 0; i--)
20630 ops[0] = gen_rtx_REG (SImode, dest + i);
20631 ops[1] = gen_rtx_REG (SImode, src + i);
20632 output_asm_insn ("mov%?\t%0, %1", ops);
20636 else
20638 gcc_assert (MEM_P (operands[0]));
20639 gcc_assert (REG_P (operands[1]));
20640 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20642 switch (GET_CODE (XEXP (operands[0], 0)))
20644 case REG:
20645 output_asm_insn ("stm%?\t%m0, %M1", operands);
20646 break;
20648 default:
20649 gcc_unreachable ();
20653 return "";
20656 /* Output a VFP load or store instruction. */
20658 const char *
20659 output_move_vfp (rtx *operands)
20661 rtx reg, mem, addr, ops[2];
20662 int load = REG_P (operands[0]);
20663 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20664 int sp = (!TARGET_VFP_FP16INST
20665 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20666 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20667 const char *templ;
20668 char buff[50];
20669 machine_mode mode;
20671 reg = operands[!load];
20672 mem = operands[load];
20674 mode = GET_MODE (reg);
20676 gcc_assert (REG_P (reg));
20677 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20678 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20679 || mode == SFmode
20680 || mode == DFmode
20681 || mode == HImode
20682 || mode == SImode
20683 || mode == DImode
20684 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20685 gcc_assert (MEM_P (mem));
20687 addr = XEXP (mem, 0);
20689 switch (GET_CODE (addr))
20691 case PRE_DEC:
20692 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20693 ops[0] = XEXP (addr, 0);
20694 ops[1] = reg;
20695 break;
20697 case POST_INC:
20698 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20699 ops[0] = XEXP (addr, 0);
20700 ops[1] = reg;
20701 break;
20703 default:
20704 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20705 ops[0] = reg;
20706 ops[1] = mem;
20707 break;
20710 sprintf (buff, templ,
20711 load ? "ld" : "st",
20712 dp ? "64" : sp ? "32" : "16",
20713 dp ? "P" : "",
20714 integer_p ? "\t%@ int" : "");
20715 output_asm_insn (buff, ops);
20717 return "";
20720 /* Output a Neon double-word or quad-word load or store, or a load
20721 or store for larger structure modes.
20723 WARNING: The ordering of elements is weird in big-endian mode,
20724 because the EABI requires that vectors stored in memory appear
20725 as though they were stored by a VSTM, as required by the EABI.
20726 GCC RTL defines element ordering based on in-memory order.
20727 This can be different from the architectural ordering of elements
20728 within a NEON register. The intrinsics defined in arm_neon.h use the
20729 NEON register element ordering, not the GCC RTL element ordering.
20731 For example, the in-memory ordering of a big-endian a quadword
20732 vector with 16-bit elements when stored from register pair {d0,d1}
20733 will be (lowest address first, d0[N] is NEON register element N):
20735 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20737 When necessary, quadword registers (dN, dN+1) are moved to ARM
20738 registers from rN in the order:
20740 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20742 So that STM/LDM can be used on vectors in ARM registers, and the
20743 same memory layout will result as if VSTM/VLDM were used.
20745 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20746 possible, which allows use of appropriate alignment tags.
20747 Note that the choice of "64" is independent of the actual vector
20748 element size; this size simply ensures that the behavior is
20749 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20751 Due to limitations of those instructions, use of VST1.64/VLD1.64
20752 is not possible if:
20753 - the address contains PRE_DEC, or
20754 - the mode refers to more than 4 double-word registers
20756 In those cases, it would be possible to replace VSTM/VLDM by a
20757 sequence of instructions; this is not currently implemented since
20758 this is not certain to actually improve performance. */
20760 const char *
20761 output_move_neon (rtx *operands)
20763 rtx reg, mem, addr, ops[2];
20764 int regno, nregs, load = REG_P (operands[0]);
20765 const char *templ;
20766 char buff[50];
20767 machine_mode mode;
20769 reg = operands[!load];
20770 mem = operands[load];
20772 mode = GET_MODE (reg);
20774 gcc_assert (REG_P (reg));
20775 regno = REGNO (reg);
20776 nregs = REG_NREGS (reg) / 2;
20777 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20778 || NEON_REGNO_OK_FOR_QUAD (regno));
20779 gcc_assert ((TARGET_NEON
20780 && (VALID_NEON_DREG_MODE (mode)
20781 || VALID_NEON_QREG_MODE (mode)
20782 || VALID_NEON_STRUCT_MODE (mode)))
20783 || (TARGET_HAVE_MVE
20784 && (VALID_MVE_MODE (mode)
20785 || VALID_MVE_STRUCT_MODE (mode))));
20786 gcc_assert (MEM_P (mem));
20788 addr = XEXP (mem, 0);
20790 /* Strip off const from addresses like (const (plus (...))). */
20791 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20792 addr = XEXP (addr, 0);
20794 switch (GET_CODE (addr))
20796 case POST_INC:
20797 /* We have to use vldm / vstm for too-large modes. */
20798 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20800 templ = "v%smia%%?\t%%0!, %%h1";
20801 ops[0] = XEXP (addr, 0);
20803 else
20805 templ = "v%s1.64\t%%h1, %%A0";
20806 ops[0] = mem;
20808 ops[1] = reg;
20809 break;
20811 case PRE_DEC:
20812 /* We have to use vldm / vstm in this case, since there is no
20813 pre-decrement form of the vld1 / vst1 instructions. */
20814 templ = "v%smdb%%?\t%%0!, %%h1";
20815 ops[0] = XEXP (addr, 0);
20816 ops[1] = reg;
20817 break;
20819 case POST_MODIFY:
20820 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20821 gcc_unreachable ();
20823 case REG:
20824 /* We have to use vldm / vstm for too-large modes. */
20825 if (nregs > 1)
20827 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20828 templ = "v%smia%%?\t%%m0, %%h1";
20829 else
20830 templ = "v%s1.64\t%%h1, %%A0";
20832 ops[0] = mem;
20833 ops[1] = reg;
20834 break;
20836 /* Fall through. */
20837 case PLUS:
20838 if (GET_CODE (addr) == PLUS)
20839 addr = XEXP (addr, 0);
20840 /* Fall through. */
20841 case LABEL_REF:
20843 int i;
20844 int overlap = -1;
20845 for (i = 0; i < nregs; i++)
20847 /* Use DFmode for vldr/vstr. */
20848 ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * i);
20849 ops[1] = adjust_address_nv (mem, DFmode, 8 * i);
20850 if (reg_overlap_mentioned_p (ops[0], mem))
20852 gcc_assert (overlap == -1);
20853 overlap = i;
20855 else
20857 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20858 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20859 else
20860 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20861 output_asm_insn (buff, ops);
20864 if (overlap != -1)
20866 ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * overlap);
20867 ops[1] = adjust_address_nv (mem, DFmode, 8 * overlap);
20868 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20869 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20870 else
20871 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20872 output_asm_insn (buff, ops);
20875 return "";
20878 default:
20879 gcc_unreachable ();
20882 sprintf (buff, templ, load ? "ld" : "st");
20883 output_asm_insn (buff, ops);
20885 return "";
20888 /* Compute and return the length of neon_mov<mode>, where <mode> is one of
20889 VSTRUCT modes: EI, OI, CI or XI for Neon, and V2x16QI, V2x8HI, V2x4SI,
20890 V2x8HF, V2x4SF, V2x16QI, V2x8HI, V2x4SI, V2x8HF, V2x4SF for MVE. */
20892 arm_attr_length_move_neon (rtx_insn *insn)
20894 rtx reg, mem, addr;
20895 int load;
20896 machine_mode mode;
20898 extract_insn_cached (insn);
20900 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20902 mode = GET_MODE (recog_data.operand[0]);
20903 switch (mode)
20905 case E_EImode:
20906 case E_OImode:
20907 case E_V2x16QImode:
20908 case E_V2x8HImode:
20909 case E_V2x4SImode:
20910 case E_V2x8HFmode:
20911 case E_V2x4SFmode:
20912 return 8;
20913 case E_CImode:
20914 return 12;
20915 case E_XImode:
20916 case E_V4x16QImode:
20917 case E_V4x8HImode:
20918 case E_V4x4SImode:
20919 case E_V4x8HFmode:
20920 case E_V4x4SFmode:
20921 return 16;
20922 default:
20923 gcc_unreachable ();
20927 load = REG_P (recog_data.operand[0]);
20928 reg = recog_data.operand[!load];
20929 mem = recog_data.operand[load];
20931 gcc_assert (MEM_P (mem));
20933 addr = XEXP (mem, 0);
20935 /* Strip off const from addresses like (const (plus (...))). */
20936 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20937 addr = XEXP (addr, 0);
20939 if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20941 int insns = REG_NREGS (reg) / 2;
20942 return insns * 4;
20944 else
20945 return 4;
20948 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20949 return zero. */
20952 arm_address_offset_is_imm (rtx_insn *insn)
20954 rtx mem, addr;
20956 extract_insn_cached (insn);
20958 if (REG_P (recog_data.operand[0]))
20959 return 0;
20961 mem = recog_data.operand[0];
20963 gcc_assert (MEM_P (mem));
20965 addr = XEXP (mem, 0);
20967 if (REG_P (addr)
20968 || (GET_CODE (addr) == PLUS
20969 && REG_P (XEXP (addr, 0))
20970 && CONST_INT_P (XEXP (addr, 1))))
20971 return 1;
20972 else
20973 return 0;
20976 /* Output an ADD r, s, #n where n may be too big for one instruction.
20977 If adding zero to one register, output nothing. */
20978 const char *
20979 output_add_immediate (rtx *operands)
20981 HOST_WIDE_INT n = INTVAL (operands[2]);
20983 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20985 if (n < 0)
20986 output_multi_immediate (operands,
20987 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20988 -n);
20989 else
20990 output_multi_immediate (operands,
20991 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20995 return "";
20998 /* Output a multiple immediate operation.
20999 OPERANDS is the vector of operands referred to in the output patterns.
21000 INSTR1 is the output pattern to use for the first constant.
21001 INSTR2 is the output pattern to use for subsequent constants.
21002 IMMED_OP is the index of the constant slot in OPERANDS.
21003 N is the constant value. */
21004 static const char *
21005 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
21006 int immed_op, HOST_WIDE_INT n)
21008 #if HOST_BITS_PER_WIDE_INT > 32
21009 n &= 0xffffffff;
21010 #endif
21012 if (n == 0)
21014 /* Quick and easy output. */
21015 operands[immed_op] = const0_rtx;
21016 output_asm_insn (instr1, operands);
21018 else
21020 int i;
21021 const char * instr = instr1;
21023 /* Note that n is never zero here (which would give no output). */
21024 for (i = 0; i < 32; i += 2)
21026 if (n & (3 << i))
21028 operands[immed_op] = GEN_INT (n & (255 << i));
21029 output_asm_insn (instr, operands);
21030 instr = instr2;
21031 i += 6;
21036 return "";
21039 /* Return the name of a shifter operation. */
21040 static const char *
21041 arm_shift_nmem(enum rtx_code code)
21043 switch (code)
21045 case ASHIFT:
21046 return ARM_LSL_NAME;
21048 case ASHIFTRT:
21049 return "asr";
21051 case LSHIFTRT:
21052 return "lsr";
21054 case ROTATERT:
21055 return "ror";
21057 default:
21058 abort();
21062 /* Return the appropriate ARM instruction for the operation code.
21063 The returned result should not be overwritten. OP is the rtx of the
21064 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
21065 was shifted. */
21066 const char *
21067 arithmetic_instr (rtx op, int shift_first_arg)
21069 switch (GET_CODE (op))
21071 case PLUS:
21072 return "add";
21074 case MINUS:
21075 return shift_first_arg ? "rsb" : "sub";
21077 case IOR:
21078 return "orr";
21080 case XOR:
21081 return "eor";
21083 case AND:
21084 return "and";
21086 case ASHIFT:
21087 case ASHIFTRT:
21088 case LSHIFTRT:
21089 case ROTATERT:
21090 return arm_shift_nmem(GET_CODE(op));
21092 default:
21093 gcc_unreachable ();
21097 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21098 for the operation code. The returned result should not be overwritten.
21099 OP is the rtx code of the shift.
21100 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21101 shift. */
21102 static const char *
21103 shift_op (rtx op, HOST_WIDE_INT *amountp)
21105 const char * mnem;
21106 enum rtx_code code = GET_CODE (op);
21108 switch (code)
21110 case ROTATE:
21111 if (!CONST_INT_P (XEXP (op, 1)))
21113 output_operand_lossage ("invalid shift operand");
21114 return NULL;
21117 code = ROTATERT;
21118 *amountp = 32 - INTVAL (XEXP (op, 1));
21119 mnem = "ror";
21120 break;
21122 case ASHIFT:
21123 case ASHIFTRT:
21124 case LSHIFTRT:
21125 case ROTATERT:
21126 mnem = arm_shift_nmem(code);
21127 if (CONST_INT_P (XEXP (op, 1)))
21129 *amountp = INTVAL (XEXP (op, 1));
21131 else if (REG_P (XEXP (op, 1)))
21133 *amountp = -1;
21134 return mnem;
21136 else
21138 output_operand_lossage ("invalid shift operand");
21139 return NULL;
21141 break;
21143 case MULT:
21144 /* We never have to worry about the amount being other than a
21145 power of 2, since this case can never be reloaded from a reg. */
21146 if (!CONST_INT_P (XEXP (op, 1)))
21148 output_operand_lossage ("invalid shift operand");
21149 return NULL;
21152 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21154 /* Amount must be a power of two. */
21155 if (*amountp & (*amountp - 1))
21157 output_operand_lossage ("invalid shift operand");
21158 return NULL;
21161 *amountp = exact_log2 (*amountp);
21162 gcc_assert (IN_RANGE (*amountp, 0, 31));
21163 return ARM_LSL_NAME;
21165 default:
21166 output_operand_lossage ("invalid shift operand");
21167 return NULL;
21170 /* This is not 100% correct, but follows from the desire to merge
21171 multiplication by a power of 2 with the recognizer for a
21172 shift. >=32 is not a valid shift for "lsl", so we must try and
21173 output a shift that produces the correct arithmetical result.
21174 Using lsr #32 is identical except for the fact that the carry bit
21175 is not set correctly if we set the flags; but we never use the
21176 carry bit from such an operation, so we can ignore that. */
21177 if (code == ROTATERT)
21178 /* Rotate is just modulo 32. */
21179 *amountp &= 31;
21180 else if (*amountp != (*amountp & 31))
21182 if (code == ASHIFT)
21183 mnem = "lsr";
21184 *amountp = 32;
21187 /* Shifts of 0 are no-ops. */
21188 if (*amountp == 0)
21189 return NULL;
21191 return mnem;
21194 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21195 because /bin/as is horribly restrictive. The judgement about
21196 whether or not each character is 'printable' (and can be output as
21197 is) or not (and must be printed with an octal escape) must be made
21198 with reference to the *host* character set -- the situation is
21199 similar to that discussed in the comments above pp_c_char in
21200 c-pretty-print.cc. */
21202 #define MAX_ASCII_LEN 51
21204 void
21205 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21207 int i;
21208 int len_so_far = 0;
21210 fputs ("\t.ascii\t\"", stream);
21212 for (i = 0; i < len; i++)
21214 int c = p[i];
21216 if (len_so_far >= MAX_ASCII_LEN)
21218 fputs ("\"\n\t.ascii\t\"", stream);
21219 len_so_far = 0;
21222 if (ISPRINT (c))
21224 if (c == '\\' || c == '\"')
21226 putc ('\\', stream);
21227 len_so_far++;
21229 putc (c, stream);
21230 len_so_far++;
21232 else
21234 fprintf (stream, "\\%03o", c);
21235 len_so_far += 4;
21239 fputs ("\"\n", stream);
21243 /* Compute the register save mask for registers 0 through 12
21244 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21246 static unsigned long
21247 arm_compute_save_reg0_reg12_mask (void)
21249 unsigned long func_type = arm_current_func_type ();
21250 unsigned long save_reg_mask = 0;
21251 unsigned int reg;
21253 if (IS_INTERRUPT (func_type))
21255 unsigned int max_reg;
21256 /* Interrupt functions must not corrupt any registers,
21257 even call clobbered ones. If this is a leaf function
21258 we can just examine the registers used by the RTL, but
21259 otherwise we have to assume that whatever function is
21260 called might clobber anything, and so we have to save
21261 all the call-clobbered registers as well. */
21262 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21263 /* FIQ handlers have registers r8 - r12 banked, so
21264 we only need to check r0 - r7, Normal ISRs only
21265 bank r14 and r15, so we must check up to r12.
21266 r13 is the stack pointer which is always preserved,
21267 so we do not need to consider it here. */
21268 max_reg = 7;
21269 else
21270 max_reg = 12;
21272 for (reg = 0; reg <= max_reg; reg++)
21273 if (reg_needs_saving_p (reg))
21274 save_reg_mask |= (1 << reg);
21276 /* Also save the pic base register if necessary. */
21277 if (PIC_REGISTER_MAY_NEED_SAVING
21278 && crtl->uses_pic_offset_table)
21279 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21281 else if (IS_VOLATILE(func_type))
21283 /* For noreturn functions we historically omitted register saves
21284 altogether. However this really messes up debugging. As a
21285 compromise save just the frame pointers. Combined with the link
21286 register saved elsewhere this should be sufficient to get
21287 a backtrace. */
21288 if (frame_pointer_needed)
21289 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21290 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21291 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21292 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21293 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21295 else
21297 /* In the normal case we only need to save those registers
21298 which are call saved and which are used by this function. */
21299 for (reg = 0; reg <= 11; reg++)
21300 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21301 save_reg_mask |= (1 << reg);
21303 /* Handle the frame pointer as a special case. */
21304 if (frame_pointer_needed)
21305 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21307 /* If we aren't loading the PIC register,
21308 don't stack it even though it may be live. */
21309 if (PIC_REGISTER_MAY_NEED_SAVING
21310 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21311 || crtl->uses_pic_offset_table))
21312 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21314 /* The prologue will copy SP into R0, so save it. */
21315 if (IS_STACKALIGN (func_type))
21316 save_reg_mask |= 1;
21319 /* Save registers so the exception handler can modify them. */
21320 if (crtl->calls_eh_return)
21322 unsigned int i;
21324 for (i = 0; ; i++)
21326 reg = EH_RETURN_DATA_REGNO (i);
21327 if (reg == INVALID_REGNUM)
21328 break;
21329 save_reg_mask |= 1 << reg;
21333 return save_reg_mask;
21336 /* Return true if r3 is live at the start of the function. */
21338 static bool
21339 arm_r3_live_at_start_p (void)
21341 /* Just look at cfg info, which is still close enough to correct at this
21342 point. This gives false positives for broken functions that might use
21343 uninitialized data that happens to be allocated in r3, but who cares? */
21344 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21347 /* Compute the number of bytes used to store the static chain register on the
21348 stack, above the stack frame. We need to know this accurately to get the
21349 alignment of the rest of the stack frame correct. */
21351 static int
21352 arm_compute_static_chain_stack_bytes (void)
21354 /* Once the value is updated from the init value of -1, do not
21355 re-compute. */
21356 if (cfun->machine->static_chain_stack_bytes != -1)
21357 return cfun->machine->static_chain_stack_bytes;
21359 /* See the defining assertion in arm_expand_prologue. */
21360 if (IS_NESTED (arm_current_func_type ())
21361 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21362 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21363 || flag_stack_clash_protection)
21364 && !df_regs_ever_live_p (LR_REGNUM)))
21365 && arm_r3_live_at_start_p ()
21366 && crtl->args.pretend_args_size == 0)
21367 return 4;
21369 return 0;
21372 /* Compute a bit mask of which core registers need to be
21373 saved on the stack for the current function.
21374 This is used by arm_compute_frame_layout, which may add extra registers. */
21376 static unsigned long
21377 arm_compute_save_core_reg_mask (void)
21379 unsigned int save_reg_mask = 0;
21380 unsigned long func_type = arm_current_func_type ();
21381 unsigned int reg;
21383 if (IS_NAKED (func_type))
21384 /* This should never really happen. */
21385 return 0;
21387 /* If we are creating a stack frame, then we must save the frame pointer,
21388 IP (which will hold the old stack pointer), LR and the PC. */
21389 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21390 save_reg_mask |=
21391 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21392 | (1 << IP_REGNUM)
21393 | (1 << LR_REGNUM)
21394 | (1 << PC_REGNUM);
21396 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21398 if (arm_current_function_pac_enabled_p ())
21399 save_reg_mask |= 1 << IP_REGNUM;
21401 /* Decide if we need to save the link register.
21402 Interrupt routines have their own banked link register,
21403 so they never need to save it.
21404 Otherwise if we do not use the link register we do not need to save
21405 it. If we are pushing other registers onto the stack however, we
21406 can save an instruction in the epilogue by pushing the link register
21407 now and then popping it back into the PC. This incurs extra memory
21408 accesses though, so we only do it when optimizing for size, and only
21409 if we know that we will not need a fancy return sequence. */
21410 if (df_regs_ever_live_p (LR_REGNUM)
21411 || (save_reg_mask
21412 && optimize_size
21413 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21414 && !crtl->tail_call_emit
21415 && !crtl->calls_eh_return))
21416 save_reg_mask |= 1 << LR_REGNUM;
21418 if (cfun->machine->lr_save_eliminated)
21419 save_reg_mask &= ~ (1 << LR_REGNUM);
21421 if (TARGET_REALLY_IWMMXT
21422 && ((bit_count (save_reg_mask)
21423 + ARM_NUM_INTS (crtl->args.pretend_args_size +
21424 arm_compute_static_chain_stack_bytes())
21425 ) % 2) != 0)
21427 /* The total number of registers that are going to be pushed
21428 onto the stack is odd. We need to ensure that the stack
21429 is 64-bit aligned before we start to save iWMMXt registers,
21430 and also before we start to create locals. (A local variable
21431 might be a double or long long which we will load/store using
21432 an iWMMXt instruction). Therefore we need to push another
21433 ARM register, so that the stack will be 64-bit aligned. We
21434 try to avoid using the arg registers (r0 -r3) as they might be
21435 used to pass values in a tail call. */
21436 for (reg = 4; reg <= 12; reg++)
21437 if ((save_reg_mask & (1 << reg)) == 0)
21438 break;
21440 if (reg <= 12)
21441 save_reg_mask |= (1 << reg);
21442 else
21444 cfun->machine->sibcall_blocked = 1;
21445 save_reg_mask |= (1 << 3);
21449 /* We may need to push an additional register for use initializing the
21450 PIC base register. */
21451 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21452 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21454 reg = thumb_find_work_register (1 << 4);
21455 if (!call_used_or_fixed_reg_p (reg))
21456 save_reg_mask |= (1 << reg);
21459 return save_reg_mask;
21462 /* Compute a bit mask of which core registers need to be
21463 saved on the stack for the current function. */
21464 static unsigned long
21465 thumb1_compute_save_core_reg_mask (void)
21467 unsigned long mask;
21468 unsigned reg;
21470 mask = 0;
21471 for (reg = 0; reg < 12; reg ++)
21472 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21473 mask |= 1 << reg;
21475 /* Handle the frame pointer as a special case. */
21476 if (frame_pointer_needed)
21477 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21479 if (flag_pic
21480 && !TARGET_SINGLE_PIC_BASE
21481 && arm_pic_register != INVALID_REGNUM
21482 && crtl->uses_pic_offset_table)
21483 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21485 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21486 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21487 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21489 /* LR will also be pushed if any lo regs are pushed. */
21490 if (mask & 0xff || thumb_force_lr_save ())
21491 mask |= (1 << LR_REGNUM);
21493 bool call_clobbered_scratch
21494 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21495 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21497 /* Make sure we have a low work register if we need one. We will
21498 need one if we are going to push a high register, but we are not
21499 currently intending to push a low register. However if both the
21500 prologue and epilogue have a spare call-clobbered low register,
21501 then we won't need to find an additional work register. It does
21502 not need to be the same register in the prologue and
21503 epilogue. */
21504 if ((mask & 0xff) == 0
21505 && !call_clobbered_scratch
21506 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21508 /* Use thumb_find_work_register to choose which register
21509 we will use. If the register is live then we will
21510 have to push it. Use LAST_LO_REGNUM as our fallback
21511 choice for the register to select. */
21512 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21513 /* Make sure the register returned by thumb_find_work_register is
21514 not part of the return value. */
21515 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21516 reg = LAST_LO_REGNUM;
21518 if (callee_saved_reg_p (reg))
21519 mask |= 1 << reg;
21522 /* The 504 below is 8 bytes less than 512 because there are two possible
21523 alignment words. We can't tell here if they will be present or not so we
21524 have to play it safe and assume that they are. */
21525 if ((CALLER_INTERWORKING_SLOT_SIZE +
21526 ROUND_UP_WORD (get_frame_size ()) +
21527 crtl->outgoing_args_size) >= 504)
21529 /* This is the same as the code in thumb1_expand_prologue() which
21530 determines which register to use for stack decrement. */
21531 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21532 if (mask & (1 << reg))
21533 break;
21535 if (reg > LAST_LO_REGNUM)
21537 /* Make sure we have a register available for stack decrement. */
21538 mask |= 1 << LAST_LO_REGNUM;
21542 return mask;
21545 /* Return the number of bytes required to save VFP registers. */
21546 static int
21547 arm_get_vfp_saved_size (void)
21549 unsigned int regno;
21550 int count;
21551 int saved;
21553 saved = 0;
21554 /* Space for saved VFP registers. */
21555 if (TARGET_VFP_BASE)
21557 count = 0;
21558 for (regno = FIRST_VFP_REGNUM;
21559 regno < LAST_VFP_REGNUM;
21560 regno += 2)
21562 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21564 if (count > 0)
21566 /* Workaround ARM10 VFPr1 bug. */
21567 if (count == 2 && !arm_arch6)
21568 count++;
21569 saved += count * 8;
21571 count = 0;
21573 else
21574 count++;
21576 if (count > 0)
21578 if (count == 2 && !arm_arch6)
21579 count++;
21580 saved += count * 8;
21583 return saved;
21587 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21588 everything bar the final return instruction. If simple_return is true,
21589 then do not output epilogue, because it has already been emitted in RTL.
21591 Note: do not forget to update length attribute of corresponding insn pattern
21592 when changing assembly output (eg. length attribute of
21593 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21594 register clearing sequences). */
21595 const char *
21596 output_return_instruction (rtx operand, bool really_return, bool reverse,
21597 bool simple_return)
21599 char conditional[10];
21600 char instr[100];
21601 unsigned reg;
21602 unsigned long live_regs_mask;
21603 unsigned long func_type;
21604 arm_stack_offsets *offsets;
21606 func_type = arm_current_func_type ();
21608 if (IS_NAKED (func_type))
21609 return "";
21611 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21613 /* If this function was declared non-returning, and we have
21614 found a tail call, then we have to trust that the called
21615 function won't return. */
21616 if (really_return)
21618 rtx ops[2];
21620 /* Otherwise, trap an attempted return by aborting. */
21621 ops[0] = operand;
21622 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21623 : "abort");
21624 assemble_external_libcall (ops[1]);
21625 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21628 return "";
21631 gcc_assert (!cfun->calls_alloca || really_return);
21633 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21635 cfun->machine->return_used_this_function = 1;
21637 offsets = arm_get_frame_offsets ();
21638 live_regs_mask = offsets->saved_regs_mask;
21640 if (!simple_return && live_regs_mask)
21642 const char * return_reg;
21644 /* If we do not have any special requirements for function exit
21645 (e.g. interworking) then we can load the return address
21646 directly into the PC. Otherwise we must load it into LR. */
21647 if (really_return
21648 && !IS_CMSE_ENTRY (func_type)
21649 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21650 return_reg = reg_names[PC_REGNUM];
21651 else
21652 return_reg = reg_names[LR_REGNUM];
21654 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21656 /* There are three possible reasons for the IP register
21657 being saved. 1) a stack frame was created, in which case
21658 IP contains the old stack pointer, or 2) an ISR routine
21659 corrupted it, or 3) it was saved to align the stack on
21660 iWMMXt. In case 1, restore IP into SP, otherwise just
21661 restore IP. */
21662 if (frame_pointer_needed)
21664 live_regs_mask &= ~ (1 << IP_REGNUM);
21665 live_regs_mask |= (1 << SP_REGNUM);
21667 else
21668 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21671 /* On some ARM architectures it is faster to use LDR rather than
21672 LDM to load a single register. On other architectures, the
21673 cost is the same. In 26 bit mode, or for exception handlers,
21674 we have to use LDM to load the PC so that the CPSR is also
21675 restored. */
21676 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21677 if (live_regs_mask == (1U << reg))
21678 break;
21680 if (reg <= LAST_ARM_REGNUM
21681 && (reg != LR_REGNUM
21682 || ! really_return
21683 || ! IS_INTERRUPT (func_type)))
21685 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21686 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21688 else
21690 char *p;
21691 int first = 1;
21693 /* Generate the load multiple instruction to restore the
21694 registers. Note we can get here, even if
21695 frame_pointer_needed is true, but only if sp already
21696 points to the base of the saved core registers. */
21697 if (live_regs_mask & (1 << SP_REGNUM))
21699 unsigned HOST_WIDE_INT stack_adjust;
21701 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21702 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21704 if (stack_adjust && arm_arch5t && TARGET_ARM)
21705 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21706 else
21708 /* If we can't use ldmib (SA110 bug),
21709 then try to pop r3 instead. */
21710 if (stack_adjust)
21711 live_regs_mask |= 1 << 3;
21713 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21716 /* For interrupt returns we have to use an LDM rather than
21717 a POP so that we can use the exception return variant. */
21718 else if (IS_INTERRUPT (func_type))
21719 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21720 else
21721 sprintf (instr, "pop%s\t{", conditional);
21723 p = instr + strlen (instr);
21725 for (reg = 0; reg <= SP_REGNUM; reg++)
21726 if (live_regs_mask & (1 << reg))
21728 int l = strlen (reg_names[reg]);
21730 if (first)
21731 first = 0;
21732 else
21734 memcpy (p, ", ", 2);
21735 p += 2;
21738 memcpy (p, "%|", 2);
21739 memcpy (p + 2, reg_names[reg], l);
21740 p += l + 2;
21743 if (live_regs_mask & (1 << LR_REGNUM))
21745 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21746 /* If returning from an interrupt, restore the CPSR. */
21747 if (IS_INTERRUPT (func_type))
21748 strcat (p, "^");
21750 else
21751 strcpy (p, "}");
21754 output_asm_insn (instr, & operand);
21756 /* See if we need to generate an extra instruction to
21757 perform the actual function return. */
21758 if (really_return
21759 && func_type != ARM_FT_INTERWORKED
21760 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21762 /* The return has already been handled
21763 by loading the LR into the PC. */
21764 return "";
21768 if (really_return)
21770 switch ((int) ARM_FUNC_TYPE (func_type))
21772 case ARM_FT_ISR:
21773 case ARM_FT_FIQ:
21774 /* ??? This is wrong for unified assembly syntax. */
21775 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21776 break;
21778 case ARM_FT_INTERWORKED:
21779 gcc_assert (arm_arch5t || arm_arch4t);
21780 sprintf (instr, "bx%s\t%%|lr", conditional);
21781 break;
21783 case ARM_FT_EXCEPTION:
21784 /* ??? This is wrong for unified assembly syntax. */
21785 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21786 break;
21788 default:
21789 if (IS_CMSE_ENTRY (func_type))
21791 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21792 emitted by cmse_nonsecure_entry_clear_before_return () and the
21793 VSTR/VLDR instructions in the prologue and epilogue. */
21794 if (!TARGET_HAVE_FPCXT_CMSE)
21796 /* Check if we have to clear the 'GE bits' which is only used if
21797 parallel add and subtraction instructions are available. */
21798 if (TARGET_INT_SIMD)
21799 snprintf (instr, sizeof (instr),
21800 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21801 else
21802 snprintf (instr, sizeof (instr),
21803 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21805 output_asm_insn (instr, & operand);
21806 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21807 care of it. */
21808 if (TARGET_HARD_FLOAT)
21810 /* Clear the cumulative exception-status bits (0-4,7) and
21811 the condition code bits (28-31) of the FPSCR. We need
21812 to remember to clear the first scratch register used
21813 (IP) and save and restore the second (r4).
21815 Important note: the length of the
21816 thumb2_cmse_entry_return insn pattern must account for
21817 the size of the below instructions. */
21818 output_asm_insn ("push\t{%|r4}", & operand);
21819 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21820 output_asm_insn ("movw\t%|r4, #65376", & operand);
21821 output_asm_insn ("movt\t%|r4, #4095", & operand);
21822 output_asm_insn ("and\t%|ip, %|r4", & operand);
21823 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21824 output_asm_insn ("pop\t{%|r4}", & operand);
21825 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21828 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21830 /* Use bx if it's available. */
21831 else if (arm_arch5t || arm_arch4t)
21832 sprintf (instr, "bx%s\t%%|lr", conditional);
21833 else
21834 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21835 break;
21838 output_asm_insn (instr, & operand);
21841 return "";
21844 /* Output in FILE asm statements needed to declare the NAME of the function
21845 defined by its DECL node. */
21847 void
21848 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21850 size_t cmse_name_len;
21851 char *cmse_name = 0;
21852 char cmse_prefix[] = "__acle_se_";
21854 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21855 extra function label for each function with the 'cmse_nonsecure_entry'
21856 attribute. This extra function label should be prepended with
21857 '__acle_se_', telling the linker that it needs to create secure gateway
21858 veneers for this function. */
21859 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21860 DECL_ATTRIBUTES (decl)))
21862 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21863 cmse_name = XALLOCAVEC (char, cmse_name_len);
21864 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21865 targetm.asm_out.globalize_label (file, cmse_name);
21867 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21868 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21871 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21872 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21873 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21874 ASM_OUTPUT_FUNCTION_LABEL (file, name, decl);
21876 if (cmse_name)
21877 ASM_OUTPUT_LABEL (file, cmse_name);
21879 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21882 /* Write the function name into the code section, directly preceding
21883 the function prologue.
21885 Code will be output similar to this:
21887 .ascii "arm_poke_function_name", 0
21888 .align
21890 .word 0xff000000 + (t1 - t0)
21891 arm_poke_function_name
21892 mov ip, sp
21893 stmfd sp!, {fp, ip, lr, pc}
21894 sub fp, ip, #4
21896 When performing a stack backtrace, code can inspect the value
21897 of 'pc' stored at 'fp' + 0. If the trace function then looks
21898 at location pc - 12 and the top 8 bits are set, then we know
21899 that there is a function name embedded immediately preceding this
21900 location and has length ((pc[-3]) & 0xff000000).
21902 We assume that pc is declared as a pointer to an unsigned long.
21904 It is of no benefit to output the function name if we are assembling
21905 a leaf function. These function types will not contain a stack
21906 backtrace structure, therefore it is not possible to determine the
21907 function name. */
21908 void
21909 arm_poke_function_name (FILE *stream, const char *name)
21911 unsigned long alignlength;
21912 unsigned long length;
21913 rtx x;
21915 length = strlen (name) + 1;
21916 alignlength = ROUND_UP_WORD (length);
21918 ASM_OUTPUT_ASCII (stream, name, length);
21919 ASM_OUTPUT_ALIGN (stream, 2);
21920 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21921 assemble_aligned_integer (UNITS_PER_WORD, x);
21924 /* Place some comments into the assembler stream
21925 describing the current function. */
21926 static void
21927 arm_output_function_prologue (FILE *f)
21929 unsigned long func_type;
21931 /* Sanity check. */
21932 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21934 func_type = arm_current_func_type ();
21936 switch ((int) ARM_FUNC_TYPE (func_type))
21938 default:
21939 case ARM_FT_NORMAL:
21940 break;
21941 case ARM_FT_INTERWORKED:
21942 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21943 break;
21944 case ARM_FT_ISR:
21945 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21946 break;
21947 case ARM_FT_FIQ:
21948 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21949 break;
21950 case ARM_FT_EXCEPTION:
21951 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21952 break;
21955 if (IS_NAKED (func_type))
21956 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21958 if (IS_VOLATILE (func_type))
21959 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21961 if (IS_NESTED (func_type))
21962 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21963 if (IS_STACKALIGN (func_type))
21964 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21965 if (IS_CMSE_ENTRY (func_type))
21966 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21968 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21969 (HOST_WIDE_INT) crtl->args.size,
21970 crtl->args.pretend_args_size,
21971 (HOST_WIDE_INT) get_frame_size ());
21973 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21974 frame_pointer_needed,
21975 cfun->machine->uses_anonymous_args);
21977 if (cfun->machine->lr_save_eliminated)
21978 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21980 if (crtl->calls_eh_return)
21981 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21985 static void
21986 arm_output_function_epilogue (FILE *)
21988 arm_stack_offsets *offsets;
21990 if (TARGET_THUMB1)
21992 int regno;
21994 /* Emit any call-via-reg trampolines that are needed for v4t support
21995 of call_reg and call_value_reg type insns. */
21996 for (regno = 0; regno < LR_REGNUM; regno++)
21998 rtx label = cfun->machine->call_via[regno];
22000 if (label != NULL)
22002 switch_to_section (function_section (current_function_decl));
22003 targetm.asm_out.internal_label (asm_out_file, "L",
22004 CODE_LABEL_NUMBER (label));
22005 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22009 /* ??? Probably not safe to set this here, since it assumes that a
22010 function will be emitted as assembly immediately after we generate
22011 RTL for it. This does not happen for inline functions. */
22012 cfun->machine->return_used_this_function = 0;
22014 else /* TARGET_32BIT */
22016 /* We need to take into account any stack-frame rounding. */
22017 offsets = arm_get_frame_offsets ();
22019 gcc_assert (!use_return_insn (FALSE, NULL)
22020 || (cfun->machine->return_used_this_function != 0)
22021 || offsets->saved_regs == offsets->outgoing_args
22022 || frame_pointer_needed);
22026 /* Generate and emit a sequence of insns equivalent to PUSH, but using
22027 STR and STRD. If an even number of registers are being pushed, one
22028 or more STRD patterns are created for each register pair. If an
22029 odd number of registers are pushed, emit an initial STR followed by
22030 as many STRD instructions as are needed. This works best when the
22031 stack is initially 64-bit aligned (the normal case), since it
22032 ensures that each STRD is also 64-bit aligned. */
22033 static void
22034 thumb2_emit_strd_push (unsigned long saved_regs_mask)
22036 int num_regs = 0;
22037 int i;
22038 int regno;
22039 rtx par = NULL_RTX;
22040 rtx dwarf = NULL_RTX;
22041 rtx tmp;
22042 bool first = true;
22044 num_regs = bit_count (saved_regs_mask);
22046 /* Must be at least one register to save, and can't save SP or PC. */
22047 gcc_assert (num_regs > 0 && num_regs <= 14);
22048 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22049 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22051 /* Create sequence for DWARF info. All the frame-related data for
22052 debugging is held in this wrapper. */
22053 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22055 /* Describe the stack adjustment. */
22056 tmp = gen_rtx_SET (stack_pointer_rtx,
22057 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22058 RTX_FRAME_RELATED_P (tmp) = 1;
22059 XVECEXP (dwarf, 0, 0) = tmp;
22061 /* Find the first register. */
22062 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
22065 i = 0;
22067 /* If there's an odd number of registers to push. Start off by
22068 pushing a single register. This ensures that subsequent strd
22069 operations are dword aligned (assuming that SP was originally
22070 64-bit aligned). */
22071 if ((num_regs & 1) != 0)
22073 rtx reg, mem, insn;
22075 reg = gen_rtx_REG (SImode, regno);
22076 if (num_regs == 1)
22077 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
22078 stack_pointer_rtx));
22079 else
22080 mem = gen_frame_mem (Pmode,
22081 gen_rtx_PRE_MODIFY
22082 (Pmode, stack_pointer_rtx,
22083 plus_constant (Pmode, stack_pointer_rtx,
22084 -4 * num_regs)));
22086 tmp = gen_rtx_SET (mem, reg);
22087 RTX_FRAME_RELATED_P (tmp) = 1;
22088 insn = emit_insn (tmp);
22089 RTX_FRAME_RELATED_P (insn) = 1;
22090 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22091 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
22092 RTX_FRAME_RELATED_P (tmp) = 1;
22093 i++;
22094 regno++;
22095 XVECEXP (dwarf, 0, i) = tmp;
22096 first = false;
22099 while (i < num_regs)
22100 if (saved_regs_mask & (1 << regno))
22102 rtx reg1, reg2, mem1, mem2;
22103 rtx tmp0, tmp1, tmp2;
22104 int regno2;
22106 /* Find the register to pair with this one. */
22107 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
22108 regno2++)
22111 reg1 = gen_rtx_REG (SImode, regno);
22112 reg2 = gen_rtx_REG (SImode, regno2);
22114 if (first)
22116 rtx insn;
22118 first = false;
22119 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22120 stack_pointer_rtx,
22121 -4 * num_regs));
22122 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22123 stack_pointer_rtx,
22124 -4 * (num_regs - 1)));
22125 tmp0 = gen_rtx_SET (stack_pointer_rtx,
22126 plus_constant (Pmode, stack_pointer_rtx,
22127 -4 * (num_regs)));
22128 tmp1 = gen_rtx_SET (mem1, reg1);
22129 tmp2 = gen_rtx_SET (mem2, reg2);
22130 RTX_FRAME_RELATED_P (tmp0) = 1;
22131 RTX_FRAME_RELATED_P (tmp1) = 1;
22132 RTX_FRAME_RELATED_P (tmp2) = 1;
22133 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22134 XVECEXP (par, 0, 0) = tmp0;
22135 XVECEXP (par, 0, 1) = tmp1;
22136 XVECEXP (par, 0, 2) = tmp2;
22137 insn = emit_insn (par);
22138 RTX_FRAME_RELATED_P (insn) = 1;
22139 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22141 else
22143 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22144 stack_pointer_rtx,
22145 4 * i));
22146 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22147 stack_pointer_rtx,
22148 4 * (i + 1)));
22149 tmp1 = gen_rtx_SET (mem1, reg1);
22150 tmp2 = gen_rtx_SET (mem2, reg2);
22151 RTX_FRAME_RELATED_P (tmp1) = 1;
22152 RTX_FRAME_RELATED_P (tmp2) = 1;
22153 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22154 XVECEXP (par, 0, 0) = tmp1;
22155 XVECEXP (par, 0, 1) = tmp2;
22156 emit_insn (par);
22159 /* Create unwind information. This is an approximation. */
22160 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22161 plus_constant (Pmode,
22162 stack_pointer_rtx,
22163 4 * i)),
22164 reg1);
22165 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22166 plus_constant (Pmode,
22167 stack_pointer_rtx,
22168 4 * (i + 1))),
22169 reg2);
22171 RTX_FRAME_RELATED_P (tmp1) = 1;
22172 RTX_FRAME_RELATED_P (tmp2) = 1;
22173 XVECEXP (dwarf, 0, i + 1) = tmp1;
22174 XVECEXP (dwarf, 0, i + 2) = tmp2;
22175 i += 2;
22176 regno = regno2 + 1;
22178 else
22179 regno++;
22181 return;
22184 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22185 whenever possible, otherwise it emits single-word stores. The first store
22186 also allocates stack space for all saved registers, using writeback with
22187 post-addressing mode. All other stores use offset addressing. If no STRD
22188 can be emitted, this function emits a sequence of single-word stores,
22189 and not an STM as before, because single-word stores provide more freedom
22190 scheduling and can be turned into an STM by peephole optimizations. */
22191 static void
22192 arm_emit_strd_push (unsigned long saved_regs_mask)
22194 int num_regs = 0;
22195 int i, j, dwarf_index = 0;
22196 int offset = 0;
22197 rtx dwarf = NULL_RTX;
22198 rtx insn = NULL_RTX;
22199 rtx tmp, mem;
22201 /* TODO: A more efficient code can be emitted by changing the
22202 layout, e.g., first push all pairs that can use STRD to keep the
22203 stack aligned, and then push all other registers. */
22204 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22205 if (saved_regs_mask & (1 << i))
22206 num_regs++;
22208 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22209 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22210 gcc_assert (num_regs > 0);
22212 /* Create sequence for DWARF info. */
22213 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22215 /* For dwarf info, we generate explicit stack update. */
22216 tmp = gen_rtx_SET (stack_pointer_rtx,
22217 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22218 RTX_FRAME_RELATED_P (tmp) = 1;
22219 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22221 /* Save registers. */
22222 offset = - 4 * num_regs;
22223 j = 0;
22224 while (j <= LAST_ARM_REGNUM)
22225 if (saved_regs_mask & (1 << j))
22227 if ((j % 2 == 0)
22228 && (saved_regs_mask & (1 << (j + 1))))
22230 /* Current register and previous register form register pair for
22231 which STRD can be generated. */
22232 if (offset < 0)
22234 /* Allocate stack space for all saved registers. */
22235 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22236 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22237 mem = gen_frame_mem (DImode, tmp);
22238 offset = 0;
22240 else if (offset > 0)
22241 mem = gen_frame_mem (DImode,
22242 plus_constant (Pmode,
22243 stack_pointer_rtx,
22244 offset));
22245 else
22246 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22248 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22249 RTX_FRAME_RELATED_P (tmp) = 1;
22250 tmp = emit_insn (tmp);
22252 /* Record the first store insn. */
22253 if (dwarf_index == 1)
22254 insn = tmp;
22256 /* Generate dwarf info. */
22257 mem = gen_frame_mem (SImode,
22258 plus_constant (Pmode,
22259 stack_pointer_rtx,
22260 offset));
22261 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22262 RTX_FRAME_RELATED_P (tmp) = 1;
22263 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22265 mem = gen_frame_mem (SImode,
22266 plus_constant (Pmode,
22267 stack_pointer_rtx,
22268 offset + 4));
22269 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22270 RTX_FRAME_RELATED_P (tmp) = 1;
22271 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22273 offset += 8;
22274 j += 2;
22276 else
22278 /* Emit a single word store. */
22279 if (offset < 0)
22281 /* Allocate stack space for all saved registers. */
22282 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22283 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22284 mem = gen_frame_mem (SImode, tmp);
22285 offset = 0;
22287 else if (offset > 0)
22288 mem = gen_frame_mem (SImode,
22289 plus_constant (Pmode,
22290 stack_pointer_rtx,
22291 offset));
22292 else
22293 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22295 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22296 RTX_FRAME_RELATED_P (tmp) = 1;
22297 tmp = emit_insn (tmp);
22299 /* Record the first store insn. */
22300 if (dwarf_index == 1)
22301 insn = tmp;
22303 /* Generate dwarf info. */
22304 mem = gen_frame_mem (SImode,
22305 plus_constant(Pmode,
22306 stack_pointer_rtx,
22307 offset));
22308 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22309 RTX_FRAME_RELATED_P (tmp) = 1;
22310 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22312 offset += 4;
22313 j += 1;
22316 else
22317 j++;
22319 /* Attach dwarf info to the first insn we generate. */
22320 gcc_assert (insn != NULL_RTX);
22321 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22322 RTX_FRAME_RELATED_P (insn) = 1;
22325 /* Generate and emit an insn that we will recognize as a push_multi.
22326 Unfortunately, since this insn does not reflect very well the actual
22327 semantics of the operation, we need to annotate the insn for the benefit
22328 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22329 MASK for registers that should be annotated for DWARF2 frame unwind
22330 information. */
22331 static rtx
22332 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22334 int num_regs = 0;
22335 int num_dwarf_regs = 0;
22336 int i, j;
22337 rtx par;
22338 rtx dwarf;
22339 int dwarf_par_index;
22340 rtx tmp, reg;
22342 /* We don't record the PC in the dwarf frame information. */
22343 dwarf_regs_mask &= ~(1 << PC_REGNUM);
22345 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22347 if (mask & (1 << i))
22348 num_regs++;
22349 if (dwarf_regs_mask & (1 << i))
22350 num_dwarf_regs++;
22353 gcc_assert (num_regs && num_regs <= 16);
22354 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22356 /* For the body of the insn we are going to generate an UNSPEC in
22357 parallel with several USEs. This allows the insn to be recognized
22358 by the push_multi pattern in the arm.md file.
22360 The body of the insn looks something like this:
22362 (parallel [
22363 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22364 (const_int:SI <num>)))
22365 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22366 (use (reg:SI XX))
22367 (use (reg:SI YY))
22371 For the frame note however, we try to be more explicit and actually
22372 show each register being stored into the stack frame, plus a (single)
22373 decrement of the stack pointer. We do it this way in order to be
22374 friendly to the stack unwinding code, which only wants to see a single
22375 stack decrement per instruction. The RTL we generate for the note looks
22376 something like this:
22378 (sequence [
22379 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22380 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22381 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22382 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22386 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22387 instead we'd have a parallel expression detailing all
22388 the stores to the various memory addresses so that debug
22389 information is more up-to-date. Remember however while writing
22390 this to take care of the constraints with the push instruction.
22392 Note also that this has to be taken care of for the VFP registers.
22394 For more see PR43399. */
22396 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22397 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22398 dwarf_par_index = 1;
22400 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22402 if (mask & (1 << i))
22404 /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22405 following example reg-reg copy of SP to IP register is handled
22406 through .cfi_def_cfa_register directive and the .cfi_offset
22407 directive for IP register is skipped by dwarf code emitter.
22408 Example:
22409 mov ip, sp
22410 .cfi_def_cfa_register 12
22411 push {fp, ip, lr, pc}
22412 .cfi_offset 11, -16
22413 .cfi_offset 13, -12
22414 .cfi_offset 14, -8
22416 Where as Arm-specific .save directive handling is different to that
22417 of dwarf code emitter and it doesn't consider reg-reg copies while
22418 updating the register list. When PACBTI is enabled we manually
22419 updated the .save directive register list to use "ra_auth_code"
22420 (pseduo register 143) instead of IP register as shown in following
22421 pseduo code.
22422 Example:
22423 pacbti ip, lr, sp
22424 .cfi_register 143, 12
22425 push {r3, r7, ip, lr}
22426 .save {r3, r7, ra_auth_code, lr}
22428 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22429 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22430 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22432 XVECEXP (par, 0, 0)
22433 = gen_rtx_SET (gen_frame_mem
22434 (BLKmode,
22435 gen_rtx_PRE_MODIFY (Pmode,
22436 stack_pointer_rtx,
22437 plus_constant
22438 (Pmode, stack_pointer_rtx,
22439 -4 * num_regs))
22441 gen_rtx_UNSPEC (BLKmode,
22442 gen_rtvec (1, reg),
22443 UNSPEC_PUSH_MULT));
22445 if (dwarf_regs_mask & (1 << i))
22447 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22448 dwarf_reg);
22449 RTX_FRAME_RELATED_P (tmp) = 1;
22450 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22453 break;
22457 for (j = 1, i++; j < num_regs; i++)
22459 if (mask & (1 << i))
22461 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22462 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22463 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22465 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22467 if (dwarf_regs_mask & (1 << i))
22470 = gen_rtx_SET (gen_frame_mem
22471 (SImode,
22472 plus_constant (Pmode, stack_pointer_rtx,
22473 4 * j)),
22474 dwarf_reg);
22475 RTX_FRAME_RELATED_P (tmp) = 1;
22476 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22479 j++;
22483 par = emit_insn (par);
22485 tmp = gen_rtx_SET (stack_pointer_rtx,
22486 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22487 RTX_FRAME_RELATED_P (tmp) = 1;
22488 XVECEXP (dwarf, 0, 0) = tmp;
22490 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22492 return par;
22495 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22496 SIZE is the offset to be adjusted.
22497 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22498 static void
22499 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22501 rtx dwarf;
22503 RTX_FRAME_RELATED_P (insn) = 1;
22504 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22505 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22508 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22509 SAVED_REGS_MASK shows which registers need to be restored.
22511 Unfortunately, since this insn does not reflect very well the actual
22512 semantics of the operation, we need to annotate the insn for the benefit
22513 of DWARF2 frame unwind information. */
22514 static void
22515 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22517 int num_regs = 0;
22518 int i, j;
22519 rtx par;
22520 rtx dwarf = NULL_RTX;
22521 rtx tmp, reg;
22522 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22523 int offset_adj;
22524 int emit_update;
22526 offset_adj = return_in_pc ? 1 : 0;
22527 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22528 if (saved_regs_mask & (1 << i))
22529 num_regs++;
22531 gcc_assert (num_regs && num_regs <= 16);
22533 /* If SP is in reglist, then we don't emit SP update insn. */
22534 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22536 /* The parallel needs to hold num_regs SETs
22537 and one SET for the stack update. */
22538 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22540 if (return_in_pc)
22541 XVECEXP (par, 0, 0) = ret_rtx;
22543 if (emit_update)
22545 /* Increment the stack pointer, based on there being
22546 num_regs 4-byte registers to restore. */
22547 tmp = gen_rtx_SET (stack_pointer_rtx,
22548 plus_constant (Pmode,
22549 stack_pointer_rtx,
22550 4 * num_regs));
22551 RTX_FRAME_RELATED_P (tmp) = 1;
22552 XVECEXP (par, 0, offset_adj) = tmp;
22555 /* Now restore every reg, which may include PC. */
22556 for (j = 0, i = 0; j < num_regs; i++)
22557 if (saved_regs_mask & (1 << i))
22559 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22560 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22561 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22562 if ((num_regs == 1) && emit_update && !return_in_pc)
22564 /* Emit single load with writeback. */
22565 tmp = gen_frame_mem (SImode,
22566 gen_rtx_POST_INC (Pmode,
22567 stack_pointer_rtx));
22568 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22569 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
22570 dwarf);
22571 return;
22574 tmp = gen_rtx_SET (reg,
22575 gen_frame_mem
22576 (SImode,
22577 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22578 RTX_FRAME_RELATED_P (tmp) = 1;
22579 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22581 /* We need to maintain a sequence for DWARF info too. As dwarf info
22582 should not have PC, skip PC. */
22583 if (i != PC_REGNUM)
22584 dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
22586 j++;
22589 if (return_in_pc)
22590 par = emit_jump_insn (par);
22591 else
22592 par = emit_insn (par);
22594 REG_NOTES (par) = dwarf;
22595 if (!return_in_pc)
22596 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22597 stack_pointer_rtx, stack_pointer_rtx);
22600 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22601 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22603 Unfortunately, since this insn does not reflect very well the actual
22604 semantics of the operation, we need to annotate the insn for the benefit
22605 of DWARF2 frame unwind information. */
22606 static void
22607 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22609 int i, j;
22610 rtx par;
22611 rtx dwarf = NULL_RTX;
22612 rtx tmp, reg;
22614 gcc_assert (num_regs && num_regs <= 32);
22616 /* Workaround ARM10 VFPr1 bug. */
22617 if (num_regs == 2 && !arm_arch6)
22619 if (first_reg == 15)
22620 first_reg--;
22622 num_regs++;
22625 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22626 there could be up to 32 D-registers to restore.
22627 If there are more than 16 D-registers, make two recursive calls,
22628 each of which emits one pop_multi instruction. */
22629 if (num_regs > 16)
22631 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22632 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22633 return;
22636 /* The parallel needs to hold num_regs SETs
22637 and one SET for the stack update. */
22638 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22640 /* Increment the stack pointer, based on there being
22641 num_regs 8-byte registers to restore. */
22642 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22643 RTX_FRAME_RELATED_P (tmp) = 1;
22644 XVECEXP (par, 0, 0) = tmp;
22646 /* Now show every reg that will be restored, using a SET for each. */
22647 for (j = 0, i=first_reg; j < num_regs; i += 2)
22649 reg = gen_rtx_REG (DFmode, i);
22651 tmp = gen_rtx_SET (reg,
22652 gen_frame_mem
22653 (DFmode,
22654 plus_constant (Pmode, base_reg, 8 * j)));
22655 RTX_FRAME_RELATED_P (tmp) = 1;
22656 XVECEXP (par, 0, j + 1) = tmp;
22658 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22660 j++;
22663 par = emit_insn (par);
22664 REG_NOTES (par) = dwarf;
22666 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22667 if (REGNO (base_reg) == IP_REGNUM)
22669 RTX_FRAME_RELATED_P (par) = 1;
22670 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22672 else
22673 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22674 base_reg, base_reg);
22677 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22678 number of registers are being popped, multiple LDRD patterns are created for
22679 all register pairs. If odd number of registers are popped, last register is
22680 loaded by using LDR pattern. */
22681 static void
22682 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22684 int num_regs = 0;
22685 int i, j;
22686 rtx par = NULL_RTX;
22687 rtx dwarf = NULL_RTX;
22688 rtx tmp, reg, tmp1;
22689 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22691 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22692 if (saved_regs_mask & (1 << i))
22693 num_regs++;
22695 gcc_assert (num_regs && num_regs <= 16);
22697 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22698 to be popped. So, if num_regs is even, now it will become odd,
22699 and we can generate pop with PC. If num_regs is odd, it will be
22700 even now, and ldr with return can be generated for PC. */
22701 if (return_in_pc)
22702 num_regs--;
22704 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22706 /* Var j iterates over all the registers to gather all the registers in
22707 saved_regs_mask. Var i gives index of saved registers in stack frame.
22708 A PARALLEL RTX of register-pair is created here, so that pattern for
22709 LDRD can be matched. As PC is always last register to be popped, and
22710 we have already decremented num_regs if PC, we don't have to worry
22711 about PC in this loop. */
22712 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22713 if (saved_regs_mask & (1 << j))
22715 /* Create RTX for memory load. */
22716 reg = gen_rtx_REG (SImode, j);
22717 tmp = gen_rtx_SET (reg,
22718 gen_frame_mem (SImode,
22719 plus_constant (Pmode,
22720 stack_pointer_rtx, 4 * i)));
22721 RTX_FRAME_RELATED_P (tmp) = 1;
22723 if (i % 2 == 0)
22725 /* When saved-register index (i) is even, the RTX to be emitted is
22726 yet to be created. Hence create it first. The LDRD pattern we
22727 are generating is :
22728 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22729 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22730 where target registers need not be consecutive. */
22731 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22732 dwarf = NULL_RTX;
22735 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22736 added as 0th element and if i is odd, reg_i is added as 1st element
22737 of LDRD pattern shown above. */
22738 XVECEXP (par, 0, (i % 2)) = tmp;
22739 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22741 if ((i % 2) == 1)
22743 /* When saved-register index (i) is odd, RTXs for both the registers
22744 to be loaded are generated in above given LDRD pattern, and the
22745 pattern can be emitted now. */
22746 par = emit_insn (par);
22747 REG_NOTES (par) = dwarf;
22748 RTX_FRAME_RELATED_P (par) = 1;
22751 i++;
22754 /* If the number of registers pushed is odd AND return_in_pc is false OR
22755 number of registers are even AND return_in_pc is true, last register is
22756 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22757 then LDR with post increment. */
22759 /* Increment the stack pointer, based on there being
22760 num_regs 4-byte registers to restore. */
22761 tmp = gen_rtx_SET (stack_pointer_rtx,
22762 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22763 RTX_FRAME_RELATED_P (tmp) = 1;
22764 tmp = emit_insn (tmp);
22765 if (!return_in_pc)
22767 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22768 stack_pointer_rtx, stack_pointer_rtx);
22771 dwarf = NULL_RTX;
22773 if (((num_regs % 2) == 1 && !return_in_pc)
22774 || ((num_regs % 2) == 0 && return_in_pc))
22776 /* Scan for the single register to be popped. Skip until the saved
22777 register is found. */
22778 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22780 /* Gen LDR with post increment here. */
22781 tmp1 = gen_rtx_MEM (SImode,
22782 gen_rtx_POST_INC (SImode,
22783 stack_pointer_rtx));
22784 set_mem_alias_set (tmp1, get_frame_alias_set ());
22786 reg = gen_rtx_REG (SImode, j);
22787 tmp = gen_rtx_SET (reg, tmp1);
22788 RTX_FRAME_RELATED_P (tmp) = 1;
22789 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22791 if (return_in_pc)
22793 /* If return_in_pc, j must be PC_REGNUM. */
22794 gcc_assert (j == PC_REGNUM);
22795 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22796 XVECEXP (par, 0, 0) = ret_rtx;
22797 XVECEXP (par, 0, 1) = tmp;
22798 par = emit_jump_insn (par);
22800 else
22802 par = emit_insn (tmp);
22803 REG_NOTES (par) = dwarf;
22804 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22805 stack_pointer_rtx, stack_pointer_rtx);
22809 else if ((num_regs % 2) == 1 && return_in_pc)
22811 /* There are 2 registers to be popped. So, generate the pattern
22812 pop_multiple_with_stack_update_and_return to pop in PC. */
22813 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22816 return;
22819 /* LDRD in ARM mode needs consecutive registers as operands. This function
22820 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22821 offset addressing and then generates one separate stack udpate. This provides
22822 more scheduling freedom, compared to writeback on every load. However,
22823 if the function returns using load into PC directly
22824 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22825 before the last load. TODO: Add a peephole optimization to recognize
22826 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22827 peephole optimization to merge the load at stack-offset zero
22828 with the stack update instruction using load with writeback
22829 in post-index addressing mode. */
22830 static void
22831 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22833 int j = 0;
22834 int offset = 0;
22835 rtx par = NULL_RTX;
22836 rtx dwarf = NULL_RTX;
22837 rtx tmp, mem;
22839 /* Restore saved registers. */
22840 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22841 j = 0;
22842 while (j <= LAST_ARM_REGNUM)
22843 if (saved_regs_mask & (1 << j))
22845 if ((j % 2) == 0
22846 && (saved_regs_mask & (1 << (j + 1)))
22847 && (j + 1) != PC_REGNUM)
22849 /* Current register and next register form register pair for which
22850 LDRD can be generated. PC is always the last register popped, and
22851 we handle it separately. */
22852 if (offset > 0)
22853 mem = gen_frame_mem (DImode,
22854 plus_constant (Pmode,
22855 stack_pointer_rtx,
22856 offset));
22857 else
22858 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22860 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22861 tmp = emit_insn (tmp);
22862 RTX_FRAME_RELATED_P (tmp) = 1;
22864 /* Generate dwarf info. */
22866 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22867 gen_rtx_REG (SImode, j),
22868 NULL_RTX);
22869 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22870 gen_rtx_REG (SImode, j + 1),
22871 dwarf);
22873 REG_NOTES (tmp) = dwarf;
22875 offset += 8;
22876 j += 2;
22878 else if (j != PC_REGNUM)
22880 /* Emit a single word load. */
22881 if (offset > 0)
22882 mem = gen_frame_mem (SImode,
22883 plus_constant (Pmode,
22884 stack_pointer_rtx,
22885 offset));
22886 else
22887 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22889 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22890 tmp = emit_insn (tmp);
22891 RTX_FRAME_RELATED_P (tmp) = 1;
22893 /* Generate dwarf info. */
22894 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22895 gen_rtx_REG (SImode, j),
22896 NULL_RTX);
22898 offset += 4;
22899 j += 1;
22901 else /* j == PC_REGNUM */
22902 j++;
22904 else
22905 j++;
22907 /* Update the stack. */
22908 if (offset > 0)
22910 tmp = gen_rtx_SET (stack_pointer_rtx,
22911 plus_constant (Pmode,
22912 stack_pointer_rtx,
22913 offset));
22914 tmp = emit_insn (tmp);
22915 arm_add_cfa_adjust_cfa_note (tmp, offset,
22916 stack_pointer_rtx, stack_pointer_rtx);
22917 offset = 0;
22920 if (saved_regs_mask & (1 << PC_REGNUM))
22922 /* Only PC is to be popped. */
22923 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22924 XVECEXP (par, 0, 0) = ret_rtx;
22925 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22926 gen_frame_mem (SImode,
22927 gen_rtx_POST_INC (SImode,
22928 stack_pointer_rtx)));
22929 RTX_FRAME_RELATED_P (tmp) = 1;
22930 XVECEXP (par, 0, 1) = tmp;
22931 par = emit_jump_insn (par);
22933 /* Generate dwarf info. */
22934 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22935 gen_rtx_REG (SImode, PC_REGNUM),
22936 NULL_RTX);
22937 REG_NOTES (par) = dwarf;
22938 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22939 stack_pointer_rtx, stack_pointer_rtx);
22943 /* Calculate the size of the return value that is passed in registers. */
22944 static unsigned
22945 arm_size_return_regs (void)
22947 machine_mode mode;
22949 if (crtl->return_rtx != 0)
22950 mode = GET_MODE (crtl->return_rtx);
22951 else
22952 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22954 return GET_MODE_SIZE (mode);
22957 /* Return true if the current function needs to save/restore LR. */
22958 static bool
22959 thumb_force_lr_save (void)
22961 return !cfun->machine->lr_save_eliminated
22962 && (!crtl->is_leaf
22963 || thumb_far_jump_used_p ()
22964 || df_regs_ever_live_p (LR_REGNUM));
22967 /* We do not know if r3 will be available because
22968 we do have an indirect tailcall happening in this
22969 particular case. */
22970 static bool
22971 is_indirect_tailcall_p (rtx call)
22973 rtx pat = PATTERN (call);
22975 /* Indirect tail call. */
22976 pat = XVECEXP (pat, 0, 0);
22977 if (GET_CODE (pat) == SET)
22978 pat = SET_SRC (pat);
22980 pat = XEXP (XEXP (pat, 0), 0);
22981 return REG_P (pat);
22984 /* Return true if r3 is used by any of the tail call insns in the
22985 current function. */
22986 static bool
22987 any_sibcall_could_use_r3 (void)
22989 edge_iterator ei;
22990 edge e;
22992 if (!crtl->tail_call_emit)
22993 return false;
22994 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22995 if (e->flags & EDGE_SIBCALL)
22997 rtx_insn *call = BB_END (e->src);
22998 if (!CALL_P (call))
22999 call = prev_nonnote_nondebug_insn (call);
23000 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
23001 if (find_regno_fusage (call, USE, 3)
23002 || is_indirect_tailcall_p (call))
23003 return true;
23005 return false;
23009 /* Compute the distance from register FROM to register TO.
23010 These can be the arg pointer (26), the soft frame pointer (25),
23011 the stack pointer (13) or the hard frame pointer (11).
23012 In thumb mode r7 is used as the soft frame pointer, if needed.
23013 Typical stack layout looks like this:
23015 old stack pointer -> | |
23016 ----
23017 | | \
23018 | | saved arguments for
23019 | | vararg functions
23020 | | /
23022 hard FP & arg pointer -> | | \
23023 | | stack
23024 | | frame
23025 | | /
23027 | | \
23028 | | call saved
23029 | | registers
23030 soft frame pointer -> | | /
23032 | | \
23033 | | local
23034 | | variables
23035 locals base pointer -> | | /
23037 | | \
23038 | | outgoing
23039 | | arguments
23040 current stack pointer -> | | /
23043 For a given function some or all of these stack components
23044 may not be needed, giving rise to the possibility of
23045 eliminating some of the registers.
23047 The values returned by this function must reflect the behavior
23048 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
23050 The sign of the number returned reflects the direction of stack
23051 growth, so the values are positive for all eliminations except
23052 from the soft frame pointer to the hard frame pointer.
23054 SFP may point just inside the local variables block to ensure correct
23055 alignment. */
23058 /* Return cached stack offsets. */
23060 static arm_stack_offsets *
23061 arm_get_frame_offsets (void)
23063 struct arm_stack_offsets *offsets;
23065 offsets = &cfun->machine->stack_offsets;
23067 return offsets;
23071 /* Calculate stack offsets. These are used to calculate register elimination
23072 offsets and in prologue/epilogue code. Also calculates which registers
23073 should be saved. */
23075 static void
23076 arm_compute_frame_layout (void)
23078 struct arm_stack_offsets *offsets;
23079 unsigned long func_type;
23080 int saved;
23081 int core_saved;
23082 HOST_WIDE_INT frame_size;
23083 int i;
23085 offsets = &cfun->machine->stack_offsets;
23087 /* Initially this is the size of the local variables. It will translated
23088 into an offset once we have determined the size of preceding data. */
23089 frame_size = ROUND_UP_WORD (get_frame_size ());
23091 /* Space for variadic functions. */
23092 offsets->saved_args = crtl->args.pretend_args_size;
23094 /* In Thumb mode this is incorrect, but never used. */
23095 offsets->frame
23096 = (offsets->saved_args
23097 + arm_compute_static_chain_stack_bytes ()
23098 + (frame_pointer_needed ? 4 : 0));
23100 if (TARGET_32BIT)
23102 unsigned int regno;
23104 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
23105 core_saved = bit_count (offsets->saved_regs_mask) * 4;
23106 saved = core_saved;
23108 /* We know that SP will be doubleword aligned on entry, and we must
23109 preserve that condition at any subroutine call. We also require the
23110 soft frame pointer to be doubleword aligned. */
23112 if (TARGET_REALLY_IWMMXT)
23114 /* Check for the call-saved iWMMXt registers. */
23115 for (regno = FIRST_IWMMXT_REGNUM;
23116 regno <= LAST_IWMMXT_REGNUM;
23117 regno++)
23118 if (reg_needs_saving_p (regno))
23119 saved += 8;
23122 func_type = arm_current_func_type ();
23123 /* Space for saved VFP registers. */
23124 if (! IS_VOLATILE (func_type)
23125 && TARGET_VFP_BASE)
23126 saved += arm_get_vfp_saved_size ();
23128 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23129 nonecure entry functions with VSTR/VLDR. */
23130 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23131 saved += 4;
23133 else /* TARGET_THUMB1 */
23135 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
23136 core_saved = bit_count (offsets->saved_regs_mask) * 4;
23137 saved = core_saved;
23138 if (TARGET_BACKTRACE)
23139 saved += 16;
23142 /* Saved registers include the stack frame. */
23143 offsets->saved_regs
23144 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
23145 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
23147 /* A leaf function does not need any stack alignment if it has nothing
23148 on the stack. */
23149 if (crtl->is_leaf && frame_size == 0
23150 /* However if it calls alloca(), we have a dynamically allocated
23151 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
23152 && ! cfun->calls_alloca)
23154 offsets->outgoing_args = offsets->soft_frame;
23155 offsets->locals_base = offsets->soft_frame;
23156 return;
23159 /* Ensure SFP has the correct alignment. */
23160 if (ARM_DOUBLEWORD_ALIGN
23161 && (offsets->soft_frame & 7))
23163 offsets->soft_frame += 4;
23164 /* Try to align stack by pushing an extra reg. Don't bother doing this
23165 when there is a stack frame as the alignment will be rolled into
23166 the normal stack adjustment. */
23167 if (frame_size + crtl->outgoing_args_size == 0)
23169 int reg = -1;
23171 /* Register r3 is caller-saved. Normally it does not need to be
23172 saved on entry by the prologue. However if we choose to save
23173 it for padding then we may confuse the compiler into thinking
23174 a prologue sequence is required when in fact it is not. This
23175 will occur when shrink-wrapping if r3 is used as a scratch
23176 register and there are no other callee-saved writes.
23178 This situation can be avoided when other callee-saved registers
23179 are available and r3 is not mandatory if we choose a callee-saved
23180 register for padding. */
23181 bool prefer_callee_reg_p = false;
23183 /* If it is safe to use r3, then do so. This sometimes
23184 generates better code on Thumb-2 by avoiding the need to
23185 use 32-bit push/pop instructions. */
23186 if (! any_sibcall_could_use_r3 ()
23187 && arm_size_return_regs () <= 12
23188 && (offsets->saved_regs_mask & (1 << 3)) == 0
23189 && (TARGET_THUMB2
23190 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23192 reg = 3;
23193 if (!TARGET_THUMB2)
23194 prefer_callee_reg_p = true;
23196 if (reg == -1
23197 || prefer_callee_reg_p)
23199 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23201 /* Avoid fixed registers; they may be changed at
23202 arbitrary times so it's unsafe to restore them
23203 during the epilogue. */
23204 if (!fixed_regs[i]
23205 && (offsets->saved_regs_mask & (1 << i)) == 0)
23207 reg = i;
23208 break;
23213 if (reg != -1)
23215 offsets->saved_regs += 4;
23216 offsets->saved_regs_mask |= (1 << reg);
23221 offsets->locals_base = offsets->soft_frame + frame_size;
23222 offsets->outgoing_args = (offsets->locals_base
23223 + crtl->outgoing_args_size);
23225 if (ARM_DOUBLEWORD_ALIGN)
23227 /* Ensure SP remains doubleword aligned. */
23228 if (offsets->outgoing_args & 7)
23229 offsets->outgoing_args += 4;
23230 gcc_assert (!(offsets->outgoing_args & 7));
23235 /* Calculate the relative offsets for the different stack pointers. Positive
23236 offsets are in the direction of stack growth. */
23238 HOST_WIDE_INT
23239 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23241 arm_stack_offsets *offsets;
23243 offsets = arm_get_frame_offsets ();
23245 /* OK, now we have enough information to compute the distances.
23246 There must be an entry in these switch tables for each pair
23247 of registers in ELIMINABLE_REGS, even if some of the entries
23248 seem to be redundant or useless. */
23249 switch (from)
23251 case ARG_POINTER_REGNUM:
23252 switch (to)
23254 case THUMB_HARD_FRAME_POINTER_REGNUM:
23255 return 0;
23257 case FRAME_POINTER_REGNUM:
23258 /* This is the reverse of the soft frame pointer
23259 to hard frame pointer elimination below. */
23260 return offsets->soft_frame - offsets->saved_args;
23262 case ARM_HARD_FRAME_POINTER_REGNUM:
23263 /* This is only non-zero in the case where the static chain register
23264 is stored above the frame. */
23265 return offsets->frame - offsets->saved_args - 4;
23267 case STACK_POINTER_REGNUM:
23268 /* If nothing has been pushed on the stack at all
23269 then this will return -4. This *is* correct! */
23270 return offsets->outgoing_args - (offsets->saved_args + 4);
23272 default:
23273 gcc_unreachable ();
23275 gcc_unreachable ();
23277 case FRAME_POINTER_REGNUM:
23278 switch (to)
23280 case THUMB_HARD_FRAME_POINTER_REGNUM:
23281 return 0;
23283 case ARM_HARD_FRAME_POINTER_REGNUM:
23284 /* The hard frame pointer points to the top entry in the
23285 stack frame. The soft frame pointer to the bottom entry
23286 in the stack frame. If there is no stack frame at all,
23287 then they are identical. */
23289 return offsets->frame - offsets->soft_frame;
23291 case STACK_POINTER_REGNUM:
23292 return offsets->outgoing_args - offsets->soft_frame;
23294 default:
23295 gcc_unreachable ();
23297 gcc_unreachable ();
23299 default:
23300 /* You cannot eliminate from the stack pointer.
23301 In theory you could eliminate from the hard frame
23302 pointer to the stack pointer, but this will never
23303 happen, since if a stack frame is not needed the
23304 hard frame pointer will never be used. */
23305 gcc_unreachable ();
23309 /* Given FROM and TO register numbers, say whether this elimination is
23310 allowed. Frame pointer elimination is automatically handled.
23312 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23313 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23314 pointer, we must eliminate FRAME_POINTER_REGNUM into
23315 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23316 ARG_POINTER_REGNUM. */
23318 bool
23319 arm_can_eliminate (const int from, const int to)
23321 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23322 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23323 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23324 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23325 true);
23328 /* Emit RTL to save coprocessor registers on function entry. Returns the
23329 number of bytes pushed. */
23331 static int
23332 arm_save_coproc_regs(void)
23334 int saved_size = 0;
23335 unsigned reg;
23336 unsigned start_reg;
23337 rtx insn;
23339 if (TARGET_REALLY_IWMMXT)
23340 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23341 if (reg_needs_saving_p (reg))
23343 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23344 insn = gen_rtx_MEM (V2SImode, insn);
23345 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23346 RTX_FRAME_RELATED_P (insn) = 1;
23347 saved_size += 8;
23350 if (TARGET_VFP_BASE)
23352 start_reg = FIRST_VFP_REGNUM;
23354 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23356 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23358 if (start_reg != reg)
23359 saved_size += vfp_emit_fstmd (start_reg,
23360 (reg - start_reg) / 2);
23361 start_reg = reg + 2;
23364 if (start_reg != reg)
23365 saved_size += vfp_emit_fstmd (start_reg,
23366 (reg - start_reg) / 2);
23368 return saved_size;
23372 /* Set the Thumb frame pointer from the stack pointer. */
23374 static void
23375 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23377 HOST_WIDE_INT amount;
23378 rtx insn, dwarf;
23380 amount = offsets->outgoing_args - offsets->locals_base;
23381 if (amount < 1024)
23382 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23383 stack_pointer_rtx, GEN_INT (amount)));
23384 else
23386 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23387 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23388 expects the first two operands to be the same. */
23389 if (TARGET_THUMB2)
23391 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23392 stack_pointer_rtx,
23393 hard_frame_pointer_rtx));
23395 else
23397 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23398 hard_frame_pointer_rtx,
23399 stack_pointer_rtx));
23401 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23402 plus_constant (Pmode, stack_pointer_rtx, amount));
23403 RTX_FRAME_RELATED_P (dwarf) = 1;
23404 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23407 RTX_FRAME_RELATED_P (insn) = 1;
23410 struct scratch_reg {
23411 rtx reg;
23412 bool saved;
23415 /* Return a short-lived scratch register for use as a 2nd scratch register on
23416 function entry after the registers are saved in the prologue. This register
23417 must be released by means of release_scratch_register_on_entry. IP is not
23418 considered since it is always used as the 1st scratch register if available.
23420 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23421 mask of live registers. */
23423 static void
23424 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23425 unsigned long live_regs)
23427 int regno = -1;
23429 sr->saved = false;
23431 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23432 regno = LR_REGNUM;
23433 else
23435 unsigned int i;
23437 for (i = 4; i < 11; i++)
23438 if (regno1 != i && (live_regs & (1 << i)) != 0)
23440 regno = i;
23441 break;
23444 if (regno < 0)
23446 /* If IP is used as the 1st scratch register for a nested function,
23447 then either r3 wasn't available or is used to preserve IP. */
23448 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23449 regno1 = 3;
23450 regno = (regno1 == 3 ? 2 : 3);
23451 sr->saved
23452 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23453 regno);
23457 sr->reg = gen_rtx_REG (SImode, regno);
23458 if (sr->saved)
23460 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23461 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23462 rtx x = gen_rtx_SET (stack_pointer_rtx,
23463 plus_constant (Pmode, stack_pointer_rtx, -4));
23464 RTX_FRAME_RELATED_P (insn) = 1;
23465 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23469 /* Release a scratch register obtained from the preceding function. */
23471 static void
23472 release_scratch_register_on_entry (struct scratch_reg *sr)
23474 if (sr->saved)
23476 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23477 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23478 rtx x = gen_rtx_SET (stack_pointer_rtx,
23479 plus_constant (Pmode, stack_pointer_rtx, 4));
23480 RTX_FRAME_RELATED_P (insn) = 1;
23481 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23485 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23487 #if PROBE_INTERVAL > 4096
23488 #error Cannot use indexed addressing mode for stack probing
23489 #endif
23491 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23492 inclusive. These are offsets from the current stack pointer. REGNO1
23493 is the index number of the 1st scratch register and LIVE_REGS is the
23494 mask of live registers. */
23496 static void
23497 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23498 unsigned int regno1, unsigned long live_regs)
23500 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23502 /* See if we have a constant small number of probes to generate. If so,
23503 that's the easy case. */
23504 if (size <= PROBE_INTERVAL)
23506 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23507 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23508 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23511 /* The run-time loop is made up of 10 insns in the generic case while the
23512 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23513 else if (size <= 5 * PROBE_INTERVAL)
23515 HOST_WIDE_INT i, rem;
23517 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23518 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23519 emit_stack_probe (reg1);
23521 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23522 it exceeds SIZE. If only two probes are needed, this will not
23523 generate any code. Then probe at FIRST + SIZE. */
23524 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23526 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23527 emit_stack_probe (reg1);
23530 rem = size - (i - PROBE_INTERVAL);
23531 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23533 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23534 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23536 else
23537 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23540 /* Otherwise, do the same as above, but in a loop. Note that we must be
23541 extra careful with variables wrapping around because we might be at
23542 the very top (or the very bottom) of the address space and we have
23543 to be able to handle this case properly; in particular, we use an
23544 equality test for the loop condition. */
23545 else
23547 HOST_WIDE_INT rounded_size;
23548 struct scratch_reg sr;
23550 get_scratch_register_on_entry (&sr, regno1, live_regs);
23552 emit_move_insn (reg1, GEN_INT (first));
23555 /* Step 1: round SIZE to the previous multiple of the interval. */
23557 rounded_size = size & -PROBE_INTERVAL;
23558 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23561 /* Step 2: compute initial and final value of the loop counter. */
23563 /* TEST_ADDR = SP + FIRST. */
23564 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23566 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23567 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23570 /* Step 3: the loop
23574 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23575 probe at TEST_ADDR
23577 while (TEST_ADDR != LAST_ADDR)
23579 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23580 until it is equal to ROUNDED_SIZE. */
23582 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23585 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23586 that SIZE is equal to ROUNDED_SIZE. */
23588 if (size != rounded_size)
23590 HOST_WIDE_INT rem = size - rounded_size;
23592 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23594 emit_set_insn (sr.reg,
23595 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23596 emit_stack_probe (plus_constant (Pmode, sr.reg,
23597 PROBE_INTERVAL - rem));
23599 else
23600 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23603 release_scratch_register_on_entry (&sr);
23606 /* Make sure nothing is scheduled before we are done. */
23607 emit_insn (gen_blockage ());
23610 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23611 absolute addresses. */
23613 const char *
23614 output_probe_stack_range (rtx reg1, rtx reg2)
23616 static int labelno = 0;
23617 char loop_lab[32];
23618 rtx xops[2];
23620 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23622 /* Loop. */
23623 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23625 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23626 xops[0] = reg1;
23627 xops[1] = GEN_INT (PROBE_INTERVAL);
23628 output_asm_insn ("sub\t%0, %0, %1", xops);
23630 /* Probe at TEST_ADDR. */
23631 output_asm_insn ("str\tr0, [%0, #0]", xops);
23633 /* Test if TEST_ADDR == LAST_ADDR. */
23634 xops[1] = reg2;
23635 output_asm_insn ("cmp\t%0, %1", xops);
23637 /* Branch. */
23638 fputs ("\tbne\t", asm_out_file);
23639 assemble_name_raw (asm_out_file, loop_lab);
23640 fputc ('\n', asm_out_file);
23642 return "";
23645 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23646 function. */
23647 void
23648 arm_expand_prologue (void)
23650 rtx amount;
23651 rtx insn;
23652 rtx ip_rtx;
23653 unsigned long live_regs_mask;
23654 unsigned long func_type;
23655 int fp_offset = 0;
23656 int saved_pretend_args = 0;
23657 int saved_regs = 0;
23658 unsigned HOST_WIDE_INT args_to_push;
23659 HOST_WIDE_INT size;
23660 arm_stack_offsets *offsets;
23661 bool clobber_ip;
23663 func_type = arm_current_func_type ();
23665 /* Naked functions don't have prologues. */
23666 if (IS_NAKED (func_type))
23668 if (flag_stack_usage_info)
23669 current_function_static_stack_size = 0;
23670 return;
23673 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23674 args_to_push = crtl->args.pretend_args_size;
23676 /* Compute which register we will have to save onto the stack. */
23677 offsets = arm_get_frame_offsets ();
23678 live_regs_mask = offsets->saved_regs_mask;
23680 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23682 /* The AAPCS requires the callee to widen integral types narrower
23683 than 32 bits to the full width of the register; but when handling
23684 calls to non-secure space, we cannot trust the callee to have
23685 correctly done so. So forcibly re-widen the result here. */
23686 if (IS_CMSE_ENTRY (func_type))
23688 function_args_iterator args_iter;
23689 CUMULATIVE_ARGS args_so_far_v;
23690 cumulative_args_t args_so_far;
23691 bool first_param = true;
23692 tree arg_type;
23693 tree fndecl = current_function_decl;
23694 tree fntype = TREE_TYPE (fndecl);
23695 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
23696 args_so_far = pack_cumulative_args (&args_so_far_v);
23697 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
23699 rtx arg_rtx;
23701 if (VOID_TYPE_P (arg_type))
23702 break;
23704 function_arg_info arg (arg_type, /*named=*/true);
23705 if (!first_param)
23706 /* We should advance after processing the argument and pass
23707 the argument we're advancing past. */
23708 arm_function_arg_advance (args_so_far, arg);
23709 first_param = false;
23710 arg_rtx = arm_function_arg (args_so_far, arg);
23711 gcc_assert (REG_P (arg_rtx));
23712 if ((TREE_CODE (arg_type) == INTEGER_TYPE
23713 || TREE_CODE (arg_type) == ENUMERAL_TYPE
23714 || TREE_CODE (arg_type) == BOOLEAN_TYPE)
23715 && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4))
23717 if (TYPE_UNSIGNED (arg_type))
23718 emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)),
23719 gen_rtx_ZERO_EXTEND (SImode, arg_rtx));
23720 else
23721 emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)),
23722 gen_rtx_SIGN_EXTEND (SImode, arg_rtx));
23727 if (IS_STACKALIGN (func_type))
23729 rtx r0, r1;
23731 /* Handle a word-aligned stack pointer. We generate the following:
23733 mov r0, sp
23734 bic r1, r0, #7
23735 mov sp, r1
23736 <save and restore r0 in normal prologue/epilogue>
23737 mov sp, r0
23738 bx lr
23740 The unwinder doesn't need to know about the stack realignment.
23741 Just tell it we saved SP in r0. */
23742 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23744 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23745 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23747 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23748 RTX_FRAME_RELATED_P (insn) = 1;
23749 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23751 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23753 /* ??? The CFA changes here, which may cause GDB to conclude that it
23754 has entered a different function. That said, the unwind info is
23755 correct, individually, before and after this instruction because
23756 we've described the save of SP, which will override the default
23757 handling of SP as restoring from the CFA. */
23758 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23761 /* Let's compute the static_chain_stack_bytes required and store it. Right
23762 now the value must be -1 as stored by arm_init_machine_status (). */
23763 cfun->machine->static_chain_stack_bytes
23764 = arm_compute_static_chain_stack_bytes ();
23766 /* The static chain register is the same as the IP register. If it is
23767 clobbered when creating the frame, we need to save and restore it. */
23768 clobber_ip = (IS_NESTED (func_type)
23769 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23770 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23771 || flag_stack_clash_protection)
23772 && !df_regs_ever_live_p (LR_REGNUM)
23773 && arm_r3_live_at_start_p ()))
23774 || arm_current_function_pac_enabled_p ()));
23776 /* Find somewhere to store IP whilst the frame is being created.
23777 We try the following places in order:
23779 1. The last argument register r3 if it is available.
23780 2. A slot on the stack above the frame if there are no
23781 arguments to push onto the stack.
23782 3. Register r3 again, after pushing the argument registers
23783 onto the stack, if this is a varargs function.
23784 4. The last slot on the stack created for the arguments to
23785 push, if this isn't a varargs function.
23787 Note - we only need to tell the dwarf2 backend about the SP
23788 adjustment in the second variant; the static chain register
23789 doesn't need to be unwound, as it doesn't contain a value
23790 inherited from the caller. */
23791 if (clobber_ip)
23793 if (!arm_r3_live_at_start_p ())
23794 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23795 else if (args_to_push == 0)
23797 rtx addr, dwarf;
23799 saved_regs += 4;
23801 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23802 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23803 fp_offset = 4;
23805 /* Just tell the dwarf backend that we adjusted SP. */
23806 dwarf = gen_rtx_SET (stack_pointer_rtx,
23807 plus_constant (Pmode, stack_pointer_rtx,
23808 -fp_offset));
23809 RTX_FRAME_RELATED_P (insn) = 1;
23810 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23811 if (arm_current_function_pac_enabled_p ())
23812 cfun->machine->pacspval_needed = 1;
23814 else
23816 /* Store the args on the stack. */
23817 if (cfun->machine->uses_anonymous_args)
23819 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23820 (0xf0 >> (args_to_push / 4)) & 0xf);
23821 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23822 saved_pretend_args = 1;
23824 else
23826 rtx addr, dwarf;
23828 if (args_to_push == 4)
23829 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23830 else
23831 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23832 plus_constant (Pmode,
23833 stack_pointer_rtx,
23834 -args_to_push));
23836 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23838 /* Just tell the dwarf backend that we adjusted SP. */
23839 dwarf = gen_rtx_SET (stack_pointer_rtx,
23840 plus_constant (Pmode, stack_pointer_rtx,
23841 -args_to_push));
23842 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23845 RTX_FRAME_RELATED_P (insn) = 1;
23846 fp_offset = args_to_push;
23847 args_to_push = 0;
23848 if (arm_current_function_pac_enabled_p ())
23849 cfun->machine->pacspval_needed = 1;
23853 if (arm_current_function_pac_enabled_p ())
23855 /* If IP was clobbered we only emit a PAC instruction as the BTI
23856 one will be added before the push of the clobbered IP (if
23857 necessary) by the bti pass. */
23858 if (aarch_bti_enabled () && !clobber_ip)
23859 insn = emit_insn (gen_pacbti_nop ());
23860 else
23861 insn = emit_insn (gen_pac_nop ());
23863 rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
23864 RTX_FRAME_RELATED_P (insn) = 1;
23865 add_reg_note (insn, REG_CFA_REGISTER, dwarf);
23868 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23870 if (IS_INTERRUPT (func_type))
23872 /* Interrupt functions must not corrupt any registers.
23873 Creating a frame pointer however, corrupts the IP
23874 register, so we must push it first. */
23875 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23877 /* Do not set RTX_FRAME_RELATED_P on this insn.
23878 The dwarf stack unwinding code only wants to see one
23879 stack decrement per function, and this is not it. If
23880 this instruction is labeled as being part of the frame
23881 creation sequence then dwarf2out_frame_debug_expr will
23882 die when it encounters the assignment of IP to FP
23883 later on, since the use of SP here establishes SP as
23884 the CFA register and not IP.
23886 Anyway this instruction is not really part of the stack
23887 frame creation although it is part of the prologue. */
23890 insn = emit_set_insn (ip_rtx,
23891 plus_constant (Pmode, stack_pointer_rtx,
23892 fp_offset));
23893 RTX_FRAME_RELATED_P (insn) = 1;
23896 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23897 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23899 saved_regs += 4;
23900 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23901 GEN_INT (FPCXTNS_ENUM)));
23902 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23903 plus_constant (Pmode, stack_pointer_rtx, -4));
23904 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23905 RTX_FRAME_RELATED_P (insn) = 1;
23908 if (args_to_push)
23910 /* Push the argument registers, or reserve space for them. */
23911 if (cfun->machine->uses_anonymous_args)
23912 insn = emit_multi_reg_push
23913 ((0xf0 >> (args_to_push / 4)) & 0xf,
23914 (0xf0 >> (args_to_push / 4)) & 0xf);
23915 else
23916 insn = emit_insn
23917 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23918 GEN_INT (- args_to_push)));
23919 RTX_FRAME_RELATED_P (insn) = 1;
23922 /* If this is an interrupt service routine, and the link register
23923 is going to be pushed, and we're not generating extra
23924 push of IP (needed when frame is needed and frame layout if apcs),
23925 subtracting four from LR now will mean that the function return
23926 can be done with a single instruction. */
23927 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23928 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23929 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23930 && TARGET_ARM)
23932 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23934 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23937 if (live_regs_mask)
23939 unsigned long dwarf_regs_mask = live_regs_mask;
23941 saved_regs += bit_count (live_regs_mask) * 4;
23942 if (optimize_size && !frame_pointer_needed
23943 && saved_regs == offsets->saved_regs - offsets->saved_args)
23945 /* If no coprocessor registers are being pushed and we don't have
23946 to worry about a frame pointer then push extra registers to
23947 create the stack frame. This is done in a way that does not
23948 alter the frame layout, so is independent of the epilogue. */
23949 int n;
23950 int frame;
23951 n = 0;
23952 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23953 n++;
23954 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23955 if (frame && n * 4 >= frame)
23957 n = frame / 4;
23958 live_regs_mask |= (1 << n) - 1;
23959 saved_regs += frame;
23963 if (TARGET_LDRD
23964 && current_tune->prefer_ldrd_strd
23965 && !optimize_function_for_size_p (cfun))
23967 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23968 if (TARGET_THUMB2)
23969 thumb2_emit_strd_push (live_regs_mask);
23970 else if (TARGET_ARM
23971 && !TARGET_APCS_FRAME
23972 && !IS_INTERRUPT (func_type))
23973 arm_emit_strd_push (live_regs_mask);
23974 else
23976 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23977 RTX_FRAME_RELATED_P (insn) = 1;
23980 else
23982 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23983 RTX_FRAME_RELATED_P (insn) = 1;
23987 if (! IS_VOLATILE (func_type))
23988 saved_regs += arm_save_coproc_regs ();
23990 if (frame_pointer_needed && TARGET_ARM)
23992 /* Create the new frame pointer. */
23993 if (TARGET_APCS_FRAME)
23995 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23996 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23997 RTX_FRAME_RELATED_P (insn) = 1;
23999 else
24001 insn = GEN_INT (saved_regs - (4 + fp_offset));
24002 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24003 stack_pointer_rtx, insn));
24004 RTX_FRAME_RELATED_P (insn) = 1;
24008 size = offsets->outgoing_args - offsets->saved_args;
24009 if (flag_stack_usage_info)
24010 current_function_static_stack_size = size;
24012 /* If this isn't an interrupt service routine and we have a frame, then do
24013 stack checking. We use IP as the first scratch register, except for the
24014 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
24015 if (!IS_INTERRUPT (func_type)
24016 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
24017 || flag_stack_clash_protection))
24019 unsigned int regno;
24021 if (!IS_NESTED (func_type) || clobber_ip)
24022 regno = IP_REGNUM;
24023 else if (df_regs_ever_live_p (LR_REGNUM))
24024 regno = LR_REGNUM;
24025 else
24026 regno = 3;
24028 if (crtl->is_leaf && !cfun->calls_alloca)
24030 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
24031 arm_emit_probe_stack_range (get_stack_check_protect (),
24032 size - get_stack_check_protect (),
24033 regno, live_regs_mask);
24035 else if (size > 0)
24036 arm_emit_probe_stack_range (get_stack_check_protect (), size,
24037 regno, live_regs_mask);
24040 /* Recover the static chain register. */
24041 if (clobber_ip)
24043 if (!arm_r3_live_at_start_p () || saved_pretend_args)
24044 insn = gen_rtx_REG (SImode, 3);
24045 else
24047 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
24048 insn = gen_frame_mem (SImode, insn);
24050 emit_set_insn (ip_rtx, insn);
24051 emit_insn (gen_force_register_use (ip_rtx));
24054 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
24056 /* This add can produce multiple insns for a large constant, so we
24057 need to get tricky. */
24058 rtx_insn *last = get_last_insn ();
24060 amount = GEN_INT (offsets->saved_args + saved_regs
24061 - offsets->outgoing_args);
24063 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24064 amount));
24067 last = last ? NEXT_INSN (last) : get_insns ();
24068 RTX_FRAME_RELATED_P (last) = 1;
24070 while (last != insn);
24072 /* If the frame pointer is needed, emit a special barrier that
24073 will prevent the scheduler from moving stores to the frame
24074 before the stack adjustment. */
24075 if (frame_pointer_needed)
24076 emit_insn (gen_stack_tie (stack_pointer_rtx,
24077 hard_frame_pointer_rtx));
24081 if (frame_pointer_needed && TARGET_THUMB2)
24082 thumb_set_frame_pointer (offsets);
24084 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24086 unsigned long mask;
24088 mask = live_regs_mask;
24089 mask &= THUMB2_WORK_REGS;
24090 if (!IS_NESTED (func_type))
24091 mask |= (1 << IP_REGNUM);
24092 arm_load_pic_register (mask, NULL_RTX);
24095 /* If we are profiling, make sure no instructions are scheduled before
24096 the call to mcount. Similarly if the user has requested no
24097 scheduling in the prolog. Similarly if we want non-call exceptions
24098 using the EABI unwinder, to prevent faulting instructions from being
24099 swapped with a stack adjustment. */
24100 if (crtl->profile || !TARGET_SCHED_PROLOG
24101 || (arm_except_unwind_info (&global_options) == UI_TARGET
24102 && cfun->can_throw_non_call_exceptions))
24103 emit_insn (gen_blockage ());
24105 /* If the link register is being kept alive, with the return address in it,
24106 then make sure that it does not get reused by the ce2 pass. */
24107 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
24108 cfun->machine->lr_save_eliminated = 1;
24111 /* Print condition code to STREAM. Helper function for arm_print_operand. */
24112 static void
24113 arm_print_condition (FILE *stream)
24115 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
24117 /* Branch conversion is not implemented for Thumb-2. */
24118 if (TARGET_THUMB)
24120 output_operand_lossage ("predicated Thumb instruction");
24121 return;
24123 if (current_insn_predicate != NULL)
24125 output_operand_lossage
24126 ("predicated instruction in conditional sequence");
24127 return;
24130 fputs (arm_condition_codes[arm_current_cc], stream);
24132 else if (current_insn_predicate)
24134 enum arm_cond_code code;
24136 if (TARGET_THUMB1)
24138 output_operand_lossage ("predicated Thumb instruction");
24139 return;
24142 code = get_arm_condition_code (current_insn_predicate);
24143 fputs (arm_condition_codes[code], stream);
24148 /* Globally reserved letters: acln
24149 Puncutation letters currently used: @_|?().!#
24150 Lower case letters currently used: bcdefhimpqtvwxyz
24151 Upper case letters currently used: ABCDEFGHIJKLMOPQRSTUV
24152 Letters previously used, but now deprecated/obsolete: sNWXYZ.
24154 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24156 If CODE is 'd', then the X is a condition operand and the instruction
24157 should only be executed if the condition is true.
24158 if CODE is 'D', then the X is a condition operand and the instruction
24159 should only be executed if the condition is false: however, if the mode
24160 of the comparison is CCFPEmode, then always execute the instruction -- we
24161 do this because in these circumstances !GE does not necessarily imply LT;
24162 in these cases the instruction pattern will take care to make sure that
24163 an instruction containing %d will follow, thereby undoing the effects of
24164 doing this instruction unconditionally.
24165 If CODE is 'B' then output a bitwise inverted value of X (a const int).
24166 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24167 If CODE is 'V', then the operand must be a CONST_INT representing
24168 the bits to preserve in the modified register (Rd) of a BFI or BFC
24169 instruction: print out both the width and lsb (shift) fields. */
24170 static void
24171 arm_print_operand (FILE *stream, rtx x, int code)
24173 switch (code)
24175 case '@':
24176 fputs (ASM_COMMENT_START, stream);
24177 return;
24179 case '_':
24180 fputs (user_label_prefix, stream);
24181 return;
24183 case '|':
24184 fputs (REGISTER_PREFIX, stream);
24185 return;
24187 case '?':
24188 arm_print_condition (stream);
24189 return;
24191 case '.':
24192 /* The current condition code for a condition code setting instruction.
24193 Preceded by 's' in unified syntax, otherwise followed by 's'. */
24194 fputc('s', stream);
24195 arm_print_condition (stream);
24196 return;
24198 case '!':
24199 /* If the instruction is conditionally executed then print
24200 the current condition code, otherwise print 's'. */
24201 gcc_assert (TARGET_THUMB2);
24202 if (current_insn_predicate)
24203 arm_print_condition (stream);
24204 else
24205 fputc('s', stream);
24206 break;
24208 /* %# is a "break" sequence. It doesn't output anything, but is used to
24209 separate e.g. operand numbers from following text, if that text consists
24210 of further digits which we don't want to be part of the operand
24211 number. */
24212 case '#':
24213 return;
24215 /* An integer or symbol address without a preceding # sign. */
24216 case 'c':
24217 switch (GET_CODE (x))
24219 case CONST_INT:
24220 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24221 break;
24223 case SYMBOL_REF:
24224 output_addr_const (stream, x);
24225 break;
24227 case CONST:
24228 if (GET_CODE (XEXP (x, 0)) == PLUS
24229 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24231 output_addr_const (stream, x);
24232 break;
24234 /* Fall through. */
24236 default:
24237 output_operand_lossage ("Unsupported operand for code '%c'", code);
24239 return;
24241 /* An integer that we want to print in HEX. */
24242 case 'x':
24243 switch (GET_CODE (x))
24245 case CONST_INT:
24246 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24247 break;
24249 default:
24250 output_operand_lossage ("Unsupported operand for code '%c'", code);
24252 return;
24254 case 'B':
24255 if (CONST_INT_P (x))
24257 HOST_WIDE_INT val;
24258 val = ARM_SIGN_EXTEND (~INTVAL (x));
24259 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24261 else
24263 putc ('~', stream);
24264 output_addr_const (stream, x);
24266 return;
24268 case 'b':
24269 /* Print the log2 of a CONST_INT. */
24271 HOST_WIDE_INT val;
24273 if (!CONST_INT_P (x)
24274 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24275 output_operand_lossage ("Unsupported operand for code '%c'", code);
24276 else
24277 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24279 return;
24281 case 'L':
24282 /* The low 16 bits of an immediate constant. */
24283 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24284 return;
24286 case 'i':
24287 fprintf (stream, "%s", arithmetic_instr (x, 1));
24288 return;
24290 case 'I':
24291 fprintf (stream, "%s", arithmetic_instr (x, 0));
24292 return;
24294 case 'S':
24296 HOST_WIDE_INT val;
24297 const char *shift;
24299 shift = shift_op (x, &val);
24301 if (shift)
24303 fprintf (stream, ", %s ", shift);
24304 if (val == -1)
24305 arm_print_operand (stream, XEXP (x, 1), 0);
24306 else
24307 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24310 return;
24312 /* An explanation of the 'Q', 'R' and 'H' register operands:
24314 In a pair of registers containing a DI or DF value the 'Q'
24315 operand returns the register number of the register containing
24316 the least significant part of the value. The 'R' operand returns
24317 the register number of the register containing the most
24318 significant part of the value.
24320 The 'H' operand returns the higher of the two register numbers.
24321 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24322 same as the 'Q' operand, since the most significant part of the
24323 value is held in the lower number register. The reverse is true
24324 on systems where WORDS_BIG_ENDIAN is false.
24326 The purpose of these operands is to distinguish between cases
24327 where the endian-ness of the values is important (for example
24328 when they are added together), and cases where the endian-ness
24329 is irrelevant, but the order of register operations is important.
24330 For example when loading a value from memory into a register
24331 pair, the endian-ness does not matter. Provided that the value
24332 from the lower memory address is put into the lower numbered
24333 register, and the value from the higher address is put into the
24334 higher numbered register, the load will work regardless of whether
24335 the value being loaded is big-wordian or little-wordian. The
24336 order of the two register loads can matter however, if the address
24337 of the memory location is actually held in one of the registers
24338 being overwritten by the load.
24340 The 'Q' and 'R' constraints are also available for 64-bit
24341 constants. */
24342 case 'Q':
24343 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24345 rtx part = gen_lowpart (SImode, x);
24346 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24347 return;
24350 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24352 output_operand_lossage ("invalid operand for code '%c'", code);
24353 return;
24356 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24357 return;
24359 case 'R':
24360 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24362 machine_mode mode = GET_MODE (x);
24363 rtx part;
24365 if (mode == VOIDmode)
24366 mode = DImode;
24367 part = gen_highpart_mode (SImode, mode, x);
24368 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24369 return;
24372 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24374 output_operand_lossage ("invalid operand for code '%c'", code);
24375 return;
24378 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24379 return;
24381 case 'H':
24382 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24384 output_operand_lossage ("invalid operand for code '%c'", code);
24385 return;
24388 asm_fprintf (stream, "%r", REGNO (x) + 1);
24389 return;
24391 case 'J':
24392 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24394 output_operand_lossage ("invalid operand for code '%c'", code);
24395 return;
24398 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24399 return;
24401 case 'K':
24402 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24404 output_operand_lossage ("invalid operand for code '%c'", code);
24405 return;
24408 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24409 return;
24411 case 'm':
24412 asm_fprintf (stream, "%r",
24413 REG_P (XEXP (x, 0))
24414 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24415 return;
24417 case 'M':
24418 asm_fprintf (stream, "{%r-%r}",
24419 REGNO (x),
24420 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24421 return;
24423 /* Like 'M', but writing doubleword vector registers, for use by Neon
24424 insns. */
24425 case 'h':
24427 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24428 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24429 if (numregs == 1)
24430 asm_fprintf (stream, "{d%d}", regno);
24431 else
24432 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24434 return;
24436 case 'd':
24437 /* CONST_TRUE_RTX means always -- that's the default. */
24438 if (x == const_true_rtx)
24439 return;
24441 if (!COMPARISON_P (x))
24443 output_operand_lossage ("invalid operand for code '%c'", code);
24444 return;
24447 fputs (arm_condition_codes[get_arm_condition_code (x)],
24448 stream);
24449 return;
24451 case 'D':
24452 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24453 want to do that. */
24454 if (x == const_true_rtx)
24456 output_operand_lossage ("instruction never executed");
24457 return;
24459 if (!COMPARISON_P (x))
24461 output_operand_lossage ("invalid operand for code '%c'", code);
24462 return;
24465 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24466 (get_arm_condition_code (x))],
24467 stream);
24468 return;
24470 case 'V':
24472 /* Output the LSB (shift) and width for a bitmask instruction
24473 based on a literal mask. The LSB is printed first,
24474 followed by the width.
24476 Eg. For 0b1...1110001, the result is #1, #3. */
24477 if (!CONST_INT_P (x))
24479 output_operand_lossage ("invalid operand for code '%c'", code);
24480 return;
24483 unsigned HOST_WIDE_INT val
24484 = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24485 int lsb = exact_log2 (val & -val);
24486 asm_fprintf (stream, "#%d, #%d", lsb,
24487 (exact_log2 (val + (val & -val)) - lsb));
24489 return;
24491 case 'N':
24492 /* Former FPA support, effectively unused after GCC-4.7, but not
24493 removed until gcc-15. */
24494 output_operand_lossage ("obsolete FPA format code '%c'", code);
24495 return;
24497 case 's':
24498 case 'W':
24499 case 'X':
24500 case 'Y':
24501 case 'Z':
24502 /* Former Maverick support, removed after GCC-4.7. */
24503 output_operand_lossage ("obsolete Maverick format code '%c'", code);
24504 return;
24506 case 'U':
24507 if (!REG_P (x)
24508 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24509 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24510 /* Bad value for wCG register number. */
24512 output_operand_lossage ("invalid operand for code '%c'", code);
24513 return;
24516 else
24517 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24518 return;
24520 /* Print an iWMMXt control register name. */
24521 case 'w':
24522 if (!CONST_INT_P (x)
24523 || INTVAL (x) < 0
24524 || INTVAL (x) >= 16)
24525 /* Bad value for wC register number. */
24527 output_operand_lossage ("invalid operand for code '%c'", code);
24528 return;
24531 else
24533 static const char * wc_reg_names [16] =
24535 "wCID", "wCon", "wCSSF", "wCASF",
24536 "wC4", "wC5", "wC6", "wC7",
24537 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24538 "wC12", "wC13", "wC14", "wC15"
24541 fputs (wc_reg_names [INTVAL (x)], stream);
24543 return;
24545 /* Print the high single-precision register of a VFP double-precision
24546 register. */
24547 case 'p':
24549 machine_mode mode = GET_MODE (x);
24550 int regno;
24552 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24554 output_operand_lossage ("invalid operand for code '%c'", code);
24555 return;
24558 regno = REGNO (x);
24559 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24561 output_operand_lossage ("invalid operand for code '%c'", code);
24562 return;
24565 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24567 return;
24569 /* Print a VFP/Neon double precision or quad precision register name. */
24570 case 'P':
24571 case 'q':
24573 machine_mode mode = GET_MODE (x);
24574 int is_quad = (code == 'q');
24575 int regno;
24577 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24579 output_operand_lossage ("invalid operand for code '%c'", code);
24580 return;
24583 if (!REG_P (x)
24584 || !IS_VFP_REGNUM (REGNO (x)))
24586 output_operand_lossage ("invalid operand for code '%c'", code);
24587 return;
24590 regno = REGNO (x);
24591 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24592 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24594 output_operand_lossage ("invalid operand for code '%c'", code);
24595 return;
24598 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24599 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24601 return;
24603 /* These two codes print the low/high doubleword register of a Neon quad
24604 register, respectively. For pair-structure types, can also print
24605 low/high quadword registers. */
24606 case 'e':
24607 case 'f':
24609 machine_mode mode = GET_MODE (x);
24610 int regno;
24612 if ((GET_MODE_SIZE (mode) != 16
24613 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24615 output_operand_lossage ("invalid operand for code '%c'", code);
24616 return;
24619 regno = REGNO (x);
24620 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24622 output_operand_lossage ("invalid operand for code '%c'", code);
24623 return;
24626 if (GET_MODE_SIZE (mode) == 16)
24627 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24628 + (code == 'f' ? 1 : 0));
24629 else
24630 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24631 + (code == 'f' ? 1 : 0));
24633 return;
24635 /* Print a VFPv3 floating-point constant, represented as an integer
24636 index. */
24637 case 'G':
24639 int index = vfp3_const_double_index (x);
24640 gcc_assert (index != -1);
24641 fprintf (stream, "%d", index);
24643 return;
24645 /* Print bits representing opcode features for Neon.
24647 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24648 and polynomials as unsigned.
24650 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24652 Bit 2 is 1 for rounding functions, 0 otherwise. */
24654 /* Identify the type as 's', 'u', 'p' or 'f'. */
24655 case 'T':
24657 HOST_WIDE_INT bits = INTVAL (x);
24658 fputc ("uspf"[bits & 3], stream);
24660 return;
24662 /* Likewise, but signed and unsigned integers are both 'i'. */
24663 case 'F':
24665 HOST_WIDE_INT bits = INTVAL (x);
24666 fputc ("iipf"[bits & 3], stream);
24668 return;
24670 /* As for 'T', but emit 'u' instead of 'p'. */
24671 case 't':
24673 HOST_WIDE_INT bits = INTVAL (x);
24674 fputc ("usuf"[bits & 3], stream);
24676 return;
24678 /* Bit 2: rounding (vs none). */
24679 case 'O':
24681 HOST_WIDE_INT bits = INTVAL (x);
24682 fputs ((bits & 4) != 0 ? "r" : "", stream);
24684 return;
24686 /* Memory operand for vld1/vst1 instruction. */
24687 case 'A':
24689 rtx addr;
24690 bool postinc = FALSE;
24691 rtx postinc_reg = NULL;
24692 unsigned align, memsize, align_bits;
24694 gcc_assert (MEM_P (x));
24695 addr = XEXP (x, 0);
24696 if (GET_CODE (addr) == POST_INC)
24698 postinc = 1;
24699 addr = XEXP (addr, 0);
24701 if (GET_CODE (addr) == POST_MODIFY)
24703 postinc_reg = XEXP( XEXP (addr, 1), 1);
24704 addr = XEXP (addr, 0);
24706 asm_fprintf (stream, "[%r", REGNO (addr));
24708 /* We know the alignment of this access, so we can emit a hint in the
24709 instruction (for some alignments) as an aid to the memory subsystem
24710 of the target. */
24711 align = MEM_ALIGN (x) >> 3;
24712 memsize = MEM_SIZE (x);
24714 /* Only certain alignment specifiers are supported by the hardware. */
24715 if (memsize == 32 && (align % 32) == 0)
24716 align_bits = 256;
24717 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24718 align_bits = 128;
24719 else if (memsize >= 8 && (align % 8) == 0)
24720 align_bits = 64;
24721 else
24722 align_bits = 0;
24724 if (align_bits != 0)
24725 asm_fprintf (stream, ":%d", align_bits);
24727 asm_fprintf (stream, "]");
24729 if (postinc)
24730 fputs("!", stream);
24731 if (postinc_reg)
24732 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24734 return;
24736 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24737 rtx_code the memory operands output looks like following.
24738 1. [Rn], #+/-<imm>
24739 2. [Rn, #+/-<imm>]!
24740 3. [Rn, #+/-<imm>]
24741 4. [Rn]. */
24742 case 'E':
24744 rtx addr;
24745 rtx postinc_reg = NULL;
24746 unsigned inc_val = 0;
24747 enum rtx_code code;
24749 gcc_assert (MEM_P (x));
24750 addr = XEXP (x, 0);
24751 code = GET_CODE (addr);
24752 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24753 || code == PRE_DEC)
24755 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24756 inc_val = GET_MODE_SIZE (GET_MODE (x));
24757 if (code == POST_INC || code == POST_DEC)
24758 asm_fprintf (stream, "], #%s%d", (code == POST_INC)
24759 ? "" : "-", inc_val);
24760 else
24761 asm_fprintf (stream, ", #%s%d]!", (code == PRE_INC)
24762 ? "" : "-", inc_val);
24764 else if (code == POST_MODIFY || code == PRE_MODIFY)
24766 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24767 postinc_reg = XEXP (XEXP (addr, 1), 1);
24768 if (postinc_reg && CONST_INT_P (postinc_reg))
24770 if (code == POST_MODIFY)
24771 asm_fprintf (stream, "], #%wd", INTVAL (postinc_reg));
24772 else
24773 asm_fprintf (stream, ", #%wd]!", INTVAL (postinc_reg));
24776 else if (code == PLUS)
24778 rtx base = XEXP (addr, 0);
24779 rtx index = XEXP (addr, 1);
24781 gcc_assert (REG_P (base) && CONST_INT_P (index));
24783 HOST_WIDE_INT offset = INTVAL (index);
24784 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24786 else
24788 gcc_assert (REG_P (addr));
24789 asm_fprintf (stream, "[%r]",REGNO (addr));
24792 return;
24794 case 'C':
24796 rtx addr;
24798 gcc_assert (MEM_P (x));
24799 addr = XEXP (x, 0);
24800 gcc_assert (REG_P (addr));
24801 asm_fprintf (stream, "[%r]", REGNO (addr));
24803 return;
24805 /* Translate an S register number into a D register number and element index. */
24806 case 'y':
24808 machine_mode mode = GET_MODE (x);
24809 int regno;
24811 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24813 output_operand_lossage ("invalid operand for code '%c'", code);
24814 return;
24817 regno = REGNO (x);
24818 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24820 output_operand_lossage ("invalid operand for code '%c'", code);
24821 return;
24824 regno = regno - FIRST_VFP_REGNUM;
24825 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24827 return;
24829 case 'v':
24830 gcc_assert (CONST_DOUBLE_P (x));
24831 int result;
24832 result = vfp3_const_double_for_fract_bits (x);
24833 if (result == 0)
24834 result = vfp3_const_double_for_bits (x);
24835 fprintf (stream, "#%d", result);
24836 return;
24838 /* Register specifier for vld1.16/vst1.16. Translate the S register
24839 number into a D register number and element index. */
24840 case 'z':
24842 machine_mode mode = GET_MODE (x);
24843 int regno;
24845 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24847 output_operand_lossage ("invalid operand for code '%c'", code);
24848 return;
24851 regno = REGNO (x);
24852 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24854 output_operand_lossage ("invalid operand for code '%c'", code);
24855 return;
24858 regno = regno - FIRST_VFP_REGNUM;
24859 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24861 return;
24863 default:
24864 if (x == 0)
24866 output_operand_lossage ("missing operand");
24867 return;
24870 switch (GET_CODE (x))
24872 case REG:
24873 asm_fprintf (stream, "%r", REGNO (x));
24874 break;
24876 case MEM:
24877 output_address (GET_MODE (x), XEXP (x, 0));
24878 break;
24880 case CONST_DOUBLE:
24882 char fpstr[20];
24883 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24884 sizeof (fpstr), 0, 1);
24885 fprintf (stream, "#%s", fpstr);
24887 break;
24889 default:
24890 gcc_assert (GET_CODE (x) != NEG);
24891 fputc ('#', stream);
24892 if (GET_CODE (x) == HIGH)
24894 fputs (":lower16:", stream);
24895 x = XEXP (x, 0);
24898 output_addr_const (stream, x);
24899 break;
24904 /* Target hook for printing a memory address. */
24905 static void
24906 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24908 if (TARGET_32BIT)
24910 int is_minus = GET_CODE (x) == MINUS;
24912 if (REG_P (x))
24913 asm_fprintf (stream, "[%r]", REGNO (x));
24914 else if (GET_CODE (x) == PLUS || is_minus)
24916 rtx base = XEXP (x, 0);
24917 rtx index = XEXP (x, 1);
24918 HOST_WIDE_INT offset = 0;
24919 if (!REG_P (base)
24920 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24922 /* Ensure that BASE is a register. */
24923 /* (one of them must be). */
24924 /* Also ensure the SP is not used as in index register. */
24925 std::swap (base, index);
24927 switch (GET_CODE (index))
24929 case CONST_INT:
24930 offset = INTVAL (index);
24931 if (is_minus)
24932 offset = -offset;
24933 asm_fprintf (stream, "[%r, #%wd]",
24934 REGNO (base), offset);
24935 break;
24937 case REG:
24938 asm_fprintf (stream, "[%r, %s%r]",
24939 REGNO (base), is_minus ? "-" : "",
24940 REGNO (index));
24941 break;
24943 case MULT:
24944 case ASHIFTRT:
24945 case LSHIFTRT:
24946 case ASHIFT:
24947 case ROTATERT:
24949 asm_fprintf (stream, "[%r, %s%r",
24950 REGNO (base), is_minus ? "-" : "",
24951 REGNO (XEXP (index, 0)));
24952 arm_print_operand (stream, index, 'S');
24953 fputs ("]", stream);
24954 break;
24957 default:
24958 gcc_unreachable ();
24961 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24962 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24964 gcc_assert (REG_P (XEXP (x, 0)));
24966 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24967 asm_fprintf (stream, "[%r, #%s%d]!",
24968 REGNO (XEXP (x, 0)),
24969 GET_CODE (x) == PRE_DEC ? "-" : "",
24970 GET_MODE_SIZE (mode));
24971 else if (TARGET_HAVE_MVE
24972 && VALID_MVE_STRUCT_MODE (mode))
24973 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24974 else
24975 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24976 GET_CODE (x) == POST_DEC ? "-" : "",
24977 GET_MODE_SIZE (mode));
24979 else if (GET_CODE (x) == PRE_MODIFY)
24981 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24982 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24983 asm_fprintf (stream, "#%wd]!",
24984 INTVAL (XEXP (XEXP (x, 1), 1)));
24985 else
24986 asm_fprintf (stream, "%r]!",
24987 REGNO (XEXP (XEXP (x, 1), 1)));
24989 else if (GET_CODE (x) == POST_MODIFY)
24991 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24992 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24993 asm_fprintf (stream, "#%wd",
24994 INTVAL (XEXP (XEXP (x, 1), 1)));
24995 else
24996 asm_fprintf (stream, "%r",
24997 REGNO (XEXP (XEXP (x, 1), 1)));
24999 else output_addr_const (stream, x);
25001 else
25003 if (REG_P (x))
25004 asm_fprintf (stream, "[%r]", REGNO (x));
25005 else if (GET_CODE (x) == POST_INC)
25006 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
25007 else if (GET_CODE (x) == PLUS)
25009 gcc_assert (REG_P (XEXP (x, 0)));
25010 if (CONST_INT_P (XEXP (x, 1)))
25011 asm_fprintf (stream, "[%r, #%wd]",
25012 REGNO (XEXP (x, 0)),
25013 INTVAL (XEXP (x, 1)));
25014 else
25015 asm_fprintf (stream, "[%r, %r]",
25016 REGNO (XEXP (x, 0)),
25017 REGNO (XEXP (x, 1)));
25019 else
25020 output_addr_const (stream, x);
25024 /* Target hook for indicating whether a punctuation character for
25025 TARGET_PRINT_OPERAND is valid. */
25026 static bool
25027 arm_print_operand_punct_valid_p (unsigned char code)
25029 return (code == '@' || code == '|' || code == '.'
25030 || code == '(' || code == ')' || code == '#'
25031 || (TARGET_32BIT && (code == '?'))
25032 || (TARGET_THUMB2 && (code == '!'))
25033 || (TARGET_THUMB && (code == '_')));
25036 /* Target hook for assembling integer objects. The ARM version needs to
25037 handle word-sized values specially. */
25038 static bool
25039 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
25041 machine_mode mode;
25043 if (size == UNITS_PER_WORD && aligned_p)
25045 fputs ("\t.word\t", asm_out_file);
25046 output_addr_const (asm_out_file, x);
25048 /* Mark symbols as position independent. We only do this in the
25049 .text segment, not in the .data segment. */
25050 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
25051 (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
25053 /* See legitimize_pic_address for an explanation of the
25054 TARGET_VXWORKS_RTP check. */
25055 /* References to weak symbols cannot be resolved locally:
25056 they may be overridden by a non-weak definition at link
25057 time. */
25058 if (!arm_pic_data_is_text_relative
25059 || (SYMBOL_REF_P (x)
25060 && (!SYMBOL_REF_LOCAL_P (x)
25061 || (SYMBOL_REF_DECL (x)
25062 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
25063 || (SYMBOL_REF_FUNCTION_P (x)
25064 && !arm_fdpic_local_funcdesc_p (x)))))
25066 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
25067 fputs ("(GOTFUNCDESC)", asm_out_file);
25068 else
25069 fputs ("(GOT)", asm_out_file);
25071 else
25073 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
25074 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
25075 else
25077 bool is_readonly;
25079 if (!TARGET_FDPIC
25080 || arm_is_segment_info_known (x, &is_readonly))
25081 fputs ("(GOTOFF)", asm_out_file);
25082 else
25083 fputs ("(GOT)", asm_out_file);
25088 /* For FDPIC we also have to mark symbol for .data section. */
25089 if (TARGET_FDPIC
25090 && !making_const_table
25091 && SYMBOL_REF_P (x)
25092 && SYMBOL_REF_FUNCTION_P (x))
25093 fputs ("(FUNCDESC)", asm_out_file);
25095 fputc ('\n', asm_out_file);
25096 return true;
25099 mode = GET_MODE (x);
25101 if (arm_vector_mode_supported_p (mode))
25103 int i, units;
25105 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25107 units = CONST_VECTOR_NUNITS (x);
25108 size = GET_MODE_UNIT_SIZE (mode);
25110 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
25111 for (i = 0; i < units; i++)
25113 rtx elt = CONST_VECTOR_ELT (x, i);
25114 assemble_integer
25115 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
25117 else
25118 for (i = 0; i < units; i++)
25120 rtx elt = CONST_VECTOR_ELT (x, i);
25121 assemble_real
25122 (*CONST_DOUBLE_REAL_VALUE (elt),
25123 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
25124 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
25127 return true;
25130 return default_assemble_integer (x, size, aligned_p);
25133 static void
25134 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
25136 section *s;
25138 if (!TARGET_AAPCS_BASED)
25140 (is_ctor ?
25141 default_named_section_asm_out_constructor
25142 : default_named_section_asm_out_destructor) (symbol, priority);
25143 return;
25146 /* Put these in the .init_array section, using a special relocation. */
25147 if (priority != DEFAULT_INIT_PRIORITY)
25149 char buf[18];
25150 sprintf (buf, "%s.%.5u",
25151 is_ctor ? ".init_array" : ".fini_array",
25152 priority);
25153 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
25155 else if (is_ctor)
25156 s = ctors_section;
25157 else
25158 s = dtors_section;
25160 switch_to_section (s);
25161 assemble_align (POINTER_SIZE);
25162 fputs ("\t.word\t", asm_out_file);
25163 output_addr_const (asm_out_file, symbol);
25164 fputs ("(target1)\n", asm_out_file);
25167 /* Add a function to the list of static constructors. */
25169 static void
25170 arm_elf_asm_constructor (rtx symbol, int priority)
25172 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
25175 /* Add a function to the list of static destructors. */
25177 static void
25178 arm_elf_asm_destructor (rtx symbol, int priority)
25180 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
25183 /* A finite state machine takes care of noticing whether or not instructions
25184 can be conditionally executed, and thus decrease execution time and code
25185 size by deleting branch instructions. The fsm is controlled by
25186 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
25188 /* The state of the fsm controlling condition codes are:
25189 0: normal, do nothing special
25190 1: make ASM_OUTPUT_OPCODE not output this instruction
25191 2: make ASM_OUTPUT_OPCODE not output this instruction
25192 3: make instructions conditional
25193 4: make instructions conditional
25195 State transitions (state->state by whom under condition):
25196 0 -> 1 final_prescan_insn if the `target' is a label
25197 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25198 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25199 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25200 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25201 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25202 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25203 (the target insn is arm_target_insn).
25205 If the jump clobbers the conditions then we use states 2 and 4.
25207 A similar thing can be done with conditional return insns.
25209 XXX In case the `target' is an unconditional branch, this conditionalising
25210 of the instructions always reduces code size, but not always execution
25211 time. But then, I want to reduce the code size to somewhere near what
25212 /bin/cc produces. */
25214 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25215 instructions. When a COND_EXEC instruction is seen the subsequent
25216 instructions are scanned so that multiple conditional instructions can be
25217 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25218 specify the length and true/false mask for the IT block. These will be
25219 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25221 /* Returns the index of the ARM condition code string in
25222 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25223 COMPARISON should be an rtx like `(eq (...) (...))'. */
25225 enum arm_cond_code
25226 maybe_get_arm_condition_code (rtx comparison)
25228 machine_mode mode = GET_MODE (XEXP (comparison, 0));
25229 enum arm_cond_code code;
25230 enum rtx_code comp_code = GET_CODE (comparison);
25232 if (GET_MODE_CLASS (mode) != MODE_CC)
25233 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25234 XEXP (comparison, 1));
25236 switch (mode)
25238 case E_CC_DNEmode: code = ARM_NE; goto dominance;
25239 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25240 case E_CC_DGEmode: code = ARM_GE; goto dominance;
25241 case E_CC_DGTmode: code = ARM_GT; goto dominance;
25242 case E_CC_DLEmode: code = ARM_LE; goto dominance;
25243 case E_CC_DLTmode: code = ARM_LT; goto dominance;
25244 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25245 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25246 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25247 case E_CC_DLTUmode: code = ARM_CC;
25249 dominance:
25250 if (comp_code == EQ)
25251 return ARM_INVERSE_CONDITION_CODE (code);
25252 if (comp_code == NE)
25253 return code;
25254 return ARM_NV;
25256 case E_CC_NZmode:
25257 switch (comp_code)
25259 case NE: return ARM_NE;
25260 case EQ: return ARM_EQ;
25261 case GE: return ARM_PL;
25262 case LT: return ARM_MI;
25263 default: return ARM_NV;
25266 case E_CC_Zmode:
25267 switch (comp_code)
25269 case NE: return ARM_NE;
25270 case EQ: return ARM_EQ;
25271 default: return ARM_NV;
25274 case E_CC_Nmode:
25275 switch (comp_code)
25277 case NE: return ARM_MI;
25278 case EQ: return ARM_PL;
25279 default: return ARM_NV;
25282 case E_CCFPEmode:
25283 case E_CCFPmode:
25284 /* We can handle all cases except UNEQ and LTGT. */
25285 switch (comp_code)
25287 case GE: return ARM_GE;
25288 case GT: return ARM_GT;
25289 case LE: return ARM_LS;
25290 case LT: return ARM_MI;
25291 case NE: return ARM_NE;
25292 case EQ: return ARM_EQ;
25293 case ORDERED: return ARM_VC;
25294 case UNORDERED: return ARM_VS;
25295 case UNLT: return ARM_LT;
25296 case UNLE: return ARM_LE;
25297 case UNGT: return ARM_HI;
25298 case UNGE: return ARM_PL;
25299 /* UNEQ and LTGT do not have a representation. */
25300 case UNEQ: /* Fall through. */
25301 case LTGT: /* Fall through. */
25302 default: return ARM_NV;
25305 case E_CC_SWPmode:
25306 switch (comp_code)
25308 case NE: return ARM_NE;
25309 case EQ: return ARM_EQ;
25310 case GE: return ARM_LE;
25311 case GT: return ARM_LT;
25312 case LE: return ARM_GE;
25313 case LT: return ARM_GT;
25314 case GEU: return ARM_LS;
25315 case GTU: return ARM_CC;
25316 case LEU: return ARM_CS;
25317 case LTU: return ARM_HI;
25318 default: return ARM_NV;
25321 case E_CC_Cmode:
25322 switch (comp_code)
25324 case LTU: return ARM_CS;
25325 case GEU: return ARM_CC;
25326 default: return ARM_NV;
25329 case E_CC_NVmode:
25330 switch (comp_code)
25332 case GE: return ARM_GE;
25333 case LT: return ARM_LT;
25334 default: return ARM_NV;
25337 case E_CC_Bmode:
25338 switch (comp_code)
25340 case GEU: return ARM_CS;
25341 case LTU: return ARM_CC;
25342 default: return ARM_NV;
25345 case E_CC_Vmode:
25346 switch (comp_code)
25348 case NE: return ARM_VS;
25349 case EQ: return ARM_VC;
25350 default: return ARM_NV;
25353 case E_CC_ADCmode:
25354 switch (comp_code)
25356 case GEU: return ARM_CS;
25357 case LTU: return ARM_CC;
25358 default: return ARM_NV;
25361 case E_CCmode:
25362 case E_CC_RSBmode:
25363 switch (comp_code)
25365 case NE: return ARM_NE;
25366 case EQ: return ARM_EQ;
25367 case GE: return ARM_GE;
25368 case GT: return ARM_GT;
25369 case LE: return ARM_LE;
25370 case LT: return ARM_LT;
25371 case GEU: return ARM_CS;
25372 case GTU: return ARM_HI;
25373 case LEU: return ARM_LS;
25374 case LTU: return ARM_CC;
25375 default: return ARM_NV;
25378 default: gcc_unreachable ();
25382 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25383 static enum arm_cond_code
25384 get_arm_condition_code (rtx comparison)
25386 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25387 gcc_assert (code != ARM_NV);
25388 return code;
25391 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25392 code registers when not targetting Thumb1. The VFP condition register
25393 only exists when generating hard-float code. */
25394 static bool
25395 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25397 if (!TARGET_32BIT)
25398 return false;
25400 *p1 = CC_REGNUM;
25401 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25402 return true;
25405 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25406 instructions. */
25407 void
25408 thumb2_final_prescan_insn (rtx_insn *insn)
25410 rtx_insn *first_insn = insn;
25411 rtx body = PATTERN (insn);
25412 rtx predicate;
25413 enum arm_cond_code code;
25414 int n;
25415 int mask;
25416 int max;
25418 /* max_insns_skipped in the tune was already taken into account in the
25419 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25420 just emit the IT blocks as we can. It does not make sense to split
25421 the IT blocks. */
25422 max = MAX_INSN_PER_IT_BLOCK;
25424 /* Remove the previous insn from the count of insns to be output. */
25425 if (arm_condexec_count)
25426 arm_condexec_count--;
25428 /* Nothing to do if we are already inside a conditional block. */
25429 if (arm_condexec_count)
25430 return;
25432 if (GET_CODE (body) != COND_EXEC)
25433 return;
25435 /* Conditional jumps are implemented directly. */
25436 if (JUMP_P (insn))
25437 return;
25439 predicate = COND_EXEC_TEST (body);
25440 arm_current_cc = get_arm_condition_code (predicate);
25442 n = get_attr_ce_count (insn);
25443 arm_condexec_count = 1;
25444 arm_condexec_mask = (1 << n) - 1;
25445 arm_condexec_masklen = n;
25446 /* See if subsequent instructions can be combined into the same block. */
25447 for (;;)
25449 insn = next_nonnote_insn (insn);
25451 /* Jumping into the middle of an IT block is illegal, so a label or
25452 barrier terminates the block. */
25453 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25454 break;
25456 body = PATTERN (insn);
25457 /* USE and CLOBBER aren't really insns, so just skip them. */
25458 if (GET_CODE (body) == USE
25459 || GET_CODE (body) == CLOBBER)
25460 continue;
25462 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25463 if (GET_CODE (body) != COND_EXEC)
25464 break;
25465 /* Maximum number of conditionally executed instructions in a block. */
25466 n = get_attr_ce_count (insn);
25467 if (arm_condexec_masklen + n > max)
25468 break;
25470 predicate = COND_EXEC_TEST (body);
25471 code = get_arm_condition_code (predicate);
25472 mask = (1 << n) - 1;
25473 if (arm_current_cc == code)
25474 arm_condexec_mask |= (mask << arm_condexec_masklen);
25475 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25476 break;
25478 arm_condexec_count++;
25479 arm_condexec_masklen += n;
25481 /* A jump must be the last instruction in a conditional block. */
25482 if (JUMP_P (insn))
25483 break;
25485 /* Restore recog_data (getting the attributes of other insns can
25486 destroy this array, but final.cc assumes that it remains intact
25487 across this call). */
25488 extract_constrain_insn_cached (first_insn);
25491 void
25492 arm_final_prescan_insn (rtx_insn *insn)
25494 /* BODY will hold the body of INSN. */
25495 rtx body = PATTERN (insn);
25497 /* This will be 1 if trying to repeat the trick, and things need to be
25498 reversed if it appears to fail. */
25499 int reverse = 0;
25501 /* If we start with a return insn, we only succeed if we find another one. */
25502 int seeking_return = 0;
25503 enum rtx_code return_code = UNKNOWN;
25505 /* START_INSN will hold the insn from where we start looking. This is the
25506 first insn after the following code_label if REVERSE is true. */
25507 rtx_insn *start_insn = insn;
25509 /* If in state 4, check if the target branch is reached, in order to
25510 change back to state 0. */
25511 if (arm_ccfsm_state == 4)
25513 if (insn == arm_target_insn)
25515 arm_target_insn = NULL;
25516 arm_ccfsm_state = 0;
25518 return;
25521 /* If in state 3, it is possible to repeat the trick, if this insn is an
25522 unconditional branch to a label, and immediately following this branch
25523 is the previous target label which is only used once, and the label this
25524 branch jumps to is not too far off. */
25525 if (arm_ccfsm_state == 3)
25527 if (simplejump_p (insn))
25529 start_insn = next_nonnote_insn (start_insn);
25530 if (BARRIER_P (start_insn))
25532 /* XXX Isn't this always a barrier? */
25533 start_insn = next_nonnote_insn (start_insn);
25535 if (LABEL_P (start_insn)
25536 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25537 && LABEL_NUSES (start_insn) == 1)
25538 reverse = TRUE;
25539 else
25540 return;
25542 else if (ANY_RETURN_P (body))
25544 start_insn = next_nonnote_insn (start_insn);
25545 if (BARRIER_P (start_insn))
25546 start_insn = next_nonnote_insn (start_insn);
25547 if (LABEL_P (start_insn)
25548 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25549 && LABEL_NUSES (start_insn) == 1)
25551 reverse = TRUE;
25552 seeking_return = 1;
25553 return_code = GET_CODE (body);
25555 else
25556 return;
25558 else
25559 return;
25562 gcc_assert (!arm_ccfsm_state || reverse);
25563 if (!JUMP_P (insn))
25564 return;
25566 /* This jump might be paralleled with a clobber of the condition codes
25567 the jump should always come first */
25568 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25569 body = XVECEXP (body, 0, 0);
25571 if (reverse
25572 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25573 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25575 int insns_skipped;
25576 int fail = FALSE, succeed = FALSE;
25577 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25578 int then_not_else = TRUE;
25579 rtx_insn *this_insn = start_insn;
25580 rtx label = 0;
25582 /* Register the insn jumped to. */
25583 if (reverse)
25585 if (!seeking_return)
25586 label = XEXP (SET_SRC (body), 0);
25588 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25589 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25590 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25592 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25593 then_not_else = FALSE;
25595 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25597 seeking_return = 1;
25598 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25600 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25602 seeking_return = 1;
25603 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25604 then_not_else = FALSE;
25606 else
25607 gcc_unreachable ();
25609 /* See how many insns this branch skips, and what kind of insns. If all
25610 insns are okay, and the label or unconditional branch to the same
25611 label is not too far away, succeed. */
25612 for (insns_skipped = 0;
25613 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25615 rtx scanbody;
25617 this_insn = next_nonnote_insn (this_insn);
25618 if (!this_insn)
25619 break;
25621 switch (GET_CODE (this_insn))
25623 case CODE_LABEL:
25624 /* Succeed if it is the target label, otherwise fail since
25625 control falls in from somewhere else. */
25626 if (this_insn == label)
25628 arm_ccfsm_state = 1;
25629 succeed = TRUE;
25631 else
25632 fail = TRUE;
25633 break;
25635 case BARRIER:
25636 /* Succeed if the following insn is the target label.
25637 Otherwise fail.
25638 If return insns are used then the last insn in a function
25639 will be a barrier. */
25640 this_insn = next_nonnote_insn (this_insn);
25641 if (this_insn && this_insn == label)
25643 arm_ccfsm_state = 1;
25644 succeed = TRUE;
25646 else
25647 fail = TRUE;
25648 break;
25650 case CALL_INSN:
25651 /* The AAPCS says that conditional calls should not be
25652 used since they make interworking inefficient (the
25653 linker can't transform BL<cond> into BLX). That's
25654 only a problem if the machine has BLX. */
25655 if (arm_arch5t)
25657 fail = TRUE;
25658 break;
25661 /* Succeed if the following insn is the target label, or
25662 if the following two insns are a barrier and the
25663 target label. */
25664 this_insn = next_nonnote_insn (this_insn);
25665 if (this_insn && BARRIER_P (this_insn))
25666 this_insn = next_nonnote_insn (this_insn);
25668 if (this_insn && this_insn == label
25669 && insns_skipped < max_insns_skipped)
25671 arm_ccfsm_state = 1;
25672 succeed = TRUE;
25674 else
25675 fail = TRUE;
25676 break;
25678 case JUMP_INSN:
25679 /* If this is an unconditional branch to the same label, succeed.
25680 If it is to another label, do nothing. If it is conditional,
25681 fail. */
25682 /* XXX Probably, the tests for SET and the PC are
25683 unnecessary. */
25685 scanbody = PATTERN (this_insn);
25686 if (GET_CODE (scanbody) == SET
25687 && GET_CODE (SET_DEST (scanbody)) == PC)
25689 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25690 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25692 arm_ccfsm_state = 2;
25693 succeed = TRUE;
25695 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25696 fail = TRUE;
25698 /* Fail if a conditional return is undesirable (e.g. on a
25699 StrongARM), but still allow this if optimizing for size. */
25700 else if (GET_CODE (scanbody) == return_code
25701 && !use_return_insn (TRUE, NULL)
25702 && !optimize_size)
25703 fail = TRUE;
25704 else if (GET_CODE (scanbody) == return_code)
25706 arm_ccfsm_state = 2;
25707 succeed = TRUE;
25709 else if (GET_CODE (scanbody) == PARALLEL)
25711 switch (get_attr_conds (this_insn))
25713 case CONDS_NOCOND:
25714 break;
25715 default:
25716 fail = TRUE;
25717 break;
25720 else
25721 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25723 break;
25725 case INSN:
25726 /* Check the instruction is explicitly marked as predicable.
25727 Instructions using or affecting the condition codes are not. */
25728 scanbody = PATTERN (this_insn);
25729 if (!(GET_CODE (scanbody) == SET
25730 || GET_CODE (scanbody) == PARALLEL)
25731 || get_attr_predicable (this_insn) != PREDICABLE_YES
25732 || get_attr_conds (this_insn) != CONDS_NOCOND)
25733 fail = TRUE;
25734 break;
25736 default:
25737 break;
25740 if (succeed)
25742 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25743 arm_target_label = CODE_LABEL_NUMBER (label);
25744 else
25746 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25748 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25750 this_insn = next_nonnote_insn (this_insn);
25751 gcc_assert (!this_insn
25752 || (!BARRIER_P (this_insn)
25753 && !LABEL_P (this_insn)));
25755 if (!this_insn)
25757 /* Oh, dear! we ran off the end.. give up. */
25758 extract_constrain_insn_cached (insn);
25759 arm_ccfsm_state = 0;
25760 arm_target_insn = NULL;
25761 return;
25763 arm_target_insn = this_insn;
25766 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25767 what it was. */
25768 if (!reverse)
25769 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25771 if (reverse || then_not_else)
25772 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25775 /* Restore recog_data (getting the attributes of other insns can
25776 destroy this array, but final.cc assumes that it remains intact
25777 across this call. */
25778 extract_constrain_insn_cached (insn);
25782 /* Output IT instructions. */
25783 void
25784 thumb2_asm_output_opcode (FILE * stream)
25786 char buff[5];
25787 int n;
25789 if (arm_condexec_mask)
25791 for (n = 0; n < arm_condexec_masklen; n++)
25792 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25793 buff[n] = 0;
25794 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25795 arm_condition_codes[arm_current_cc]);
25796 arm_condexec_mask = 0;
25800 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25801 UNITS_PER_WORD bytes wide. */
25802 static unsigned int
25803 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25805 if (IS_VPR_REGNUM (regno))
25806 return CEIL (GET_MODE_SIZE (mode), 2);
25808 if (TARGET_32BIT
25809 && regno > PC_REGNUM
25810 && regno != FRAME_POINTER_REGNUM
25811 && regno != ARG_POINTER_REGNUM
25812 && !IS_VFP_REGNUM (regno))
25813 return 1;
25815 return ARM_NUM_REGS (mode);
25818 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25819 static bool
25820 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25822 if (GET_MODE_CLASS (mode) == MODE_CC)
25823 return (regno == CC_REGNUM
25824 || (TARGET_VFP_BASE
25825 && regno == VFPCC_REGNUM));
25827 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25828 return false;
25830 if (IS_VPR_REGNUM (regno))
25831 return VALID_MVE_PRED_MODE (mode);
25833 if (TARGET_THUMB1)
25834 /* For the Thumb we only allow values bigger than SImode in
25835 registers 0 - 6, so that there is always a second low
25836 register available to hold the upper part of the value.
25837 We probably we ought to ensure that the register is the
25838 start of an even numbered register pair. */
25839 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25841 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25843 if (mode == DFmode || mode == DImode)
25844 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25846 if (mode == HFmode || mode == BFmode || mode == HImode
25847 || mode == SFmode || mode == SImode)
25848 return VFP_REGNO_OK_FOR_SINGLE (regno);
25850 if (TARGET_NEON)
25851 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25852 || (VALID_NEON_QREG_MODE (mode)
25853 && NEON_REGNO_OK_FOR_QUAD (regno))
25854 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25855 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25856 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25857 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25858 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25859 if (TARGET_HAVE_MVE)
25860 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25861 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25862 || (mode == V2x16QImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25863 || (mode == V2x8HImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25864 || (mode == V2x4SImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25865 || (mode == V2x8HFmode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25866 || (mode == V2x4SFmode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25867 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
25868 || (mode == V4x16QImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
25869 || (mode == V4x8HImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
25870 || (mode == V4x4SImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
25871 || (mode == V4x8HFmode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
25872 || (mode == V4x4SFmode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25874 return false;
25877 if (TARGET_REALLY_IWMMXT)
25879 if (IS_IWMMXT_GR_REGNUM (regno))
25880 return mode == SImode;
25882 if (IS_IWMMXT_REGNUM (regno))
25883 return VALID_IWMMXT_REG_MODE (mode);
25886 /* We allow almost any value to be stored in the general registers.
25887 Restrict doubleword quantities to even register pairs in ARM state
25888 so that we can use ldrd. The same restriction applies for MVE
25889 in order to support Armv8.1-M Mainline instructions.
25890 Do not allow very large Neon structure opaque modes in general
25891 registers; they would use too many. */
25892 if (regno <= LAST_ARM_REGNUM)
25894 if (ARM_NUM_REGS (mode) > 4)
25895 return false;
25897 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25898 return true;
25900 return !((TARGET_LDRD || TARGET_CDE)
25901 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25904 if (regno == FRAME_POINTER_REGNUM
25905 || regno == ARG_POINTER_REGNUM)
25906 /* We only allow integers in the fake hard registers. */
25907 return GET_MODE_CLASS (mode) == MODE_INT;
25909 return false;
25912 /* Implement TARGET_MODES_TIEABLE_P. */
25914 static bool
25915 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25917 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25918 return true;
25920 if (TARGET_HAVE_MVE
25921 && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
25922 return true;
25924 /* We specifically want to allow elements of "structure" modes to
25925 be tieable to the structure. This more general condition allows
25926 other rarer situations too. */
25927 if ((TARGET_NEON
25928 && (VALID_NEON_DREG_MODE (mode1)
25929 || VALID_NEON_QREG_MODE (mode1)
25930 || VALID_NEON_STRUCT_MODE (mode1))
25931 && (VALID_NEON_DREG_MODE (mode2)
25932 || VALID_NEON_QREG_MODE (mode2)
25933 || VALID_NEON_STRUCT_MODE (mode2)))
25934 || (TARGET_HAVE_MVE
25935 && (VALID_MVE_MODE (mode1)
25936 || VALID_MVE_STRUCT_MODE (mode1))
25937 && (VALID_MVE_MODE (mode2)
25938 || VALID_MVE_STRUCT_MODE (mode2))))
25939 return true;
25941 return false;
25944 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25945 not used in arm mode. */
25947 enum reg_class
25948 arm_regno_class (int regno)
25950 if (regno == PC_REGNUM)
25951 return NO_REGS;
25953 if (IS_VPR_REGNUM (regno))
25954 return VPR_REG;
25956 if (IS_PAC_REGNUM (regno))
25957 return PAC_REG;
25959 if (TARGET_THUMB1)
25961 if (regno == STACK_POINTER_REGNUM)
25962 return STACK_REG;
25963 if (regno == CC_REGNUM)
25964 return CC_REG;
25965 if (regno < 8)
25966 return LO_REGS;
25967 return HI_REGS;
25970 if (TARGET_THUMB2 && regno < 8)
25971 return LO_REGS;
25973 if ( regno <= LAST_ARM_REGNUM
25974 || regno == FRAME_POINTER_REGNUM
25975 || regno == ARG_POINTER_REGNUM)
25976 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25978 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25979 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25981 if (IS_VFP_REGNUM (regno))
25983 if (regno <= D7_VFP_REGNUM)
25984 return VFP_D0_D7_REGS;
25985 else if (regno <= LAST_LO_VFP_REGNUM)
25986 return VFP_LO_REGS;
25987 else
25988 return VFP_HI_REGS;
25991 if (IS_IWMMXT_REGNUM (regno))
25992 return IWMMXT_REGS;
25994 if (IS_IWMMXT_GR_REGNUM (regno))
25995 return IWMMXT_GR_REGS;
25997 return NO_REGS;
26000 /* Handle a special case when computing the offset
26001 of an argument from the frame pointer. */
26003 arm_debugger_arg_offset (int value, rtx addr)
26005 rtx_insn *insn;
26007 /* We are only interested if dbxout_parms() failed to compute the offset. */
26008 if (value != 0)
26009 return 0;
26011 /* We can only cope with the case where the address is held in a register. */
26012 if (!REG_P (addr))
26013 return 0;
26015 /* If we are using the frame pointer to point at the argument, then
26016 an offset of 0 is correct. */
26017 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
26018 return 0;
26020 /* If we are using the stack pointer to point at the
26021 argument, then an offset of 0 is correct. */
26022 /* ??? Check this is consistent with thumb2 frame layout. */
26023 if ((TARGET_THUMB || !frame_pointer_needed)
26024 && REGNO (addr) == SP_REGNUM)
26025 return 0;
26027 /* Oh dear. The argument is pointed to by a register rather
26028 than being held in a register, or being stored at a known
26029 offset from the frame pointer. Since GDB only understands
26030 those two kinds of argument we must translate the address
26031 held in the register into an offset from the frame pointer.
26032 We do this by searching through the insns for the function
26033 looking to see where this register gets its value. If the
26034 register is initialized from the frame pointer plus an offset
26035 then we are in luck and we can continue, otherwise we give up.
26037 This code is exercised by producing debugging information
26038 for a function with arguments like this:
26040 double func (double a, double b, int c, double d) {return d;}
26042 Without this code the stab for parameter 'd' will be set to
26043 an offset of 0 from the frame pointer, rather than 8. */
26045 /* The if() statement says:
26047 If the insn is a normal instruction
26048 and if the insn is setting the value in a register
26049 and if the register being set is the register holding the address of the argument
26050 and if the address is computing by an addition
26051 that involves adding to a register
26052 which is the frame pointer
26053 a constant integer
26055 then... */
26057 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26059 if ( NONJUMP_INSN_P (insn)
26060 && GET_CODE (PATTERN (insn)) == SET
26061 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
26062 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
26063 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
26064 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
26065 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
26068 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
26070 break;
26074 if (value == 0)
26076 debug_rtx (addr);
26077 warning (0, "unable to compute real location of stacked parameter");
26078 value = 8; /* XXX magic hack */
26081 return value;
26084 /* Implement TARGET_PROMOTED_TYPE. */
26086 static tree
26087 arm_promoted_type (const_tree t)
26089 if (SCALAR_FLOAT_TYPE_P (t)
26090 && TYPE_PRECISION (t) == 16
26091 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
26092 return float_type_node;
26093 return NULL_TREE;
26096 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
26097 This simply adds HFmode as a supported mode; even though we don't
26098 implement arithmetic on this type directly, it's supported by
26099 optabs conversions, much the way the double-word arithmetic is
26100 special-cased in the default hook. */
26102 static bool
26103 arm_scalar_mode_supported_p (scalar_mode mode)
26105 if (mode == HFmode)
26106 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
26107 else if (ALL_FIXED_POINT_MODE_P (mode))
26108 return true;
26109 else
26110 return default_scalar_mode_supported_p (mode);
26113 /* Set the value of FLT_EVAL_METHOD.
26114 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
26116 0: evaluate all operations and constants, whose semantic type has at
26117 most the range and precision of type float, to the range and
26118 precision of float; evaluate all other operations and constants to
26119 the range and precision of the semantic type;
26121 N, where _FloatN is a supported interchange floating type
26122 evaluate all operations and constants, whose semantic type has at
26123 most the range and precision of _FloatN type, to the range and
26124 precision of the _FloatN type; evaluate all other operations and
26125 constants to the range and precision of the semantic type;
26127 If we have the ARMv8.2-A extensions then we support _Float16 in native
26128 precision, so we should set this to 16. Otherwise, we support the type,
26129 but want to evaluate expressions in float precision, so set this to
26130 0. */
26132 static enum flt_eval_method
26133 arm_excess_precision (enum excess_precision_type type)
26135 switch (type)
26137 case EXCESS_PRECISION_TYPE_FAST:
26138 case EXCESS_PRECISION_TYPE_STANDARD:
26139 /* We can calculate either in 16-bit range and precision or
26140 32-bit range and precision. Make that decision based on whether
26141 we have native support for the ARMv8.2-A 16-bit floating-point
26142 instructions or not. */
26143 return (TARGET_VFP_FP16INST
26144 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26145 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
26146 case EXCESS_PRECISION_TYPE_IMPLICIT:
26147 case EXCESS_PRECISION_TYPE_FLOAT16:
26148 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26149 default:
26150 gcc_unreachable ();
26152 return FLT_EVAL_METHOD_UNPREDICTABLE;
26156 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
26157 _Float16 if we are using anything other than ieee format for 16-bit
26158 floating point. Otherwise, punt to the default implementation. */
26159 static opt_scalar_float_mode
26160 arm_floatn_mode (int n, bool extended)
26162 if (!extended && n == 16)
26164 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
26165 return HFmode;
26166 return opt_scalar_float_mode ();
26169 return default_floatn_mode (n, extended);
26173 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26174 not to early-clobber SRC registers in the process.
26176 We assume that the operands described by SRC and DEST represent a
26177 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
26178 number of components into which the copy has been decomposed. */
26179 void
26180 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
26182 unsigned int i;
26184 if (!reg_overlap_mentioned_p (operands[0], operands[1])
26185 || REGNO (operands[0]) < REGNO (operands[1]))
26187 for (i = 0; i < count; i++)
26189 operands[2 * i] = dest[i];
26190 operands[2 * i + 1] = src[i];
26193 else
26195 for (i = 0; i < count; i++)
26197 operands[2 * i] = dest[count - i - 1];
26198 operands[2 * i + 1] = src[count - i - 1];
26203 /* Split operands into moves from op[1] + op[2] into op[0]. */
26205 void
26206 neon_split_vcombine (rtx operands[3])
26208 unsigned int dest = REGNO (operands[0]);
26209 unsigned int src1 = REGNO (operands[1]);
26210 unsigned int src2 = REGNO (operands[2]);
26211 machine_mode halfmode = GET_MODE (operands[1]);
26212 unsigned int halfregs = REG_NREGS (operands[1]);
26213 rtx destlo, desthi;
26215 if (src1 == dest && src2 == dest + halfregs)
26217 /* No-op move. Can't split to nothing; emit something. */
26218 emit_note (NOTE_INSN_DELETED);
26219 return;
26222 /* Preserve register attributes for variable tracking. */
26223 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
26224 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26225 GET_MODE_SIZE (halfmode));
26227 /* Special case of reversed high/low parts. Use VSWP. */
26228 if (src2 == dest && src1 == dest + halfregs)
26230 rtx x = gen_rtx_SET (destlo, operands[1]);
26231 rtx y = gen_rtx_SET (desthi, operands[2]);
26232 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26233 return;
26236 if (!reg_overlap_mentioned_p (operands[2], destlo))
26238 /* Try to avoid unnecessary moves if part of the result
26239 is in the right place already. */
26240 if (src1 != dest)
26241 emit_move_insn (destlo, operands[1]);
26242 if (src2 != dest + halfregs)
26243 emit_move_insn (desthi, operands[2]);
26245 else
26247 if (src2 != dest + halfregs)
26248 emit_move_insn (desthi, operands[2]);
26249 if (src1 != dest)
26250 emit_move_insn (destlo, operands[1]);
26254 /* Return the number (counting from 0) of
26255 the least significant set bit in MASK. */
26257 inline static int
26258 number_of_first_bit_set (unsigned mask)
26260 return ctz_hwi (mask);
26263 /* Like emit_multi_reg_push, but allowing for a different set of
26264 registers to be described as saved. MASK is the set of registers
26265 to be saved; REAL_REGS is the set of registers to be described as
26266 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26268 static rtx_insn *
26269 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26271 unsigned long regno;
26272 rtx par[10], tmp, reg;
26273 rtx_insn *insn;
26274 int i, j;
26276 /* Build the parallel of the registers actually being stored. */
26277 for (i = 0; mask; ++i, mask &= mask - 1)
26279 regno = ctz_hwi (mask);
26280 reg = gen_rtx_REG (SImode, regno);
26282 if (i == 0)
26283 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26284 else
26285 tmp = gen_rtx_USE (VOIDmode, reg);
26287 par[i] = tmp;
26290 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26291 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26292 tmp = gen_frame_mem (BLKmode, tmp);
26293 tmp = gen_rtx_SET (tmp, par[0]);
26294 par[0] = tmp;
26296 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26297 insn = emit_insn (tmp);
26299 /* Always build the stack adjustment note for unwind info. */
26300 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26301 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26302 par[0] = tmp;
26304 /* Build the parallel of the registers recorded as saved for unwind. */
26305 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26307 regno = ctz_hwi (real_regs);
26308 reg = gen_rtx_REG (SImode, regno);
26310 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26311 tmp = gen_frame_mem (SImode, tmp);
26312 tmp = gen_rtx_SET (tmp, reg);
26313 RTX_FRAME_RELATED_P (tmp) = 1;
26314 par[j + 1] = tmp;
26317 if (j == 0)
26318 tmp = par[0];
26319 else
26321 RTX_FRAME_RELATED_P (par[0]) = 1;
26322 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26325 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26327 return insn;
26330 /* Emit code to push or pop registers to or from the stack. F is the
26331 assembly file. MASK is the registers to pop. */
26332 static void
26333 thumb_pop (FILE *f, unsigned long mask)
26335 int regno;
26336 int lo_mask = mask & 0xFF;
26338 gcc_assert (mask);
26340 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26342 /* Special case. Do not generate a POP PC statement here, do it in
26343 thumb_exit() */
26344 thumb_exit (f, -1);
26345 return;
26348 fprintf (f, "\tpop\t{");
26350 /* Look at the low registers first. */
26351 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26353 if (lo_mask & 1)
26355 asm_fprintf (f, "%r", regno);
26357 if ((lo_mask & ~1) != 0)
26358 fprintf (f, ", ");
26362 if (mask & (1 << PC_REGNUM))
26364 /* Catch popping the PC. */
26365 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26366 || IS_CMSE_ENTRY (arm_current_func_type ()))
26368 /* The PC is never poped directly, instead
26369 it is popped into r3 and then BX is used. */
26370 fprintf (f, "}\n");
26372 thumb_exit (f, -1);
26374 return;
26376 else
26378 if (mask & 0xFF)
26379 fprintf (f, ", ");
26381 asm_fprintf (f, "%r", PC_REGNUM);
26385 fprintf (f, "}\n");
26388 /* Generate code to return from a thumb function.
26389 If 'reg_containing_return_addr' is -1, then the return address is
26390 actually on the stack, at the stack pointer.
26392 Note: do not forget to update length attribute of corresponding insn pattern
26393 when changing assembly output (eg. length attribute of epilogue_insns when
26394 updating Armv8-M Baseline Security Extensions register clearing
26395 sequences). */
26396 static void
26397 thumb_exit (FILE *f, int reg_containing_return_addr)
26399 unsigned regs_available_for_popping;
26400 unsigned regs_to_pop;
26401 int pops_needed;
26402 unsigned available;
26403 unsigned required;
26404 machine_mode mode;
26405 int size;
26406 int restore_a4 = FALSE;
26408 /* Compute the registers we need to pop. */
26409 regs_to_pop = 0;
26410 pops_needed = 0;
26412 if (reg_containing_return_addr == -1)
26414 regs_to_pop |= 1 << LR_REGNUM;
26415 ++pops_needed;
26418 if (TARGET_BACKTRACE)
26420 /* Restore the (ARM) frame pointer and stack pointer. */
26421 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26422 pops_needed += 2;
26425 /* If there is nothing to pop then just emit the BX instruction and
26426 return. */
26427 if (pops_needed == 0)
26429 if (crtl->calls_eh_return)
26430 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26432 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26434 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26435 emitted by cmse_nonsecure_entry_clear_before_return (). */
26436 if (!TARGET_HAVE_FPCXT_CMSE)
26437 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26438 reg_containing_return_addr);
26439 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26441 else
26442 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26443 return;
26445 /* Otherwise if we are not supporting interworking and we have not created
26446 a backtrace structure and the function was not entered in ARM mode then
26447 just pop the return address straight into the PC. */
26448 else if (!TARGET_INTERWORK
26449 && !TARGET_BACKTRACE
26450 && !is_called_in_ARM_mode (current_function_decl)
26451 && !crtl->calls_eh_return
26452 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26454 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26455 return;
26458 /* Find out how many of the (return) argument registers we can corrupt. */
26459 regs_available_for_popping = 0;
26461 /* If returning via __builtin_eh_return, the bottom three registers
26462 all contain information needed for the return. */
26463 if (crtl->calls_eh_return)
26464 size = 12;
26465 else
26467 /* If we can deduce the registers used from the function's
26468 return value. This is more reliable that examining
26469 df_regs_ever_live_p () because that will be set if the register is
26470 ever used in the function, not just if the register is used
26471 to hold a return value. */
26473 if (crtl->return_rtx != 0)
26474 mode = GET_MODE (crtl->return_rtx);
26475 else
26476 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26478 size = GET_MODE_SIZE (mode);
26480 if (size == 0)
26482 /* In a void function we can use any argument register.
26483 In a function that returns a structure on the stack
26484 we can use the second and third argument registers. */
26485 if (mode == VOIDmode)
26486 regs_available_for_popping =
26487 (1 << ARG_REGISTER (1))
26488 | (1 << ARG_REGISTER (2))
26489 | (1 << ARG_REGISTER (3));
26490 else
26491 regs_available_for_popping =
26492 (1 << ARG_REGISTER (2))
26493 | (1 << ARG_REGISTER (3));
26495 else if (size <= 4)
26496 regs_available_for_popping =
26497 (1 << ARG_REGISTER (2))
26498 | (1 << ARG_REGISTER (3));
26499 else if (size <= 8)
26500 regs_available_for_popping =
26501 (1 << ARG_REGISTER (3));
26504 /* Match registers to be popped with registers into which we pop them. */
26505 for (available = regs_available_for_popping,
26506 required = regs_to_pop;
26507 required != 0 && available != 0;
26508 available &= ~(available & - available),
26509 required &= ~(required & - required))
26510 -- pops_needed;
26512 /* If we have any popping registers left over, remove them. */
26513 if (available > 0)
26514 regs_available_for_popping &= ~available;
26516 /* Otherwise if we need another popping register we can use
26517 the fourth argument register. */
26518 else if (pops_needed)
26520 /* If we have not found any free argument registers and
26521 reg a4 contains the return address, we must move it. */
26522 if (regs_available_for_popping == 0
26523 && reg_containing_return_addr == LAST_ARG_REGNUM)
26525 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26526 reg_containing_return_addr = LR_REGNUM;
26528 else if (size > 12)
26530 /* Register a4 is being used to hold part of the return value,
26531 but we have dire need of a free, low register. */
26532 restore_a4 = TRUE;
26534 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26537 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26539 /* The fourth argument register is available. */
26540 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26542 --pops_needed;
26546 /* Pop as many registers as we can. */
26547 thumb_pop (f, regs_available_for_popping);
26549 /* Process the registers we popped. */
26550 if (reg_containing_return_addr == -1)
26552 /* The return address was popped into the lowest numbered register. */
26553 regs_to_pop &= ~(1 << LR_REGNUM);
26555 reg_containing_return_addr =
26556 number_of_first_bit_set (regs_available_for_popping);
26558 /* Remove this register for the mask of available registers, so that
26559 the return address will not be corrupted by further pops. */
26560 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26563 /* If we popped other registers then handle them here. */
26564 if (regs_available_for_popping)
26566 int frame_pointer;
26568 /* Work out which register currently contains the frame pointer. */
26569 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26571 /* Move it into the correct place. */
26572 asm_fprintf (f, "\tmov\t%r, %r\n",
26573 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26575 /* (Temporarily) remove it from the mask of popped registers. */
26576 regs_available_for_popping &= ~(1 << frame_pointer);
26577 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26579 if (regs_available_for_popping)
26581 int stack_pointer;
26583 /* We popped the stack pointer as well,
26584 find the register that contains it. */
26585 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26587 /* Move it into the stack register. */
26588 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26590 /* At this point we have popped all necessary registers, so
26591 do not worry about restoring regs_available_for_popping
26592 to its correct value:
26594 assert (pops_needed == 0)
26595 assert (regs_available_for_popping == (1 << frame_pointer))
26596 assert (regs_to_pop == (1 << STACK_POINTER)) */
26598 else
26600 /* Since we have just move the popped value into the frame
26601 pointer, the popping register is available for reuse, and
26602 we know that we still have the stack pointer left to pop. */
26603 regs_available_for_popping |= (1 << frame_pointer);
26607 /* If we still have registers left on the stack, but we no longer have
26608 any registers into which we can pop them, then we must move the return
26609 address into the link register and make available the register that
26610 contained it. */
26611 if (regs_available_for_popping == 0 && pops_needed > 0)
26613 regs_available_for_popping |= 1 << reg_containing_return_addr;
26615 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26616 reg_containing_return_addr);
26618 reg_containing_return_addr = LR_REGNUM;
26621 /* If we have registers left on the stack then pop some more.
26622 We know that at most we will want to pop FP and SP. */
26623 if (pops_needed > 0)
26625 int popped_into;
26626 int move_to;
26628 thumb_pop (f, regs_available_for_popping);
26630 /* We have popped either FP or SP.
26631 Move whichever one it is into the correct register. */
26632 popped_into = number_of_first_bit_set (regs_available_for_popping);
26633 move_to = number_of_first_bit_set (regs_to_pop);
26635 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26636 --pops_needed;
26639 /* If we still have not popped everything then we must have only
26640 had one register available to us and we are now popping the SP. */
26641 if (pops_needed > 0)
26643 int popped_into;
26645 thumb_pop (f, regs_available_for_popping);
26647 popped_into = number_of_first_bit_set (regs_available_for_popping);
26649 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26651 assert (regs_to_pop == (1 << STACK_POINTER))
26652 assert (pops_needed == 1)
26656 /* If necessary restore the a4 register. */
26657 if (restore_a4)
26659 if (reg_containing_return_addr != LR_REGNUM)
26661 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26662 reg_containing_return_addr = LR_REGNUM;
26665 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26668 if (crtl->calls_eh_return)
26669 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26671 /* Return to caller. */
26672 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26674 /* This is for the cases where LR is not being used to contain the return
26675 address. It may therefore contain information that we might not want
26676 to leak, hence it must be cleared. The value in R0 will never be a
26677 secret at this point, so it is safe to use it, see the clearing code
26678 in cmse_nonsecure_entry_clear_before_return (). */
26679 if (reg_containing_return_addr != LR_REGNUM)
26680 asm_fprintf (f, "\tmov\tlr, r0\n");
26682 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26683 by cmse_nonsecure_entry_clear_before_return (). */
26684 if (!TARGET_HAVE_FPCXT_CMSE)
26685 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26686 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26688 else
26689 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26692 /* Scan INSN just before assembler is output for it.
26693 For Thumb-1, we track the status of the condition codes; this
26694 information is used in the cbranchsi4_insn pattern. */
26695 void
26696 thumb1_final_prescan_insn (rtx_insn *insn)
26698 if (flag_print_asm_name)
26699 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26700 INSN_ADDRESSES (INSN_UID (insn)));
26701 /* Don't overwrite the previous setter when we get to a cbranch. */
26702 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26704 enum attr_conds conds;
26706 if (cfun->machine->thumb1_cc_insn)
26708 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26709 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26710 CC_STATUS_INIT;
26712 conds = get_attr_conds (insn);
26713 if (conds == CONDS_SET)
26715 rtx set = single_set (insn);
26716 cfun->machine->thumb1_cc_insn = insn;
26717 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26718 cfun->machine->thumb1_cc_op1 = const0_rtx;
26719 cfun->machine->thumb1_cc_mode = CC_NZmode;
26720 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26722 rtx src1 = XEXP (SET_SRC (set), 1);
26723 if (src1 == const0_rtx)
26724 cfun->machine->thumb1_cc_mode = CCmode;
26726 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26728 /* Record the src register operand instead of dest because
26729 cprop_hardreg pass propagates src. */
26730 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26733 else if (conds != CONDS_NOCOND)
26734 cfun->machine->thumb1_cc_insn = NULL_RTX;
26737 /* Check if unexpected far jump is used. */
26738 if (cfun->machine->lr_save_eliminated
26739 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26740 internal_error("Unexpected thumb1 far jump");
26744 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26746 unsigned HOST_WIDE_INT mask = 0xff;
26747 int i;
26749 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26750 if (val == 0) /* XXX */
26751 return 0;
26753 for (i = 0; i < 25; i++)
26754 if ((val & (mask << i)) == val)
26755 return 1;
26757 return 0;
26760 /* Returns nonzero if the current function contains,
26761 or might contain a far jump. */
26762 static int
26763 thumb_far_jump_used_p (void)
26765 rtx_insn *insn;
26766 bool far_jump = false;
26767 unsigned int func_size = 0;
26769 /* If we have already decided that far jumps may be used,
26770 do not bother checking again, and always return true even if
26771 it turns out that they are not being used. Once we have made
26772 the decision that far jumps are present (and that hence the link
26773 register will be pushed onto the stack) we cannot go back on it. */
26774 if (cfun->machine->far_jump_used)
26775 return 1;
26777 /* If this function is not being called from the prologue/epilogue
26778 generation code then it must be being called from the
26779 INITIAL_ELIMINATION_OFFSET macro. */
26780 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26782 /* In this case we know that we are being asked about the elimination
26783 of the arg pointer register. If that register is not being used,
26784 then there are no arguments on the stack, and we do not have to
26785 worry that a far jump might force the prologue to push the link
26786 register, changing the stack offsets. In this case we can just
26787 return false, since the presence of far jumps in the function will
26788 not affect stack offsets.
26790 If the arg pointer is live (or if it was live, but has now been
26791 eliminated and so set to dead) then we do have to test to see if
26792 the function might contain a far jump. This test can lead to some
26793 false negatives, since before reload is completed, then length of
26794 branch instructions is not known, so gcc defaults to returning their
26795 longest length, which in turn sets the far jump attribute to true.
26797 A false negative will not result in bad code being generated, but it
26798 will result in a needless push and pop of the link register. We
26799 hope that this does not occur too often.
26801 If we need doubleword stack alignment this could affect the other
26802 elimination offsets so we can't risk getting it wrong. */
26803 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26804 cfun->machine->arg_pointer_live = 1;
26805 else if (!cfun->machine->arg_pointer_live)
26806 return 0;
26809 /* We should not change far_jump_used during or after reload, as there is
26810 no chance to change stack frame layout. */
26811 if (reload_in_progress || reload_completed)
26812 return 0;
26814 /* Check to see if the function contains a branch
26815 insn with the far jump attribute set. */
26816 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26818 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26820 far_jump = true;
26822 func_size += get_attr_length (insn);
26825 /* Attribute far_jump will always be true for thumb1 before
26826 shorten_branch pass. So checking far_jump attribute before
26827 shorten_branch isn't much useful.
26829 Following heuristic tries to estimate more accurately if a far jump
26830 may finally be used. The heuristic is very conservative as there is
26831 no chance to roll-back the decision of not to use far jump.
26833 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26834 2-byte insn is associated with a 4 byte constant pool. Using
26835 function size 2048/3 as the threshold is conservative enough. */
26836 if (far_jump)
26838 if ((func_size * 3) >= 2048)
26840 /* Record the fact that we have decided that
26841 the function does use far jumps. */
26842 cfun->machine->far_jump_used = 1;
26843 return 1;
26847 return 0;
26850 /* Return nonzero if FUNC must be entered in ARM mode. */
26851 static bool
26852 is_called_in_ARM_mode (tree func)
26854 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26856 /* Ignore the problem about functions whose address is taken. */
26857 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26858 return true;
26860 return false;
26863 /* Given the stack offsets and register mask in OFFSETS, decide how
26864 many additional registers to push instead of subtracting a constant
26865 from SP. For epilogues the principle is the same except we use pop.
26866 FOR_PROLOGUE indicates which we're generating. */
26867 static int
26868 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26870 HOST_WIDE_INT amount;
26871 unsigned long live_regs_mask = offsets->saved_regs_mask;
26872 /* Extract a mask of the ones we can give to the Thumb's push/pop
26873 instruction. */
26874 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26875 /* Then count how many other high registers will need to be pushed. */
26876 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26877 int n_free, reg_base, size;
26879 if (!for_prologue && frame_pointer_needed)
26880 amount = offsets->locals_base - offsets->saved_regs;
26881 else
26882 amount = offsets->outgoing_args - offsets->saved_regs;
26884 /* If the stack frame size is 512 exactly, we can save one load
26885 instruction, which should make this a win even when optimizing
26886 for speed. */
26887 if (!optimize_size && amount != 512)
26888 return 0;
26890 /* Can't do this if there are high registers to push. */
26891 if (high_regs_pushed != 0)
26892 return 0;
26894 /* Shouldn't do it in the prologue if no registers would normally
26895 be pushed at all. In the epilogue, also allow it if we'll have
26896 a pop insn for the PC. */
26897 if (l_mask == 0
26898 && (for_prologue
26899 || TARGET_BACKTRACE
26900 || (live_regs_mask & 1 << LR_REGNUM) == 0
26901 || TARGET_INTERWORK
26902 || crtl->args.pretend_args_size != 0))
26903 return 0;
26905 /* Don't do this if thumb_expand_prologue wants to emit instructions
26906 between the push and the stack frame allocation. */
26907 if (for_prologue
26908 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26909 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26910 return 0;
26912 reg_base = 0;
26913 n_free = 0;
26914 if (!for_prologue)
26916 size = arm_size_return_regs ();
26917 reg_base = ARM_NUM_INTS (size);
26918 live_regs_mask >>= reg_base;
26921 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26922 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26924 live_regs_mask >>= 1;
26925 n_free++;
26928 if (n_free == 0)
26929 return 0;
26930 gcc_assert (amount / 4 * 4 == amount);
26932 if (amount >= 512 && (amount - n_free * 4) < 512)
26933 return (amount - 508) / 4;
26934 if (amount <= n_free * 4)
26935 return amount / 4;
26936 return 0;
26939 /* The bits which aren't usefully expanded as rtl. */
26940 const char *
26941 thumb1_unexpanded_epilogue (void)
26943 arm_stack_offsets *offsets;
26944 int regno;
26945 unsigned long live_regs_mask = 0;
26946 int high_regs_pushed = 0;
26947 int extra_pop;
26948 int had_to_push_lr;
26949 int size;
26951 if (cfun->machine->return_used_this_function != 0)
26952 return "";
26954 if (IS_NAKED (arm_current_func_type ()))
26955 return "";
26957 offsets = arm_get_frame_offsets ();
26958 live_regs_mask = offsets->saved_regs_mask;
26959 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26961 /* If we can deduce the registers used from the function's return value.
26962 This is more reliable that examining df_regs_ever_live_p () because that
26963 will be set if the register is ever used in the function, not just if
26964 the register is used to hold a return value. */
26965 size = arm_size_return_regs ();
26967 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26968 if (extra_pop > 0)
26970 unsigned long extra_mask = (1 << extra_pop) - 1;
26971 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26974 /* The prolog may have pushed some high registers to use as
26975 work registers. e.g. the testsuite file:
26976 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26977 compiles to produce:
26978 push {r4, r5, r6, r7, lr}
26979 mov r7, r9
26980 mov r6, r8
26981 push {r6, r7}
26982 as part of the prolog. We have to undo that pushing here. */
26984 if (high_regs_pushed)
26986 unsigned long mask = live_regs_mask & 0xff;
26987 int next_hi_reg;
26989 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26991 if (mask == 0)
26992 /* Oh dear! We have no low registers into which we can pop
26993 high registers! */
26994 internal_error
26995 ("no low registers available for popping high registers");
26997 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26998 if (live_regs_mask & (1 << next_hi_reg))
26999 break;
27001 while (high_regs_pushed)
27003 /* Find lo register(s) into which the high register(s) can
27004 be popped. */
27005 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
27007 if (mask & (1 << regno))
27008 high_regs_pushed--;
27009 if (high_regs_pushed == 0)
27010 break;
27013 if (high_regs_pushed == 0 && regno >= 0)
27014 mask &= ~((1 << regno) - 1);
27016 /* Pop the values into the low register(s). */
27017 thumb_pop (asm_out_file, mask);
27019 /* Move the value(s) into the high registers. */
27020 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
27022 if (mask & (1 << regno))
27024 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
27025 regno);
27027 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
27028 next_hi_reg--)
27029 if (live_regs_mask & (1 << next_hi_reg))
27030 break;
27034 live_regs_mask &= ~0x0f00;
27037 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
27038 live_regs_mask &= 0xff;
27040 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
27042 /* Pop the return address into the PC. */
27043 if (had_to_push_lr)
27044 live_regs_mask |= 1 << PC_REGNUM;
27046 /* Either no argument registers were pushed or a backtrace
27047 structure was created which includes an adjusted stack
27048 pointer, so just pop everything. */
27049 if (live_regs_mask)
27050 thumb_pop (asm_out_file, live_regs_mask);
27052 /* We have either just popped the return address into the
27053 PC or it is was kept in LR for the entire function.
27054 Note that thumb_pop has already called thumb_exit if the
27055 PC was in the list. */
27056 if (!had_to_push_lr)
27057 thumb_exit (asm_out_file, LR_REGNUM);
27059 else
27061 /* Pop everything but the return address. */
27062 if (live_regs_mask)
27063 thumb_pop (asm_out_file, live_regs_mask);
27065 if (had_to_push_lr)
27067 if (size > 12)
27069 /* We have no free low regs, so save one. */
27070 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
27071 LAST_ARG_REGNUM);
27074 /* Get the return address into a temporary register. */
27075 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
27077 if (size > 12)
27079 /* Move the return address to lr. */
27080 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
27081 LAST_ARG_REGNUM);
27082 /* Restore the low register. */
27083 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
27084 IP_REGNUM);
27085 regno = LR_REGNUM;
27087 else
27088 regno = LAST_ARG_REGNUM;
27090 else
27091 regno = LR_REGNUM;
27093 /* Remove the argument registers that were pushed onto the stack. */
27094 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
27095 SP_REGNUM, SP_REGNUM,
27096 crtl->args.pretend_args_size);
27098 thumb_exit (asm_out_file, regno);
27101 return "";
27104 /* Functions to save and restore machine-specific function data. */
27105 static struct machine_function *
27106 arm_init_machine_status (void)
27108 struct machine_function *machine;
27109 machine = ggc_cleared_alloc<machine_function> ();
27111 #if ARM_FT_UNKNOWN != 0
27112 machine->func_type = ARM_FT_UNKNOWN;
27113 #endif
27114 machine->static_chain_stack_bytes = -1;
27115 machine->pacspval_needed = 0;
27116 return machine;
27119 /* Return an RTX indicating where the return address to the
27120 calling function can be found. */
27122 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27124 if (count != 0)
27125 return NULL_RTX;
27127 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27130 /* Do anything needed before RTL is emitted for each function. */
27131 void
27132 arm_init_expanders (void)
27134 /* Arrange to initialize and mark the machine per-function status. */
27135 init_machine_status = arm_init_machine_status;
27137 /* This is to stop the combine pass optimizing away the alignment
27138 adjustment of va_arg. */
27139 /* ??? It is claimed that this should not be necessary. */
27140 if (cfun)
27141 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27144 /* Check that FUNC is called with a different mode. */
27146 bool
27147 arm_change_mode_p (tree func)
27149 if (TREE_CODE (func) != FUNCTION_DECL)
27150 return false;
27152 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
27154 if (!callee_tree)
27155 callee_tree = target_option_default_node;
27157 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
27158 int flags = callee_opts->x_target_flags;
27160 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
27163 /* Like arm_compute_initial_elimination offset. Simpler because there
27164 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27165 to point at the base of the local variables after static stack
27166 space for a function has been allocated. */
27168 HOST_WIDE_INT
27169 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27171 arm_stack_offsets *offsets;
27173 offsets = arm_get_frame_offsets ();
27175 switch (from)
27177 case ARG_POINTER_REGNUM:
27178 switch (to)
27180 case STACK_POINTER_REGNUM:
27181 return offsets->outgoing_args - offsets->saved_args;
27183 case FRAME_POINTER_REGNUM:
27184 return offsets->soft_frame - offsets->saved_args;
27186 case ARM_HARD_FRAME_POINTER_REGNUM:
27187 return offsets->saved_regs - offsets->saved_args;
27189 case THUMB_HARD_FRAME_POINTER_REGNUM:
27190 return offsets->locals_base - offsets->saved_args;
27192 default:
27193 gcc_unreachable ();
27195 break;
27197 case FRAME_POINTER_REGNUM:
27198 switch (to)
27200 case STACK_POINTER_REGNUM:
27201 return offsets->outgoing_args - offsets->soft_frame;
27203 case ARM_HARD_FRAME_POINTER_REGNUM:
27204 return offsets->saved_regs - offsets->soft_frame;
27206 case THUMB_HARD_FRAME_POINTER_REGNUM:
27207 return offsets->locals_base - offsets->soft_frame;
27209 default:
27210 gcc_unreachable ();
27212 break;
27214 default:
27215 gcc_unreachable ();
27219 /* Generate the function's prologue. */
27221 void
27222 thumb1_expand_prologue (void)
27224 rtx_insn *insn;
27226 HOST_WIDE_INT amount;
27227 HOST_WIDE_INT size;
27228 arm_stack_offsets *offsets;
27229 unsigned long func_type;
27230 int regno;
27231 unsigned long live_regs_mask;
27232 unsigned long l_mask;
27233 unsigned high_regs_pushed = 0;
27234 bool lr_needs_saving;
27236 func_type = arm_current_func_type ();
27238 /* Naked functions don't have prologues. */
27239 if (IS_NAKED (func_type))
27241 if (flag_stack_usage_info)
27242 current_function_static_stack_size = 0;
27243 return;
27246 if (IS_INTERRUPT (func_type))
27248 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27249 return;
27252 if (is_called_in_ARM_mode (current_function_decl))
27253 emit_insn (gen_prologue_thumb1_interwork ());
27255 offsets = arm_get_frame_offsets ();
27256 live_regs_mask = offsets->saved_regs_mask;
27257 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27259 /* The AAPCS requires the callee to widen integral types narrower
27260 than 32 bits to the full width of the register; but when handling
27261 calls to non-secure space, we cannot trust the callee to have
27262 correctly done so. So forcibly re-widen the result here. */
27263 if (IS_CMSE_ENTRY (func_type))
27265 function_args_iterator args_iter;
27266 CUMULATIVE_ARGS args_so_far_v;
27267 cumulative_args_t args_so_far;
27268 bool first_param = true;
27269 tree arg_type;
27270 tree fndecl = current_function_decl;
27271 tree fntype = TREE_TYPE (fndecl);
27272 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
27273 args_so_far = pack_cumulative_args (&args_so_far_v);
27274 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
27276 rtx arg_rtx;
27278 if (VOID_TYPE_P (arg_type))
27279 break;
27281 function_arg_info arg (arg_type, /*named=*/true);
27282 if (!first_param)
27283 /* We should advance after processing the argument and pass
27284 the argument we're advancing past. */
27285 arm_function_arg_advance (args_so_far, arg);
27286 first_param = false;
27287 arg_rtx = arm_function_arg (args_so_far, arg);
27288 gcc_assert (REG_P (arg_rtx));
27289 if ((TREE_CODE (arg_type) == INTEGER_TYPE
27290 || TREE_CODE (arg_type) == ENUMERAL_TYPE
27291 || TREE_CODE (arg_type) == BOOLEAN_TYPE)
27292 && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4))
27294 rtx res_reg = gen_rtx_REG (SImode, REGNO (arg_rtx));
27295 if (TYPE_UNSIGNED (arg_type))
27296 emit_set_insn (res_reg, gen_rtx_ZERO_EXTEND (SImode, arg_rtx));
27297 else
27299 /* Signed-extension is a special case because of
27300 thumb1_extendhisi2. */
27301 if (known_eq (GET_MODE_SIZE (GET_MODE (arg_rtx)), 2))
27302 emit_insn (gen_thumb1_extendhisi2 (res_reg, arg_rtx));
27303 else
27304 emit_set_insn (res_reg,
27305 gen_rtx_SIGN_EXTEND (SImode, arg_rtx));
27311 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27312 l_mask = live_regs_mask & 0x40ff;
27313 /* Then count how many other high registers will need to be pushed. */
27314 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27316 if (crtl->args.pretend_args_size)
27318 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27320 if (cfun->machine->uses_anonymous_args)
27322 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27323 unsigned long mask;
27325 mask = 1ul << (LAST_ARG_REGNUM + 1);
27326 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27328 insn = thumb1_emit_multi_reg_push (mask, 0);
27330 else
27332 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27333 stack_pointer_rtx, x));
27335 RTX_FRAME_RELATED_P (insn) = 1;
27338 if (TARGET_BACKTRACE)
27340 HOST_WIDE_INT offset = 0;
27341 unsigned work_register;
27342 rtx work_reg, x, arm_hfp_rtx;
27344 /* We have been asked to create a stack backtrace structure.
27345 The code looks like this:
27347 0 .align 2
27348 0 func:
27349 0 sub SP, #16 Reserve space for 4 registers.
27350 2 push {R7} Push low registers.
27351 4 add R7, SP, #20 Get the stack pointer before the push.
27352 6 str R7, [SP, #8] Store the stack pointer
27353 (before reserving the space).
27354 8 mov R7, PC Get hold of the start of this code + 12.
27355 10 str R7, [SP, #16] Store it.
27356 12 mov R7, FP Get hold of the current frame pointer.
27357 14 str R7, [SP, #4] Store it.
27358 16 mov R7, LR Get hold of the current return address.
27359 18 str R7, [SP, #12] Store it.
27360 20 add R7, SP, #16 Point at the start of the
27361 backtrace structure.
27362 22 mov FP, R7 Put this value into the frame pointer. */
27364 work_register = thumb_find_work_register (live_regs_mask);
27365 work_reg = gen_rtx_REG (SImode, work_register);
27366 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27368 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27369 stack_pointer_rtx, GEN_INT (-16)));
27370 RTX_FRAME_RELATED_P (insn) = 1;
27372 if (l_mask)
27374 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27375 RTX_FRAME_RELATED_P (insn) = 1;
27376 lr_needs_saving = false;
27378 offset = bit_count (l_mask) * UNITS_PER_WORD;
27381 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27382 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27384 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27385 x = gen_frame_mem (SImode, x);
27386 emit_move_insn (x, work_reg);
27388 /* Make sure that the instruction fetching the PC is in the right place
27389 to calculate "start of backtrace creation code + 12". */
27390 /* ??? The stores using the common WORK_REG ought to be enough to
27391 prevent the scheduler from doing anything weird. Failing that
27392 we could always move all of the following into an UNSPEC_VOLATILE. */
27393 if (l_mask)
27395 x = gen_rtx_REG (SImode, PC_REGNUM);
27396 emit_move_insn (work_reg, x);
27398 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27399 x = gen_frame_mem (SImode, x);
27400 emit_move_insn (x, work_reg);
27402 emit_move_insn (work_reg, arm_hfp_rtx);
27404 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27405 x = gen_frame_mem (SImode, x);
27406 emit_move_insn (x, work_reg);
27408 else
27410 emit_move_insn (work_reg, arm_hfp_rtx);
27412 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27413 x = gen_frame_mem (SImode, x);
27414 emit_move_insn (x, work_reg);
27416 x = gen_rtx_REG (SImode, PC_REGNUM);
27417 emit_move_insn (work_reg, x);
27419 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27420 x = gen_frame_mem (SImode, x);
27421 emit_move_insn (x, work_reg);
27424 x = gen_rtx_REG (SImode, LR_REGNUM);
27425 emit_move_insn (work_reg, x);
27427 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27428 x = gen_frame_mem (SImode, x);
27429 emit_move_insn (x, work_reg);
27431 x = GEN_INT (offset + 12);
27432 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27434 emit_move_insn (arm_hfp_rtx, work_reg);
27436 /* Optimization: If we are not pushing any low registers but we are going
27437 to push some high registers then delay our first push. This will just
27438 be a push of LR and we can combine it with the push of the first high
27439 register. */
27440 else if ((l_mask & 0xff) != 0
27441 || (high_regs_pushed == 0 && lr_needs_saving))
27443 unsigned long mask = l_mask;
27444 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27445 insn = thumb1_emit_multi_reg_push (mask, mask);
27446 RTX_FRAME_RELATED_P (insn) = 1;
27447 lr_needs_saving = false;
27450 if (high_regs_pushed)
27452 unsigned pushable_regs;
27453 unsigned next_hi_reg;
27454 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27455 : crtl->args.info.nregs;
27456 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27458 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27459 if (live_regs_mask & (1 << next_hi_reg))
27460 break;
27462 /* Here we need to mask out registers used for passing arguments
27463 even if they can be pushed. This is to avoid using them to
27464 stash the high registers. Such kind of stash may clobber the
27465 use of arguments. */
27466 pushable_regs = l_mask & (~arg_regs_mask);
27467 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27469 /* Normally, LR can be used as a scratch register once it has been
27470 saved; but if the function examines its own return address then
27471 the value is still live and we need to avoid using it. */
27472 bool return_addr_live
27473 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27474 LR_REGNUM);
27476 if (lr_needs_saving || return_addr_live)
27477 pushable_regs &= ~(1 << LR_REGNUM);
27479 if (pushable_regs == 0)
27480 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27482 while (high_regs_pushed > 0)
27484 unsigned long real_regs_mask = 0;
27485 unsigned long push_mask = 0;
27487 for (regno = LR_REGNUM; regno >= 0; regno --)
27489 if (pushable_regs & (1 << regno))
27491 emit_move_insn (gen_rtx_REG (SImode, regno),
27492 gen_rtx_REG (SImode, next_hi_reg));
27494 high_regs_pushed --;
27495 real_regs_mask |= (1 << next_hi_reg);
27496 push_mask |= (1 << regno);
27498 if (high_regs_pushed)
27500 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27501 next_hi_reg --)
27502 if (live_regs_mask & (1 << next_hi_reg))
27503 break;
27505 else
27506 break;
27510 /* If we had to find a work register and we have not yet
27511 saved the LR then add it to the list of regs to push. */
27512 if (lr_needs_saving)
27514 push_mask |= 1 << LR_REGNUM;
27515 real_regs_mask |= 1 << LR_REGNUM;
27516 lr_needs_saving = false;
27517 /* If the return address is not live at this point, we
27518 can add LR to the list of registers that we can use
27519 for pushes. */
27520 if (!return_addr_live)
27521 pushable_regs |= 1 << LR_REGNUM;
27524 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27525 RTX_FRAME_RELATED_P (insn) = 1;
27529 /* Load the pic register before setting the frame pointer,
27530 so we can use r7 as a temporary work register. */
27531 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27532 arm_load_pic_register (live_regs_mask, NULL_RTX);
27534 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27535 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27536 stack_pointer_rtx);
27538 size = offsets->outgoing_args - offsets->saved_args;
27539 if (flag_stack_usage_info)
27540 current_function_static_stack_size = size;
27542 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27543 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27544 || flag_stack_clash_protection)
27545 && size)
27546 sorry ("%<-fstack-check=specific%> for Thumb-1");
27548 amount = offsets->outgoing_args - offsets->saved_regs;
27549 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27550 if (amount)
27552 if (amount < 512)
27554 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27555 GEN_INT (- amount)));
27556 RTX_FRAME_RELATED_P (insn) = 1;
27558 else
27560 rtx reg, dwarf;
27562 /* The stack decrement is too big for an immediate value in a single
27563 insn. In theory we could issue multiple subtracts, but after
27564 three of them it becomes more space efficient to place the full
27565 value in the constant pool and load into a register. (Also the
27566 ARM debugger really likes to see only one stack decrement per
27567 function). So instead we look for a scratch register into which
27568 we can load the decrement, and then we subtract this from the
27569 stack pointer. Unfortunately on the thumb the only available
27570 scratch registers are the argument registers, and we cannot use
27571 these as they may hold arguments to the function. Instead we
27572 attempt to locate a call preserved register which is used by this
27573 function. If we can find one, then we know that it will have
27574 been pushed at the start of the prologue and so we can corrupt
27575 it now. */
27576 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27577 if (live_regs_mask & (1 << regno))
27578 break;
27580 gcc_assert(regno <= LAST_LO_REGNUM);
27582 reg = gen_rtx_REG (SImode, regno);
27584 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27586 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27587 stack_pointer_rtx, reg));
27589 dwarf = gen_rtx_SET (stack_pointer_rtx,
27590 plus_constant (Pmode, stack_pointer_rtx,
27591 -amount));
27592 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27593 RTX_FRAME_RELATED_P (insn) = 1;
27597 if (frame_pointer_needed)
27598 thumb_set_frame_pointer (offsets);
27600 /* If we are profiling, make sure no instructions are scheduled before
27601 the call to mcount. Similarly if the user has requested no
27602 scheduling in the prolog. Similarly if we want non-call exceptions
27603 using the EABI unwinder, to prevent faulting instructions from being
27604 swapped with a stack adjustment. */
27605 if (crtl->profile || !TARGET_SCHED_PROLOG
27606 || (arm_except_unwind_info (&global_options) == UI_TARGET
27607 && cfun->can_throw_non_call_exceptions))
27608 emit_insn (gen_blockage ());
27610 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27611 if (live_regs_mask & 0xff)
27612 cfun->machine->lr_save_eliminated = 0;
27615 /* Clear caller saved registers not used to pass return values and leaked
27616 condition flags before exiting a cmse_nonsecure_entry function. */
27618 void
27619 cmse_nonsecure_entry_clear_before_return (void)
27621 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27622 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27623 uint32_t padding_bits_to_clear = 0;
27624 auto_sbitmap to_clear_bitmap (maxregno + 1);
27625 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27626 tree result_type;
27628 bitmap_clear (to_clear_bitmap);
27629 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27630 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27632 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27633 registers. */
27634 if (clear_vfpregs)
27636 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27638 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27640 if (!TARGET_HAVE_FPCXT_CMSE)
27642 /* Make sure we don't clear the two scratch registers used to clear
27643 the relevant FPSCR bits in output_return_instruction. */
27644 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27645 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27646 emit_use (gen_rtx_REG (SImode, 4));
27647 bitmap_clear_bit (to_clear_bitmap, 4);
27651 /* If the user has defined registers to be caller saved, these are no longer
27652 restored by the function before returning and must thus be cleared for
27653 security purposes. */
27654 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27656 /* We do not touch registers that can be used to pass arguments as per
27657 the AAPCS, since these should never be made callee-saved by user
27658 options. */
27659 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27660 continue;
27661 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27662 continue;
27663 if (!callee_saved_reg_p (regno)
27664 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27665 || TARGET_HARD_FLOAT))
27666 bitmap_set_bit (to_clear_bitmap, regno);
27669 /* Make sure we do not clear the registers used to return the result in. */
27670 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27671 if (!VOID_TYPE_P (result_type))
27673 uint64_t to_clear_return_mask;
27674 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27676 /* No need to check that we return in registers, because we don't
27677 support returning on stack yet. */
27678 gcc_assert (REG_P (result_rtl));
27679 to_clear_return_mask
27680 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27681 &padding_bits_to_clear);
27682 if (to_clear_return_mask)
27684 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27685 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27687 if (to_clear_return_mask & (1ULL << regno))
27688 bitmap_clear_bit (to_clear_bitmap, regno);
27693 if (padding_bits_to_clear != 0)
27695 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27696 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27698 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27699 returning a composite type, which only uses r0. Let's make sure that
27700 r1-r3 is cleared too. */
27701 bitmap_clear (to_clear_arg_regs_bitmap);
27702 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27703 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27706 /* Clear full registers that leak before returning. */
27707 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27708 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27709 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27710 clearing_reg);
27713 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27714 POP instruction can be generated. LR should be replaced by PC. All
27715 the checks required are already done by USE_RETURN_INSN (). Hence,
27716 all we really need to check here is if single register is to be
27717 returned, or multiple register return. */
27718 void
27719 thumb2_expand_return (bool simple_return)
27721 int i, num_regs;
27722 unsigned long saved_regs_mask;
27723 arm_stack_offsets *offsets;
27725 offsets = arm_get_frame_offsets ();
27726 saved_regs_mask = offsets->saved_regs_mask;
27728 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27729 if (saved_regs_mask & (1 << i))
27730 num_regs++;
27732 if (!simple_return && saved_regs_mask)
27734 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27735 functions or adapt code to handle according to ACLE. This path should
27736 not be reachable for cmse_nonsecure_entry functions though we prefer
27737 to assert it for now to ensure that future code changes do not silently
27738 change this behavior. */
27739 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27740 if (arm_current_function_pac_enabled_p ())
27742 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27743 arm_emit_multi_reg_pop (saved_regs_mask);
27744 emit_insn (gen_aut_nop ());
27745 emit_jump_insn (simple_return_rtx);
27747 else if (num_regs == 1)
27749 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27750 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27751 rtx addr = gen_rtx_MEM (SImode,
27752 gen_rtx_POST_INC (SImode,
27753 stack_pointer_rtx));
27754 set_mem_alias_set (addr, get_frame_alias_set ());
27755 XVECEXP (par, 0, 0) = ret_rtx;
27756 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27757 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27758 emit_jump_insn (par);
27760 else
27762 saved_regs_mask &= ~ (1 << LR_REGNUM);
27763 saved_regs_mask |= (1 << PC_REGNUM);
27764 arm_emit_multi_reg_pop (saved_regs_mask);
27767 else
27769 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27770 cmse_nonsecure_entry_clear_before_return ();
27771 emit_jump_insn (simple_return_rtx);
27775 void
27776 thumb1_expand_epilogue (void)
27778 HOST_WIDE_INT amount;
27779 arm_stack_offsets *offsets;
27780 int regno;
27782 /* Naked functions don't have prologues. */
27783 if (IS_NAKED (arm_current_func_type ()))
27784 return;
27786 offsets = arm_get_frame_offsets ();
27787 amount = offsets->outgoing_args - offsets->saved_regs;
27789 if (frame_pointer_needed)
27791 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27792 amount = offsets->locals_base - offsets->saved_regs;
27794 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27796 gcc_assert (amount >= 0);
27797 if (amount)
27799 emit_insn (gen_blockage ());
27801 if (amount < 512)
27802 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27803 GEN_INT (amount)));
27804 else
27806 /* r3 is always free in the epilogue. */
27807 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27809 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27810 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27814 /* Emit a USE (stack_pointer_rtx), so that
27815 the stack adjustment will not be deleted. */
27816 emit_insn (gen_force_register_use (stack_pointer_rtx));
27818 if (crtl->profile || !TARGET_SCHED_PROLOG)
27819 emit_insn (gen_blockage ());
27821 /* Emit a clobber for each insn that will be restored in the epilogue,
27822 so that flow2 will get register lifetimes correct. */
27823 for (regno = 0; regno < 13; regno++)
27824 if (reg_needs_saving_p (regno))
27825 emit_clobber (gen_rtx_REG (SImode, regno));
27827 if (! df_regs_ever_live_p (LR_REGNUM))
27828 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27830 /* Clear all caller-saved regs that are not used to return. */
27831 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27832 cmse_nonsecure_entry_clear_before_return ();
27835 /* Epilogue code for APCS frame. */
27836 static void
27837 arm_expand_epilogue_apcs_frame (bool really_return)
27839 unsigned long func_type;
27840 unsigned long saved_regs_mask;
27841 int num_regs = 0;
27842 int i;
27843 int floats_from_frame = 0;
27844 arm_stack_offsets *offsets;
27846 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27847 func_type = arm_current_func_type ();
27849 /* Get frame offsets for ARM. */
27850 offsets = arm_get_frame_offsets ();
27851 saved_regs_mask = offsets->saved_regs_mask;
27853 /* Find the offset of the floating-point save area in the frame. */
27854 floats_from_frame
27855 = (offsets->saved_args
27856 + arm_compute_static_chain_stack_bytes ()
27857 - offsets->frame);
27859 /* Compute how many core registers saved and how far away the floats are. */
27860 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27861 if (saved_regs_mask & (1 << i))
27863 num_regs++;
27864 floats_from_frame += 4;
27867 if (TARGET_VFP_BASE)
27869 int start_reg;
27870 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27872 /* The offset is from IP_REGNUM. */
27873 int saved_size = arm_get_vfp_saved_size ();
27874 if (saved_size > 0)
27876 rtx_insn *insn;
27877 floats_from_frame += saved_size;
27878 insn = emit_insn (gen_addsi3 (ip_rtx,
27879 hard_frame_pointer_rtx,
27880 GEN_INT (-floats_from_frame)));
27881 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27882 ip_rtx, hard_frame_pointer_rtx);
27885 /* Generate VFP register multi-pop. */
27886 start_reg = FIRST_VFP_REGNUM;
27888 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27889 /* Look for a case where a reg does not need restoring. */
27890 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27892 if (start_reg != i)
27893 arm_emit_vfp_multi_reg_pop (start_reg,
27894 (i - start_reg) / 2,
27895 gen_rtx_REG (SImode,
27896 IP_REGNUM));
27897 start_reg = i + 2;
27900 /* Restore the remaining regs that we have discovered (or possibly
27901 even all of them, if the conditional in the for loop never
27902 fired). */
27903 if (start_reg != i)
27904 arm_emit_vfp_multi_reg_pop (start_reg,
27905 (i - start_reg) / 2,
27906 gen_rtx_REG (SImode, IP_REGNUM));
27909 if (TARGET_IWMMXT)
27911 /* The frame pointer is guaranteed to be non-double-word aligned, as
27912 it is set to double-word-aligned old_stack_pointer - 4. */
27913 rtx_insn *insn;
27914 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27916 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27917 if (reg_needs_saving_p (i))
27919 rtx addr = gen_frame_mem (V2SImode,
27920 plus_constant (Pmode, hard_frame_pointer_rtx,
27921 - lrm_count * 4));
27922 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27923 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27924 gen_rtx_REG (V2SImode, i),
27925 NULL_RTX);
27926 lrm_count += 2;
27930 /* saved_regs_mask should contain IP which contains old stack pointer
27931 at the time of activation creation. Since SP and IP are adjacent registers,
27932 we can restore the value directly into SP. */
27933 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27934 saved_regs_mask &= ~(1 << IP_REGNUM);
27935 saved_regs_mask |= (1 << SP_REGNUM);
27937 /* There are two registers left in saved_regs_mask - LR and PC. We
27938 only need to restore LR (the return address), but to
27939 save time we can load it directly into PC, unless we need a
27940 special function exit sequence, or we are not really returning. */
27941 if (really_return
27942 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27943 && !crtl->calls_eh_return)
27944 /* Delete LR from the register mask, so that LR on
27945 the stack is loaded into the PC in the register mask. */
27946 saved_regs_mask &= ~(1 << LR_REGNUM);
27947 else
27948 saved_regs_mask &= ~(1 << PC_REGNUM);
27950 num_regs = bit_count (saved_regs_mask);
27951 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27953 rtx_insn *insn;
27954 emit_insn (gen_blockage ());
27955 /* Unwind the stack to just below the saved registers. */
27956 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27957 hard_frame_pointer_rtx,
27958 GEN_INT (- 4 * num_regs)));
27960 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27961 stack_pointer_rtx, hard_frame_pointer_rtx);
27964 arm_emit_multi_reg_pop (saved_regs_mask);
27966 if (IS_INTERRUPT (func_type))
27968 /* Interrupt handlers will have pushed the
27969 IP onto the stack, so restore it now. */
27970 rtx_insn *insn;
27971 rtx addr = gen_rtx_MEM (SImode,
27972 gen_rtx_POST_INC (SImode,
27973 stack_pointer_rtx));
27974 set_mem_alias_set (addr, get_frame_alias_set ());
27975 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27976 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27977 gen_rtx_REG (SImode, IP_REGNUM),
27978 NULL_RTX);
27981 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27982 return;
27984 if (crtl->calls_eh_return)
27985 emit_insn (gen_addsi3 (stack_pointer_rtx,
27986 stack_pointer_rtx,
27987 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27989 if (IS_STACKALIGN (func_type))
27990 /* Restore the original stack pointer. Before prologue, the stack was
27991 realigned and the original stack pointer saved in r0. For details,
27992 see comment in arm_expand_prologue. */
27993 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27995 emit_jump_insn (simple_return_rtx);
27998 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27999 function is not a sibcall. */
28000 void
28001 arm_expand_epilogue (bool really_return)
28003 unsigned long func_type;
28004 unsigned long saved_regs_mask;
28005 int num_regs = 0;
28006 int i;
28007 int amount;
28008 arm_stack_offsets *offsets;
28010 func_type = arm_current_func_type ();
28012 /* Naked functions don't have epilogue. Hence, generate return pattern, and
28013 let output_return_instruction take care of instruction emission if any. */
28014 if (IS_NAKED (func_type)
28015 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
28017 if (really_return)
28018 emit_jump_insn (simple_return_rtx);
28019 return;
28022 /* If we are throwing an exception, then we really must be doing a
28023 return, so we can't tail-call. */
28024 gcc_assert (!crtl->calls_eh_return || really_return);
28026 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
28028 arm_expand_epilogue_apcs_frame (really_return);
28029 return;
28032 /* Get frame offsets for ARM. */
28033 offsets = arm_get_frame_offsets ();
28034 saved_regs_mask = offsets->saved_regs_mask;
28035 num_regs = bit_count (saved_regs_mask);
28037 if (frame_pointer_needed)
28039 rtx_insn *insn;
28040 /* Restore stack pointer if necessary. */
28041 if (TARGET_ARM)
28043 /* In ARM mode, frame pointer points to first saved register.
28044 Restore stack pointer to last saved register. */
28045 amount = offsets->frame - offsets->saved_regs;
28047 /* Force out any pending memory operations that reference stacked data
28048 before stack de-allocation occurs. */
28049 emit_insn (gen_blockage ());
28050 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
28051 hard_frame_pointer_rtx,
28052 GEN_INT (amount)));
28053 arm_add_cfa_adjust_cfa_note (insn, amount,
28054 stack_pointer_rtx,
28055 hard_frame_pointer_rtx);
28057 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
28058 deleted. */
28059 emit_insn (gen_force_register_use (stack_pointer_rtx));
28061 else
28063 /* In Thumb-2 mode, the frame pointer points to the last saved
28064 register. */
28065 amount = offsets->locals_base - offsets->saved_regs;
28066 if (amount)
28068 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
28069 hard_frame_pointer_rtx,
28070 GEN_INT (amount)));
28071 arm_add_cfa_adjust_cfa_note (insn, amount,
28072 hard_frame_pointer_rtx,
28073 hard_frame_pointer_rtx);
28076 /* Force out any pending memory operations that reference stacked data
28077 before stack de-allocation occurs. */
28078 emit_insn (gen_blockage ());
28079 insn = emit_insn (gen_movsi (stack_pointer_rtx,
28080 hard_frame_pointer_rtx));
28081 arm_add_cfa_adjust_cfa_note (insn, 0,
28082 stack_pointer_rtx,
28083 hard_frame_pointer_rtx);
28084 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
28085 deleted. */
28086 emit_insn (gen_force_register_use (stack_pointer_rtx));
28089 else
28091 /* Pop off outgoing args and local frame to adjust stack pointer to
28092 last saved register. */
28093 amount = offsets->outgoing_args - offsets->saved_regs;
28094 if (amount)
28096 rtx_insn *tmp;
28097 /* Force out any pending memory operations that reference stacked data
28098 before stack de-allocation occurs. */
28099 emit_insn (gen_blockage ());
28100 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
28101 stack_pointer_rtx,
28102 GEN_INT (amount)));
28103 arm_add_cfa_adjust_cfa_note (tmp, amount,
28104 stack_pointer_rtx, stack_pointer_rtx);
28105 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
28106 not deleted. */
28107 emit_insn (gen_force_register_use (stack_pointer_rtx));
28111 if (TARGET_VFP_BASE)
28113 /* Generate VFP register multi-pop. */
28114 int end_reg = LAST_VFP_REGNUM + 1;
28116 /* Scan the registers in reverse order. We need to match
28117 any groupings made in the prologue and generate matching
28118 vldm operations. The need to match groups is because,
28119 unlike pop, vldm can only do consecutive regs. */
28120 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
28121 /* Look for a case where a reg does not need restoring. */
28122 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
28124 /* Restore the regs discovered so far (from reg+2 to
28125 end_reg). */
28126 if (end_reg > i + 2)
28127 arm_emit_vfp_multi_reg_pop (i + 2,
28128 (end_reg - (i + 2)) / 2,
28129 stack_pointer_rtx);
28130 end_reg = i;
28133 /* Restore the remaining regs that we have discovered (or possibly
28134 even all of them, if the conditional in the for loop never
28135 fired). */
28136 if (end_reg > i + 2)
28137 arm_emit_vfp_multi_reg_pop (i + 2,
28138 (end_reg - (i + 2)) / 2,
28139 stack_pointer_rtx);
28142 if (TARGET_IWMMXT)
28143 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
28144 if (reg_needs_saving_p (i))
28146 rtx_insn *insn;
28147 rtx addr = gen_rtx_MEM (V2SImode,
28148 gen_rtx_POST_INC (SImode,
28149 stack_pointer_rtx));
28150 set_mem_alias_set (addr, get_frame_alias_set ());
28151 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
28152 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
28153 gen_rtx_REG (V2SImode, i),
28154 NULL_RTX);
28155 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
28156 stack_pointer_rtx, stack_pointer_rtx);
28159 if (saved_regs_mask)
28161 rtx insn;
28162 bool return_in_pc = false;
28164 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
28165 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
28166 && !IS_CMSE_ENTRY (func_type)
28167 && !IS_STACKALIGN (func_type)
28168 && really_return
28169 && crtl->args.pretend_args_size == 0
28170 && saved_regs_mask & (1 << LR_REGNUM)
28171 && !crtl->calls_eh_return
28172 && !arm_current_function_pac_enabled_p ())
28174 saved_regs_mask &= ~(1 << LR_REGNUM);
28175 saved_regs_mask |= (1 << PC_REGNUM);
28176 return_in_pc = true;
28179 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
28181 for (i = 0; i <= LAST_ARM_REGNUM; i++)
28182 if (saved_regs_mask & (1 << i))
28184 rtx addr = gen_rtx_MEM (SImode,
28185 gen_rtx_POST_INC (SImode,
28186 stack_pointer_rtx));
28187 set_mem_alias_set (addr, get_frame_alias_set ());
28189 if (i == PC_REGNUM)
28191 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
28192 XVECEXP (insn, 0, 0) = ret_rtx;
28193 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
28194 addr);
28195 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
28196 insn = emit_jump_insn (insn);
28198 else
28200 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
28201 addr));
28202 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
28203 gen_rtx_REG (SImode, i),
28204 NULL_RTX);
28205 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
28206 stack_pointer_rtx,
28207 stack_pointer_rtx);
28211 else
28213 if (TARGET_LDRD
28214 && current_tune->prefer_ldrd_strd
28215 && !optimize_function_for_size_p (cfun))
28217 if (TARGET_THUMB2)
28218 thumb2_emit_ldrd_pop (saved_regs_mask);
28219 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28220 arm_emit_ldrd_pop (saved_regs_mask);
28221 else
28222 arm_emit_multi_reg_pop (saved_regs_mask);
28224 else
28225 arm_emit_multi_reg_pop (saved_regs_mask);
28228 if (return_in_pc)
28229 return;
28232 amount
28233 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
28234 if (amount)
28236 int i, j;
28237 rtx dwarf = NULL_RTX;
28238 rtx_insn *tmp =
28239 emit_insn (gen_addsi3 (stack_pointer_rtx,
28240 stack_pointer_rtx,
28241 GEN_INT (amount)));
28243 RTX_FRAME_RELATED_P (tmp) = 1;
28245 if (cfun->machine->uses_anonymous_args)
28247 /* Restore pretend args. Refer arm_expand_prologue on how to save
28248 pretend_args in stack. */
28249 int num_regs = crtl->args.pretend_args_size / 4;
28250 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28251 for (j = 0, i = 0; j < num_regs; i++)
28252 if (saved_regs_mask & (1 << i))
28254 rtx reg = gen_rtx_REG (SImode, i);
28255 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28256 j++;
28258 REG_NOTES (tmp) = dwarf;
28260 arm_add_cfa_adjust_cfa_note (tmp, amount,
28261 stack_pointer_rtx, stack_pointer_rtx);
28264 if (IS_CMSE_ENTRY (func_type))
28266 /* CMSE_ENTRY always returns. */
28267 gcc_assert (really_return);
28268 /* Clear all caller-saved regs that are not used to return. */
28269 cmse_nonsecure_entry_clear_before_return ();
28271 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28272 VLDR. */
28273 if (TARGET_HAVE_FPCXT_CMSE)
28275 rtx_insn *insn;
28277 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28278 GEN_INT (FPCXTNS_ENUM)));
28279 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28280 plus_constant (Pmode, stack_pointer_rtx, 4));
28281 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28282 RTX_FRAME_RELATED_P (insn) = 1;
28286 if (arm_current_function_pac_enabled_p ())
28287 emit_insn (gen_aut_nop ());
28289 if (!really_return)
28290 return;
28292 if (crtl->calls_eh_return)
28293 emit_insn (gen_addsi3 (stack_pointer_rtx,
28294 stack_pointer_rtx,
28295 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28297 if (IS_STACKALIGN (func_type))
28298 /* Restore the original stack pointer. Before prologue, the stack was
28299 realigned and the original stack pointer saved in r0. For details,
28300 see comment in arm_expand_prologue. */
28301 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28303 emit_jump_insn (simple_return_rtx);
28306 /* Implementation of insn prologue_thumb1_interwork. This is the first
28307 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28309 const char *
28310 thumb1_output_interwork (void)
28312 const char * name;
28313 FILE *f = asm_out_file;
28315 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28316 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28317 == SYMBOL_REF);
28318 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28320 /* Generate code sequence to switch us into Thumb mode. */
28321 /* The .code 32 directive has already been emitted by
28322 ASM_DECLARE_FUNCTION_NAME. */
28323 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28324 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28326 /* Generate a label, so that the debugger will notice the
28327 change in instruction sets. This label is also used by
28328 the assembler to bypass the ARM code when this function
28329 is called from a Thumb encoded function elsewhere in the
28330 same file. Hence the definition of STUB_NAME here must
28331 agree with the definition in gas/config/tc-arm.c. */
28333 #define STUB_NAME ".real_start_of"
28335 fprintf (f, "\t.code\t16\n");
28336 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28337 fprintf (f, "\t.thumb_func\n");
28338 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28340 return "";
28343 /* Handle the case of a double word load into a low register from
28344 a computed memory address. The computed address may involve a
28345 register which is overwritten by the load. */
28346 const char *
28347 thumb_load_double_from_address (rtx *operands)
28349 rtx addr;
28350 rtx base;
28351 rtx offset;
28352 rtx arg1;
28353 rtx arg2;
28355 gcc_assert (REG_P (operands[0]));
28356 gcc_assert (MEM_P (operands[1]));
28358 /* Get the memory address. */
28359 addr = XEXP (operands[1], 0);
28361 /* Work out how the memory address is computed. */
28362 switch (GET_CODE (addr))
28364 case REG:
28365 if (reg_overlap_mentioned_p (addr, operands[0]))
28366 output_asm_insn ("ldmia\t%m1, {%0, %H0}", operands);
28367 else
28369 operands[2] = adjust_address (operands[1], SImode, 4);
28370 output_asm_insn ("ldr\t%0, %1", operands);
28371 output_asm_insn ("ldr\t%H0, %2", operands);
28373 break;
28375 case CONST:
28376 /* Compute <address> + 4 for the high order load. */
28377 operands[2] = adjust_address (operands[1], SImode, 4);
28379 output_asm_insn ("ldr\t%0, %1", operands);
28380 output_asm_insn ("ldr\t%H0, %2", operands);
28381 break;
28383 case PLUS:
28384 arg1 = XEXP (addr, 0);
28385 arg2 = XEXP (addr, 1);
28387 if (CONSTANT_P (arg1))
28388 base = arg2, offset = arg1;
28389 else
28390 base = arg1, offset = arg2;
28392 gcc_assert (REG_P (base));
28394 /* Catch the case of <address> = <reg> + <reg> */
28395 if (REG_P (offset))
28397 int reg_offset = REGNO (offset);
28398 int reg_base = REGNO (base);
28399 int reg_dest = REGNO (operands[0]);
28401 /* Add the base and offset registers together into the
28402 higher destination register. */
28403 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28404 reg_dest + 1, reg_base, reg_offset);
28406 /* Load the lower destination register from the address in
28407 the higher destination register. */
28408 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28409 reg_dest, reg_dest + 1);
28411 /* Load the higher destination register from its own address
28412 plus 4. */
28413 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28414 reg_dest + 1, reg_dest + 1);
28416 else
28418 /* Compute <address> + 4 for the high order load. */
28419 operands[2] = adjust_address (operands[1], SImode, 4);
28421 /* If the computed address is held in the low order register
28422 then load the high order register first, otherwise always
28423 load the low order register first. */
28424 if (REGNO (operands[0]) == REGNO (base))
28426 output_asm_insn ("ldr\t%H0, %2", operands);
28427 output_asm_insn ("ldr\t%0, %1", operands);
28429 else
28431 output_asm_insn ("ldr\t%0, %1", operands);
28432 output_asm_insn ("ldr\t%H0, %2", operands);
28435 break;
28437 case LABEL_REF:
28438 /* With no registers to worry about we can just load the value
28439 directly. */
28440 operands[2] = adjust_address (operands[1], SImode, 4);
28442 output_asm_insn ("ldr\t%H0, %2", operands);
28443 output_asm_insn ("ldr\t%0, %1", operands);
28444 break;
28446 default:
28447 gcc_unreachable ();
28450 return "";
28453 const char *
28454 thumb_output_move_mem_multiple (int n, rtx *operands)
28456 switch (n)
28458 case 2:
28459 if (REGNO (operands[4]) > REGNO (operands[5]))
28460 std::swap (operands[4], operands[5]);
28462 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28463 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28464 break;
28466 case 3:
28467 if (REGNO (operands[4]) > REGNO (operands[5]))
28468 std::swap (operands[4], operands[5]);
28469 if (REGNO (operands[5]) > REGNO (operands[6]))
28470 std::swap (operands[5], operands[6]);
28471 if (REGNO (operands[4]) > REGNO (operands[5]))
28472 std::swap (operands[4], operands[5]);
28474 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28475 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28476 break;
28478 default:
28479 gcc_unreachable ();
28482 return "";
28485 /* Output a call-via instruction for thumb state. */
28486 const char *
28487 thumb_call_via_reg (rtx reg)
28489 int regno = REGNO (reg);
28490 rtx *labelp;
28492 gcc_assert (regno < LR_REGNUM);
28494 /* If we are in the normal text section we can use a single instance
28495 per compilation unit. If we are doing function sections, then we need
28496 an entry per section, since we can't rely on reachability. */
28497 if (in_section == text_section)
28499 thumb_call_reg_needed = 1;
28501 if (thumb_call_via_label[regno] == NULL)
28502 thumb_call_via_label[regno] = gen_label_rtx ();
28503 labelp = thumb_call_via_label + regno;
28505 else
28507 if (cfun->machine->call_via[regno] == NULL)
28508 cfun->machine->call_via[regno] = gen_label_rtx ();
28509 labelp = cfun->machine->call_via + regno;
28512 output_asm_insn ("bl\t%a0", labelp);
28513 return "";
28516 /* Routines for generating rtl. */
28517 void
28518 thumb_expand_cpymemqi (rtx *operands)
28520 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28521 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28522 HOST_WIDE_INT len = INTVAL (operands[2]);
28523 HOST_WIDE_INT offset = 0;
28525 while (len >= 12)
28527 emit_insn (gen_cpymem12b (out, in, out, in));
28528 len -= 12;
28531 if (len >= 8)
28533 emit_insn (gen_cpymem8b (out, in, out, in));
28534 len -= 8;
28537 if (len >= 4)
28539 rtx reg = gen_reg_rtx (SImode);
28540 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28541 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28542 len -= 4;
28543 offset += 4;
28546 if (len >= 2)
28548 rtx reg = gen_reg_rtx (HImode);
28549 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28550 plus_constant (Pmode, in,
28551 offset))));
28552 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28553 offset)),
28554 reg));
28555 len -= 2;
28556 offset += 2;
28559 if (len)
28561 rtx reg = gen_reg_rtx (QImode);
28562 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28563 plus_constant (Pmode, in,
28564 offset))));
28565 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28566 offset)),
28567 reg));
28571 void
28572 thumb_reload_out_hi (rtx *operands)
28574 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28577 /* Return the length of a function name prefix
28578 that starts with the character 'c'. */
28579 static int
28580 arm_get_strip_length (int c)
28582 switch (c)
28584 ARM_NAME_ENCODING_LENGTHS
28585 default: return 0;
28589 /* Return a pointer to a function's name with any
28590 and all prefix encodings stripped from it. */
28591 const char *
28592 arm_strip_name_encoding (const char *name)
28594 int skip;
28596 while ((skip = arm_get_strip_length (* name)))
28597 name += skip;
28599 return name;
28602 /* If there is a '*' anywhere in the name's prefix, then
28603 emit the stripped name verbatim, otherwise prepend an
28604 underscore if leading underscores are being used. */
28605 void
28606 arm_asm_output_labelref (FILE *stream, const char *name)
28608 int skip;
28609 int verbatim = 0;
28611 while ((skip = arm_get_strip_length (* name)))
28613 verbatim |= (*name == '*');
28614 name += skip;
28617 if (verbatim)
28618 fputs (name, stream);
28619 else
28620 asm_fprintf (stream, "%U%s", name);
28623 /* This function is used to emit an EABI tag and its associated value.
28624 We emit the numerical value of the tag in case the assembler does not
28625 support textual tags. (Eg gas prior to 2.20). If requested we include
28626 the tag name in a comment so that anyone reading the assembler output
28627 will know which tag is being set.
28629 This function is not static because arm-c.cc needs it too. */
28631 void
28632 arm_emit_eabi_attribute (const char *name, int num, int val)
28634 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28635 if (flag_verbose_asm || flag_debug_asm)
28636 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28637 asm_fprintf (asm_out_file, "\n");
28640 /* This function is used to print CPU tuning information as comment
28641 in assembler file. Pointers are not printed for now. */
28643 void
28644 arm_print_tune_info (void)
28646 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28647 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28648 current_tune->constant_limit);
28649 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28650 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28651 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28652 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28653 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28654 "prefetch.l1_cache_size:\t%d\n",
28655 current_tune->prefetch.l1_cache_size);
28656 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28657 "prefetch.l1_cache_line_size:\t%d\n",
28658 current_tune->prefetch.l1_cache_line_size);
28659 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28660 "prefer_constant_pool:\t%d\n",
28661 (int) current_tune->prefer_constant_pool);
28662 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28663 "branch_cost:\t(s:speed, p:predictable)\n");
28664 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28665 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28666 current_tune->branch_cost (false, false));
28667 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28668 current_tune->branch_cost (false, true));
28669 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28670 current_tune->branch_cost (true, false));
28671 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28672 current_tune->branch_cost (true, true));
28673 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28674 "prefer_ldrd_strd:\t%d\n",
28675 (int) current_tune->prefer_ldrd_strd);
28676 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28677 "logical_op_non_short_circuit:\t[%d,%d]\n",
28678 (int) current_tune->logical_op_non_short_circuit_thumb,
28679 (int) current_tune->logical_op_non_short_circuit_arm);
28680 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28681 "disparage_flag_setting_t16_encodings:\t%d\n",
28682 (int) current_tune->disparage_flag_setting_t16_encodings);
28683 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28684 "string_ops_prefer_neon:\t%d\n",
28685 (int) current_tune->string_ops_prefer_neon);
28686 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28687 "max_insns_inline_memset:\t%d\n",
28688 current_tune->max_insns_inline_memset);
28689 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28690 current_tune->fusible_ops);
28691 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28692 (int) current_tune->sched_autopref);
28695 /* The last set of target options used to emit .arch directives, etc. This
28696 could be a function-local static if it were not required to expose it as a
28697 root to the garbage collector. */
28698 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28700 /* Print .arch and .arch_extension directives corresponding to the
28701 current architecture configuration. */
28702 static void
28703 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28705 arm_build_target build_target;
28706 /* If the target options haven't changed since the last time we were called
28707 there is nothing to do. This should be sufficient to suppress the
28708 majority of redundant work. */
28709 if (last_asm_targ_options == targ_options)
28710 return;
28712 last_asm_targ_options = targ_options;
28714 build_target.isa = sbitmap_alloc (isa_num_bits);
28715 arm_configure_build_target (&build_target, targ_options, false);
28717 if (build_target.core_name
28718 && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28720 const char* truncated_name
28721 = arm_rewrite_selected_cpu (build_target.core_name);
28722 asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28725 const arch_option *arch
28726 = arm_parse_arch_option_name (all_architectures, "-march",
28727 build_target.arch_name);
28728 auto_sbitmap opt_bits (isa_num_bits);
28730 gcc_assert (arch);
28732 if (strcmp (build_target.arch_name, "armv7ve") == 0)
28734 /* Keep backward compatability for assemblers which don't support
28735 armv7ve. Fortunately, none of the following extensions are reset
28736 by a .fpu directive. */
28737 asm_fprintf (stream, "\t.arch armv7-a\n");
28738 asm_fprintf (stream, "\t.arch_extension virt\n");
28739 asm_fprintf (stream, "\t.arch_extension idiv\n");
28740 asm_fprintf (stream, "\t.arch_extension sec\n");
28741 asm_fprintf (stream, "\t.arch_extension mp\n");
28743 else
28744 asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28746 /* The .fpu directive will reset any architecture extensions from the
28747 assembler that relate to the fp/vector extensions. So put this out before
28748 any .arch_extension directives. */
28749 const char *fpu_name = (TARGET_SOFT_FLOAT
28750 ? "softvfp"
28751 : arm_identify_fpu_from_isa (build_target.isa));
28752 asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28754 if (!arch->common.extensions)
28755 return;
28757 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28758 opt->name != NULL;
28759 opt++)
28761 if (!opt->remove)
28763 arm_initialize_isa (opt_bits, opt->isa_bits);
28765 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28766 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28767 floating point instructions is disabled. So the following check
28768 restricts the printing of ".arch_extension mve" and
28769 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28770 this special behaviour because the feature bit "mve" and
28771 "mve_float" are not part of "fpu bits", so they are not cleared
28772 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28773 TARGET_HAVE_MVE_FLOAT are disabled. */
28774 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28775 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28776 && !TARGET_HAVE_MVE_FLOAT))
28777 continue;
28779 /* If every feature bit of this option is set in the target ISA
28780 specification, print out the option name. However, don't print
28781 anything if all the bits are part of the FPU specification. */
28782 if (bitmap_subset_p (opt_bits, build_target.isa)
28783 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28784 asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28789 static void
28790 arm_file_start (void)
28792 int val;
28793 bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28794 bool bti = (aarch_enable_bti == 1);
28796 arm_print_asm_arch_directives
28797 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28799 if (TARGET_BPABI)
28801 /* If we have a named cpu, but we the assembler does not support that
28802 name via .cpu, put out a cpu name attribute; but don't do this if the
28803 name starts with the fictitious prefix, 'generic'. */
28804 if (arm_active_target.core_name
28805 && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28806 && !startswith (arm_active_target.core_name, "generic"))
28808 const char* truncated_name
28809 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28810 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28811 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28812 truncated_name);
28815 if (print_tune_info)
28816 arm_print_tune_info ();
28818 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28819 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28821 if (TARGET_HARD_FLOAT_ABI)
28822 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28824 /* Some of these attributes only apply when the corresponding features
28825 are used. However we don't have any easy way of figuring this out.
28826 Conservatively record the setting that would have been used. */
28828 if (flag_rounding_math)
28829 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28831 if (!flag_unsafe_math_optimizations)
28833 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28834 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28836 if (flag_signaling_nans)
28837 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28839 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28840 flag_finite_math_only ? 1 : 3);
28842 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28843 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28844 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28845 flag_short_enums ? 1 : 2);
28847 /* Tag_ABI_optimization_goals. */
28848 if (optimize_size)
28849 val = 4;
28850 else if (optimize >= 2)
28851 val = 2;
28852 else if (optimize)
28853 val = 1;
28854 else
28855 val = 6;
28856 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28858 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28859 unaligned_access);
28861 if (arm_fp16_format)
28862 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28863 (int) arm_fp16_format);
28865 if (TARGET_HAVE_PACBTI)
28867 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28868 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28870 else if (pac || bti)
28872 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28873 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28876 if (bti)
28877 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28878 if (pac)
28879 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28881 if (arm_lang_output_object_attributes_hook)
28882 arm_lang_output_object_attributes_hook();
28885 default_file_start ();
28888 static void
28889 arm_file_end (void)
28891 int regno;
28893 /* Just in case the last function output in the assembler had non-default
28894 architecture directives, we force the assembler state back to the default
28895 set, so that any 'calculated' build attributes are based on the default
28896 options rather than the special options for that function. */
28897 arm_print_asm_arch_directives
28898 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28900 if (NEED_INDICATE_EXEC_STACK)
28901 /* Add .note.GNU-stack. */
28902 file_end_indicate_exec_stack ();
28904 if (! thumb_call_reg_needed)
28905 return;
28907 switch_to_section (text_section);
28908 asm_fprintf (asm_out_file, "\t.code 16\n");
28909 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28911 for (regno = 0; regno < LR_REGNUM; regno++)
28913 rtx label = thumb_call_via_label[regno];
28915 if (label != 0)
28917 targetm.asm_out.internal_label (asm_out_file, "L",
28918 CODE_LABEL_NUMBER (label));
28919 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28924 /* Symbols in the text segment can be accessed without indirecting via the
28925 constant pool; it may take an extra binary operation, but this is still
28926 faster than indirecting via memory. Don't do this when not optimizing,
28927 since we won't be calculating al of the offsets necessary to do this
28928 simplification. */
28930 static void
28931 arm_encode_section_info (tree decl, rtx rtl, int first)
28933 if (optimize > 0 && TREE_CONSTANT (decl))
28934 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28936 default_encode_section_info (decl, rtl, first);
28939 static void
28940 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28942 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28943 && !strcmp (prefix, "L"))
28945 arm_ccfsm_state = 0;
28946 arm_target_insn = NULL;
28948 default_internal_label (stream, prefix, labelno);
28951 /* Define classes to generate code as RTL or output asm to a file.
28952 Using templates then allows to use the same code to output code
28953 sequences in the two formats. */
28954 class thumb1_const_rtl
28956 public:
28957 thumb1_const_rtl (rtx dst) : dst (dst) {}
28959 void mov (HOST_WIDE_INT val)
28961 emit_set_insn (dst, GEN_INT (val));
28964 void add (HOST_WIDE_INT val)
28966 emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28969 void ashift (HOST_WIDE_INT shift)
28971 emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28974 void neg ()
28976 emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28979 private:
28980 rtx dst;
28983 class thumb1_const_print
28985 public:
28986 thumb1_const_print (FILE *f, int regno)
28988 t_file = f;
28989 dst_regname = reg_names[regno];
28992 void mov (HOST_WIDE_INT val)
28994 asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28995 dst_regname, val);
28998 void add (HOST_WIDE_INT val)
29000 asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
29001 dst_regname, val);
29004 void ashift (HOST_WIDE_INT shift)
29006 asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
29007 dst_regname, shift);
29010 void neg ()
29012 asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
29015 private:
29016 FILE *t_file;
29017 const char *dst_regname;
29020 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
29021 Avoid generating useless code when one of the bytes is zero. */
29022 template <class T>
29023 void
29024 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
29026 bool mov_done_p = false;
29027 unsigned HOST_WIDE_INT val = op1;
29028 int shift = 0;
29029 int i;
29031 gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
29033 if (val <= 255)
29035 dst.mov (val);
29036 return;
29039 /* For negative numbers with the first nine bits set, build the
29040 opposite of OP1, then negate it, it's generally shorter and not
29041 longer. */
29042 if ((val & 0xFF800000) == 0xFF800000)
29044 thumb1_gen_const_int_1 (dst, -op1);
29045 dst.neg ();
29046 return;
29049 /* In the general case, we need 7 instructions to build
29050 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
29051 do better if VAL is small enough, or
29052 right-shiftable by a suitable amount. If the
29053 right-shift enables to encode at least one less byte,
29054 it's worth it: we save a adds and a lsls at the
29055 expense of a final lsls. */
29056 int final_shift = number_of_first_bit_set (val);
29058 int leading_zeroes = clz_hwi (val);
29059 int number_of_bytes_needed
29060 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
29061 / BITS_PER_UNIT) + 1;
29062 int number_of_bytes_needed2
29063 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
29064 / BITS_PER_UNIT) + 1;
29066 if (number_of_bytes_needed2 < number_of_bytes_needed)
29067 val >>= final_shift;
29068 else
29069 final_shift = 0;
29071 /* If we are in a very small range, we can use either a single movs
29072 or movs+adds. */
29073 if (val <= 510)
29075 if (val > 255)
29077 unsigned HOST_WIDE_INT high = val - 255;
29079 dst.mov (high);
29080 dst.add (255);
29082 else
29083 dst.mov (val);
29085 if (final_shift > 0)
29086 dst.ashift (final_shift);
29088 else
29090 /* General case, emit upper 3 bytes as needed. */
29091 for (i = 0; i < 3; i++)
29093 unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
29095 if (byte)
29097 /* We are about to emit new bits, stop accumulating a
29098 shift amount, and left-shift only if we have already
29099 emitted some upper bits. */
29100 if (mov_done_p)
29102 dst.ashift (shift);
29103 dst.add (byte);
29105 else
29106 dst.mov (byte);
29108 /* Stop accumulating shift amount since we've just
29109 emitted some bits. */
29110 shift = 0;
29112 mov_done_p = true;
29115 if (mov_done_p)
29116 shift += 8;
29119 /* Emit lower byte. */
29120 if (!mov_done_p)
29121 dst.mov (val & 0xff);
29122 else
29124 dst.ashift (shift);
29125 if (val & 0xff)
29126 dst.add (val & 0xff);
29129 if (final_shift > 0)
29130 dst.ashift (final_shift);
29134 /* Proxies for thumb1.md, since the thumb1_const_print and
29135 thumb1_const_rtl classes are not exported. */
29136 void
29137 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
29139 thumb1_const_rtl t (dst);
29140 thumb1_gen_const_int_1 (t, op1);
29143 void
29144 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
29146 thumb1_const_print t (asm_out_file, REGNO (dst));
29147 thumb1_gen_const_int_1 (t, op1);
29150 /* Output code to add DELTA to the first argument, and then jump
29151 to FUNCTION. Used for C++ multiple inheritance. */
29153 static void
29154 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29155 HOST_WIDE_INT, tree function)
29157 static int thunk_label = 0;
29158 char label[256];
29159 char labelpc[256];
29160 int mi_delta = delta;
29161 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
29162 int shift = 0;
29163 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
29164 ? 1 : 0);
29165 if (mi_delta < 0)
29166 mi_delta = - mi_delta;
29168 final_start_function (emit_barrier (), file, 1);
29170 if (TARGET_THUMB1)
29172 int labelno = thunk_label++;
29173 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
29174 /* Thunks are entered in arm mode when available. */
29175 if (TARGET_THUMB1_ONLY)
29177 /* push r3 so we can use it as a temporary. */
29178 /* TODO: Omit this save if r3 is not used. */
29179 fputs ("\tpush {r3}\n", file);
29181 /* With -mpure-code, we cannot load the address from the
29182 constant pool: we build it explicitly. */
29183 if (target_pure_code)
29185 fputs ("\tmovs\tr3, #:upper8_15:#", file);
29186 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29187 fputc ('\n', file);
29188 fputs ("\tlsls r3, #8\n", file);
29189 fputs ("\tadds\tr3, #:upper0_7:#", file);
29190 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29191 fputc ('\n', file);
29192 fputs ("\tlsls r3, #8\n", file);
29193 fputs ("\tadds\tr3, #:lower8_15:#", file);
29194 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29195 fputc ('\n', file);
29196 fputs ("\tlsls r3, #8\n", file);
29197 fputs ("\tadds\tr3, #:lower0_7:#", file);
29198 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29199 fputc ('\n', file);
29201 else
29202 fputs ("\tldr\tr3, ", file);
29204 else
29206 fputs ("\tldr\tr12, ", file);
29209 if (!target_pure_code)
29211 assemble_name (file, label);
29212 fputc ('\n', file);
29215 if (flag_pic)
29217 /* If we are generating PIC, the ldr instruction below loads
29218 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
29219 the address of the add + 8, so we have:
29221 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29222 = target + 1.
29224 Note that we have "+ 1" because some versions of GNU ld
29225 don't set the low bit of the result for R_ARM_REL32
29226 relocations against thumb function symbols.
29227 On ARMv6M this is +4, not +8. */
29228 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
29229 assemble_name (file, labelpc);
29230 fputs (":\n", file);
29231 if (TARGET_THUMB1_ONLY)
29233 /* This is 2 insns after the start of the thunk, so we know it
29234 is 4-byte aligned. */
29235 fputs ("\tadd\tr3, pc, r3\n", file);
29236 fputs ("\tmov r12, r3\n", file);
29238 else
29239 fputs ("\tadd\tr12, pc, r12\n", file);
29241 else if (TARGET_THUMB1_ONLY)
29242 fputs ("\tmov r12, r3\n", file);
29244 if (TARGET_THUMB1_ONLY)
29246 if (mi_delta > 255)
29248 /* With -mpure-code, we cannot load MI_DELTA from the
29249 constant pool: we build it explicitly. */
29250 if (target_pure_code)
29252 thumb1_const_print r3 (file, 3);
29253 thumb1_gen_const_int_1 (r3, mi_delta);
29255 else
29257 fputs ("\tldr\tr3, ", file);
29258 assemble_name (file, label);
29259 fputs ("+4\n", file);
29261 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
29262 mi_op, this_regno, this_regno);
29264 else if (mi_delta != 0)
29266 /* Thumb1 unified syntax requires s suffix in instruction name when
29267 one of the operands is immediate. */
29268 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29269 mi_op, this_regno, this_regno,
29270 mi_delta);
29273 else
29275 /* TODO: Use movw/movt for large constants when available. */
29276 while (mi_delta != 0)
29278 if ((mi_delta & (3 << shift)) == 0)
29279 shift += 2;
29280 else
29282 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29283 mi_op, this_regno, this_regno,
29284 mi_delta & (0xff << shift));
29285 mi_delta &= ~(0xff << shift);
29286 shift += 8;
29290 if (TARGET_THUMB1)
29292 if (TARGET_THUMB1_ONLY)
29293 fputs ("\tpop\t{r3}\n", file);
29295 fprintf (file, "\tbx\tr12\n");
29297 /* With -mpure-code, we don't need to emit literals for the
29298 function address and delta since we emitted code to build
29299 them. */
29300 if (!target_pure_code)
29302 ASM_OUTPUT_ALIGN (file, 2);
29303 assemble_name (file, label);
29304 fputs (":\n", file);
29305 if (flag_pic)
29307 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29308 rtx tem = XEXP (DECL_RTL (function), 0);
29309 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29310 pipeline offset is four rather than eight. Adjust the offset
29311 accordingly. */
29312 tem = plus_constant (GET_MODE (tem), tem,
29313 TARGET_THUMB1_ONLY ? -3 : -7);
29314 tem = gen_rtx_MINUS (GET_MODE (tem),
29315 tem,
29316 gen_rtx_SYMBOL_REF (Pmode,
29317 ggc_strdup (labelpc)));
29318 assemble_integer (tem, 4, BITS_PER_WORD, 1);
29320 else
29321 /* Output ".word .LTHUNKn". */
29322 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29324 if (TARGET_THUMB1_ONLY && mi_delta > 255)
29325 assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29328 else
29330 fputs ("\tb\t", file);
29331 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29332 if (NEED_PLT_RELOC)
29333 fputs ("(PLT)", file);
29334 fputc ('\n', file);
29337 final_end_function ();
29340 /* MI thunk handling for TARGET_32BIT. */
29342 static void
29343 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29344 HOST_WIDE_INT vcall_offset, tree function)
29346 const bool long_call_p = arm_is_long_call_p (function);
29348 /* On ARM, this_regno is R0 or R1 depending on
29349 whether the function returns an aggregate or not.
29351 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29352 function)
29353 ? R1_REGNUM : R0_REGNUM);
29355 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29356 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29357 reload_completed = 1;
29358 emit_note (NOTE_INSN_PROLOGUE_END);
29360 /* Add DELTA to THIS_RTX. */
29361 if (delta != 0)
29362 arm_split_constant (PLUS, Pmode, NULL_RTX,
29363 delta, this_rtx, this_rtx, false);
29365 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29366 if (vcall_offset != 0)
29368 /* Load *THIS_RTX. */
29369 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29370 /* Compute *THIS_RTX + VCALL_OFFSET. */
29371 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29372 false);
29373 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29374 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29375 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29378 /* Generate a tail call to the target function. */
29379 if (!TREE_USED (function))
29381 assemble_external (function);
29382 TREE_USED (function) = 1;
29384 rtx funexp = XEXP (DECL_RTL (function), 0);
29385 if (long_call_p)
29387 emit_move_insn (temp, funexp);
29388 funexp = temp;
29390 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29391 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29392 SIBLING_CALL_P (insn) = 1;
29393 emit_barrier ();
29395 /* Indirect calls require a bit of fixup in PIC mode. */
29396 if (long_call_p)
29398 split_all_insns_noflow ();
29399 arm_reorg ();
29402 insn = get_insns ();
29403 shorten_branches (insn);
29404 final_start_function (insn, file, 1);
29405 final (insn, file, 1);
29406 final_end_function ();
29408 /* Stop pretending this is a post-reload pass. */
29409 reload_completed = 0;
29412 /* Output code to add DELTA to the first argument, and then jump
29413 to FUNCTION. Used for C++ multiple inheritance. */
29415 static void
29416 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29417 HOST_WIDE_INT vcall_offset, tree function)
29419 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29421 assemble_start_function (thunk, fnname);
29422 if (aarch_bti_enabled ())
29423 emit_insn (aarch_gen_bti_c ());
29424 if (TARGET_32BIT)
29425 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29426 else
29427 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29428 assemble_end_function (thunk, fnname);
29432 arm_emit_vector_const (FILE *file, rtx x)
29434 int i;
29435 const char * pattern;
29437 gcc_assert (GET_CODE (x) == CONST_VECTOR);
29439 switch (GET_MODE (x))
29441 case E_V2SImode: pattern = "%08x"; break;
29442 case E_V4HImode: pattern = "%04x"; break;
29443 case E_V8QImode: pattern = "%02x"; break;
29444 default: gcc_unreachable ();
29447 fprintf (file, "0x");
29448 for (i = CONST_VECTOR_NUNITS (x); i--;)
29450 rtx element;
29452 element = CONST_VECTOR_ELT (x, i);
29453 fprintf (file, pattern, INTVAL (element));
29456 return 1;
29459 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29460 HFmode constant pool entries are actually loaded with ldr. */
29461 void
29462 arm_emit_fp16_const (rtx c)
29464 long bits;
29466 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29467 if (WORDS_BIG_ENDIAN)
29468 assemble_zeros (2);
29469 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29470 if (!WORDS_BIG_ENDIAN)
29471 assemble_zeros (2);
29474 const char *
29475 arm_output_load_gr (rtx *operands)
29477 rtx reg;
29478 rtx offset;
29479 rtx wcgr;
29480 rtx sum;
29482 if (!MEM_P (operands [1])
29483 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29484 || !REG_P (reg = XEXP (sum, 0))
29485 || !CONST_INT_P (offset = XEXP (sum, 1))
29486 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29487 return "wldrw%?\t%0, %1";
29489 /* Fix up an out-of-range load of a GR register. */
29490 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29491 wcgr = operands[0];
29492 operands[0] = reg;
29493 output_asm_insn ("ldr%?\t%0, %1", operands);
29495 operands[0] = wcgr;
29496 operands[1] = reg;
29497 output_asm_insn ("tmcr%?\t%0, %1", operands);
29498 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29500 return "";
29503 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29505 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29506 named arg and all anonymous args onto the stack.
29507 XXX I know the prologue shouldn't be pushing registers, but it is faster
29508 that way. */
29510 static void
29511 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29512 const function_arg_info &arg,
29513 int *pretend_size,
29514 int second_time ATTRIBUTE_UNUSED)
29516 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29517 int nregs;
29519 cfun->machine->uses_anonymous_args = 1;
29520 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29522 nregs = pcum->aapcs_ncrn;
29523 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29524 && (nregs & 1))
29526 int res = arm_needs_doubleword_align (arg.mode, arg.type);
29527 if (res < 0 && warn_psabi)
29528 inform (input_location, "parameter passing for argument of "
29529 "type %qT changed in GCC 7.1", arg.type);
29530 else if (res > 0)
29532 nregs++;
29533 if (res > 1 && warn_psabi)
29534 inform (input_location,
29535 "parameter passing for argument of type "
29536 "%qT changed in GCC 9.1", arg.type);
29540 else
29541 nregs = pcum->nregs;
29543 if (nregs < NUM_ARG_REGS)
29544 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29547 /* We can't rely on the caller doing the proper promotion when
29548 using APCS or ATPCS. */
29550 static bool
29551 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29553 return !TARGET_AAPCS_BASED;
29556 static machine_mode
29557 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29558 machine_mode mode,
29559 int *punsignedp ATTRIBUTE_UNUSED,
29560 const_tree fntype ATTRIBUTE_UNUSED,
29561 int for_return ATTRIBUTE_UNUSED)
29563 if (GET_MODE_CLASS (mode) == MODE_INT
29564 && GET_MODE_SIZE (mode) < 4)
29565 return SImode;
29567 return mode;
29571 static bool
29572 arm_default_short_enums (void)
29574 return ARM_DEFAULT_SHORT_ENUMS;
29578 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29580 static bool
29581 arm_align_anon_bitfield (void)
29583 return TARGET_AAPCS_BASED;
29587 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29589 static tree
29590 arm_cxx_guard_type (void)
29592 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29596 /* The EABI says test the least significant bit of a guard variable. */
29598 static bool
29599 arm_cxx_guard_mask_bit (void)
29601 return TARGET_AAPCS_BASED;
29605 /* The EABI specifies that all array cookies are 8 bytes long. */
29607 static tree
29608 arm_get_cookie_size (tree type)
29610 tree size;
29612 if (!TARGET_AAPCS_BASED)
29613 return default_cxx_get_cookie_size (type);
29615 size = build_int_cst (sizetype, 8);
29616 return size;
29620 /* The EABI says that array cookies should also contain the element size. */
29622 static bool
29623 arm_cookie_has_size (void)
29625 return TARGET_AAPCS_BASED;
29629 /* The EABI says constructors and destructors should return a pointer to
29630 the object constructed/destroyed. */
29632 static bool
29633 arm_cxx_cdtor_returns_this (void)
29635 return TARGET_AAPCS_BASED;
29638 /* The EABI says that an inline function may never be the key
29639 method. */
29641 static bool
29642 arm_cxx_key_method_may_be_inline (void)
29644 return !TARGET_AAPCS_BASED;
29647 static void
29648 arm_cxx_determine_class_data_visibility (tree decl)
29650 if (!TARGET_AAPCS_BASED
29651 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29652 return;
29654 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29655 is exported. However, on systems without dynamic vague linkage,
29656 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29657 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29658 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29659 else
29660 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29661 DECL_VISIBILITY_SPECIFIED (decl) = 1;
29664 static bool
29665 arm_cxx_class_data_always_comdat (void)
29667 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29668 vague linkage if the class has no key function. */
29669 return !TARGET_AAPCS_BASED;
29673 /* The EABI says __aeabi_atexit should be used to register static
29674 destructors. */
29676 static bool
29677 arm_cxx_use_aeabi_atexit (void)
29679 return TARGET_AAPCS_BASED;
29683 void
29684 arm_set_return_address (rtx source, rtx scratch)
29686 arm_stack_offsets *offsets;
29687 HOST_WIDE_INT delta;
29688 rtx addr, mem;
29689 unsigned long saved_regs;
29691 offsets = arm_get_frame_offsets ();
29692 saved_regs = offsets->saved_regs_mask;
29694 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29695 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29696 else
29698 if (frame_pointer_needed)
29699 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29700 else
29702 /* LR will be the first saved register. */
29703 delta = offsets->outgoing_args - (offsets->frame + 4);
29706 if (delta >= 4096)
29708 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29709 GEN_INT (delta & ~4095)));
29710 addr = scratch;
29711 delta &= 4095;
29713 else
29714 addr = stack_pointer_rtx;
29716 addr = plus_constant (Pmode, addr, delta);
29719 /* The store needs to be marked to prevent DSE from deleting
29720 it as dead if it is based on fp. */
29721 mem = gen_frame_mem (Pmode, addr);
29722 MEM_VOLATILE_P (mem) = true;
29723 emit_move_insn (mem, source);
29728 void
29729 thumb_set_return_address (rtx source, rtx scratch)
29731 arm_stack_offsets *offsets;
29732 HOST_WIDE_INT delta;
29733 HOST_WIDE_INT limit;
29734 int reg;
29735 rtx addr, mem;
29736 unsigned long mask;
29738 emit_use (source);
29740 offsets = arm_get_frame_offsets ();
29741 mask = offsets->saved_regs_mask;
29742 if (mask & (1 << LR_REGNUM))
29744 limit = 1024;
29745 /* Find the saved regs. */
29746 if (frame_pointer_needed)
29748 delta = offsets->soft_frame - offsets->saved_args;
29749 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29750 if (TARGET_THUMB1)
29751 limit = 128;
29753 else
29755 delta = offsets->outgoing_args - offsets->saved_args;
29756 reg = SP_REGNUM;
29758 /* Allow for the stack frame. */
29759 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29760 delta -= 16;
29761 /* The link register is always the first saved register. */
29762 delta -= 4;
29764 /* Construct the address. */
29765 addr = gen_rtx_REG (SImode, reg);
29766 if (delta > limit)
29768 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29769 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29770 addr = scratch;
29772 else
29773 addr = plus_constant (Pmode, addr, delta);
29775 /* The store needs to be marked to prevent DSE from deleting
29776 it as dead if it is based on fp. */
29777 mem = gen_frame_mem (Pmode, addr);
29778 MEM_VOLATILE_P (mem) = true;
29779 emit_move_insn (mem, source);
29781 else
29782 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29785 /* Implements target hook vector_mode_supported_p. */
29786 bool
29787 arm_vector_mode_supported_p (machine_mode mode)
29789 /* Neon also supports V2SImode, etc. listed in the clause below. */
29790 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29791 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29792 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29793 || mode == V8BFmode))
29794 return true;
29796 if ((TARGET_NEON || TARGET_IWMMXT)
29797 && ((mode == V2SImode)
29798 || (mode == V4HImode)
29799 || (mode == V8QImode)))
29800 return true;
29802 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29803 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29804 || mode == V2HAmode))
29805 return true;
29807 if (TARGET_HAVE_MVE
29808 && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
29809 return true;
29811 if (TARGET_HAVE_MVE_FLOAT
29812 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29813 return true;
29815 return false;
29818 /* Implements target hook array_mode. */
29819 static opt_machine_mode
29820 arm_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
29822 if (TARGET_HAVE_MVE
29823 /* MVE accepts only tuples of 2 or 4 vectors. */
29824 && (nelems == 2
29825 || nelems == 4))
29827 machine_mode struct_mode;
29828 FOR_EACH_MODE_IN_CLASS (struct_mode, GET_MODE_CLASS (mode))
29830 if (GET_MODE_INNER (struct_mode) == GET_MODE_INNER (mode)
29831 && known_eq (GET_MODE_NUNITS (struct_mode),
29832 GET_MODE_NUNITS (mode) * nelems))
29833 return struct_mode;
29836 return opt_machine_mode ();
29839 /* Implements target hook array_mode_supported_p. */
29841 static bool
29842 arm_array_mode_supported_p (machine_mode mode,
29843 unsigned HOST_WIDE_INT nelems)
29845 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29846 for now, as the lane-swapping logic needs to be extended in the expanders.
29847 See PR target/82518. */
29848 if (TARGET_NEON && !BYTES_BIG_ENDIAN
29849 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29850 && (nelems >= 2 && nelems <= 4))
29851 return true;
29853 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29854 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29855 return true;
29857 return false;
29860 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29861 registers when autovectorizing for Neon, at least until multiple vector
29862 widths are supported properly by the middle-end. */
29864 static machine_mode
29865 arm_preferred_simd_mode (scalar_mode mode)
29867 if (TARGET_NEON)
29868 switch (mode)
29870 case E_HFmode:
29871 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29872 case E_SFmode:
29873 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29874 case E_SImode:
29875 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29876 case E_HImode:
29877 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29878 case E_QImode:
29879 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29880 case E_DImode:
29881 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29882 return V2DImode;
29883 break;
29885 default:;
29888 if (TARGET_REALLY_IWMMXT)
29889 switch (mode)
29891 case E_SImode:
29892 return V2SImode;
29893 case E_HImode:
29894 return V4HImode;
29895 case E_QImode:
29896 return V8QImode;
29898 default:;
29901 if (TARGET_HAVE_MVE)
29902 switch (mode)
29904 case E_QImode:
29905 return V16QImode;
29906 case E_HImode:
29907 return V8HImode;
29908 case E_SImode:
29909 return V4SImode;
29911 default:;
29914 if (TARGET_HAVE_MVE_FLOAT)
29915 switch (mode)
29917 case E_HFmode:
29918 return V8HFmode;
29919 case E_SFmode:
29920 return V4SFmode;
29922 default:;
29925 return word_mode;
29928 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29930 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29931 using r0-r4 for function arguments, r7 for the stack frame and don't have
29932 enough left over to do doubleword arithmetic. For Thumb-2 all the
29933 potentially problematic instructions accept high registers so this is not
29934 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29935 that require many low registers. */
29936 static bool
29937 arm_class_likely_spilled_p (reg_class_t rclass)
29939 if ((TARGET_THUMB1 && rclass == LO_REGS)
29940 || rclass == CC_REG)
29941 return true;
29943 return default_class_likely_spilled_p (rclass);
29946 /* Implements target hook small_register_classes_for_mode_p. */
29947 bool
29948 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29950 return TARGET_THUMB1;
29953 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29954 ARM insns and therefore guarantee that the shift count is modulo 256.
29955 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29956 guarantee no particular behavior for out-of-range counts. */
29958 static unsigned HOST_WIDE_INT
29959 arm_shift_truncation_mask (machine_mode mode)
29961 return mode == SImode ? 255 : 0;
29965 /* Map internal gcc register numbers to DWARF2 register numbers. */
29967 unsigned int
29968 arm_debugger_regno (unsigned int regno)
29970 if (regno < 16)
29971 return regno;
29973 if (IS_VFP_REGNUM (regno))
29975 /* See comment in arm_dwarf_register_span. */
29976 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29977 return 64 + regno - FIRST_VFP_REGNUM;
29978 else
29979 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29982 if (IS_IWMMXT_GR_REGNUM (regno))
29983 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29985 if (IS_IWMMXT_REGNUM (regno))
29986 return 112 + regno - FIRST_IWMMXT_REGNUM;
29988 if (IS_PAC_REGNUM (regno))
29989 return DWARF_PAC_REGNUM;
29991 return DWARF_FRAME_REGISTERS;
29994 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29995 GCC models tham as 64 32-bit registers, so we need to describe this to
29996 the DWARF generation code. Other registers can use the default. */
29997 static rtx
29998 arm_dwarf_register_span (rtx rtl)
30000 machine_mode mode;
30001 unsigned regno;
30002 rtx parts[16];
30003 int nregs;
30004 int i;
30006 regno = REGNO (rtl);
30007 if (!IS_VFP_REGNUM (regno))
30008 return NULL_RTX;
30010 /* XXX FIXME: The EABI defines two VFP register ranges:
30011 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
30012 256-287: D0-D31
30013 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
30014 corresponding D register. Until GDB supports this, we shall use the
30015 legacy encodings. We also use these encodings for D0-D15 for
30016 compatibility with older debuggers. */
30017 mode = GET_MODE (rtl);
30018 if (GET_MODE_SIZE (mode) < 8)
30019 return NULL_RTX;
30021 if (VFP_REGNO_OK_FOR_SINGLE (regno))
30023 nregs = GET_MODE_SIZE (mode) / 4;
30024 for (i = 0; i < nregs; i += 2)
30025 if (TARGET_BIG_END)
30027 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
30028 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
30030 else
30032 parts[i] = gen_rtx_REG (SImode, regno + i);
30033 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
30036 else
30038 nregs = GET_MODE_SIZE (mode) / 8;
30039 for (i = 0; i < nregs; i++)
30040 parts[i] = gen_rtx_REG (DImode, regno + i);
30043 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
30046 #if ARM_UNWIND_INFO
30047 /* Emit unwind directives for a store-multiple instruction or stack pointer
30048 push during alignment.
30049 These should only ever be generated by the function prologue code, so
30050 expect them to have a particular form.
30051 The store-multiple instruction sometimes pushes pc as the last register,
30052 although it should not be tracked into unwind information, or for -Os
30053 sometimes pushes some dummy registers before first register that needs
30054 to be tracked in unwind information; such dummy registers are there just
30055 to avoid separate stack adjustment, and will not be restored in the
30056 epilogue. */
30058 static void
30059 arm_unwind_emit_sequence (FILE * out_file, rtx p)
30061 int i;
30062 HOST_WIDE_INT offset;
30063 HOST_WIDE_INT nregs;
30064 int reg_size;
30065 unsigned reg;
30066 unsigned lastreg;
30067 unsigned padfirst = 0, padlast = 0;
30068 rtx e;
30070 e = XVECEXP (p, 0, 0);
30071 gcc_assert (GET_CODE (e) == SET);
30073 /* First insn will adjust the stack pointer. */
30074 gcc_assert (GET_CODE (e) == SET
30075 && REG_P (SET_DEST (e))
30076 && REGNO (SET_DEST (e)) == SP_REGNUM
30077 && GET_CODE (SET_SRC (e)) == PLUS);
30079 offset = -INTVAL (XEXP (SET_SRC (e), 1));
30080 nregs = XVECLEN (p, 0) - 1;
30081 gcc_assert (nregs);
30083 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
30084 if (reg < 16 || IS_PAC_REGNUM (reg))
30086 /* For -Os dummy registers can be pushed at the beginning to
30087 avoid separate stack pointer adjustment. */
30088 e = XVECEXP (p, 0, 1);
30089 e = XEXP (SET_DEST (e), 0);
30090 if (GET_CODE (e) == PLUS)
30091 padfirst = INTVAL (XEXP (e, 1));
30092 gcc_assert (padfirst == 0 || optimize_size);
30093 /* The function prologue may also push pc, but not annotate it as it is
30094 never restored. We turn this into a stack pointer adjustment. */
30095 e = XVECEXP (p, 0, nregs);
30096 e = XEXP (SET_DEST (e), 0);
30097 if (GET_CODE (e) == PLUS)
30098 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
30099 else
30100 padlast = offset - 4;
30101 gcc_assert (padlast == 0 || padlast == 4);
30102 if (padlast == 4)
30103 fprintf (out_file, "\t.pad #4\n");
30104 reg_size = 4;
30105 fprintf (out_file, "\t.save {");
30107 else if (IS_VFP_REGNUM (reg))
30109 reg_size = 8;
30110 fprintf (out_file, "\t.vsave {");
30112 else
30113 /* Unknown register type. */
30114 gcc_unreachable ();
30116 /* If the stack increment doesn't match the size of the saved registers,
30117 something has gone horribly wrong. */
30118 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
30120 offset = padfirst;
30121 lastreg = 0;
30122 /* The remaining insns will describe the stores. */
30123 for (i = 1; i <= nregs; i++)
30125 /* Expect (set (mem <addr>) (reg)).
30126 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
30127 e = XVECEXP (p, 0, i);
30128 gcc_assert (GET_CODE (e) == SET
30129 && MEM_P (SET_DEST (e))
30130 && REG_P (SET_SRC (e)));
30132 reg = REGNO (SET_SRC (e));
30133 gcc_assert (reg >= lastreg);
30135 if (i != 1)
30136 fprintf (out_file, ", ");
30137 /* We can't use %r for vfp because we need to use the
30138 double precision register names. */
30139 if (IS_VFP_REGNUM (reg))
30140 asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
30141 else if (IS_PAC_REGNUM (reg))
30142 asm_fprintf (asm_out_file, "ra_auth_code");
30143 else
30144 asm_fprintf (out_file, "%r", reg);
30146 if (flag_checking)
30148 /* Check that the addresses are consecutive. */
30149 e = XEXP (SET_DEST (e), 0);
30150 if (GET_CODE (e) == PLUS)
30151 gcc_assert (REG_P (XEXP (e, 0))
30152 && REGNO (XEXP (e, 0)) == SP_REGNUM
30153 && CONST_INT_P (XEXP (e, 1))
30154 && offset == INTVAL (XEXP (e, 1)));
30155 else
30156 gcc_assert (i == 1
30157 && REG_P (e)
30158 && REGNO (e) == SP_REGNUM);
30159 offset += reg_size;
30162 fprintf (out_file, "}\n");
30163 if (padfirst)
30164 fprintf (out_file, "\t.pad #%d\n", padfirst);
30167 /* Emit unwind directives for a SET. */
30169 static void
30170 arm_unwind_emit_set (FILE * out_file, rtx p)
30172 rtx e0;
30173 rtx e1;
30174 unsigned reg;
30176 e0 = XEXP (p, 0);
30177 e1 = XEXP (p, 1);
30178 switch (GET_CODE (e0))
30180 case MEM:
30181 /* Pushing a single register. */
30182 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
30183 || !REG_P (XEXP (XEXP (e0, 0), 0))
30184 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
30185 abort ();
30187 asm_fprintf (out_file, "\t.save ");
30188 if (IS_VFP_REGNUM (REGNO (e1)))
30189 asm_fprintf(out_file, "{d%d}\n",
30190 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
30191 else
30192 asm_fprintf(out_file, "{%r}\n", REGNO (e1));
30193 break;
30195 case REG:
30196 if (REGNO (e0) == SP_REGNUM)
30198 /* A stack increment. */
30199 if (GET_CODE (e1) != PLUS
30200 || !REG_P (XEXP (e1, 0))
30201 || REGNO (XEXP (e1, 0)) != SP_REGNUM
30202 || !CONST_INT_P (XEXP (e1, 1)))
30203 abort ();
30205 asm_fprintf (out_file, "\t.pad #%wd\n",
30206 -INTVAL (XEXP (e1, 1)));
30208 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
30210 HOST_WIDE_INT offset;
30212 if (GET_CODE (e1) == PLUS)
30214 if (!REG_P (XEXP (e1, 0))
30215 || !CONST_INT_P (XEXP (e1, 1)))
30216 abort ();
30217 reg = REGNO (XEXP (e1, 0));
30218 offset = INTVAL (XEXP (e1, 1));
30219 asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
30220 HARD_FRAME_POINTER_REGNUM, reg,
30221 offset);
30223 else if (REG_P (e1))
30225 reg = REGNO (e1);
30226 asm_fprintf (out_file, "\t.setfp %r, %r\n",
30227 HARD_FRAME_POINTER_REGNUM, reg);
30229 else
30230 abort ();
30232 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
30234 /* Move from sp to reg. */
30235 asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
30237 else if (GET_CODE (e1) == PLUS
30238 && REG_P (XEXP (e1, 0))
30239 && REGNO (XEXP (e1, 0)) == SP_REGNUM
30240 && CONST_INT_P (XEXP (e1, 1)))
30242 /* Set reg to offset from sp. */
30243 asm_fprintf (out_file, "\t.movsp %r, #%d\n",
30244 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
30246 else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
30248 if (cfun->machine->pacspval_needed)
30249 asm_fprintf (out_file, "\t.pacspval\n");
30251 else
30252 abort ();
30253 break;
30255 default:
30256 abort ();
30261 /* Emit unwind directives for the given insn. */
30263 static void
30264 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
30266 rtx note, pat;
30267 bool handled_one = false;
30269 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30270 return;
30272 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30273 && (TREE_NOTHROW (current_function_decl)
30274 || crtl->all_throwers_are_sibcalls))
30275 return;
30277 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
30278 return;
30280 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
30282 switch (REG_NOTE_KIND (note))
30284 case REG_FRAME_RELATED_EXPR:
30285 pat = XEXP (note, 0);
30286 goto found;
30288 case REG_CFA_REGISTER:
30289 pat = XEXP (note, 0);
30290 if (pat == NULL)
30292 pat = PATTERN (insn);
30293 if (GET_CODE (pat) == PARALLEL)
30294 pat = XVECEXP (pat, 0, 0);
30297 /* Only emitted for IS_STACKALIGN re-alignment. */
30299 rtx dest, src;
30300 unsigned reg;
30302 src = SET_SRC (pat);
30303 dest = SET_DEST (pat);
30305 gcc_assert (src == stack_pointer_rtx
30306 || IS_PAC_REGNUM (REGNO (src)));
30307 reg = REGNO (dest);
30309 if (IS_PAC_REGNUM (REGNO (src)))
30310 arm_unwind_emit_set (out_file, PATTERN (insn));
30311 else
30312 asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30313 reg + 0x90, reg);
30315 handled_one = true;
30316 break;
30318 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30319 to get correct dwarf information for shrink-wrap. We should not
30320 emit unwind information for it because these are used either for
30321 pretend arguments or notes to adjust sp and restore registers from
30322 stack. */
30323 case REG_CFA_DEF_CFA:
30324 case REG_CFA_ADJUST_CFA:
30325 case REG_CFA_RESTORE:
30326 return;
30328 case REG_CFA_EXPRESSION:
30329 case REG_CFA_OFFSET:
30330 /* ??? Only handling here what we actually emit. */
30331 gcc_unreachable ();
30333 default:
30334 break;
30337 if (handled_one)
30338 return;
30339 pat = PATTERN (insn);
30340 found:
30342 switch (GET_CODE (pat))
30344 case SET:
30345 arm_unwind_emit_set (out_file, pat);
30346 break;
30348 case SEQUENCE:
30349 /* Store multiple. */
30350 arm_unwind_emit_sequence (out_file, pat);
30351 break;
30353 default:
30354 abort();
30359 /* Output a reference from a function exception table to the type_info
30360 object X. The EABI specifies that the symbol should be relocated by
30361 an R_ARM_TARGET2 relocation. */
30363 static bool
30364 arm_output_ttype (rtx x)
30366 fputs ("\t.word\t", asm_out_file);
30367 output_addr_const (asm_out_file, x);
30368 /* Use special relocations for symbol references. */
30369 if (!CONST_INT_P (x))
30370 fputs ("(TARGET2)", asm_out_file);
30371 fputc ('\n', asm_out_file);
30373 return TRUE;
30376 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30378 static void
30379 arm_asm_emit_except_personality (rtx personality)
30381 fputs ("\t.personality\t", asm_out_file);
30382 output_addr_const (asm_out_file, personality);
30383 fputc ('\n', asm_out_file);
30385 #endif /* ARM_UNWIND_INFO */
30387 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30389 static void
30390 arm_asm_init_sections (void)
30392 #if ARM_UNWIND_INFO
30393 exception_section = get_unnamed_section (0, output_section_asm_op,
30394 "\t.handlerdata");
30395 #endif /* ARM_UNWIND_INFO */
30397 #ifdef OBJECT_FORMAT_ELF
30398 if (target_pure_code)
30399 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30400 #endif
30403 /* Output unwind directives for the start/end of a function. */
30405 void
30406 arm_output_fn_unwind (FILE * f, bool prologue)
30408 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30409 return;
30411 if (prologue)
30412 fputs ("\t.fnstart\n", f);
30413 else
30415 /* If this function will never be unwound, then mark it as such.
30416 The came condition is used in arm_unwind_emit to suppress
30417 the frame annotations. */
30418 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30419 && (TREE_NOTHROW (current_function_decl)
30420 || crtl->all_throwers_are_sibcalls))
30421 fputs("\t.cantunwind\n", f);
30423 fputs ("\t.fnend\n", f);
30427 static bool
30428 arm_emit_tls_decoration (FILE *fp, rtx x)
30430 enum tls_reloc reloc;
30431 rtx val;
30433 val = XVECEXP (x, 0, 0);
30434 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30436 output_addr_const (fp, val);
30438 switch (reloc)
30440 case TLS_GD32:
30441 fputs ("(tlsgd)", fp);
30442 break;
30443 case TLS_GD32_FDPIC:
30444 fputs ("(tlsgd_fdpic)", fp);
30445 break;
30446 case TLS_LDM32:
30447 fputs ("(tlsldm)", fp);
30448 break;
30449 case TLS_LDM32_FDPIC:
30450 fputs ("(tlsldm_fdpic)", fp);
30451 break;
30452 case TLS_LDO32:
30453 fputs ("(tlsldo)", fp);
30454 break;
30455 case TLS_IE32:
30456 fputs ("(gottpoff)", fp);
30457 break;
30458 case TLS_IE32_FDPIC:
30459 fputs ("(gottpoff_fdpic)", fp);
30460 break;
30461 case TLS_LE32:
30462 fputs ("(tpoff)", fp);
30463 break;
30464 case TLS_DESCSEQ:
30465 fputs ("(tlsdesc)", fp);
30466 break;
30467 default:
30468 gcc_unreachable ();
30471 switch (reloc)
30473 case TLS_GD32:
30474 case TLS_LDM32:
30475 case TLS_IE32:
30476 case TLS_DESCSEQ:
30477 fputs (" + (. - ", fp);
30478 output_addr_const (fp, XVECEXP (x, 0, 2));
30479 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30480 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30481 output_addr_const (fp, XVECEXP (x, 0, 3));
30482 fputc (')', fp);
30483 break;
30484 default:
30485 break;
30488 return TRUE;
30491 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30493 static void
30494 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30496 gcc_assert (size == 4);
30497 fputs ("\t.word\t", file);
30498 output_addr_const (file, x);
30499 fputs ("(tlsldo)", file);
30502 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30504 static bool
30505 arm_output_addr_const_extra (FILE *fp, rtx x)
30507 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30508 return arm_emit_tls_decoration (fp, x);
30509 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30511 char label[256];
30512 int labelno = INTVAL (XVECEXP (x, 0, 0));
30514 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30515 assemble_name_raw (fp, label);
30517 return TRUE;
30519 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30521 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30522 if (GOT_PCREL)
30523 fputs ("+.", fp);
30524 fputs ("-(", fp);
30525 output_addr_const (fp, XVECEXP (x, 0, 0));
30526 fputc (')', fp);
30527 return TRUE;
30529 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30531 output_addr_const (fp, XVECEXP (x, 0, 0));
30532 if (GOT_PCREL)
30533 fputs ("+.", fp);
30534 fputs ("-(", fp);
30535 output_addr_const (fp, XVECEXP (x, 0, 1));
30536 fputc (')', fp);
30537 return TRUE;
30539 else if (GET_CODE (x) == CONST_VECTOR)
30540 return arm_emit_vector_const (fp, x);
30542 return FALSE;
30545 /* Output assembly for a shift instruction.
30546 SET_FLAGS determines how the instruction modifies the condition codes.
30547 0 - Do not set condition codes.
30548 1 - Set condition codes.
30549 2 - Use smallest instruction. */
30550 const char *
30551 arm_output_shift(rtx * operands, int set_flags)
30553 char pattern[100];
30554 static const char flag_chars[3] = {'?', '.', '!'};
30555 const char *shift;
30556 HOST_WIDE_INT val;
30557 char c;
30559 c = flag_chars[set_flags];
30560 shift = shift_op(operands[3], &val);
30561 if (shift)
30563 if (val != -1)
30564 operands[2] = GEN_INT(val);
30565 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30567 else
30568 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30570 output_asm_insn (pattern, operands);
30571 return "";
30574 /* Output assembly for a WMMX immediate shift instruction. */
30575 const char *
30576 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30578 int shift = INTVAL (operands[2]);
30579 char templ[50];
30580 machine_mode opmode = GET_MODE (operands[0]);
30582 gcc_assert (shift >= 0);
30584 /* If the shift value in the register versions is > 63 (for D qualifier),
30585 31 (for W qualifier) or 15 (for H qualifier). */
30586 if (((opmode == V4HImode) && (shift > 15))
30587 || ((opmode == V2SImode) && (shift > 31))
30588 || ((opmode == DImode) && (shift > 63)))
30590 if (wror_or_wsra)
30592 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30593 output_asm_insn (templ, operands);
30594 if (opmode == DImode)
30596 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30597 output_asm_insn (templ, operands);
30600 else
30602 /* The destination register will contain all zeros. */
30603 sprintf (templ, "wzero\t%%0");
30604 output_asm_insn (templ, operands);
30606 return "";
30609 if ((opmode == DImode) && (shift > 32))
30611 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30612 output_asm_insn (templ, operands);
30613 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30614 output_asm_insn (templ, operands);
30616 else
30618 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30619 output_asm_insn (templ, operands);
30621 return "";
30624 /* Output assembly for a WMMX tinsr instruction. */
30625 const char *
30626 arm_output_iwmmxt_tinsr (rtx *operands)
30628 int mask = INTVAL (operands[3]);
30629 int i;
30630 char templ[50];
30631 int units = mode_nunits[GET_MODE (operands[0])];
30632 gcc_assert ((mask & (mask - 1)) == 0);
30633 for (i = 0; i < units; ++i)
30635 if ((mask & 0x01) == 1)
30637 break;
30639 mask >>= 1;
30641 gcc_assert (i < units);
30643 switch (GET_MODE (operands[0]))
30645 case E_V8QImode:
30646 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30647 break;
30648 case E_V4HImode:
30649 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30650 break;
30651 case E_V2SImode:
30652 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30653 break;
30654 default:
30655 gcc_unreachable ();
30656 break;
30658 output_asm_insn (templ, operands);
30660 return "";
30663 /* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn.
30664 Responsible for the handling of switch statements in arm. */
30665 const char *
30666 arm_output_casesi (rtx *operands)
30668 char label[100];
30669 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30670 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30671 output_asm_insn ("cmp\t%0, %1", operands);
30672 output_asm_insn ("bhi\t%l3", operands);
30673 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
30674 switch (GET_MODE (diff_vec))
30676 case E_QImode:
30677 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30678 output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
30679 else
30680 output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
30681 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30682 break;
30683 case E_HImode:
30684 if (REGNO (operands[4]) != REGNO (operands[5]))
30686 output_asm_insn ("add\t%4, %0, %0", operands);
30687 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30688 output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
30689 else
30690 output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
30692 else
30694 output_asm_insn ("add\t%4, %5, %0", operands);
30695 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30696 output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
30697 else
30698 output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
30700 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30701 break;
30702 case E_SImode:
30703 if (flag_pic)
30705 output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
30706 output_asm_insn ("add\t%|pc, %|pc, %4", operands);
30708 else
30709 output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
30710 break;
30711 default:
30712 gcc_unreachable ();
30714 assemble_label (asm_out_file, label);
30715 output_asm_insn ("nop", operands);
30716 return "";
30719 /* Output a Thumb-1 casesi dispatch sequence. */
30720 const char *
30721 thumb1_output_casesi (rtx *operands)
30723 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30725 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30727 switch (GET_MODE(diff_vec))
30729 case E_QImode:
30730 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30731 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30732 case E_HImode:
30733 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30734 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30735 case E_SImode:
30736 return "bl\t%___gnu_thumb1_case_si";
30737 default:
30738 gcc_unreachable ();
30742 /* Output a Thumb-2 casesi instruction. */
30743 const char *
30744 thumb2_output_casesi (rtx *operands)
30746 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30748 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30750 output_asm_insn ("cmp\t%0, %1", operands);
30751 output_asm_insn ("bhi\t%l3", operands);
30752 switch (GET_MODE(diff_vec))
30754 case E_QImode:
30755 return "tbb\t[%|pc, %0]";
30756 case E_HImode:
30757 return "tbh\t[%|pc, %0, lsl #1]";
30758 case E_SImode:
30759 if (flag_pic)
30761 output_asm_insn ("adr\t%4, %l2", operands);
30762 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30763 output_asm_insn ("add\t%4, %4, %5", operands);
30764 return "bx\t%4";
30766 else
30768 output_asm_insn ("adr\t%4, %l2", operands);
30769 return "ldr\t%|pc, [%4, %0, lsl #2]";
30771 default:
30772 gcc_unreachable ();
30776 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30777 per-core tuning structs. */
30778 static int
30779 arm_issue_rate (void)
30781 return current_tune->issue_rate;
30784 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30785 static int
30786 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30788 if (DEBUG_INSN_P (insn))
30789 return more;
30791 rtx_code code = GET_CODE (PATTERN (insn));
30792 if (code == USE || code == CLOBBER)
30793 return more;
30795 if (get_attr_type (insn) == TYPE_NO_INSN)
30796 return more;
30798 return more - 1;
30801 /* Return how many instructions should scheduler lookahead to choose the
30802 best one. */
30803 static int
30804 arm_first_cycle_multipass_dfa_lookahead (void)
30806 int issue_rate = arm_issue_rate ();
30808 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30811 /* Enable modeling of L2 auto-prefetcher. */
30812 static int
30813 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30815 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30818 const char *
30819 arm_mangle_type (const_tree type)
30821 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30822 has to be managled as if it is in the "std" namespace. */
30823 if (TARGET_AAPCS_BASED
30824 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30825 return "St9__va_list";
30827 /* Half-precision floating point types. */
30828 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
30830 if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30831 return NULL;
30832 if (TYPE_MODE (type) == BFmode)
30833 return "u6__bf16";
30834 else
30835 return "Dh";
30838 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30839 builtin type. */
30840 if (TYPE_NAME (type) != NULL)
30841 return arm_mangle_builtin_type (type);
30843 /* Use the default mangling. */
30844 return NULL;
30847 /* Order of allocation of core registers for Thumb: this allocation is
30848 written over the corresponding initial entries of the array
30849 initialized with REG_ALLOC_ORDER. We allocate all low registers
30850 first. Saving and restoring a low register is usually cheaper than
30851 using a call-clobbered high register. */
30853 static const int thumb_core_reg_alloc_order[] =
30855 3, 2, 1, 0, 4, 5, 6, 7,
30856 12, 14, 8, 9, 10, 11
30859 /* Adjust register allocation order when compiling for Thumb. */
30861 void
30862 arm_order_regs_for_local_alloc (void)
30864 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30865 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30866 if (TARGET_THUMB)
30867 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30868 sizeof (thumb_core_reg_alloc_order));
30871 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30873 bool
30874 arm_frame_pointer_required (void)
30876 if (SUBTARGET_FRAME_POINTER_REQUIRED)
30877 return true;
30879 /* If the function receives nonlocal gotos, it needs to save the frame
30880 pointer in the nonlocal_goto_save_area object. */
30881 if (cfun->has_nonlocal_label)
30882 return true;
30884 /* The frame pointer is required for non-leaf APCS frames. */
30885 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30886 return true;
30888 /* If we are probing the stack in the prologue, we will have a faulting
30889 instruction prior to the stack adjustment and this requires a frame
30890 pointer if we want to catch the exception using the EABI unwinder. */
30891 if (!IS_INTERRUPT (arm_current_func_type ())
30892 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30893 || flag_stack_clash_protection)
30894 && arm_except_unwind_info (&global_options) == UI_TARGET
30895 && cfun->can_throw_non_call_exceptions)
30897 HOST_WIDE_INT size = get_frame_size ();
30899 /* That's irrelevant if there is no stack adjustment. */
30900 if (size <= 0)
30901 return false;
30903 /* That's relevant only if there is a stack probe. */
30904 if (crtl->is_leaf && !cfun->calls_alloca)
30906 /* We don't have the final size of the frame so adjust. */
30907 size += 32 * UNITS_PER_WORD;
30908 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30909 return true;
30911 else
30912 return true;
30915 return false;
30918 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30919 All modes except THUMB1 have conditional execution.
30920 If we have conditional arithmetic, return false before reload to
30921 enable some ifcvt transformations. */
30922 static bool
30923 arm_have_conditional_execution (void)
30925 bool has_cond_exec, enable_ifcvt_trans;
30927 /* Only THUMB1 cannot support conditional execution. */
30928 has_cond_exec = !TARGET_THUMB1;
30930 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30931 before reload. */
30932 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30934 return has_cond_exec && !enable_ifcvt_trans;
30937 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30938 static HOST_WIDE_INT
30939 arm_vector_alignment (const_tree type)
30941 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30943 if (TARGET_AAPCS_BASED)
30944 align = MIN (align, 64);
30946 return align;
30949 static unsigned int
30950 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30952 if (!TARGET_NEON_VECTORIZE_DOUBLE)
30954 modes->safe_push (V16QImode);
30955 modes->safe_push (V8QImode);
30957 return 0;
30960 static bool
30961 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30963 /* Vectors which aren't in packed structures will not be less aligned than
30964 the natural alignment of their element type, so this is safe. */
30965 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30966 return !is_packed;
30968 return default_builtin_vector_alignment_reachable (type, is_packed);
30971 static bool
30972 arm_builtin_support_vector_misalignment (machine_mode mode,
30973 const_tree type, int misalignment,
30974 bool is_packed)
30976 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30978 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30980 if (is_packed)
30981 return align == 1;
30983 /* If the misalignment is unknown, we should be able to handle the access
30984 so long as it is not to a member of a packed data structure. */
30985 if (misalignment == -1)
30986 return true;
30988 /* Return true if the misalignment is a multiple of the natural alignment
30989 of the vector's element type. This is probably always going to be
30990 true in practice, since we've already established that this isn't a
30991 packed access. */
30992 return ((misalignment % align) == 0);
30995 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30996 is_packed);
30999 static void
31000 arm_conditional_register_usage (void)
31002 int regno;
31004 if (TARGET_THUMB1 && optimize_size)
31006 /* When optimizing for size on Thumb-1, it's better not
31007 to use the HI regs, because of the overhead of
31008 stacking them. */
31009 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
31010 fixed_regs[regno] = call_used_regs[regno] = 1;
31013 /* The link register can be clobbered by any branch insn,
31014 but we have no way to track that at present, so mark
31015 it as unavailable. */
31016 if (TARGET_THUMB1)
31017 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
31019 if (TARGET_32BIT && TARGET_VFP_BASE)
31021 /* VFPv3 registers are disabled when earlier VFP
31022 versions are selected due to the definition of
31023 LAST_VFP_REGNUM. */
31024 for (regno = FIRST_VFP_REGNUM;
31025 regno <= LAST_VFP_REGNUM; ++ regno)
31027 fixed_regs[regno] = 0;
31028 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
31029 || regno >= FIRST_VFP_REGNUM + 32;
31031 if (TARGET_HAVE_MVE)
31032 fixed_regs[VPR_REGNUM] = 0;
31035 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
31037 regno = FIRST_IWMMXT_GR_REGNUM;
31038 /* The 2002/10/09 revision of the XScale ABI has wCG0
31039 and wCG1 as call-preserved registers. The 2002/11/21
31040 revision changed this so that all wCG registers are
31041 scratch registers. */
31042 for (regno = FIRST_IWMMXT_GR_REGNUM;
31043 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
31044 fixed_regs[regno] = 0;
31045 /* The XScale ABI has wR0 - wR9 as scratch registers,
31046 the rest as call-preserved registers. */
31047 for (regno = FIRST_IWMMXT_REGNUM;
31048 regno <= LAST_IWMMXT_REGNUM; ++ regno)
31050 fixed_regs[regno] = 0;
31051 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
31055 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
31057 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
31058 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
31060 else if (TARGET_APCS_STACK)
31062 fixed_regs[10] = 1;
31063 call_used_regs[10] = 1;
31065 /* -mcaller-super-interworking reserves r11 for calls to
31066 _interwork_r11_call_via_rN(). Making the register global
31067 is an easy way of ensuring that it remains valid for all
31068 calls. */
31069 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
31070 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
31072 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
31073 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
31074 if (TARGET_CALLER_INTERWORKING)
31075 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
31078 /* The Q and GE bits are only accessed via special ACLE patterns. */
31079 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
31080 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
31082 SUBTARGET_CONDITIONAL_REGISTER_USAGE
31085 static reg_class_t
31086 arm_preferred_rename_class (reg_class_t rclass)
31088 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
31089 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
31090 and code size can be reduced. */
31091 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
31092 return LO_REGS;
31093 else
31094 return NO_REGS;
31097 /* Compute the attribute "length" of insn "*push_multi".
31098 So this function MUST be kept in sync with that insn pattern. */
31100 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
31102 int i, regno, hi_reg;
31103 int num_saves = XVECLEN (parallel_op, 0);
31105 /* ARM mode. */
31106 if (TARGET_ARM)
31107 return 4;
31108 /* Thumb1 mode. */
31109 if (TARGET_THUMB1)
31110 return 2;
31112 /* Thumb2 mode. */
31113 regno = REGNO (first_op);
31114 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
31115 list is 8-bit. Normally this means all registers in the list must be
31116 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
31117 encodings. There is one exception for PUSH that LR in HI_REGS can be used
31118 with 16-bit encoding. */
31119 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
31120 for (i = 1; i < num_saves && !hi_reg; i++)
31122 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
31123 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
31126 if (!hi_reg)
31127 return 2;
31128 return 4;
31131 /* Compute the attribute "length" of insn. Currently, this function is used
31132 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
31133 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
31134 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
31135 true if OPERANDS contains insn which explicit updates base register. */
31138 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
31140 /* ARM mode. */
31141 if (TARGET_ARM)
31142 return 4;
31143 /* Thumb1 mode. */
31144 if (TARGET_THUMB1)
31145 return 2;
31147 rtx parallel_op = operands[0];
31148 /* Initialize to elements number of PARALLEL. */
31149 unsigned indx = XVECLEN (parallel_op, 0) - 1;
31150 /* Initialize the value to base register. */
31151 unsigned regno = REGNO (operands[1]);
31152 /* Skip return and write back pattern.
31153 We only need register pop pattern for later analysis. */
31154 unsigned first_indx = 0;
31155 first_indx += return_pc ? 1 : 0;
31156 first_indx += write_back_p ? 1 : 0;
31158 /* A pop operation can be done through LDM or POP. If the base register is SP
31159 and if it's with write back, then a LDM will be alias of POP. */
31160 bool pop_p = (regno == SP_REGNUM && write_back_p);
31161 bool ldm_p = !pop_p;
31163 /* Check base register for LDM. */
31164 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
31165 return 4;
31167 /* Check each register in the list. */
31168 for (; indx >= first_indx; indx--)
31170 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
31171 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
31172 comment in arm_attr_length_push_multi. */
31173 if (REGNO_REG_CLASS (regno) == HI_REGS
31174 && (regno != PC_REGNUM || ldm_p))
31175 return 4;
31178 return 2;
31181 /* Compute the number of instructions emitted by output_move_double. */
31183 arm_count_output_move_double_insns (rtx *operands)
31185 int count;
31186 rtx ops[2];
31187 /* output_move_double may modify the operands array, so call it
31188 here on a copy of the array. */
31189 ops[0] = operands[0];
31190 ops[1] = operands[1];
31191 output_move_double (ops, false, &count);
31192 return count;
31195 /* Same as above, but operands are a register/memory pair in SImode.
31196 Assumes operands has the base register in position 0 and memory in position
31197 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
31199 arm_count_ldrdstrd_insns (rtx *operands, bool load)
31201 int count;
31202 rtx ops[2];
31203 int regnum, memnum;
31204 if (load)
31205 regnum = 0, memnum = 1;
31206 else
31207 regnum = 1, memnum = 0;
31208 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
31209 ops[memnum] = adjust_address (operands[2], DImode, 0);
31210 output_move_double (ops, false, &count);
31211 return count;
31216 vfp3_const_double_for_fract_bits (rtx operand)
31218 REAL_VALUE_TYPE r0;
31220 if (!CONST_DOUBLE_P (operand))
31221 return 0;
31223 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
31224 if (exact_real_inverse (DFmode, &r0)
31225 && !REAL_VALUE_NEGATIVE (r0))
31227 if (exact_real_truncate (DFmode, &r0))
31229 HOST_WIDE_INT value = real_to_integer (&r0);
31230 value = value & 0xffffffff;
31231 if ((value != 0) && ( (value & (value - 1)) == 0))
31233 int ret = exact_log2 (value);
31234 gcc_assert (IN_RANGE (ret, 0, 31));
31235 return ret;
31239 return 0;
31242 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31243 log2 is in [1, 32], return that log2. Otherwise return -1.
31244 This is used in the patterns for vcvt.s32.f32 floating-point to
31245 fixed-point conversions. */
31248 vfp3_const_double_for_bits (rtx x)
31250 const REAL_VALUE_TYPE *r;
31252 if (!CONST_DOUBLE_P (x))
31253 return -1;
31255 r = CONST_DOUBLE_REAL_VALUE (x);
31257 if (REAL_VALUE_NEGATIVE (*r)
31258 || REAL_VALUE_ISNAN (*r)
31259 || REAL_VALUE_ISINF (*r)
31260 || !real_isinteger (r, SFmode))
31261 return -1;
31263 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
31265 /* The exact_log2 above will have returned -1 if this is
31266 not an exact log2. */
31267 if (!IN_RANGE (hwint, 1, 32))
31268 return -1;
31270 return hwint;
31274 /* Emit a memory barrier around an atomic sequence according to MODEL. */
31276 static void
31277 arm_pre_atomic_barrier (enum memmodel model)
31279 if (need_atomic_barrier_p (model, true))
31280 emit_insn (gen_memory_barrier ());
31283 static void
31284 arm_post_atomic_barrier (enum memmodel model)
31286 if (need_atomic_barrier_p (model, false))
31287 emit_insn (gen_memory_barrier ());
31290 /* Emit the load-exclusive and store-exclusive instructions.
31291 Use acquire and release versions if necessary. */
31293 static void
31294 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
31296 rtx (*gen) (rtx, rtx);
31298 if (acq)
31300 switch (mode)
31302 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
31303 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
31304 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
31305 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
31306 default:
31307 gcc_unreachable ();
31310 else
31312 switch (mode)
31314 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
31315 case E_HImode: gen = gen_arm_load_exclusivehi; break;
31316 case E_SImode: gen = gen_arm_load_exclusivesi; break;
31317 case E_DImode: gen = gen_arm_load_exclusivedi; break;
31318 default:
31319 gcc_unreachable ();
31323 emit_insn (gen (rval, mem));
31326 static void
31327 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
31328 rtx mem, bool rel)
31330 rtx (*gen) (rtx, rtx, rtx);
31332 if (rel)
31334 switch (mode)
31336 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
31337 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
31338 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
31339 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
31340 default:
31341 gcc_unreachable ();
31344 else
31346 switch (mode)
31348 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
31349 case E_HImode: gen = gen_arm_store_exclusivehi; break;
31350 case E_SImode: gen = gen_arm_store_exclusivesi; break;
31351 case E_DImode: gen = gen_arm_store_exclusivedi; break;
31352 default:
31353 gcc_unreachable ();
31357 emit_insn (gen (bval, rval, mem));
31360 /* Mark the previous jump instruction as unlikely. */
31362 static void
31363 emit_unlikely_jump (rtx insn)
31365 rtx_insn *jump = emit_jump_insn (insn);
31366 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31369 /* Expand a compare and swap pattern. */
31371 void
31372 arm_expand_compare_and_swap (rtx operands[])
31374 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31375 machine_mode mode, cmp_mode;
31377 bval = operands[0];
31378 rval = operands[1];
31379 mem = operands[2];
31380 oldval = operands[3];
31381 newval = operands[4];
31382 is_weak = operands[5];
31383 mod_s = operands[6];
31384 mod_f = operands[7];
31385 mode = GET_MODE (mem);
31387 /* Normally the succ memory model must be stronger than fail, but in the
31388 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31389 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31391 if (TARGET_HAVE_LDACQ
31392 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31393 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31394 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31396 switch (mode)
31398 case E_QImode:
31399 case E_HImode:
31400 /* For narrow modes, we're going to perform the comparison in SImode,
31401 so do the zero-extension now. */
31402 rval = gen_reg_rtx (SImode);
31403 oldval = convert_modes (SImode, mode, oldval, true);
31404 /* FALLTHRU */
31406 case E_SImode:
31407 /* Force the value into a register if needed. We waited until after
31408 the zero-extension above to do this properly. */
31409 if (!arm_add_operand (oldval, SImode))
31410 oldval = force_reg (SImode, oldval);
31411 break;
31413 case E_DImode:
31414 if (!cmpdi_operand (oldval, mode))
31415 oldval = force_reg (mode, oldval);
31416 break;
31418 default:
31419 gcc_unreachable ();
31422 if (TARGET_THUMB1)
31423 cmp_mode = E_SImode;
31424 else
31425 cmp_mode = CC_Zmode;
31427 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31428 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31429 oldval, newval, is_weak, mod_s, mod_f));
31431 if (mode == QImode || mode == HImode)
31432 emit_move_insn (operands[1], gen_lowpart (mode, rval));
31434 /* In all cases, we arrange for success to be signaled by Z set.
31435 This arrangement allows for the boolean result to be used directly
31436 in a subsequent branch, post optimization. For Thumb-1 targets, the
31437 boolean negation of the result is also stored in bval because Thumb-1
31438 backend lacks dependency tracking for CC flag due to flag-setting not
31439 being represented at RTL level. */
31440 if (TARGET_THUMB1)
31441 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31442 else
31444 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31445 emit_insn (gen_rtx_SET (bval, x));
31449 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31450 another memory store between the load-exclusive and store-exclusive can
31451 reset the monitor from Exclusive to Open state. This means we must wait
31452 until after reload to split the pattern, lest we get a register spill in
31453 the middle of the atomic sequence. Success of the compare and swap is
31454 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31455 for Thumb-1 targets (ie. negation of the boolean value returned by
31456 atomic_compare_and_swapmode standard pattern in operand 0). */
31458 void
31459 arm_split_compare_and_swap (rtx operands[])
31461 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31462 machine_mode mode;
31463 enum memmodel mod_s, mod_f;
31464 bool is_weak;
31465 rtx_code_label *label1, *label2;
31466 rtx x, cond;
31468 rval = operands[1];
31469 mem = operands[2];
31470 oldval = operands[3];
31471 newval = operands[4];
31472 is_weak = (operands[5] != const0_rtx);
31473 mod_s_rtx = operands[6];
31474 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31475 mod_f = memmodel_from_int (INTVAL (operands[7]));
31476 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31477 mode = GET_MODE (mem);
31479 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31481 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31482 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31484 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31485 a full barrier is emitted after the store-release. */
31486 if (is_armv8_sync)
31487 use_acquire = false;
31489 /* Checks whether a barrier is needed and emits one accordingly. */
31490 if (!(use_acquire || use_release))
31491 arm_pre_atomic_barrier (mod_s);
31493 label1 = NULL;
31494 if (!is_weak)
31496 label1 = gen_label_rtx ();
31497 emit_label (label1);
31499 label2 = gen_label_rtx ();
31501 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31503 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31504 as required to communicate with arm_expand_compare_and_swap. */
31505 if (TARGET_32BIT)
31507 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31508 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31509 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31510 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31511 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31513 else
31515 cond = gen_rtx_NE (VOIDmode, rval, oldval);
31516 if (thumb1_cmpneg_operand (oldval, SImode))
31518 rtx src = rval;
31519 if (!satisfies_constraint_L (oldval))
31521 gcc_assert (satisfies_constraint_J (oldval));
31523 /* For such immediates, ADDS needs the source and destination regs
31524 to be the same.
31526 Normally this would be handled by RA, but this is all happening
31527 after RA. */
31528 emit_move_insn (neg_bval, rval);
31529 src = neg_bval;
31532 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31533 label2, cond));
31535 else
31537 emit_move_insn (neg_bval, const1_rtx);
31538 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31542 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31544 /* Weak or strong, we want EQ to be true for success, so that we
31545 match the flags that we got from the compare above. */
31546 if (TARGET_32BIT)
31548 cond = gen_rtx_REG (CCmode, CC_REGNUM);
31549 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31550 emit_insn (gen_rtx_SET (cond, x));
31553 if (!is_weak)
31555 /* Z is set to boolean value of !neg_bval, as required to communicate
31556 with arm_expand_compare_and_swap. */
31557 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31558 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31561 if (!is_mm_relaxed (mod_f))
31562 emit_label (label2);
31564 /* Checks whether a barrier is needed and emits one accordingly. */
31565 if (is_armv8_sync
31566 || !(use_acquire || use_release))
31567 arm_post_atomic_barrier (mod_s);
31569 if (is_mm_relaxed (mod_f))
31570 emit_label (label2);
31573 /* Split an atomic operation pattern. Operation is given by CODE and is one
31574 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31575 operation). Operation is performed on the content at MEM and on VALUE
31576 following the memory model MODEL_RTX. The content at MEM before and after
31577 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31578 success of the operation is returned in COND. Using a scratch register or
31579 an operand register for these determines what result is returned for that
31580 pattern. */
31582 void
31583 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31584 rtx value, rtx model_rtx, rtx cond)
31586 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31587 machine_mode mode = GET_MODE (mem);
31588 machine_mode wmode = (mode == DImode ? DImode : SImode);
31589 rtx_code_label *label;
31590 bool all_low_regs, bind_old_new;
31591 rtx x;
31593 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31595 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31596 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31598 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31599 a full barrier is emitted after the store-release. */
31600 if (is_armv8_sync)
31601 use_acquire = false;
31603 /* Checks whether a barrier is needed and emits one accordingly. */
31604 if (!(use_acquire || use_release))
31605 arm_pre_atomic_barrier (model);
31607 label = gen_label_rtx ();
31608 emit_label (label);
31610 if (new_out)
31611 new_out = gen_lowpart (wmode, new_out);
31612 if (old_out)
31613 old_out = gen_lowpart (wmode, old_out);
31614 else
31615 old_out = new_out;
31616 value = simplify_gen_subreg (wmode, value, mode, 0);
31618 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31620 /* Does the operation require destination and first operand to use the same
31621 register? This is decided by register constraints of relevant insn
31622 patterns in thumb1.md. */
31623 gcc_assert (!new_out || REG_P (new_out));
31624 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31625 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31626 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31627 bind_old_new =
31628 (TARGET_THUMB1
31629 && code != SET
31630 && code != MINUS
31631 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31633 /* We want to return the old value while putting the result of the operation
31634 in the same register as the old value so copy the old value over to the
31635 destination register and use that register for the operation. */
31636 if (old_out && bind_old_new)
31638 emit_move_insn (new_out, old_out);
31639 old_out = new_out;
31642 switch (code)
31644 case SET:
31645 new_out = value;
31646 break;
31648 case NOT:
31649 x = gen_rtx_AND (wmode, old_out, value);
31650 emit_insn (gen_rtx_SET (new_out, x));
31651 x = gen_rtx_NOT (wmode, new_out);
31652 emit_insn (gen_rtx_SET (new_out, x));
31653 break;
31655 case MINUS:
31656 if (CONST_INT_P (value))
31658 value = gen_int_mode (-INTVAL (value), wmode);
31659 code = PLUS;
31661 /* FALLTHRU */
31663 case PLUS:
31664 if (mode == DImode)
31666 /* DImode plus/minus need to clobber flags. */
31667 /* The adddi3 and subdi3 patterns are incorrectly written so that
31668 they require matching operands, even when we could easily support
31669 three operands. Thankfully, this can be fixed up post-splitting,
31670 as the individual add+adc patterns do accept three operands and
31671 post-reload cprop can make these moves go away. */
31672 emit_move_insn (new_out, old_out);
31673 if (code == PLUS)
31674 x = gen_adddi3 (new_out, new_out, value);
31675 else
31676 x = gen_subdi3 (new_out, new_out, value);
31677 emit_insn (x);
31678 break;
31680 /* FALLTHRU */
31682 default:
31683 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31684 emit_insn (gen_rtx_SET (new_out, x));
31685 break;
31688 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31689 use_release);
31691 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31692 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31694 /* Checks whether a barrier is needed and emits one accordingly. */
31695 if (is_armv8_sync
31696 || !(use_acquire || use_release))
31697 arm_post_atomic_barrier (model);
31700 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31701 opt_machine_mode
31702 arm_mode_to_pred_mode (machine_mode mode)
31704 switch (GET_MODE_NUNITS (mode))
31706 case 16: return V16BImode;
31707 case 8: return V8BImode;
31708 case 4: return V4BImode;
31709 case 2: return V2QImode;
31711 return opt_machine_mode ();
31714 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31715 If CAN_INVERT, store either the result or its inverse in TARGET
31716 and return true if TARGET contains the inverse. If !CAN_INVERT,
31717 always store the result in TARGET, never its inverse.
31719 Note that the handling of floating-point comparisons is not
31720 IEEE compliant. */
31722 bool
31723 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31724 bool can_invert)
31726 machine_mode cmp_result_mode = GET_MODE (target);
31727 machine_mode cmp_mode = GET_MODE (op0);
31729 bool inverted;
31731 /* MVE supports more comparisons than Neon. */
31732 if (TARGET_HAVE_MVE)
31733 inverted = false;
31734 else
31735 switch (code)
31737 /* For these we need to compute the inverse of the requested
31738 comparison. */
31739 case UNORDERED:
31740 case UNLT:
31741 case UNLE:
31742 case UNGT:
31743 case UNGE:
31744 case UNEQ:
31745 case NE:
31746 code = reverse_condition_maybe_unordered (code);
31747 if (!can_invert)
31749 /* Recursively emit the inverted comparison into a temporary
31750 and then store its inverse in TARGET. This avoids reusing
31751 TARGET (which for integer NE could be one of the inputs). */
31752 rtx tmp = gen_reg_rtx (cmp_result_mode);
31753 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31754 gcc_unreachable ();
31755 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31756 return false;
31758 inverted = true;
31759 break;
31761 default:
31762 inverted = false;
31763 break;
31766 switch (code)
31768 /* These are natively supported by Neon for zero comparisons, but otherwise
31769 require the operands to be swapped. For MVE, we can only compare
31770 registers. */
31771 case LE:
31772 case LT:
31773 if (!TARGET_HAVE_MVE)
31774 if (op1 != CONST0_RTX (cmp_mode))
31776 code = swap_condition (code);
31777 std::swap (op0, op1);
31779 /* Fall through. */
31781 /* These are natively supported by Neon for both register and zero
31782 operands. MVE supports registers only. */
31783 case EQ:
31784 case GE:
31785 case GT:
31786 case NE:
31787 if (TARGET_HAVE_MVE)
31789 switch (GET_MODE_CLASS (cmp_mode))
31791 case MODE_VECTOR_INT:
31792 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31793 op0, force_reg (cmp_mode, op1)));
31794 break;
31795 case MODE_VECTOR_FLOAT:
31796 if (TARGET_HAVE_MVE_FLOAT)
31797 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31798 op0, force_reg (cmp_mode, op1)));
31799 else
31800 gcc_unreachable ();
31801 break;
31802 default:
31803 gcc_unreachable ();
31806 else
31807 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31808 return inverted;
31810 /* These are natively supported for register operands only.
31811 Comparisons with zero aren't useful and should be folded
31812 or canonicalized by target-independent code. */
31813 case GEU:
31814 case GTU:
31815 if (TARGET_HAVE_MVE)
31816 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31817 op0, force_reg (cmp_mode, op1)));
31818 else
31819 emit_insn (gen_neon_vc (code, cmp_mode, target,
31820 op0, force_reg (cmp_mode, op1)));
31821 return inverted;
31823 /* These require the operands to be swapped and likewise do not
31824 support comparisons with zero. */
31825 case LEU:
31826 case LTU:
31827 if (TARGET_HAVE_MVE)
31828 emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31829 force_reg (cmp_mode, op1), op0));
31830 else
31831 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31832 target, force_reg (cmp_mode, op1), op0));
31833 return inverted;
31835 /* These need a combination of two comparisons. */
31836 case LTGT:
31837 case ORDERED:
31839 /* Operands are LTGT iff (a > b || a > b).
31840 Operands are ORDERED iff (a > b || a <= b). */
31841 rtx gt_res = gen_reg_rtx (cmp_result_mode);
31842 rtx alt_res = gen_reg_rtx (cmp_result_mode);
31843 rtx_code alt_code = (code == LTGT ? LT : LE);
31844 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31845 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31846 gcc_unreachable ();
31847 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31848 gt_res, alt_res)));
31849 return inverted;
31852 default:
31853 gcc_unreachable ();
31858 #define MAX_VECT_LEN 16
31860 struct expand_vec_perm_d
31862 rtx target, op0, op1;
31863 vec_perm_indices perm;
31864 machine_mode vmode;
31865 bool one_vector_p;
31866 bool testing_p;
31869 /* Generate a variable permutation. */
31871 static void
31872 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31874 machine_mode vmode = GET_MODE (target);
31875 bool one_vector_p = rtx_equal_p (op0, op1);
31877 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31878 gcc_checking_assert (GET_MODE (op0) == vmode);
31879 gcc_checking_assert (GET_MODE (op1) == vmode);
31880 gcc_checking_assert (GET_MODE (sel) == vmode);
31881 gcc_checking_assert (TARGET_NEON);
31883 if (one_vector_p)
31885 if (vmode == V8QImode)
31886 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31887 else
31888 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31890 else
31892 rtx pair;
31894 if (vmode == V8QImode)
31896 pair = gen_reg_rtx (V16QImode);
31897 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31898 pair = gen_lowpart (TImode, pair);
31899 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31901 else
31903 pair = gen_reg_rtx (OImode);
31904 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31905 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31910 void
31911 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31913 machine_mode vmode = GET_MODE (target);
31914 unsigned int nelt = GET_MODE_NUNITS (vmode);
31915 bool one_vector_p = rtx_equal_p (op0, op1);
31916 rtx mask;
31918 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31919 numbering of elements for big-endian, we must reverse the order. */
31920 gcc_checking_assert (!BYTES_BIG_ENDIAN);
31922 /* The VTBL instruction does not use a modulo index, so we must take care
31923 of that ourselves. */
31924 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31925 mask = gen_const_vec_duplicate (vmode, mask);
31926 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31928 arm_expand_vec_perm_1 (target, op0, op1, sel);
31931 /* Map lane ordering between architectural lane order, and GCC lane order,
31932 taking into account ABI. See comment above output_move_neon for details. */
31934 static int
31935 neon_endian_lane_map (machine_mode mode, int lane)
31937 if (BYTES_BIG_ENDIAN)
31939 int nelems = GET_MODE_NUNITS (mode);
31940 /* Reverse lane order. */
31941 lane = (nelems - 1 - lane);
31942 /* Reverse D register order, to match ABI. */
31943 if (GET_MODE_SIZE (mode) == 16)
31944 lane = lane ^ (nelems / 2);
31946 return lane;
31949 /* Some permutations index into pairs of vectors, this is a helper function
31950 to map indexes into those pairs of vectors. */
31952 static int
31953 neon_pair_endian_lane_map (machine_mode mode, int lane)
31955 int nelem = GET_MODE_NUNITS (mode);
31956 if (BYTES_BIG_ENDIAN)
31957 lane =
31958 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31959 return lane;
31962 /* Generate or test for an insn that supports a constant permutation. */
31964 /* Recognize patterns for the VUZP insns. */
31966 static bool
31967 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31969 unsigned int i, odd, mask, nelt = d->perm.length ();
31970 rtx out0, out1, in0, in1;
31971 int first_elem;
31972 int swap_nelt;
31974 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31975 return false;
31977 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31978 big endian pattern on 64 bit vectors, so we correct for that. */
31979 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31980 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31982 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31984 if (first_elem == neon_endian_lane_map (d->vmode, 0))
31985 odd = 0;
31986 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31987 odd = 1;
31988 else
31989 return false;
31990 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31992 for (i = 0; i < nelt; i++)
31994 unsigned elt =
31995 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31996 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31997 return false;
32000 /* Success! */
32001 if (d->testing_p)
32002 return true;
32004 in0 = d->op0;
32005 in1 = d->op1;
32006 if (swap_nelt != 0)
32007 std::swap (in0, in1);
32009 out0 = d->target;
32010 out1 = gen_reg_rtx (d->vmode);
32011 if (odd)
32012 std::swap (out0, out1);
32014 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
32015 return true;
32018 /* Recognize patterns for the VZIP insns. */
32020 static bool
32021 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
32023 unsigned int i, high, mask, nelt = d->perm.length ();
32024 rtx out0, out1, in0, in1;
32025 int first_elem;
32026 bool is_swapped;
32028 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
32029 return false;
32031 is_swapped = BYTES_BIG_ENDIAN;
32033 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
32035 high = nelt / 2;
32036 if (first_elem == neon_endian_lane_map (d->vmode, high))
32038 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
32039 high = 0;
32040 else
32041 return false;
32042 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
32044 for (i = 0; i < nelt / 2; i++)
32046 unsigned elt =
32047 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
32048 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
32049 != elt)
32050 return false;
32051 elt =
32052 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
32053 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
32054 != elt)
32055 return false;
32058 /* Success! */
32059 if (d->testing_p)
32060 return true;
32062 in0 = d->op0;
32063 in1 = d->op1;
32064 if (is_swapped)
32065 std::swap (in0, in1);
32067 out0 = d->target;
32068 out1 = gen_reg_rtx (d->vmode);
32069 if (high)
32070 std::swap (out0, out1);
32072 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
32073 return true;
32076 /* Recognize patterns for the VREV insns. */
32077 static bool
32078 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
32080 unsigned int i, j, diff, nelt = d->perm.length ();
32081 rtx (*gen) (machine_mode, rtx, rtx);
32083 if (!d->one_vector_p)
32084 return false;
32086 diff = d->perm[0];
32087 switch (diff)
32089 case 7:
32090 switch (d->vmode)
32092 case E_V16QImode:
32093 case E_V8QImode:
32094 gen = gen_neon_vrev64;
32095 break;
32096 default:
32097 return false;
32099 break;
32100 case 3:
32101 switch (d->vmode)
32103 case E_V16QImode:
32104 case E_V8QImode:
32105 gen = gen_neon_vrev32;
32106 break;
32107 case E_V8HImode:
32108 case E_V4HImode:
32109 case E_V8HFmode:
32110 case E_V4HFmode:
32111 gen = gen_neon_vrev64;
32112 break;
32113 default:
32114 return false;
32116 break;
32117 case 1:
32118 switch (d->vmode)
32120 case E_V16QImode:
32121 case E_V8QImode:
32122 gen = gen_neon_vrev16;
32123 break;
32124 case E_V8HImode:
32125 case E_V4HImode:
32126 gen = gen_neon_vrev32;
32127 break;
32128 case E_V4SImode:
32129 case E_V2SImode:
32130 case E_V4SFmode:
32131 case E_V2SFmode:
32132 gen = gen_neon_vrev64;
32133 break;
32134 default:
32135 return false;
32137 break;
32138 default:
32139 return false;
32142 for (i = 0; i < nelt ; i += diff + 1)
32143 for (j = 0; j <= diff; j += 1)
32145 /* This is guaranteed to be true as the value of diff
32146 is 7, 3, 1 and we should have enough elements in the
32147 queue to generate this. Getting a vector mask with a
32148 value of diff other than these values implies that
32149 something is wrong by the time we get here. */
32150 gcc_assert (i + j < nelt);
32151 if (d->perm[i + j] != i + diff - j)
32152 return false;
32155 /* Success! */
32156 if (d->testing_p)
32157 return true;
32159 emit_insn (gen (d->vmode, d->target, d->op0));
32160 return true;
32163 /* Recognize patterns for the VTRN insns. */
32165 static bool
32166 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
32168 unsigned int i, odd, mask, nelt = d->perm.length ();
32169 rtx out0, out1, in0, in1;
32171 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
32172 return false;
32174 /* Note that these are little-endian tests. Adjust for big-endian later. */
32175 if (d->perm[0] == 0)
32176 odd = 0;
32177 else if (d->perm[0] == 1)
32178 odd = 1;
32179 else
32180 return false;
32181 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
32183 for (i = 0; i < nelt; i += 2)
32185 if (d->perm[i] != i + odd)
32186 return false;
32187 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
32188 return false;
32191 /* Success! */
32192 if (d->testing_p)
32193 return true;
32195 in0 = d->op0;
32196 in1 = d->op1;
32197 if (BYTES_BIG_ENDIAN)
32199 std::swap (in0, in1);
32200 odd = !odd;
32203 out0 = d->target;
32204 out1 = gen_reg_rtx (d->vmode);
32205 if (odd)
32206 std::swap (out0, out1);
32208 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
32209 return true;
32212 /* Recognize patterns for the VEXT insns. */
32214 static bool
32215 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
32217 unsigned int i, nelt = d->perm.length ();
32218 rtx offset;
32220 unsigned int location;
32222 unsigned int next = d->perm[0] + 1;
32224 /* TODO: Handle GCC's numbering of elements for big-endian. */
32225 if (BYTES_BIG_ENDIAN)
32226 return false;
32228 /* Check if the extracted indexes are increasing by one. */
32229 for (i = 1; i < nelt; next++, i++)
32231 /* If we hit the most significant element of the 2nd vector in
32232 the previous iteration, no need to test further. */
32233 if (next == 2 * nelt)
32234 return false;
32236 /* If we are operating on only one vector: it could be a
32237 rotation. If there are only two elements of size < 64, let
32238 arm_evpc_neon_vrev catch it. */
32239 if (d->one_vector_p && (next == nelt))
32241 if ((nelt == 2) && (d->vmode != V2DImode))
32242 return false;
32243 else
32244 next = 0;
32247 if (d->perm[i] != next)
32248 return false;
32251 location = d->perm[0];
32253 /* Success! */
32254 if (d->testing_p)
32255 return true;
32257 offset = GEN_INT (location);
32259 if(d->vmode == E_DImode)
32260 return false;
32262 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
32263 return true;
32266 /* The NEON VTBL instruction is a fully variable permuation that's even
32267 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
32268 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
32269 can do slightly better by expanding this as a constant where we don't
32270 have to apply a mask. */
32272 static bool
32273 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
32275 rtx rperm[MAX_VECT_LEN], sel;
32276 machine_mode vmode = d->vmode;
32277 unsigned int i, nelt = d->perm.length ();
32279 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
32280 numbering of elements for big-endian, we must reverse the order. */
32281 if (BYTES_BIG_ENDIAN)
32282 return false;
32284 if (d->testing_p)
32285 return true;
32287 /* Generic code will try constant permutation twice. Once with the
32288 original mode and again with the elements lowered to QImode.
32289 So wait and don't do the selector expansion ourselves. */
32290 if (vmode != V8QImode && vmode != V16QImode)
32291 return false;
32293 for (i = 0; i < nelt; ++i)
32294 rperm[i] = GEN_INT (d->perm[i]);
32295 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
32296 sel = force_reg (vmode, sel);
32298 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
32299 return true;
32302 static bool
32303 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
32305 /* Check if the input mask matches vext before reordering the
32306 operands. */
32307 if (TARGET_NEON)
32308 if (arm_evpc_neon_vext (d))
32309 return true;
32311 /* The pattern matching functions above are written to look for a small
32312 number to begin the sequence (0, 1, N/2). If we begin with an index
32313 from the second operand, we can swap the operands. */
32314 unsigned int nelt = d->perm.length ();
32315 if (d->perm[0] >= nelt)
32317 d->perm.rotate_inputs (1);
32318 std::swap (d->op0, d->op1);
32321 if (TARGET_NEON)
32323 if (arm_evpc_neon_vuzp (d))
32324 return true;
32325 if (arm_evpc_neon_vzip (d))
32326 return true;
32327 if (arm_evpc_neon_vrev (d))
32328 return true;
32329 if (arm_evpc_neon_vtrn (d))
32330 return true;
32331 return arm_evpc_neon_vtbl (d);
32333 return false;
32336 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32338 static bool
32339 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32340 rtx target, rtx op0, rtx op1,
32341 const vec_perm_indices &sel)
32343 if (vmode != op_mode)
32344 return false;
32346 struct expand_vec_perm_d d;
32347 int i, nelt, which;
32349 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32350 return false;
32352 d.target = target;
32353 if (op0)
32355 rtx nop0 = force_reg (vmode, op0);
32356 if (op0 == op1)
32357 op1 = nop0;
32358 op0 = nop0;
32360 if (op1)
32361 op1 = force_reg (vmode, op1);
32362 d.op0 = op0;
32363 d.op1 = op1;
32365 d.vmode = vmode;
32366 gcc_assert (VECTOR_MODE_P (d.vmode));
32367 d.testing_p = !target;
32369 nelt = GET_MODE_NUNITS (d.vmode);
32370 for (i = which = 0; i < nelt; ++i)
32372 int ei = sel[i] & (2 * nelt - 1);
32373 which |= (ei < nelt ? 1 : 2);
32376 switch (which)
32378 default:
32379 gcc_unreachable();
32381 case 3:
32382 d.one_vector_p = false;
32383 if (d.testing_p || !rtx_equal_p (op0, op1))
32384 break;
32386 /* The elements of PERM do not suggest that only the first operand
32387 is used, but both operands are identical. Allow easier matching
32388 of the permutation by folding the permutation into the single
32389 input vector. */
32390 /* FALLTHRU */
32391 case 2:
32392 d.op0 = op1;
32393 d.one_vector_p = true;
32394 break;
32396 case 1:
32397 d.op1 = op0;
32398 d.one_vector_p = true;
32399 break;
32402 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32404 if (!d.testing_p)
32405 return arm_expand_vec_perm_const_1 (&d);
32407 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32408 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32409 if (!d.one_vector_p)
32410 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32412 start_sequence ();
32413 bool ret = arm_expand_vec_perm_const_1 (&d);
32414 end_sequence ();
32416 return ret;
32419 bool
32420 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32422 /* If we are soft float and we do not have ldrd
32423 then all auto increment forms are ok. */
32424 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32425 return true;
32427 switch (code)
32429 /* Post increment and Pre Decrement are supported for all
32430 instruction forms except for vector forms. */
32431 case ARM_POST_INC:
32432 case ARM_PRE_DEC:
32433 if (VECTOR_MODE_P (mode))
32435 if (code != ARM_PRE_DEC)
32436 return true;
32437 else
32438 return false;
32441 return true;
32443 case ARM_POST_DEC:
32444 case ARM_PRE_INC:
32445 /* Without LDRD and mode size greater than
32446 word size, there is no point in auto-incrementing
32447 because ldm and stm will not have these forms. */
32448 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32449 return false;
32451 /* Vector and floating point modes do not support
32452 these auto increment forms. */
32453 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32454 return false;
32456 return true;
32458 default:
32459 return false;
32463 return false;
32466 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32467 on ARM, since we know that shifts by negative amounts are no-ops.
32468 Additionally, the default expansion code is not available or suitable
32469 for post-reload insn splits (this can occur when the register allocator
32470 chooses not to do a shift in NEON).
32472 This function is used in both initial expand and post-reload splits, and
32473 handles all kinds of 64-bit shifts.
32475 Input requirements:
32476 - It is safe for the input and output to be the same register, but
32477 early-clobber rules apply for the shift amount and scratch registers.
32478 - Shift by register requires both scratch registers. In all other cases
32479 the scratch registers may be NULL.
32480 - Ashiftrt by a register also clobbers the CC register. */
32481 void
32482 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32483 rtx amount, rtx scratch1, rtx scratch2)
32485 rtx out_high = gen_highpart (SImode, out);
32486 rtx out_low = gen_lowpart (SImode, out);
32487 rtx in_high = gen_highpart (SImode, in);
32488 rtx in_low = gen_lowpart (SImode, in);
32490 /* Terminology:
32491 in = the register pair containing the input value.
32492 out = the destination register pair.
32493 up = the high- or low-part of each pair.
32494 down = the opposite part to "up".
32495 In a shift, we can consider bits to shift from "up"-stream to
32496 "down"-stream, so in a left-shift "up" is the low-part and "down"
32497 is the high-part of each register pair. */
32499 rtx out_up = code == ASHIFT ? out_low : out_high;
32500 rtx out_down = code == ASHIFT ? out_high : out_low;
32501 rtx in_up = code == ASHIFT ? in_low : in_high;
32502 rtx in_down = code == ASHIFT ? in_high : in_low;
32504 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32505 gcc_assert (out
32506 && (REG_P (out) || SUBREG_P (out))
32507 && GET_MODE (out) == DImode);
32508 gcc_assert (in
32509 && (REG_P (in) || SUBREG_P (in))
32510 && GET_MODE (in) == DImode);
32511 gcc_assert (amount
32512 && (((REG_P (amount) || SUBREG_P (amount))
32513 && GET_MODE (amount) == SImode)
32514 || CONST_INT_P (amount)));
32515 gcc_assert (scratch1 == NULL
32516 || (GET_CODE (scratch1) == SCRATCH)
32517 || (GET_MODE (scratch1) == SImode
32518 && REG_P (scratch1)));
32519 gcc_assert (scratch2 == NULL
32520 || (GET_CODE (scratch2) == SCRATCH)
32521 || (GET_MODE (scratch2) == SImode
32522 && REG_P (scratch2)));
32523 gcc_assert (!REG_P (out) || !REG_P (amount)
32524 || !HARD_REGISTER_P (out)
32525 || (REGNO (out) != REGNO (amount)
32526 && REGNO (out) + 1 != REGNO (amount)));
32528 /* Macros to make following code more readable. */
32529 #define SUB_32(DEST,SRC) \
32530 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32531 #define RSB_32(DEST,SRC) \
32532 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32533 #define SUB_S_32(DEST,SRC) \
32534 gen_addsi3_compare0 ((DEST), (SRC), \
32535 GEN_INT (-32))
32536 #define SET(DEST,SRC) \
32537 gen_rtx_SET ((DEST), (SRC))
32538 #define SHIFT(CODE,SRC,AMOUNT) \
32539 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32540 #define LSHIFT(CODE,SRC,AMOUNT) \
32541 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32542 SImode, (SRC), (AMOUNT))
32543 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32544 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32545 SImode, (SRC), (AMOUNT))
32546 #define ORR(A,B) \
32547 gen_rtx_IOR (SImode, (A), (B))
32548 #define BRANCH(COND,LABEL) \
32549 gen_arm_cond_branch ((LABEL), \
32550 gen_rtx_ ## COND (CCmode, cc_reg, \
32551 const0_rtx), \
32552 cc_reg)
32554 /* Shifts by register and shifts by constant are handled separately. */
32555 if (CONST_INT_P (amount))
32557 /* We have a shift-by-constant. */
32559 /* First, handle out-of-range shift amounts.
32560 In both cases we try to match the result an ARM instruction in a
32561 shift-by-register would give. This helps reduce execution
32562 differences between optimization levels, but it won't stop other
32563 parts of the compiler doing different things. This is "undefined
32564 behavior, in any case. */
32565 if (INTVAL (amount) <= 0)
32566 emit_insn (gen_movdi (out, in));
32567 else if (INTVAL (amount) >= 64)
32569 if (code == ASHIFTRT)
32571 rtx const31_rtx = GEN_INT (31);
32572 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32573 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32575 else
32576 emit_insn (gen_movdi (out, const0_rtx));
32579 /* Now handle valid shifts. */
32580 else if (INTVAL (amount) < 32)
32582 /* Shifts by a constant less than 32. */
32583 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32585 /* Clearing the out register in DImode first avoids lots
32586 of spilling and results in less stack usage.
32587 Later this redundant insn is completely removed.
32588 Do that only if "in" and "out" are different registers. */
32589 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32590 emit_insn (SET (out, const0_rtx));
32591 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32592 emit_insn (SET (out_down,
32593 ORR (REV_LSHIFT (code, in_up, reverse_amount),
32594 out_down)));
32595 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32597 else
32599 /* Shifts by a constant greater than 31. */
32600 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32602 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32603 emit_insn (SET (out, const0_rtx));
32604 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32605 if (code == ASHIFTRT)
32606 emit_insn (gen_ashrsi3 (out_up, in_up,
32607 GEN_INT (31)));
32608 else
32609 emit_insn (SET (out_up, const0_rtx));
32612 else
32614 /* We have a shift-by-register. */
32615 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32617 /* This alternative requires the scratch registers. */
32618 gcc_assert (scratch1 && REG_P (scratch1));
32619 gcc_assert (scratch2 && REG_P (scratch2));
32621 /* We will need the values "amount-32" and "32-amount" later.
32622 Swapping them around now allows the later code to be more general. */
32623 switch (code)
32625 case ASHIFT:
32626 emit_insn (SUB_32 (scratch1, amount));
32627 emit_insn (RSB_32 (scratch2, amount));
32628 break;
32629 case ASHIFTRT:
32630 emit_insn (RSB_32 (scratch1, amount));
32631 /* Also set CC = amount > 32. */
32632 emit_insn (SUB_S_32 (scratch2, amount));
32633 break;
32634 case LSHIFTRT:
32635 emit_insn (RSB_32 (scratch1, amount));
32636 emit_insn (SUB_32 (scratch2, amount));
32637 break;
32638 default:
32639 gcc_unreachable ();
32642 /* Emit code like this:
32644 arithmetic-left:
32645 out_down = in_down << amount;
32646 out_down = (in_up << (amount - 32)) | out_down;
32647 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32648 out_up = in_up << amount;
32650 arithmetic-right:
32651 out_down = in_down >> amount;
32652 out_down = (in_up << (32 - amount)) | out_down;
32653 if (amount < 32)
32654 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32655 out_up = in_up << amount;
32657 logical-right:
32658 out_down = in_down >> amount;
32659 out_down = (in_up << (32 - amount)) | out_down;
32660 if (amount < 32)
32661 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32662 out_up = in_up << amount;
32664 The ARM and Thumb2 variants are the same but implemented slightly
32665 differently. If this were only called during expand we could just
32666 use the Thumb2 case and let combine do the right thing, but this
32667 can also be called from post-reload splitters. */
32669 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32671 if (!TARGET_THUMB2)
32673 /* Emit code for ARM mode. */
32674 emit_insn (SET (out_down,
32675 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32676 if (code == ASHIFTRT)
32678 rtx_code_label *done_label = gen_label_rtx ();
32679 emit_jump_insn (BRANCH (LT, done_label));
32680 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32681 out_down)));
32682 emit_label (done_label);
32684 else
32685 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32686 out_down)));
32688 else
32690 /* Emit code for Thumb2 mode.
32691 Thumb2 can't do shift and or in one insn. */
32692 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32693 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32695 if (code == ASHIFTRT)
32697 rtx_code_label *done_label = gen_label_rtx ();
32698 emit_jump_insn (BRANCH (LT, done_label));
32699 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32700 emit_insn (SET (out_down, ORR (out_down, scratch2)));
32701 emit_label (done_label);
32703 else
32705 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32706 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32710 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32713 #undef SUB_32
32714 #undef RSB_32
32715 #undef SUB_S_32
32716 #undef SET
32717 #undef SHIFT
32718 #undef LSHIFT
32719 #undef REV_LSHIFT
32720 #undef ORR
32721 #undef BRANCH
32724 /* Returns true if the pattern is a valid symbolic address, which is either a
32725 symbol_ref or (symbol_ref + addend).
32727 According to the ARM ELF ABI, the initial addend of REL-type relocations
32728 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32729 literal field of the instruction as a 16-bit signed value in the range
32730 -32768 <= A < 32768.
32732 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32733 unsigned range of 0 <= A < 256 as described in the AAELF32
32734 relocation handling documentation: REL-type relocations are encoded
32735 as unsigned in this case. */
32737 bool
32738 arm_valid_symbolic_address_p (rtx addr)
32740 rtx xop0, xop1 = NULL_RTX;
32741 rtx tmp = addr;
32743 if (target_word_relocations)
32744 return false;
32746 if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32747 return true;
32749 /* (const (plus: symbol_ref const_int)) */
32750 if (GET_CODE (addr) == CONST)
32751 tmp = XEXP (addr, 0);
32753 if (GET_CODE (tmp) == PLUS)
32755 xop0 = XEXP (tmp, 0);
32756 xop1 = XEXP (tmp, 1);
32758 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32760 if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32761 return IN_RANGE (INTVAL (xop1), 0, 0xff);
32762 else
32763 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32767 return false;
32770 /* Returns true if a valid comparison operation and makes
32771 the operands in a form that is valid. */
32772 bool
32773 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32775 enum rtx_code code = GET_CODE (*comparison);
32776 int code_int;
32777 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32778 ? GET_MODE (*op2) : GET_MODE (*op1);
32780 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32782 if (code == UNEQ || code == LTGT)
32783 return false;
32785 code_int = (int)code;
32786 arm_canonicalize_comparison (&code_int, op1, op2, 0);
32787 PUT_CODE (*comparison, (enum rtx_code)code_int);
32789 switch (mode)
32791 case E_SImode:
32792 if (!arm_add_operand (*op1, mode))
32793 *op1 = force_reg (mode, *op1);
32794 if (!arm_add_operand (*op2, mode))
32795 *op2 = force_reg (mode, *op2);
32796 return true;
32798 case E_DImode:
32799 /* gen_compare_reg() will sort out any invalid operands. */
32800 return true;
32802 case E_HFmode:
32803 if (!TARGET_VFP_FP16INST)
32804 break;
32805 /* FP16 comparisons are done in SF mode. */
32806 mode = SFmode;
32807 *op1 = convert_to_mode (mode, *op1, 1);
32808 *op2 = convert_to_mode (mode, *op2, 1);
32809 /* Fall through. */
32810 case E_SFmode:
32811 case E_DFmode:
32812 if (!vfp_compare_operand (*op1, mode))
32813 *op1 = force_reg (mode, *op1);
32814 if (!vfp_compare_operand (*op2, mode))
32815 *op2 = force_reg (mode, *op2);
32816 return true;
32817 default:
32818 break;
32821 return false;
32825 /* Maximum number of instructions to set block of memory. */
32826 static int
32827 arm_block_set_max_insns (void)
32829 if (optimize_function_for_size_p (cfun))
32830 return 4;
32831 else
32832 return current_tune->max_insns_inline_memset;
32835 /* Return TRUE if it's profitable to set block of memory for
32836 non-vectorized case. VAL is the value to set the memory
32837 with. LENGTH is the number of bytes to set. ALIGN is the
32838 alignment of the destination memory in bytes. UNALIGNED_P
32839 is TRUE if we can only set the memory with instructions
32840 meeting alignment requirements. USE_STRD_P is TRUE if we
32841 can use strd to set the memory. */
32842 static bool
32843 arm_block_set_non_vect_profit_p (rtx val,
32844 unsigned HOST_WIDE_INT length,
32845 unsigned HOST_WIDE_INT align,
32846 bool unaligned_p, bool use_strd_p)
32848 int num = 0;
32849 /* For leftovers in bytes of 0-7, we can set the memory block using
32850 strb/strh/str with minimum instruction number. */
32851 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32853 if (unaligned_p)
32855 num = arm_const_inline_cost (SET, val);
32856 num += length / align + length % align;
32858 else if (use_strd_p)
32860 num = arm_const_double_inline_cost (val);
32861 num += (length >> 3) + leftover[length & 7];
32863 else
32865 num = arm_const_inline_cost (SET, val);
32866 num += (length >> 2) + leftover[length & 3];
32869 /* We may be able to combine last pair STRH/STRB into a single STR
32870 by shifting one byte back. */
32871 if (unaligned_access && length > 3 && (length & 3) == 3)
32872 num--;
32874 return (num <= arm_block_set_max_insns ());
32877 /* Return TRUE if it's profitable to set block of memory for
32878 vectorized case. LENGTH is the number of bytes to set.
32879 ALIGN is the alignment of destination memory in bytes.
32880 MODE is the vector mode used to set the memory. */
32881 static bool
32882 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32883 unsigned HOST_WIDE_INT align,
32884 machine_mode mode)
32886 int num;
32887 bool unaligned_p = ((align & 3) != 0);
32888 unsigned int nelt = GET_MODE_NUNITS (mode);
32890 /* Instruction loading constant value. */
32891 num = 1;
32892 /* Instructions storing the memory. */
32893 num += (length + nelt - 1) / nelt;
32894 /* Instructions adjusting the address expression. Only need to
32895 adjust address expression if it's 4 bytes aligned and bytes
32896 leftover can only be stored by mis-aligned store instruction. */
32897 if (!unaligned_p && (length & 3) != 0)
32898 num++;
32900 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32901 if (!unaligned_p && mode == V16QImode)
32902 num--;
32904 return (num <= arm_block_set_max_insns ());
32907 /* Set a block of memory using vectorization instructions for the
32908 unaligned case. We fill the first LENGTH bytes of the memory
32909 area starting from DSTBASE with byte constant VALUE. ALIGN is
32910 the alignment requirement of memory. Return TRUE if succeeded. */
32911 static bool
32912 arm_block_set_unaligned_vect (rtx dstbase,
32913 unsigned HOST_WIDE_INT length,
32914 unsigned HOST_WIDE_INT value,
32915 unsigned HOST_WIDE_INT align)
32917 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32918 rtx dst, mem;
32919 rtx val_vec, reg;
32920 rtx (*gen_func) (rtx, rtx);
32921 machine_mode mode;
32922 unsigned HOST_WIDE_INT v = value;
32923 unsigned int offset = 0;
32924 gcc_assert ((align & 0x3) != 0);
32925 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32926 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32927 if (length >= nelt_v16)
32929 mode = V16QImode;
32930 gen_func = gen_movmisalignv16qi;
32932 else
32934 mode = V8QImode;
32935 gen_func = gen_movmisalignv8qi;
32937 nelt_mode = GET_MODE_NUNITS (mode);
32938 gcc_assert (length >= nelt_mode);
32939 /* Skip if it isn't profitable. */
32940 if (!arm_block_set_vect_profit_p (length, align, mode))
32941 return false;
32943 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32944 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32946 v = sext_hwi (v, BITS_PER_WORD);
32948 reg = gen_reg_rtx (mode);
32949 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32950 /* Emit instruction loading the constant value. */
32951 emit_move_insn (reg, val_vec);
32953 /* Handle nelt_mode bytes in a vector. */
32954 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32956 emit_insn ((*gen_func) (mem, reg));
32957 if (i + 2 * nelt_mode <= length)
32959 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32960 offset += nelt_mode;
32961 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32965 /* If there are not less than nelt_v8 bytes leftover, we must be in
32966 V16QI mode. */
32967 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32969 /* Handle (8, 16) bytes leftover. */
32970 if (i + nelt_v8 < length)
32972 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32973 offset += length - i;
32974 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32976 /* We are shifting bytes back, set the alignment accordingly. */
32977 if ((length & 1) != 0 && align >= 2)
32978 set_mem_align (mem, BITS_PER_UNIT);
32980 emit_insn (gen_movmisalignv16qi (mem, reg));
32982 /* Handle (0, 8] bytes leftover. */
32983 else if (i < length && i + nelt_v8 >= length)
32985 if (mode == V16QImode)
32986 reg = gen_lowpart (V8QImode, reg);
32988 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32989 + (nelt_mode - nelt_v8))));
32990 offset += (length - i) + (nelt_mode - nelt_v8);
32991 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32993 /* We are shifting bytes back, set the alignment accordingly. */
32994 if ((length & 1) != 0 && align >= 2)
32995 set_mem_align (mem, BITS_PER_UNIT);
32997 emit_insn (gen_movmisalignv8qi (mem, reg));
33000 return true;
33003 /* Set a block of memory using vectorization instructions for the
33004 aligned case. We fill the first LENGTH bytes of the memory area
33005 starting from DSTBASE with byte constant VALUE. ALIGN is the
33006 alignment requirement of memory. Return TRUE if succeeded. */
33007 static bool
33008 arm_block_set_aligned_vect (rtx dstbase,
33009 unsigned HOST_WIDE_INT length,
33010 unsigned HOST_WIDE_INT value,
33011 unsigned HOST_WIDE_INT align)
33013 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
33014 rtx dst, addr, mem;
33015 rtx val_vec, reg;
33016 machine_mode mode;
33017 unsigned int offset = 0;
33019 gcc_assert ((align & 0x3) == 0);
33020 nelt_v8 = GET_MODE_NUNITS (V8QImode);
33021 nelt_v16 = GET_MODE_NUNITS (V16QImode);
33022 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
33023 mode = V16QImode;
33024 else
33025 mode = V8QImode;
33027 nelt_mode = GET_MODE_NUNITS (mode);
33028 gcc_assert (length >= nelt_mode);
33029 /* Skip if it isn't profitable. */
33030 if (!arm_block_set_vect_profit_p (length, align, mode))
33031 return false;
33033 dst = copy_addr_to_reg (XEXP (dstbase, 0));
33035 reg = gen_reg_rtx (mode);
33036 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
33037 /* Emit instruction loading the constant value. */
33038 emit_move_insn (reg, val_vec);
33040 i = 0;
33041 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
33042 if (mode == V16QImode)
33044 mem = adjust_automodify_address (dstbase, mode, dst, offset);
33045 emit_insn (gen_movmisalignv16qi (mem, reg));
33046 i += nelt_mode;
33047 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
33048 if (i + nelt_v8 < length && i + nelt_v16 > length)
33050 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
33051 offset += length - nelt_mode;
33052 mem = adjust_automodify_address (dstbase, mode, dst, offset);
33053 /* We are shifting bytes back, set the alignment accordingly. */
33054 if ((length & 0x3) == 0)
33055 set_mem_align (mem, BITS_PER_UNIT * 4);
33056 else if ((length & 0x1) == 0)
33057 set_mem_align (mem, BITS_PER_UNIT * 2);
33058 else
33059 set_mem_align (mem, BITS_PER_UNIT);
33061 emit_insn (gen_movmisalignv16qi (mem, reg));
33062 return true;
33064 /* Fall through for bytes leftover. */
33065 mode = V8QImode;
33066 nelt_mode = GET_MODE_NUNITS (mode);
33067 reg = gen_lowpart (V8QImode, reg);
33070 /* Handle 8 bytes in a vector. */
33071 for (; (i + nelt_mode <= length); i += nelt_mode)
33073 addr = plus_constant (Pmode, dst, i);
33074 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
33075 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
33076 emit_move_insn (mem, reg);
33077 else
33078 emit_insn (gen_unaligned_storev8qi (mem, reg));
33081 /* Handle single word leftover by shifting 4 bytes back. We can
33082 use aligned access for this case. */
33083 if (i + UNITS_PER_WORD == length)
33085 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
33086 offset += i - UNITS_PER_WORD;
33087 mem = adjust_automodify_address (dstbase, mode, addr, offset);
33088 /* We are shifting 4 bytes back, set the alignment accordingly. */
33089 if (align > UNITS_PER_WORD)
33090 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
33092 emit_insn (gen_unaligned_storev8qi (mem, reg));
33094 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
33095 We have to use unaligned access for this case. */
33096 else if (i < length)
33098 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
33099 offset += length - nelt_mode;
33100 mem = adjust_automodify_address (dstbase, mode, dst, offset);
33101 /* We are shifting bytes back, set the alignment accordingly. */
33102 if ((length & 1) == 0)
33103 set_mem_align (mem, BITS_PER_UNIT * 2);
33104 else
33105 set_mem_align (mem, BITS_PER_UNIT);
33107 emit_insn (gen_movmisalignv8qi (mem, reg));
33110 return true;
33113 /* Set a block of memory using plain strh/strb instructions, only
33114 using instructions allowed by ALIGN on processor. We fill the
33115 first LENGTH bytes of the memory area starting from DSTBASE
33116 with byte constant VALUE. ALIGN is the alignment requirement
33117 of memory. */
33118 static bool
33119 arm_block_set_unaligned_non_vect (rtx dstbase,
33120 unsigned HOST_WIDE_INT length,
33121 unsigned HOST_WIDE_INT value,
33122 unsigned HOST_WIDE_INT align)
33124 unsigned int i;
33125 rtx dst, addr, mem;
33126 rtx val_exp, val_reg, reg;
33127 machine_mode mode;
33128 HOST_WIDE_INT v = value;
33130 gcc_assert (align == 1 || align == 2);
33132 if (align == 2)
33133 v |= (value << BITS_PER_UNIT);
33135 v = sext_hwi (v, BITS_PER_WORD);
33136 val_exp = GEN_INT (v);
33137 /* Skip if it isn't profitable. */
33138 if (!arm_block_set_non_vect_profit_p (val_exp, length,
33139 align, true, false))
33140 return false;
33142 dst = copy_addr_to_reg (XEXP (dstbase, 0));
33143 mode = (align == 2 ? HImode : QImode);
33144 val_reg = force_reg (SImode, val_exp);
33145 reg = gen_lowpart (mode, val_reg);
33147 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
33149 addr = plus_constant (Pmode, dst, i);
33150 mem = adjust_automodify_address (dstbase, mode, addr, i);
33151 emit_move_insn (mem, reg);
33154 /* Handle single byte leftover. */
33155 if (i + 1 == length)
33157 reg = gen_lowpart (QImode, val_reg);
33158 addr = plus_constant (Pmode, dst, i);
33159 mem = adjust_automodify_address (dstbase, QImode, addr, i);
33160 emit_move_insn (mem, reg);
33161 i++;
33164 gcc_assert (i == length);
33165 return true;
33168 /* Set a block of memory using plain strd/str/strh/strb instructions,
33169 to permit unaligned copies on processors which support unaligned
33170 semantics for those instructions. We fill the first LENGTH bytes
33171 of the memory area starting from DSTBASE with byte constant VALUE.
33172 ALIGN is the alignment requirement of memory. */
33173 static bool
33174 arm_block_set_aligned_non_vect (rtx dstbase,
33175 unsigned HOST_WIDE_INT length,
33176 unsigned HOST_WIDE_INT value,
33177 unsigned HOST_WIDE_INT align)
33179 unsigned int i;
33180 rtx dst, addr, mem;
33181 rtx val_exp, val_reg, reg;
33182 unsigned HOST_WIDE_INT v;
33183 bool use_strd_p;
33185 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
33186 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
33188 v = (value | (value << 8) | (value << 16) | (value << 24));
33189 if (length < UNITS_PER_WORD)
33190 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
33192 if (use_strd_p)
33193 v |= (v << BITS_PER_WORD);
33194 else
33195 v = sext_hwi (v, BITS_PER_WORD);
33197 val_exp = GEN_INT (v);
33198 /* Skip if it isn't profitable. */
33199 if (!arm_block_set_non_vect_profit_p (val_exp, length,
33200 align, false, use_strd_p))
33202 if (!use_strd_p)
33203 return false;
33205 /* Try without strd. */
33206 v = (v >> BITS_PER_WORD);
33207 v = sext_hwi (v, BITS_PER_WORD);
33208 val_exp = GEN_INT (v);
33209 use_strd_p = false;
33210 if (!arm_block_set_non_vect_profit_p (val_exp, length,
33211 align, false, use_strd_p))
33212 return false;
33215 i = 0;
33216 dst = copy_addr_to_reg (XEXP (dstbase, 0));
33217 /* Handle double words using strd if possible. */
33218 if (use_strd_p)
33220 val_reg = force_reg (DImode, val_exp);
33221 reg = val_reg;
33222 for (; (i + 8 <= length); i += 8)
33224 addr = plus_constant (Pmode, dst, i);
33225 mem = adjust_automodify_address (dstbase, DImode, addr, i);
33226 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
33227 emit_move_insn (mem, reg);
33228 else
33229 emit_insn (gen_unaligned_storedi (mem, reg));
33232 else
33233 val_reg = force_reg (SImode, val_exp);
33235 /* Handle words. */
33236 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
33237 for (; (i + 4 <= length); i += 4)
33239 addr = plus_constant (Pmode, dst, i);
33240 mem = adjust_automodify_address (dstbase, SImode, addr, i);
33241 if ((align & 3) == 0)
33242 emit_move_insn (mem, reg);
33243 else
33244 emit_insn (gen_unaligned_storesi (mem, reg));
33247 /* Merge last pair of STRH and STRB into a STR if possible. */
33248 if (unaligned_access && i > 0 && (i + 3) == length)
33250 addr = plus_constant (Pmode, dst, i - 1);
33251 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
33252 /* We are shifting one byte back, set the alignment accordingly. */
33253 if ((align & 1) == 0)
33254 set_mem_align (mem, BITS_PER_UNIT);
33256 /* Most likely this is an unaligned access, and we can't tell at
33257 compilation time. */
33258 emit_insn (gen_unaligned_storesi (mem, reg));
33259 return true;
33262 /* Handle half word leftover. */
33263 if (i + 2 <= length)
33265 reg = gen_lowpart (HImode, val_reg);
33266 addr = plus_constant (Pmode, dst, i);
33267 mem = adjust_automodify_address (dstbase, HImode, addr, i);
33268 if ((align & 1) == 0)
33269 emit_move_insn (mem, reg);
33270 else
33271 emit_insn (gen_unaligned_storehi (mem, reg));
33273 i += 2;
33276 /* Handle single byte leftover. */
33277 if (i + 1 == length)
33279 reg = gen_lowpart (QImode, val_reg);
33280 addr = plus_constant (Pmode, dst, i);
33281 mem = adjust_automodify_address (dstbase, QImode, addr, i);
33282 emit_move_insn (mem, reg);
33285 return true;
33288 /* Set a block of memory using vectorization instructions for both
33289 aligned and unaligned cases. We fill the first LENGTH bytes of
33290 the memory area starting from DSTBASE with byte constant VALUE.
33291 ALIGN is the alignment requirement of memory. */
33292 static bool
33293 arm_block_set_vect (rtx dstbase,
33294 unsigned HOST_WIDE_INT length,
33295 unsigned HOST_WIDE_INT value,
33296 unsigned HOST_WIDE_INT align)
33298 /* Check whether we need to use unaligned store instruction. */
33299 if (((align & 3) != 0 || (length & 3) != 0)
33300 /* Check whether unaligned store instruction is available. */
33301 && (!unaligned_access || BYTES_BIG_ENDIAN))
33302 return false;
33304 if ((align & 3) == 0)
33305 return arm_block_set_aligned_vect (dstbase, length, value, align);
33306 else
33307 return arm_block_set_unaligned_vect (dstbase, length, value, align);
33310 /* Expand string store operation. Firstly we try to do that by using
33311 vectorization instructions, then try with ARM unaligned access and
33312 double-word store if profitable. OPERANDS[0] is the destination,
33313 OPERANDS[1] is the number of bytes, operands[2] is the value to
33314 initialize the memory, OPERANDS[3] is the known alignment of the
33315 destination. */
33316 bool
33317 arm_gen_setmem (rtx *operands)
33319 rtx dstbase = operands[0];
33320 unsigned HOST_WIDE_INT length;
33321 unsigned HOST_WIDE_INT value;
33322 unsigned HOST_WIDE_INT align;
33324 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33325 return false;
33327 length = UINTVAL (operands[1]);
33328 if (length > 64)
33329 return false;
33331 value = (UINTVAL (operands[2]) & 0xFF);
33332 align = UINTVAL (operands[3]);
33333 if (TARGET_NEON && length >= 8
33334 && current_tune->string_ops_prefer_neon
33335 && arm_block_set_vect (dstbase, length, value, align))
33336 return true;
33338 if (!unaligned_access && (align & 3) != 0)
33339 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33341 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33345 static bool
33346 arm_macro_fusion_p (void)
33348 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33351 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33352 for MOVW / MOVT macro fusion. */
33354 static bool
33355 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33357 /* We are trying to fuse
33358 movw imm / movt imm
33359 instructions as a group that gets scheduled together. */
33361 rtx set_dest = SET_DEST (curr_set);
33363 if (GET_MODE (set_dest) != SImode)
33364 return false;
33366 /* We are trying to match:
33367 prev (movw) == (set (reg r0) (const_int imm16))
33368 curr (movt) == (set (zero_extract (reg r0)
33369 (const_int 16)
33370 (const_int 16))
33371 (const_int imm16_1))
33373 prev (movw) == (set (reg r1)
33374 (high (symbol_ref ("SYM"))))
33375 curr (movt) == (set (reg r0)
33376 (lo_sum (reg r1)
33377 (symbol_ref ("SYM")))) */
33379 if (GET_CODE (set_dest) == ZERO_EXTRACT)
33381 if (CONST_INT_P (SET_SRC (curr_set))
33382 && CONST_INT_P (SET_SRC (prev_set))
33383 && REG_P (XEXP (set_dest, 0))
33384 && REG_P (SET_DEST (prev_set))
33385 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33386 return true;
33389 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33390 && REG_P (SET_DEST (curr_set))
33391 && REG_P (SET_DEST (prev_set))
33392 && GET_CODE (SET_SRC (prev_set)) == HIGH
33393 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33394 return true;
33396 return false;
33399 static bool
33400 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33402 rtx prev_set = single_set (prev);
33403 rtx curr_set = single_set (curr);
33405 if (!prev_set
33406 || !curr_set)
33407 return false;
33409 if (any_condjump_p (curr))
33410 return false;
33412 if (!arm_macro_fusion_p ())
33413 return false;
33415 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33416 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33417 return true;
33419 return false;
33422 /* Return true iff the instruction fusion described by OP is enabled. */
33423 bool
33424 arm_fusion_enabled_p (tune_params::fuse_ops op)
33426 return current_tune->fusible_ops & op;
33429 /* Return TRUE if return address signing mechanism is enabled. */
33430 bool
33431 arm_current_function_pac_enabled_p (void)
33433 return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33434 || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33435 && !crtl->is_leaf));
33438 /* Raise an error if the current target arch is not bti compatible. */
33439 void aarch_bti_arch_check (void)
33441 if (!arm_arch8m_main)
33442 error ("This architecture does not support branch protection instructions");
33445 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33446 bool
33447 aarch_bti_enabled (void)
33449 return aarch_enable_bti != 0;
33452 /* Check if INSN is a BTI J insn. */
33453 bool
33454 aarch_bti_j_insn_p (rtx_insn *insn)
33456 if (!insn || !INSN_P (insn))
33457 return false;
33459 rtx pat = PATTERN (insn);
33460 return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
33463 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
33464 bool
33465 aarch_pac_insn_p (rtx x)
33467 if (!x || !INSN_P (x))
33468 return false;
33470 rtx pat = PATTERN (x);
33472 if (GET_CODE (pat) == SET)
33474 rtx tmp = XEXP (pat, 1);
33475 if (tmp
33476 && ((GET_CODE (tmp) == UNSPEC
33477 && XINT (tmp, 1) == UNSPEC_PAC_NOP)
33478 || (GET_CODE (tmp) == UNSPEC_VOLATILE
33479 && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
33480 return true;
33483 return false;
33486 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33487 For Arm, both of these map to a simple BTI instruction. */
33490 aarch_gen_bti_c (void)
33492 return gen_bti_nop ();
33496 aarch_gen_bti_j (void)
33498 return gen_bti_nop ();
33501 /* For AArch32, we always return false because indirect_return attribute
33502 is only supported on AArch64 targets. */
33504 bool
33505 aarch_fun_is_indirect_return (rtx_insn *)
33507 return false;
33510 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33511 scheduled for speculative execution. Reject the long-running division
33512 and square-root instructions. */
33514 static bool
33515 arm_sched_can_speculate_insn (rtx_insn *insn)
33517 switch (get_attr_type (insn))
33519 case TYPE_SDIV:
33520 case TYPE_UDIV:
33521 case TYPE_FDIVS:
33522 case TYPE_FDIVD:
33523 case TYPE_FSQRTS:
33524 case TYPE_FSQRTD:
33525 case TYPE_NEON_FP_SQRT_S:
33526 case TYPE_NEON_FP_SQRT_D:
33527 case TYPE_NEON_FP_SQRT_S_Q:
33528 case TYPE_NEON_FP_SQRT_D_Q:
33529 case TYPE_NEON_FP_DIV_S:
33530 case TYPE_NEON_FP_DIV_D:
33531 case TYPE_NEON_FP_DIV_S_Q:
33532 case TYPE_NEON_FP_DIV_D_Q:
33533 return false;
33534 default:
33535 return true;
33539 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33541 static unsigned HOST_WIDE_INT
33542 arm_asan_shadow_offset (void)
33544 return HOST_WIDE_INT_1U << 29;
33548 /* This is a temporary fix for PR60655. Ideally we need
33549 to handle most of these cases in the generic part but
33550 currently we reject minus (..) (sym_ref). We try to
33551 ameliorate the case with minus (sym_ref1) (sym_ref2)
33552 where they are in the same section. */
33554 static bool
33555 arm_const_not_ok_for_debug_p (rtx p)
33557 tree decl_op0 = NULL;
33558 tree decl_op1 = NULL;
33560 if (GET_CODE (p) == UNSPEC)
33561 return true;
33562 if (GET_CODE (p) == MINUS)
33564 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33566 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33567 if (decl_op1
33568 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33569 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33571 if ((VAR_P (decl_op1)
33572 || TREE_CODE (decl_op1) == CONST_DECL)
33573 && (VAR_P (decl_op0)
33574 || TREE_CODE (decl_op0) == CONST_DECL))
33575 return (get_variable_section (decl_op1, false)
33576 != get_variable_section (decl_op0, false));
33578 if (TREE_CODE (decl_op1) == LABEL_DECL
33579 && TREE_CODE (decl_op0) == LABEL_DECL)
33580 return (DECL_CONTEXT (decl_op1)
33581 != DECL_CONTEXT (decl_op0));
33584 return true;
33588 return false;
33591 /* return TRUE if x is a reference to a value in a constant pool */
33592 extern bool
33593 arm_is_constant_pool_ref (rtx x)
33595 return (MEM_P (x)
33596 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33597 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33600 /* Remember the last target of arm_set_current_function. */
33601 static GTY(()) tree arm_previous_fndecl;
33603 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33605 void
33606 save_restore_target_globals (tree new_tree)
33608 /* If we have a previous state, use it. */
33609 if (TREE_TARGET_GLOBALS (new_tree))
33610 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33611 else if (new_tree == target_option_default_node)
33612 restore_target_globals (&default_target_globals);
33613 else
33615 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33616 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33619 arm_option_params_internal ();
33622 /* Invalidate arm_previous_fndecl. */
33624 void
33625 arm_reset_previous_fndecl (void)
33627 arm_previous_fndecl = NULL_TREE;
33630 /* Establish appropriate back-end context for processing the function
33631 FNDECL. The argument might be NULL to indicate processing at top
33632 level, outside of any function scope. */
33634 static void
33635 arm_set_current_function (tree fndecl)
33637 if (!fndecl || fndecl == arm_previous_fndecl)
33638 return;
33640 tree old_tree = (arm_previous_fndecl
33641 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33642 : NULL_TREE);
33644 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33646 /* If current function has no attributes but previous one did,
33647 use the default node. */
33648 if (! new_tree && old_tree)
33649 new_tree = target_option_default_node;
33651 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33652 the default have been handled by save_restore_target_globals from
33653 arm_pragma_target_parse. */
33654 if (old_tree == new_tree)
33655 return;
33657 arm_previous_fndecl = fndecl;
33659 /* First set the target options. */
33660 cl_target_option_restore (&global_options, &global_options_set,
33661 TREE_TARGET_OPTION (new_tree));
33663 save_restore_target_globals (new_tree);
33665 arm_override_options_after_change_1 (&global_options, &global_options_set);
33668 /* Implement TARGET_OPTION_PRINT. */
33670 static void
33671 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33673 int flags = ptr->x_target_flags;
33674 const char *fpu_name;
33676 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33677 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33679 fprintf (file, "%*sselected isa %s\n", indent, "",
33680 TARGET_THUMB2_P (flags) ? "thumb2" :
33681 TARGET_THUMB_P (flags) ? "thumb1" :
33682 "arm");
33684 if (ptr->x_arm_arch_string)
33685 fprintf (file, "%*sselected architecture %s\n", indent, "",
33686 ptr->x_arm_arch_string);
33688 if (ptr->x_arm_cpu_string)
33689 fprintf (file, "%*sselected CPU %s\n", indent, "",
33690 ptr->x_arm_cpu_string);
33692 if (ptr->x_arm_tune_string)
33693 fprintf (file, "%*sselected tune %s\n", indent, "",
33694 ptr->x_arm_tune_string);
33696 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33699 /* Hook to determine if one function can safely inline another. */
33701 static bool
33702 arm_can_inline_p (tree caller, tree callee)
33704 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33705 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33706 bool can_inline = true;
33708 struct cl_target_option *caller_opts
33709 = TREE_TARGET_OPTION (caller_tree ? caller_tree
33710 : target_option_default_node);
33712 struct cl_target_option *callee_opts
33713 = TREE_TARGET_OPTION (callee_tree ? callee_tree
33714 : target_option_default_node);
33716 if (callee_opts == caller_opts)
33717 return true;
33719 /* Callee's ISA features should be a subset of the caller's. */
33720 struct arm_build_target caller_target;
33721 struct arm_build_target callee_target;
33722 caller_target.isa = sbitmap_alloc (isa_num_bits);
33723 callee_target.isa = sbitmap_alloc (isa_num_bits);
33725 arm_configure_build_target (&caller_target, caller_opts, false);
33726 arm_configure_build_target (&callee_target, callee_opts, false);
33727 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33728 can_inline = false;
33730 sbitmap_free (caller_target.isa);
33731 sbitmap_free (callee_target.isa);
33733 /* OK to inline between different modes.
33734 Function with mode specific instructions, e.g using asm,
33735 must be explicitly protected with noinline. */
33736 return can_inline;
33739 /* Hook to fix function's alignment affected by target attribute. */
33741 static void
33742 arm_relayout_function (tree fndecl)
33744 if (DECL_USER_ALIGN (fndecl))
33745 return;
33747 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33749 if (!callee_tree)
33750 callee_tree = target_option_default_node;
33752 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33753 SET_DECL_ALIGN
33754 (fndecl,
33755 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33758 /* Inner function to process the attribute((target(...))), take an argument and
33759 set the current options from the argument. If we have a list, recursively
33760 go over the list. */
33762 static bool
33763 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33765 if (TREE_CODE (args) == TREE_LIST)
33767 bool ret = true;
33769 for (; args; args = TREE_CHAIN (args))
33770 if (TREE_VALUE (args)
33771 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33772 ret = false;
33773 return ret;
33776 else if (TREE_CODE (args) != STRING_CST)
33778 error ("attribute %<target%> argument not a string");
33779 return false;
33782 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33783 char *q;
33785 while ((q = strtok (argstr, ",")) != NULL)
33787 argstr = NULL;
33788 if (!strcmp (q, "thumb"))
33790 opts->x_target_flags |= MASK_THUMB;
33791 if (TARGET_FDPIC && !arm_arch_thumb2)
33792 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33795 else if (!strcmp (q, "arm"))
33796 opts->x_target_flags &= ~MASK_THUMB;
33798 else if (!strcmp (q, "general-regs-only"))
33799 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33801 else if (startswith (q, "fpu="))
33803 int fpu_index;
33804 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33805 &fpu_index, CL_TARGET))
33807 error ("invalid fpu for target attribute or pragma %qs", q);
33808 return false;
33810 if (fpu_index == TARGET_FPU_auto)
33812 /* This doesn't really make sense until we support
33813 general dynamic selection of the architecture and all
33814 sub-features. */
33815 sorry ("auto fpu selection not currently permitted here");
33816 return false;
33818 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33820 else if (startswith (q, "arch="))
33822 char *arch = q + 5;
33823 const arch_option *arm_selected_arch
33824 = arm_parse_arch_option_name (all_architectures, "arch", arch);
33826 if (!arm_selected_arch)
33828 error ("invalid architecture for target attribute or pragma %qs",
33830 return false;
33833 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33835 else if (q[0] == '+')
33837 opts->x_arm_arch_string
33838 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33840 else
33842 error ("unknown target attribute or pragma %qs", q);
33843 return false;
33847 return true;
33850 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33852 tree
33853 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33854 struct gcc_options *opts_set)
33856 struct cl_target_option cl_opts;
33858 if (!arm_valid_target_attribute_rec (args, opts))
33859 return NULL_TREE;
33861 cl_target_option_save (&cl_opts, opts, opts_set);
33862 arm_configure_build_target (&arm_active_target, &cl_opts, false);
33863 arm_option_check_internal (opts);
33864 /* Do any overrides, such as global options arch=xxx.
33865 We do this since arm_active_target was overridden. */
33866 arm_option_reconfigure_globals ();
33867 arm_options_perform_arch_sanity_checks ();
33868 arm_option_override_internal (opts, opts_set);
33870 return build_target_option_node (opts, opts_set);
33873 static void
33874 add_attribute (const char * mode, tree *attributes)
33876 size_t len = strlen (mode);
33877 tree value = build_string (len, mode);
33879 TREE_TYPE (value) = build_array_type (char_type_node,
33880 build_index_type (size_int (len)));
33882 *attributes = tree_cons (get_identifier ("target"),
33883 build_tree_list (NULL_TREE, value),
33884 *attributes);
33887 /* For testing. Insert thumb or arm modes alternatively on functions. */
33889 static void
33890 arm_insert_attributes (tree fndecl, tree * attributes)
33892 const char *mode;
33894 if (! TARGET_FLIP_THUMB)
33895 return;
33897 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33898 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33899 return;
33901 /* Nested definitions must inherit mode. */
33902 if (current_function_decl)
33904 mode = TARGET_THUMB ? "thumb" : "arm";
33905 add_attribute (mode, attributes);
33906 return;
33909 /* If there is already a setting don't change it. */
33910 if (lookup_attribute ("target", *attributes) != NULL)
33911 return;
33913 mode = thumb_flipper ? "thumb" : "arm";
33914 add_attribute (mode, attributes);
33916 thumb_flipper = !thumb_flipper;
33919 /* Hook to validate attribute((target("string"))). */
33921 static bool
33922 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33923 tree args, int ARG_UNUSED (flags))
33925 bool ret = true;
33926 struct gcc_options func_options, func_options_set;
33927 tree cur_tree, new_optimize;
33928 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33930 /* Get the optimization options of the current function. */
33931 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33933 /* If the function changed the optimization levels as well as setting target
33934 options, start with the optimizations specified. */
33935 if (!func_optimize)
33936 func_optimize = optimization_default_node;
33938 /* Init func_options. */
33939 memset (&func_options, 0, sizeof (func_options));
33940 init_options_struct (&func_options, NULL);
33941 lang_hooks.init_options_struct (&func_options);
33942 memset (&func_options_set, 0, sizeof (func_options_set));
33944 /* Initialize func_options to the defaults. */
33945 cl_optimization_restore (&func_options, &func_options_set,
33946 TREE_OPTIMIZATION (func_optimize));
33948 cl_target_option_restore (&func_options, &func_options_set,
33949 TREE_TARGET_OPTION (target_option_default_node));
33951 /* Set func_options flags with new target mode. */
33952 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33953 &func_options_set);
33955 if (cur_tree == NULL_TREE)
33956 ret = false;
33958 new_optimize = build_optimization_node (&func_options, &func_options_set);
33960 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33962 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33964 return ret;
33967 /* Match an ISA feature bitmap to a named FPU. We always use the
33968 first entry that exactly matches the feature set, so that we
33969 effectively canonicalize the FPU name for the assembler. */
33970 static const char*
33971 arm_identify_fpu_from_isa (sbitmap isa)
33973 auto_sbitmap fpubits (isa_num_bits);
33974 auto_sbitmap cand_fpubits (isa_num_bits);
33976 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33978 /* If there are no ISA feature bits relating to the FPU, we must be
33979 doing soft-float. */
33980 if (bitmap_empty_p (fpubits))
33981 return "softvfp";
33983 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33985 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33986 if (bitmap_equal_p (fpubits, cand_fpubits))
33987 return all_fpus[i].name;
33989 /* We must find an entry, or things have gone wrong. */
33990 gcc_unreachable ();
33993 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33994 by the function fndecl. */
33995 void
33996 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33998 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
34000 struct cl_target_option *targ_options;
34001 if (target_parts)
34002 targ_options = TREE_TARGET_OPTION (target_parts);
34003 else
34004 targ_options = TREE_TARGET_OPTION (target_option_current_node);
34005 gcc_assert (targ_options);
34007 arm_print_asm_arch_directives (stream, targ_options);
34009 fprintf (stream, "\t.syntax unified\n");
34011 if (TARGET_THUMB)
34013 if (is_called_in_ARM_mode (decl)
34014 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
34015 && cfun->is_thunk))
34016 fprintf (stream, "\t.code 32\n");
34017 else if (TARGET_THUMB1)
34018 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
34019 else
34020 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
34022 else
34023 fprintf (stream, "\t.arm\n");
34025 if (TARGET_POKE_FUNCTION_NAME)
34026 arm_poke_function_name (stream, (const char *) name);
34029 /* If MEM is in the form of [base+offset], extract the two parts
34030 of address and set to BASE and OFFSET, otherwise return false
34031 after clearing BASE and OFFSET. */
34033 static bool
34034 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
34036 rtx addr;
34038 gcc_assert (MEM_P (mem));
34040 addr = XEXP (mem, 0);
34042 /* Strip off const from addresses like (const (addr)). */
34043 if (GET_CODE (addr) == CONST)
34044 addr = XEXP (addr, 0);
34046 if (REG_P (addr))
34048 *base = addr;
34049 *offset = const0_rtx;
34050 return true;
34053 if (GET_CODE (addr) == PLUS
34054 && GET_CODE (XEXP (addr, 0)) == REG
34055 && CONST_INT_P (XEXP (addr, 1)))
34057 *base = XEXP (addr, 0);
34058 *offset = XEXP (addr, 1);
34059 return true;
34062 *base = NULL_RTX;
34063 *offset = NULL_RTX;
34065 return false;
34068 /* If INSN is a load or store of address in the form of [base+offset],
34069 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
34070 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
34071 otherwise return FALSE. */
34073 static bool
34074 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
34076 rtx x, dest, src;
34078 gcc_assert (INSN_P (insn));
34079 x = PATTERN (insn);
34080 if (GET_CODE (x) != SET)
34081 return false;
34083 src = SET_SRC (x);
34084 dest = SET_DEST (x);
34085 if (REG_P (src) && MEM_P (dest))
34087 *is_load = false;
34088 extract_base_offset_in_addr (dest, base, offset);
34090 else if (MEM_P (src) && REG_P (dest))
34092 *is_load = true;
34093 extract_base_offset_in_addr (src, base, offset);
34095 else
34096 return false;
34098 return (*base != NULL_RTX && *offset != NULL_RTX);
34101 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
34103 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
34104 and PRI are only calculated for these instructions. For other instruction,
34105 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
34106 instruction fusion can be supported by returning different priorities.
34108 It's important that irrelevant instructions get the largest FUSION_PRI. */
34110 static void
34111 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
34112 int *fusion_pri, int *pri)
34114 int tmp, off_val;
34115 bool is_load;
34116 rtx base, offset;
34118 gcc_assert (INSN_P (insn));
34120 tmp = max_pri - 1;
34121 if (!fusion_load_store (insn, &base, &offset, &is_load))
34123 *pri = tmp;
34124 *fusion_pri = tmp;
34125 return;
34128 /* Load goes first. */
34129 if (is_load)
34130 *fusion_pri = tmp - 1;
34131 else
34132 *fusion_pri = tmp - 2;
34134 tmp /= 2;
34136 /* INSN with smaller base register goes first. */
34137 tmp -= ((REGNO (base) & 0xff) << 20);
34139 /* INSN with smaller offset goes first. */
34140 off_val = (int)(INTVAL (offset));
34141 if (off_val >= 0)
34142 tmp -= (off_val & 0xfffff);
34143 else
34144 tmp += ((- off_val) & 0xfffff);
34146 *pri = tmp;
34147 return;
34151 /* Construct and return a PARALLEL RTX vector with elements numbering the
34152 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34153 the vector - from the perspective of the architecture. This does not
34154 line up with GCC's perspective on lane numbers, so we end up with
34155 different masks depending on our target endian-ness. The diagram
34156 below may help. We must draw the distinction when building masks
34157 which select one half of the vector. An instruction selecting
34158 architectural low-lanes for a big-endian target, must be described using
34159 a mask selecting GCC high-lanes.
34161 Big-Endian Little-Endian
34163 GCC 0 1 2 3 3 2 1 0
34164 | x | x | x | x | | x | x | x | x |
34165 Architecture 3 2 1 0 3 2 1 0
34167 Low Mask: { 2, 3 } { 0, 1 }
34168 High Mask: { 0, 1 } { 2, 3 }
34172 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
34174 int nunits = GET_MODE_NUNITS (mode);
34175 rtvec v = rtvec_alloc (nunits / 2);
34176 int high_base = nunits / 2;
34177 int low_base = 0;
34178 int base;
34179 rtx t1;
34180 int i;
34182 if (BYTES_BIG_ENDIAN)
34183 base = high ? low_base : high_base;
34184 else
34185 base = high ? high_base : low_base;
34187 for (i = 0; i < nunits / 2; i++)
34188 RTVEC_ELT (v, i) = GEN_INT (base + i);
34190 t1 = gen_rtx_PARALLEL (mode, v);
34191 return t1;
34194 /* Check OP for validity as a PARALLEL RTX vector with elements
34195 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34196 from the perspective of the architecture. See the diagram above
34197 arm_simd_vect_par_cnst_half_p for more details. */
34199 bool
34200 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
34201 bool high)
34203 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
34204 HOST_WIDE_INT count_op = XVECLEN (op, 0);
34205 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
34206 int i = 0;
34208 if (!VECTOR_MODE_P (mode))
34209 return false;
34211 if (count_op != count_ideal)
34212 return false;
34214 for (i = 0; i < count_ideal; i++)
34216 rtx elt_op = XVECEXP (op, 0, i);
34217 rtx elt_ideal = XVECEXP (ideal, 0, i);
34219 if (!CONST_INT_P (elt_op)
34220 || INTVAL (elt_ideal) != INTVAL (elt_op))
34221 return false;
34223 return true;
34226 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34227 in Thumb1. */
34228 static bool
34229 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
34230 const_tree)
34232 /* For now, we punt and not handle this for TARGET_THUMB1. */
34233 if (vcall_offset && TARGET_THUMB1)
34234 return false;
34236 /* Otherwise ok. */
34237 return true;
34240 /* Generate RTL for a conditional branch with rtx comparison CODE in
34241 mode CC_MODE. The destination of the unlikely conditional branch
34242 is LABEL_REF. */
34244 void
34245 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
34246 rtx label_ref)
34248 rtx x;
34249 x = gen_rtx_fmt_ee (code, VOIDmode,
34250 gen_rtx_REG (cc_mode, CC_REGNUM),
34251 const0_rtx);
34253 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
34254 gen_rtx_LABEL_REF (VOIDmode, label_ref),
34255 pc_rtx);
34256 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
34259 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34261 For pure-code sections there is no letter code for this attribute, so
34262 output all the section flags numerically when this is needed. */
34264 static bool
34265 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
34268 if (flags & SECTION_ARM_PURECODE)
34270 *num = 0x20000000;
34272 if (!(flags & SECTION_DEBUG))
34273 *num |= 0x2;
34274 if (flags & SECTION_EXCLUDE)
34275 *num |= 0x80000000;
34276 if (flags & SECTION_WRITE)
34277 *num |= 0x1;
34278 if (flags & SECTION_CODE)
34279 *num |= 0x4;
34280 if (flags & SECTION_MERGE)
34281 *num |= 0x10;
34282 if (flags & SECTION_STRINGS)
34283 *num |= 0x20;
34284 if (flags & SECTION_TLS)
34285 *num |= 0x400;
34286 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
34287 *num |= 0x200;
34289 return true;
34292 return false;
34295 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34297 If pure-code is passed as an option, make sure all functions are in
34298 sections that have the SHF_ARM_PURECODE attribute. */
34300 static section *
34301 arm_function_section (tree decl, enum node_frequency freq,
34302 bool startup, bool exit)
34304 const char * section_name;
34305 section * sec;
34307 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
34308 return default_function_section (decl, freq, startup, exit);
34310 if (!target_pure_code)
34311 return default_function_section (decl, freq, startup, exit);
34314 section_name = DECL_SECTION_NAME (decl);
34316 /* If a function is not in a named section then it falls under the 'default'
34317 text section, also known as '.text'. We can preserve previous behavior as
34318 the default text section already has the SHF_ARM_PURECODE section
34319 attribute. */
34320 if (!section_name)
34322 section *default_sec = default_function_section (decl, freq, startup,
34323 exit);
34325 /* If default_sec is not null, then it must be a special section like for
34326 example .text.startup. We set the pure-code attribute and return the
34327 same section to preserve existing behavior. */
34328 if (default_sec)
34329 default_sec->common.flags |= SECTION_ARM_PURECODE;
34330 return default_sec;
34333 /* Otherwise look whether a section has already been created with
34334 'section_name'. */
34335 sec = get_named_section (decl, section_name, 0);
34336 if (!sec)
34337 /* If that is not the case passing NULL as the section's name to
34338 'get_named_section' will create a section with the declaration's
34339 section name. */
34340 sec = get_named_section (decl, NULL, 0);
34342 /* Set the SHF_ARM_PURECODE attribute. */
34343 sec->common.flags |= SECTION_ARM_PURECODE;
34345 return sec;
34348 /* Implements the TARGET_SECTION_FLAGS hook.
34350 If DECL is a function declaration and pure-code is passed as an option
34351 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
34352 section's name and RELOC indicates whether the declarations initializer may
34353 contain runtime relocations. */
34355 static unsigned int
34356 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
34358 unsigned int flags = default_section_type_flags (decl, name, reloc);
34360 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
34361 flags |= SECTION_ARM_PURECODE;
34363 return flags;
34366 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
34368 static void
34369 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
34370 rtx op0, rtx op1,
34371 rtx *quot_p, rtx *rem_p)
34373 if (mode == SImode)
34374 gcc_assert (!TARGET_IDIV);
34376 scalar_int_mode libval_mode
34377 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode)).require ();
34379 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34380 libval_mode, op0, mode, op1, mode);
34382 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34383 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34384 GET_MODE_SIZE (mode));
34386 gcc_assert (quotient);
34387 gcc_assert (remainder);
34389 *quot_p = quotient;
34390 *rem_p = remainder;
34393 /* This function checks for the availability of the coprocessor builtin passed
34394 in BUILTIN for the current target. Returns true if it is available and
34395 false otherwise. If a BUILTIN is passed for which this function has not
34396 been implemented it will cause an exception. */
34398 bool
34399 arm_coproc_builtin_available (enum unspecv builtin)
34401 /* None of these builtins are available in Thumb mode if the target only
34402 supports Thumb-1. */
34403 if (TARGET_THUMB1)
34404 return false;
34406 switch (builtin)
34408 case VUNSPEC_CDP:
34409 case VUNSPEC_LDC:
34410 case VUNSPEC_LDCL:
34411 case VUNSPEC_STC:
34412 case VUNSPEC_STCL:
34413 case VUNSPEC_MCR:
34414 case VUNSPEC_MRC:
34415 if (arm_arch4)
34416 return true;
34417 break;
34418 case VUNSPEC_CDP2:
34419 case VUNSPEC_LDC2:
34420 case VUNSPEC_LDC2L:
34421 case VUNSPEC_STC2:
34422 case VUNSPEC_STC2L:
34423 case VUNSPEC_MCR2:
34424 case VUNSPEC_MRC2:
34425 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34426 ARMv8-{A,M}. */
34427 if (arm_arch5t)
34428 return true;
34429 break;
34430 case VUNSPEC_MCRR:
34431 case VUNSPEC_MRRC:
34432 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34433 ARMv8-{A,M}. */
34434 if (arm_arch6 || arm_arch5te)
34435 return true;
34436 break;
34437 case VUNSPEC_MCRR2:
34438 case VUNSPEC_MRRC2:
34439 if (arm_arch6)
34440 return true;
34441 break;
34442 default:
34443 gcc_unreachable ();
34445 return false;
34448 /* This function returns true if OP is a valid memory operand for the ldc and
34449 stc coprocessor instructions and false otherwise. */
34451 bool
34452 arm_coproc_ldc_stc_legitimate_address (rtx op)
34454 HOST_WIDE_INT range;
34455 /* Has to be a memory operand. */
34456 if (!MEM_P (op))
34457 return false;
34459 op = XEXP (op, 0);
34461 /* We accept registers. */
34462 if (REG_P (op))
34463 return true;
34465 switch GET_CODE (op)
34467 case PLUS:
34469 /* Or registers with an offset. */
34470 if (!REG_P (XEXP (op, 0)))
34471 return false;
34473 op = XEXP (op, 1);
34475 /* The offset must be an immediate though. */
34476 if (!CONST_INT_P (op))
34477 return false;
34479 range = INTVAL (op);
34481 /* Within the range of [-1020,1020]. */
34482 if (!IN_RANGE (range, -1020, 1020))
34483 return false;
34485 /* And a multiple of 4. */
34486 return (range % 4) == 0;
34488 case PRE_INC:
34489 case POST_INC:
34490 case PRE_DEC:
34491 case POST_DEC:
34492 return REG_P (XEXP (op, 0));
34493 default:
34494 gcc_unreachable ();
34496 return false;
34499 /* Return true if OP is a valid memory operand for LDRD/STRD without any
34500 register overlap restrictions. Allow [base] and [base, imm] for now. */
34501 bool
34502 arm_ldrd_legitimate_address (rtx op)
34504 if (!MEM_P (op))
34505 return false;
34507 op = XEXP (op, 0);
34508 if (REG_P (op))
34509 return true;
34511 if (GET_CODE (op) != PLUS)
34512 return false;
34513 if (!REG_P (XEXP (op, 0)) || !CONST_INT_P (XEXP (op, 1)))
34514 return false;
34516 HOST_WIDE_INT val = INTVAL (XEXP (op, 1));
34518 if (TARGET_ARM)
34519 return IN_RANGE (val, -255, 255);
34520 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
34523 /* Return the diagnostic message string if conversion from FROMTYPE to
34524 TOTYPE is not allowed, NULL otherwise. */
34526 static const char *
34527 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34529 if (element_mode (fromtype) != element_mode (totype))
34531 /* Do no allow conversions to/from BFmode scalar types. */
34532 if (TYPE_MODE (fromtype) == BFmode)
34533 return N_("invalid conversion from type %<bfloat16_t%>");
34534 if (TYPE_MODE (totype) == BFmode)
34535 return N_("invalid conversion to type %<bfloat16_t%>");
34538 /* Conversion allowed. */
34539 return NULL;
34542 /* Return the diagnostic message string if the unary operation OP is
34543 not permitted on TYPE, NULL otherwise. */
34545 static const char *
34546 arm_invalid_unary_op (int op, const_tree type)
34548 /* Reject all single-operand operations on BFmode except for &. */
34549 if (element_mode (type) == BFmode && op != ADDR_EXPR)
34550 return N_("operation not permitted on type %<bfloat16_t%>");
34552 /* Operation allowed. */
34553 return NULL;
34556 /* Return the diagnostic message string if the binary operation OP is
34557 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34559 static const char *
34560 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34561 const_tree type2)
34563 /* Reject all 2-operand operations on BFmode. */
34564 if (element_mode (type1) == BFmode
34565 || element_mode (type2) == BFmode)
34566 return N_("operation not permitted on type %<bfloat16_t%>");
34568 /* Operation allowed. */
34569 return NULL;
34572 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34574 In VFPv1, VFP registers could only be accessed in the mode they were
34575 set, so subregs would be invalid there. However, we don't support
34576 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34578 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34579 VFP registers in little-endian order. We can't describe that accurately to
34580 GCC, so avoid taking subregs of such values.
34582 The only exception is going from a 128-bit to a 64-bit type. In that
34583 case the data layout happens to be consistent for big-endian, so we
34584 explicitly allow that case. */
34586 static bool
34587 arm_can_change_mode_class (machine_mode from, machine_mode to,
34588 reg_class_t rclass)
34590 if (TARGET_BIG_END
34591 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34592 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34593 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34594 && reg_classes_intersect_p (VFP_REGS, rclass))
34595 return false;
34596 return true;
34599 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34600 strcpy from constants will be faster. */
34602 static HOST_WIDE_INT
34603 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34605 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34606 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34607 return MAX (align, BITS_PER_WORD * factor);
34608 return align;
34611 /* Emit a speculation barrier on target architectures that do not have
34612 DSB/ISB directly. Such systems probably don't need a barrier
34613 themselves, but if the code is ever run on a later architecture, it
34614 might become a problem. */
34615 void
34616 arm_emit_speculation_barrier_function ()
34618 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34621 /* Have we recorded an explicit access to the Q bit of APSR?. */
34622 bool
34623 arm_q_bit_access (void)
34625 if (cfun && cfun->decl)
34626 return lookup_attribute ("acle qbit",
34627 DECL_ATTRIBUTES (cfun->decl));
34628 return true;
34631 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34632 bool
34633 arm_ge_bits_access (void)
34635 if (cfun && cfun->decl)
34636 return lookup_attribute ("acle gebits",
34637 DECL_ATTRIBUTES (cfun->decl));
34638 return true;
34641 /* NULL if insn INSN is valid within a low-overhead loop.
34642 Otherwise return why doloop cannot be applied. */
34644 static const char *
34645 arm_invalid_within_doloop (const rtx_insn *insn)
34647 if (!TARGET_HAVE_LOB)
34648 return default_invalid_within_doloop (insn);
34650 if (CALL_P (insn))
34651 return "Function call in the loop.";
34653 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34654 return "LR is used inside loop.";
34656 return NULL;
34659 bool
34660 arm_target_bb_ok_for_lob (basic_block bb)
34662 /* Make sure the basic block is a simple latch having as the single
34663 predecessor and successor the body of the loop itself.
34664 Only simple loops with a single basic block as body are supported for
34665 low over head loops, making sure that LE target is above LE instruction
34666 in the generated code. */
34667 return (single_succ_p (bb)
34668 && single_pred_p (bb)
34669 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src);
34672 /* Utility fuction: Given a VCTP or a VCTP_M insn, return the number of MVE
34673 lanes based on the machine mode being used. */
34675 static int
34676 arm_mve_get_vctp_lanes (rtx_insn *insn)
34678 rtx insn_set = single_set (insn);
34679 if (insn_set
34680 && GET_CODE (SET_SRC (insn_set)) == UNSPEC
34681 && (XINT (SET_SRC (insn_set), 1) == VCTP
34682 || XINT (SET_SRC (insn_set), 1) == VCTP_M))
34684 machine_mode mode = GET_MODE (SET_SRC (insn_set));
34685 return ((VECTOR_MODE_P (mode) && VALID_MVE_PRED_MODE (mode))
34686 ? GET_MODE_NUNITS (mode) : 0);
34688 return 0;
34691 enum arm_dl_usage_type { DL_USAGE_ANY = 0,
34692 DL_USAGE_READ = 1,
34693 DL_USAGE_WRITE = 2 };
34695 /* Check if INSN requires the use of the VPR reg, if it does, return the
34696 sub-rtx of the VPR reg. The TYPE argument controls whether
34697 this function should:
34698 * For TYPE == DL_USAGE_ANY, check all operands, including the OUT operands,
34699 and return the first occurrence of the VPR reg.
34700 * For TYPE == DL_USAGE_READ, only check the input operands.
34701 * For TYPE == DL_USAGE_WRITE, only check the output operands.
34702 (INOUT operands are considered both as input and output operands)
34704 static rtx
34705 arm_get_required_vpr_reg (rtx_insn *insn,
34706 arm_dl_usage_type type = DL_USAGE_ANY)
34708 gcc_assert (type < 3);
34709 if (!NONJUMP_INSN_P (insn))
34710 return NULL_RTX;
34712 bool requires_vpr;
34713 extract_constrain_insn (insn);
34714 int n_operands = recog_data.n_operands;
34715 if (recog_data.n_alternatives == 0)
34716 return NULL_RTX;
34718 /* Fill in recog_op_alt with information about the constraints of
34719 this insn. */
34720 preprocess_constraints (insn);
34722 for (int op = 0; op < n_operands; op++)
34724 requires_vpr = true;
34725 if (type == DL_USAGE_READ
34726 && recog_data.operand_type[op] == OP_OUT)
34727 continue;
34728 else if (type == DL_USAGE_WRITE
34729 && recog_data.operand_type[op] == OP_IN)
34730 continue;
34732 /* Iterate through alternatives of operand "op" in recog_op_alt and
34733 identify if the operand is required to be the VPR. */
34734 for (int alt = 0; alt < recog_data.n_alternatives; alt++)
34736 const operand_alternative *op_alt
34737 = &recog_op_alt[alt * n_operands];
34738 /* Fetch the reg_class for each entry and check it against the
34739 VPR_REG reg_class. */
34740 if (alternative_class (op_alt, op) != VPR_REG)
34741 requires_vpr = false;
34743 /* If all alternatives of the insn require the VPR reg for this operand,
34744 it means that either this is VPR-generating instruction, like a vctp,
34745 vcmp, etc., or it is a VPT-predicated insruction. Return the subrtx
34746 of the VPR reg operand. */
34747 if (requires_vpr)
34748 return recog_data.operand[op];
34750 return NULL_RTX;
34753 /* Wrapper function of arm_get_required_vpr_reg with TYPE == DL_USAGE_READ,
34754 so return the VPR only if it is an input operand to the insn. */
34756 static rtx
34757 arm_get_required_vpr_reg_param (rtx_insn *insn)
34759 return arm_get_required_vpr_reg (insn, DL_USAGE_READ);
34762 /* Wrapper function of arm_get_required_vpr_reg with TYPE == DL_USAGE_WRITE,
34763 so return the VPR only if it is the return value, an output of, or is
34764 clobbered by the insn. */
34766 static rtx
34767 arm_get_required_vpr_reg_ret_val (rtx_insn *insn)
34769 return arm_get_required_vpr_reg (insn, DL_USAGE_WRITE);
34772 /* Return the first VCTP instruction in BB, if it exists, or NULL otherwise. */
34774 static rtx_insn *
34775 arm_mve_get_loop_vctp (basic_block bb)
34777 rtx_insn *insn = BB_HEAD (bb);
34779 /* Now scan through all the instruction patterns and pick out the VCTP
34780 instruction. We require arm_get_required_vpr_reg_param to be false
34781 to make sure we pick up a VCTP, rather than a VCTP_M. */
34782 FOR_BB_INSNS (bb, insn)
34783 if (NONDEBUG_INSN_P (insn))
34784 if (arm_get_required_vpr_reg_ret_val (insn)
34785 && (arm_mve_get_vctp_lanes (insn) != 0)
34786 && !arm_get_required_vpr_reg_param (insn))
34787 return insn;
34788 return NULL;
34791 /* Return true if INSN is a MVE instruction that is VPT-predicable and is
34792 predicated on VPR_REG. */
34794 static bool
34795 arm_mve_insn_predicated_by (rtx_insn *insn, rtx vpr_reg)
34797 rtx insn_vpr_reg_operand = (MVE_VPT_PREDICATED_INSN_P (insn)
34798 ? arm_get_required_vpr_reg_param (insn)
34799 : NULL_RTX);
34800 return (insn_vpr_reg_operand
34801 && rtx_equal_p (vpr_reg, insn_vpr_reg_operand));
34804 /* Utility function to identify if INSN is an MVE instruction that performs
34805 some across lane operation (and as a result does not align with normal
34806 lane predication rules). All such instructions give one only scalar
34807 output, except for vshlcq which gives a PARALLEL of a vector and a scalar
34808 (one vector result and one carry output). */
34810 static bool
34811 arm_mve_across_lane_insn_p (rtx_insn* insn)
34813 df_ref insn_defs = NULL;
34814 if (!MVE_VPT_PREDICABLE_INSN_P (insn))
34815 return false;
34817 FOR_EACH_INSN_DEF (insn_defs, insn)
34818 if (!VALID_MVE_MODE (GET_MODE (DF_REF_REG (insn_defs)))
34819 && !arm_get_required_vpr_reg_ret_val (insn))
34820 return true;
34822 return false;
34825 /* Utility function to identify if INSN is an MVE load or store instruction.
34826 * For TYPE == DL_USAGE_ANY, check all operands. If the function returns
34827 true, INSN is a load or a store insn.
34828 * For TYPE == DL_USAGE_READ, only check the input operands. If the
34829 function returns true, INSN is a load insn.
34830 * For TYPE == DL_USAGE_WRITE, only check the output operands. If the
34831 function returns true, INSN is a store insn. */
34833 static bool
34834 arm_mve_load_store_insn_p (rtx_insn* insn,
34835 arm_dl_usage_type type = DL_USAGE_ANY)
34837 gcc_assert (type < 3);
34838 int n_operands = recog_data.n_operands;
34839 extract_insn (insn);
34841 for (int op = 0; op < n_operands; op++)
34843 if (type == DL_USAGE_READ && recog_data.operand_type[op] == OP_OUT)
34844 continue;
34845 else if (type == DL_USAGE_WRITE && recog_data.operand_type[op] == OP_IN)
34846 continue;
34847 if (mve_memory_operand (recog_data.operand[op],
34848 GET_MODE (recog_data.operand[op])))
34849 return true;
34851 return false;
34854 /* Return TRUE if INSN is validated for implicit predication by how its outputs
34855 are used.
34857 If INSN is a MVE operation across lanes that is not predicated by
34858 VCTP_VPR_GENERATED it can not be validated by the use of its ouputs.
34860 Any other INSN is safe to implicit predicate if we don't use its outputs
34861 outside the loop. The instructions that use this INSN's outputs will be
34862 validated as we go through the analysis. */
34864 static bool
34865 arm_mve_impl_pred_on_outputs_p (rtx_insn *insn, rtx vctp_vpr_generated)
34867 /* Reject any unpredicated across lane operation. */
34868 if (!arm_mve_insn_predicated_by (insn, vctp_vpr_generated)
34869 && arm_mve_across_lane_insn_p (insn))
34870 return false;
34872 /* Next, scan forward to the various USEs of the DEFs in this insn. */
34873 df_ref insn_def = NULL;
34874 basic_block insn_bb = BLOCK_FOR_INSN (insn);
34875 FOR_EACH_INSN_DEF (insn_def, insn)
34877 for (df_ref use = DF_REG_USE_CHAIN (DF_REF_REGNO (insn_def));
34878 use;
34879 use = DF_REF_NEXT_REG (use))
34881 rtx_insn *next_use_insn = DF_REF_INSN (use);
34882 if (!INSN_P (next_use_insn) || DEBUG_INSN_P (next_use_insn))
34883 continue;
34885 if (insn_bb != BLOCK_FOR_INSN (next_use_insn))
34886 return false;
34889 return true;
34893 /* Returns the prevailing definition of OP before CUR_INSN in the same
34894 basic block as CUR_INSN, if one exists, returns NULL otherwise. */
34896 static rtx_insn*
34897 arm_last_vect_def_insn (rtx op, rtx_insn *cur_insn)
34899 if (!REG_P (op)
34900 || !BLOCK_FOR_INSN (cur_insn))
34901 return NULL;
34903 df_ref def_insns;
34904 rtx_insn *last_def = NULL;
34905 for (def_insns = DF_REG_DEF_CHAIN (REGNO (op));
34906 def_insns;
34907 def_insns = DF_REF_NEXT_REG (def_insns))
34909 rtx_insn *def_insn = DF_REF_INSN (def_insns);
34910 /* Definition not in the loop body or after the current insn. */
34911 if (DF_REF_BB (def_insns) != BLOCK_FOR_INSN (cur_insn)
34912 || INSN_UID (def_insn) >= INSN_UID (cur_insn))
34913 continue;
34915 if (!last_def || INSN_UID (def_insn) > INSN_UID (last_def))
34916 last_def = def_insn;
34918 return last_def;
34922 /* This function returns TRUE if we can validate the implicit predication of
34923 INSN_IN with VCTP_VPR_GENERATED based on the definition of the instruction's
34924 input operands.
34926 If INSN_IN is a MVE operation across lanes then all of its MVE vector
34927 operands must have its tail-predicated lanes be zeroes. We keep track of any
34928 instructions that define vector operands for which this is true in
34929 PROPS_ZERO_SET.
34931 For any other INSN_IN, the definition of all its operands must be defined
34932 inside the loop body by an instruction that comes before INSN_IN and not be
34933 a MVE load predicated by a different VPR. These instructions have all been
34934 validated for explicit or implicit predication.
34937 static bool
34938 arm_mve_impl_pred_on_inputs_p (vec <rtx_insn *> *props_zero_set,
34939 rtx_insn *insn_in, rtx vctp_vpr_generated)
34941 /* If all inputs come from instructions that are explicitly or
34942 implicitly predicated by the same predicate then it is safe to
34943 implicitly predicate this instruction. */
34944 df_ref insn_uses = NULL;
34945 bool across_lane = arm_mve_across_lane_insn_p (insn_in);
34946 FOR_EACH_INSN_USE (insn_uses, insn_in)
34948 rtx op = DF_REF_REG (insn_uses);
34949 rtx_insn *def_insn = arm_last_vect_def_insn (op, insn_in);
34950 if (across_lane)
34952 if (!VALID_MVE_MODE (GET_MODE (op)))
34953 continue;
34954 if (!def_insn || !props_zero_set->contains (def_insn))
34955 return false;
34957 continue;
34960 if (!def_insn
34961 || (!arm_mve_insn_predicated_by (def_insn, vctp_vpr_generated)
34962 && arm_mve_load_store_insn_p (def_insn, DL_USAGE_READ)))
34963 return false;
34966 return true;
34970 /* Determine whether INSN_IN is safe to implicitly predicate based on the type
34971 of instruction and where needed the definition of its inputs and the uses of
34972 its outputs.
34973 Return TRUE if it is safe to implicitly predicate and FALSE otherwise.
34975 * If INSN_IN is a store, then it is always unsafe to implicitly predicate it.
34976 * If INSN_IN is a load, only reject implicit predication if its uses
34977 directly invalidate it.
34978 * If INSN_IN operates across vector lanes and does not have the
34979 "mve_safe_imp_xlane_pred" attribute, then it is always unsafe to implicitly
34980 predicate.
34981 * If INSN_IN operates on Floating Point elements and we are not compiling
34982 with -Ofast, then it is unsafe to implicitly predicate it as we may be
34983 changing exception and cumulative bits behaviour.
34984 * If INSN_IN is a VCTP instruction, then it is safe to implicitly predicate,
34985 but instructions that use this predicate will need to be checked
34986 just like any other UNPREDICATED MVE instruction.
34987 * Otherwise check if INSN_IN's inputs or uses of outputs can validate its
34988 implicit predication.
34990 * If all inputs come from instructions that are explicitly or implicitly
34991 predicated by the same predicate then it is safe to implicitly predicate
34992 this instruction.
34993 * If INSN_IN is an operation across lanes with the "mve_safe_imp_xlane_pred"
34994 attribute, then all it's operands must have zeroed falsely predicated tail
34995 lanes.
34997 * Otherwise, check if the implicit predication of INSN_IN can be validated
34998 based on its inputs, and if not check whether it can be validated based on
34999 how its outputs are used. */
35001 static bool
35002 arm_mve_impl_predicated_p (vec <rtx_insn *> *props_zero_set,
35003 rtx_insn *insn_in, rtx vctp_vpr_generated)
35006 /* If INSN_IN is a store, then it is always unsafe to implicitly
35007 predicate it. */
35008 if (arm_mve_load_store_insn_p (insn_in, DL_USAGE_WRITE))
35009 return false;
35011 /* If INSN_IN is a load, only reject implicit predication if its uses
35012 directly invalidate it. */
35013 if (arm_mve_load_store_insn_p (insn_in, DL_USAGE_READ))
35015 if (!arm_mve_impl_pred_on_outputs_p (insn_in, vctp_vpr_generated))
35016 return false;
35017 return true;
35020 /* If INSN_IN operates across vector lanes and does not have the
35021 "mve_safe_imp_xlane_pred" attribute, then it is always unsafe to implicitly
35022 predicate. */
35023 if (arm_mve_across_lane_insn_p (insn_in)
35024 && (get_attr_mve_safe_imp_xlane_pred (insn_in)
35025 != MVE_SAFE_IMP_XLANE_PRED_YES))
35026 return false;
35028 /* If INSN_IN operates on Floating Point elements and we are not compiling
35029 with -Ofast, then it is unsafe to implicitly predicate it as we may be
35030 changing exception and cumulative bits behaviour. */
35031 if (!flag_unsafe_math_optimizations
35032 && flag_trapping_math
35033 && MVE_VPT_UNPREDICATED_INSN_P (insn_in))
35035 df_ref def;
35036 FOR_EACH_INSN_DEF (def, insn_in)
35037 if (DF_REF_TYPE (def) == DF_REF_REG_DEF
35038 && FLOAT_MODE_P (GET_MODE (DF_REF_REG (def))))
35039 return false;
35040 FOR_EACH_INSN_USE (def, insn_in)
35041 if (DF_REF_TYPE (def) == DF_REF_REG_DEF
35042 && FLOAT_MODE_P (GET_MODE (DF_REF_REG (def))))
35043 return false;
35046 /* If INSN_IN is a VCTP instruction, then it is safe to implicitly predicate,
35047 but instructions that use this predicate will need to be checked
35048 just like any other UNPREDICATED MVE instruction. */
35049 if (arm_get_required_vpr_reg_ret_val (insn_in)
35050 && (arm_mve_get_vctp_lanes (insn_in) != 0))
35051 return true;
35053 /* Otherwise, check if the implicit predication of INSN_IN can be validated
35054 based on its inputs, and if not check whether it can be validated based on
35055 how its outputs are used. */
35056 return (arm_mve_impl_pred_on_inputs_p (props_zero_set, insn_in, vctp_vpr_generated)
35057 || arm_mve_impl_pred_on_outputs_p (insn_in, vctp_vpr_generated));
35060 /* Helper function to `arm_mve_dlstp_check_inc_counter` and to
35061 `arm_mve_dlstp_check_dec_counter`. In the situations where the loop counter
35062 is incrementing by 1 or decrementing by 1 in each iteration, ensure that the
35063 number of iterations, the value of REG, going into the loop, was calculated
35065 REG = (N + [1, VCTP_STEP - 1]) / VCTP_STEP
35067 where N is equivalent to the VCTP_REG.
35070 static bool
35071 arm_mve_check_reg_origin_is_num_elems (loop *loop, rtx reg, rtx vctp_step,
35072 rtx vctp_reg)
35074 df_ref counter_max_last_def = NULL;
35076 /* More than one reaching definition. */
35077 if (DF_REG_DEF_COUNT (REGNO (reg)) > 2)
35078 return false;
35080 /* Look for a single defition of REG going into the loop. The DEF_CHAIN will
35081 have at least two values, as this is a loop induction variable that is
35082 defined outside the loop. */
35083 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
35084 def;
35085 def = DF_REF_NEXT_REG (def))
35087 /* Skip the update inside the loop, this has already been checked by the
35088 iv_analyze call earlier. */
35089 if (DF_REF_BB (def) == loop->header)
35090 continue;
35092 counter_max_last_def = def;
35093 break;
35096 if (!counter_max_last_def)
35097 return false;
35099 rtx counter_max_last_set = single_set (DF_REF_INSN (counter_max_last_def));
35101 if (!counter_max_last_set)
35102 return false;
35104 /* If we encounter a simple SET from a REG, follow it through. */
35105 if (REG_P (SET_SRC (counter_max_last_set)))
35107 if (DF_REG_DEF_COUNT (REGNO (SET_SRC (counter_max_last_set))) != 1)
35108 return false;
35110 counter_max_last_def
35111 = DF_REG_DEF_CHAIN (REGNO (SET_SRC (counter_max_last_set)));
35112 counter_max_last_set
35113 = single_set (DF_REF_INSN (counter_max_last_def));
35115 if (!counter_max_last_set)
35116 return false;
35119 /* We are looking for:
35120 COUNTER_MAX_LAST_SET = (N + VCTP_STEP - 1) / VCTP_STEP.
35121 We currently only support the unsigned VCTP_OP case. */
35122 rtx division = SET_SRC (counter_max_last_set);
35123 if (GET_CODE (division) != LSHIFTRT)
35124 return false;
35126 /* Now check that we are dividing by VCTP_STEP, i.e. the number of lanes. */
35127 rtx divisor = XEXP (division, 1);
35128 unsigned vctp_step_cst = abs_hwi (INTVAL (vctp_step));
35129 if (!CONST_INT_P (divisor)
35130 || (1U << INTVAL (divisor) != vctp_step_cst))
35131 return false;
35133 rtx dividend = XEXP (division, 0);
35134 if (!REG_P (dividend))
35135 /* Subreg? */
35136 return false;
35138 /* For now only support the simple case, this only works for unsigned N, any
35139 signed N will have further computations to deal with overflow. */
35140 if (DF_REG_DEF_COUNT (REGNO (dividend)) != 1)
35141 return false;
35143 rtx_insn *dividend_insn = DF_REF_INSN (DF_REG_DEF_CHAIN (REGNO (dividend)));
35144 rtx dividend_op = single_set (dividend_insn);
35145 if (!dividend_op
35146 && GET_CODE (SET_SRC (dividend_op)) != PLUS)
35147 return false;
35149 /* Check if PLUS_OP is (VCTP_OP + VAL), where VAL = [1, VCTP_STEP - 1]. */
35150 rtx plus_op = SET_SRC (dividend_op);
35151 if (!REG_P (XEXP (plus_op, 0))
35152 || !CONST_INT_P (XEXP (plus_op, 1))
35153 || !IN_RANGE (INTVAL (XEXP (plus_op, 1)), 1, vctp_step_cst - 1))
35154 return false;
35156 /* VCTP_REG may have been copied before entering the loop, let's see if we can
35157 trace such a copy back. If we have more than one reaching definition then
35158 bail out as analysis will be too difficult. */
35159 if (DF_REG_DEF_COUNT (REGNO (vctp_reg)) > 2)
35160 return false;
35162 /* Look for the definition of N. */
35163 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (vctp_reg));
35164 def;
35165 def = DF_REF_NEXT_REG (def))
35167 if (DF_REF_BB (def) == loop->header)
35168 continue;
35169 rtx set = single_set (DF_REF_INSN (def));
35170 if (set
35171 && REG_P (SET_SRC (set))
35172 && !HARD_REGISTER_P (SET_SRC (set)))
35173 vctp_reg = SET_SRC (set);
35176 return rtx_equal_p (vctp_reg, XEXP (plus_op, 0));
35179 /* If we have identified the loop to have an incrementing counter, we need to
35180 make sure that it increments by 1 and that the loop is structured correctly:
35181 * The counter starts from 0
35182 * The counter terminates at (num_of_elem + num_of_lanes - 1) / num_of_lanes
35183 * The vctp insn uses a reg that decrements appropriately in each iteration.
35186 static rtx_insn*
35187 arm_mve_dlstp_check_inc_counter (loop *loop, rtx_insn* vctp_insn,
35188 rtx condconst, rtx condcount)
35190 rtx vctp_reg = XVECEXP (XEXP (PATTERN (vctp_insn), 1), 0, 0);
35191 /* The loop latch has to be empty. When compiling all the known MVE LoLs in
35192 user applications, none of those with incrementing counters had any real
35193 insns in the loop latch. As such, this function has only been tested with
35194 an empty latch and may misbehave or ICE if we somehow get here with an
35195 increment in the latch, so, for correctness, error out early. */
35196 if (!empty_block_p (loop->latch))
35197 return NULL;
35199 class rtx_iv vctp_reg_iv;
35200 /* For loops of DLSTP_TYPE_B, the loop counter is independent of the decrement
35201 of the reg used in the vctp_insn. So run iv analysis on that reg. This
35202 has to succeed for such loops to be supported. */
35203 if (!iv_analyze (vctp_insn, as_a<scalar_int_mode> (GET_MODE (vctp_reg)),
35204 vctp_reg, &vctp_reg_iv))
35205 return NULL;
35207 /* Extract the decrementnum of the vctp reg from the iv. This decrementnum
35208 is the number of lanes/elements it decrements from the remaining number of
35209 lanes/elements to process in the loop, for this reason this is always a
35210 negative number, but to simplify later checks we use it's absolute value. */
35211 HOST_WIDE_INT decrementnum = INTVAL (vctp_reg_iv.step);
35212 if (decrementnum >= 0)
35213 return NULL;
35214 decrementnum = abs_hwi (decrementnum);
35216 /* Find where both of those are modified in the loop header bb. */
35217 df_ref condcount_reg_set_df = df_bb_regno_only_def_find (loop->header,
35218 REGNO (condcount));
35219 df_ref vctp_reg_set_df = df_bb_regno_only_def_find (loop->header,
35220 REGNO (vctp_reg));
35221 if (!condcount_reg_set_df || !vctp_reg_set_df)
35222 return NULL;
35223 rtx condcount_reg_set = single_set (DF_REF_INSN (condcount_reg_set_df));
35224 rtx vctp_reg_set = single_set (DF_REF_INSN (vctp_reg_set_df));
35225 if (!condcount_reg_set || !vctp_reg_set)
35226 return NULL;
35228 /* Ensure the modification of the vctp reg from df is consistent with
35229 the iv and the number of lanes on the vctp insn. */
35230 if (GET_CODE (SET_SRC (vctp_reg_set)) != PLUS
35231 || !REG_P (SET_DEST (vctp_reg_set))
35232 || !REG_P (XEXP (SET_SRC (vctp_reg_set), 0))
35233 || REGNO (SET_DEST (vctp_reg_set))
35234 != REGNO (XEXP (SET_SRC (vctp_reg_set), 0))
35235 || !CONST_INT_P (XEXP (SET_SRC (vctp_reg_set), 1))
35236 || INTVAL (XEXP (SET_SRC (vctp_reg_set), 1)) >= 0
35237 || decrementnum != abs_hwi (INTVAL (XEXP (SET_SRC (vctp_reg_set), 1)))
35238 || decrementnum != arm_mve_get_vctp_lanes (vctp_insn))
35239 return NULL;
35241 if (REG_P (condcount) && REG_P (condconst))
35243 /* First we need to prove that the loop is going 0..condconst with an
35244 inc of 1 in each iteration. */
35245 if (GET_CODE (SET_SRC (condcount_reg_set)) == PLUS
35246 && CONST_INT_P (XEXP (SET_SRC (condcount_reg_set), 1))
35247 && INTVAL (XEXP (SET_SRC (condcount_reg_set), 1)) == 1)
35249 rtx counter_reg = SET_DEST (condcount_reg_set);
35250 /* Check that the counter did indeed start from zero. */
35251 df_ref this_set = DF_REG_DEF_CHAIN (REGNO (counter_reg));
35252 if (!this_set)
35253 return NULL;
35254 df_ref last_set_def = DF_REF_NEXT_REG (this_set);
35255 if (!last_set_def)
35256 return NULL;
35257 rtx_insn* last_set_insn = DF_REF_INSN (last_set_def);
35258 rtx last_set = single_set (last_set_insn);
35259 if (!last_set)
35260 return NULL;
35261 rtx counter_orig_set;
35262 counter_orig_set = SET_SRC (last_set);
35263 if (!CONST_INT_P (counter_orig_set)
35264 || (INTVAL (counter_orig_set) != 0))
35265 return NULL;
35266 /* And finally check that the target value of the counter,
35267 condconst, is of the correct shape. */
35268 if (!arm_mve_check_reg_origin_is_num_elems (loop, condconst,
35269 vctp_reg_iv.step,
35270 vctp_reg))
35271 return NULL;
35273 else
35274 return NULL;
35276 else
35277 return NULL;
35279 /* Everything looks valid. */
35280 return vctp_insn;
35283 /* Helper function to 'arm_mve_dlstp_check_dec_counter' to make sure DEC_INSN
35284 is of the expected form:
35285 (set (reg a) (plus (reg a) (const_int)))
35286 where (reg a) is the same as CONDCOUNT.
35287 Return a rtx with the set if it is in the right format or NULL_RTX
35288 otherwise. */
35290 static rtx
35291 check_dec_insn (rtx_insn *dec_insn, rtx condcount)
35293 if (!NONDEBUG_INSN_P (dec_insn))
35294 return NULL_RTX;
35295 rtx dec_set = single_set (dec_insn);
35296 if (!dec_set
35297 || !REG_P (SET_DEST (dec_set))
35298 || GET_CODE (SET_SRC (dec_set)) != PLUS
35299 || !REG_P (XEXP (SET_SRC (dec_set), 0))
35300 || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
35301 || REGNO (SET_DEST (dec_set))
35302 != REGNO (XEXP (SET_SRC (dec_set), 0))
35303 || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
35304 return NULL_RTX;
35306 return dec_set;
35309 /* Helper function to `arm_mve_loop_valid_for_dlstp`. In the case of a
35310 counter that is decrementing, ensure that it is decrementing by the
35311 right amount in each iteration and that the target condition is what
35312 we expect. */
35314 static rtx_insn*
35315 arm_mve_dlstp_check_dec_counter (loop *loop, rtx_insn* vctp_insn,
35316 rtx condconst, rtx condcount)
35318 rtx vctp_reg = XVECEXP (XEXP (PATTERN (vctp_insn), 1), 0, 0);
35319 class rtx_iv vctp_reg_iv;
35320 HOST_WIDE_INT decrementnum;
35321 /* For decrementing loops of DLSTP_TYPE_A, the counter is usually present in the
35322 loop latch. Here we simply need to verify that this counter is the same
35323 reg that is also used in the vctp_insn and that it is not otherwise
35324 modified. */
35325 rtx dec_set = check_dec_insn (BB_END (loop->latch), condcount);
35326 /* If not in the loop latch, try to find the decrement in the loop header. */
35327 if (dec_set == NULL_RTX)
35329 df_ref temp = df_bb_regno_only_def_find (loop->header, REGNO (condcount));
35330 /* If we haven't been able to find the decrement, bail out. */
35331 if (!temp)
35332 return NULL;
35333 dec_set = check_dec_insn (DF_REF_INSN (temp), condcount);
35335 if (dec_set == NULL_RTX)
35336 return NULL;
35339 decrementnum = INTVAL (XEXP (SET_SRC (dec_set), 1));
35341 /* This decrementnum is the number of lanes/elements it decrements from the
35342 remaining number of lanes/elements to process in the loop, for this reason
35343 this is always a negative number, but to simplify later checks we use its
35344 absolute value. */
35345 if (decrementnum >= 0)
35346 return NULL;
35347 decrementnum = -decrementnum;
35349 /* If the decrementnum is a 1, then we need to look at the loop vctp_reg and
35350 verify that it also decrements correctly.
35351 Then, we need to establish that the starting value of the loop decrement
35352 originates from the starting value of the vctp decrement. */
35353 if (decrementnum == 1)
35355 class rtx_iv vctp_reg_iv, condcount_reg_iv;
35356 /* The loop counter is found to be independent of the decrement
35357 of the reg used in the vctp_insn, again. Ensure that IV analysis
35358 succeeds and check the step. */
35359 if (!iv_analyze (vctp_insn, as_a<scalar_int_mode> (GET_MODE (vctp_reg)),
35360 vctp_reg, &vctp_reg_iv))
35361 return NULL;
35362 /* Ensure it matches the number of lanes of the vctp instruction. */
35363 if (abs (INTVAL (vctp_reg_iv.step))
35364 != arm_mve_get_vctp_lanes (vctp_insn))
35365 return NULL;
35367 if (!arm_mve_check_reg_origin_is_num_elems (loop, condcount,
35368 vctp_reg_iv.step,
35369 vctp_reg))
35370 return NULL;
35372 /* If the decrements are the same, then the situation is simple: either they
35373 are also the same reg, which is safe, or they are different registers, in
35374 which case makse sure that there is a only simple SET from one to the
35375 other inside the loop.*/
35376 else if (decrementnum == arm_mve_get_vctp_lanes (vctp_insn))
35378 if (REGNO (condcount) != REGNO (vctp_reg))
35380 /* It wasn't the same reg, but it could be behild a
35381 (set (vctp_reg) (condcount)), so instead find where
35382 the VCTP insn is DEF'd inside the loop. */
35383 rtx_insn *vctp_reg_insn
35384 = DF_REF_INSN (df_bb_regno_only_def_find (loop->header,
35385 REGNO (vctp_reg)));
35386 rtx vctp_reg_set = single_set (vctp_reg_insn);
35387 /* This must just be a simple SET from the condcount. */
35388 if (!vctp_reg_set
35389 || !REG_P (SET_DEST (vctp_reg_set))
35390 || !REG_P (SET_SRC (vctp_reg_set))
35391 || REGNO (SET_SRC (vctp_reg_set)) != REGNO (condcount))
35392 return NULL;
35395 else
35396 return NULL;
35398 /* We now only need to find out that the loop terminates with a LE
35399 zero condition. If condconst is a const_int, then this is easy.
35400 If its a REG, look at the last condition+jump in a bb before
35401 the loop, because that usually will have a branch jumping over
35402 the loop header. */
35403 rtx_insn *jump_insn = BB_END (loop->header);
35404 if (CONST_INT_P (condconst)
35405 && !(INTVAL (condconst) == 0 && JUMP_P (jump_insn)
35406 && GET_CODE (XEXP (PATTERN (jump_insn), 1)) == IF_THEN_ELSE
35407 && (GET_CODE (XEXP (XEXP (PATTERN (jump_insn), 1), 0)) == NE
35408 ||GET_CODE (XEXP (XEXP (PATTERN (jump_insn), 1), 0)) == GT)))
35409 return NULL;
35410 else if (REG_P (condconst))
35412 basic_block preheader_b = loop_preheader_edge (loop)->src;
35413 if (!single_pred_p (preheader_b))
35414 return NULL;
35415 basic_block pre_loop_bb = single_pred (preheader_b);
35417 rtx initial_compare = NULL_RTX;
35418 if (!(prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb))
35419 && INSN_P (prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb)))))
35420 return NULL;
35421 else
35422 initial_compare
35423 = single_set (prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb)));
35424 if (!(initial_compare
35425 && cc_register (SET_DEST (initial_compare), VOIDmode)
35426 && GET_CODE (SET_SRC (initial_compare)) == COMPARE
35427 && CONST_INT_P (XEXP (SET_SRC (initial_compare), 1))
35428 && INTVAL (XEXP (SET_SRC (initial_compare), 1)) == 0))
35429 return NULL;
35431 /* Usually this is a LE condition, but it can also just be a GT or an EQ
35432 condition (if the value is unsigned or the compiler knows its not negative) */
35433 rtx_insn *loop_jumpover = BB_END (pre_loop_bb);
35434 if (!(JUMP_P (loop_jumpover)
35435 && GET_CODE (XEXP (PATTERN (loop_jumpover), 1)) == IF_THEN_ELSE
35436 && (GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == LE
35437 || GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == GT
35438 || GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == EQ)))
35439 return NULL;
35442 /* Everything looks valid. */
35443 return vctp_insn;
35446 /* Function to check a loop's structure to see if it is a valid candidate for
35447 an MVE Tail Predicated Low-Overhead Loop. Returns the loop's VCTP_INSN if
35448 it is valid, or NULL if it isn't. */
35450 static rtx_insn*
35451 arm_mve_loop_valid_for_dlstp (loop *loop)
35453 /* Doloop can only be done "elementwise" with predicated dlstp/letp if it
35454 contains a VCTP on the number of elements processed by the loop.
35455 Find the VCTP predicate generation inside the loop body BB. */
35456 rtx_insn *vctp_insn = arm_mve_get_loop_vctp (loop->header);
35457 if (!vctp_insn)
35458 return NULL;
35460 /* We only support two loop forms for tail predication:
35461 DLSTP_TYPE_A) Loops of the form:
35462 int num_of_lanes = 128 / elem_size;
35463 while (num_of_elem > 0)
35465 p = vctp<size> (num_of_elem);
35466 num_of_elem -= num_of_lanes;
35468 DLSTP_TYPE_B) Loops of the form:
35469 int num_of_lanes = 128 / elem_size;
35470 int num_of_iters = (num_of_elem + num_of_lanes - 1) / num_of_lanes;
35471 for (i = 0; i < num_of_iters; i++)
35473 p = vctp<size> (num_of_elem);
35474 num_of_elem -= num_of_lanes;
35477 Then, depending on the type of loop above we need will need to do
35478 different sets of checks. */
35479 iv_analysis_loop_init (loop);
35481 /* In order to find out if the loop is of DLSTP_TYPE_A or DLSTP_TYPE_B above
35482 look for the loop counter: it will either be incrementing by one per
35483 iteration or it will be decrementing by num_of_lanes. We can find the
35484 loop counter in the condition at the end of the loop. */
35485 rtx_insn *loop_cond = prev_nonnote_nondebug_insn_bb (BB_END (loop->header));
35486 if (!(cc_register (XEXP (PATTERN (loop_cond), 0), VOIDmode)
35487 && GET_CODE (XEXP (PATTERN (loop_cond), 1)) == COMPARE))
35488 return NULL;
35490 /* The operands in the condition: Try to identify which one is the
35491 constant and which is the counter and run IV analysis on the latter. */
35492 rtx cond_arg_1 = XEXP (XEXP (PATTERN (loop_cond), 1), 0);
35493 rtx cond_arg_2 = XEXP (XEXP (PATTERN (loop_cond), 1), 1);
35495 rtx loop_cond_constant;
35496 rtx loop_counter;
35497 class rtx_iv cond_counter_iv, cond_temp_iv;
35499 if (CONST_INT_P (cond_arg_1))
35501 /* cond_arg_1 is the constant and cond_arg_2 is the counter. */
35502 loop_cond_constant = cond_arg_1;
35503 loop_counter = cond_arg_2;
35504 iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_2)),
35505 cond_arg_2, &cond_counter_iv);
35507 else if (CONST_INT_P (cond_arg_2))
35509 /* cond_arg_2 is the constant and cond_arg_1 is the counter. */
35510 loop_cond_constant = cond_arg_2;
35511 loop_counter = cond_arg_1;
35512 iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_1)),
35513 cond_arg_1, &cond_counter_iv);
35515 else if (REG_P (cond_arg_1) && REG_P (cond_arg_2))
35517 /* If both operands to the compare are REGs, we can safely
35518 run IV analysis on both and then determine which is the
35519 constant by looking at the step.
35520 First assume cond_arg_1 is the counter. */
35521 loop_counter = cond_arg_1;
35522 loop_cond_constant = cond_arg_2;
35523 iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_1)),
35524 cond_arg_1, &cond_counter_iv);
35525 iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_2)),
35526 cond_arg_2, &cond_temp_iv);
35528 /* Look at the steps and swap around the rtx's if needed. Error out if
35529 one of them cannot be identified as constant. */
35530 if (!CONST_INT_P (cond_counter_iv.step) || !CONST_INT_P (cond_temp_iv.step))
35531 return NULL;
35532 if (INTVAL (cond_counter_iv.step) != 0 && INTVAL (cond_temp_iv.step) != 0)
35533 return NULL;
35534 if (INTVAL (cond_counter_iv.step) == 0 && INTVAL (cond_temp_iv.step) != 0)
35536 loop_counter = cond_arg_2;
35537 loop_cond_constant = cond_arg_1;
35538 cond_counter_iv = cond_temp_iv;
35541 else
35542 return NULL;
35544 if (!REG_P (loop_counter))
35545 return NULL;
35546 if (!(REG_P (loop_cond_constant) || CONST_INT_P (loop_cond_constant)))
35547 return NULL;
35549 /* Now we have extracted the IV step of the loop counter, call the
35550 appropriate checking function. */
35551 if (INTVAL (cond_counter_iv.step) > 0)
35552 return arm_mve_dlstp_check_inc_counter (loop, vctp_insn,
35553 loop_cond_constant, loop_counter);
35554 else if (INTVAL (cond_counter_iv.step) < 0)
35555 return arm_mve_dlstp_check_dec_counter (loop, vctp_insn,
35556 loop_cond_constant, loop_counter);
35557 else
35558 return NULL;
35561 /* Predict whether the given loop in gimple will be transformed in the RTL
35562 doloop_optimize pass. It could be argued that turning large enough loops
35563 into low-overhead loops would not show a signficant performance boost.
35564 However, in the case of tail predication we would still avoid using VPT/VPST
35565 instructions inside the loop, and in either case using low-overhead loops
35566 would not be detrimental, so we decided to not consider size, avoiding the
35567 need of a heuristic to determine what an appropriate size boundary is. */
35569 static bool
35570 arm_predict_doloop_p (struct loop *loop)
35572 gcc_assert (loop);
35573 /* On arm, targetm.can_use_doloop_p is actually
35574 can_use_doloop_if_innermost. Ensure the loop is innermost,
35575 it is valid and as per arm_target_bb_ok_for_lob and the
35576 correct architecture flags are enabled. */
35577 if (!(TARGET_HAVE_LOB && optimize > 0))
35579 if (dump_file && (dump_flags & TDF_DETAILS))
35580 fprintf (dump_file, "Predict doloop failure due to"
35581 " target architecture or optimisation flags.\n");
35582 return false;
35584 else if (loop->inner != NULL)
35586 if (dump_file && (dump_flags & TDF_DETAILS))
35587 fprintf (dump_file, "Predict doloop failure due to"
35588 " loop nesting.\n");
35589 return false;
35591 else if (!arm_target_bb_ok_for_lob (loop->header->next_bb))
35593 if (dump_file && (dump_flags & TDF_DETAILS))
35594 fprintf (dump_file, "Predict doloop failure due to"
35595 " loop bb complexity.\n");
35596 return false;
35598 else
35600 gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
35601 while (!gsi_end_p (gsi))
35603 if (is_gimple_call (gsi_stmt (gsi))
35604 && !gimple_call_builtin_p (gsi_stmt (gsi)))
35606 if (dump_file && (dump_flags & TDF_DETAILS))
35607 fprintf (dump_file, "Predict doloop failure due to"
35608 " call in loop.\n");
35609 return false;
35611 gsi_next (&gsi);
35615 return true;
35618 /* Implement targetm.loop_unroll_adjust. Use this to block unrolling of loops
35619 that may later be turned into MVE Tail Predicated Low Overhead Loops. The
35620 performance benefit of an MVE LoL is likely to be much higher than that of
35621 the unrolling. */
35623 unsigned
35624 arm_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
35626 if (TARGET_HAVE_MVE
35627 && arm_target_bb_ok_for_lob (loop->latch)
35628 && arm_mve_loop_valid_for_dlstp (loop))
35629 return 0;
35630 else
35631 return nunroll;
35634 /* Function to hadle emitting a VPT-unpredicated version of a VPT-predicated
35635 insn to a sequence. */
35637 static bool
35638 arm_emit_mve_unpredicated_insn_to_seq (rtx_insn* insn)
35640 rtx insn_vpr_reg_operand = arm_get_required_vpr_reg_param (insn);
35641 int new_icode = get_attr_mve_unpredicated_insn (insn);
35642 if (!in_sequence_p ()
35643 || !MVE_VPT_PREDICATED_INSN_P (insn)
35644 || (!insn_vpr_reg_operand)
35645 || (!new_icode))
35646 return false;
35648 extract_insn (insn);
35649 rtx arr[8];
35650 int j = 0;
35652 /* When transforming a VPT-predicated instruction into its unpredicated
35653 equivalent we need to drop the VPR operand and we may need to also drop a
35654 merge "vuninit" input operand, depending on the instruction pattern. Here
35655 ensure that we have at most a two-operand difference between the two
35656 instrunctions. */
35657 int n_operands_diff
35658 = recog_data.n_operands - insn_data[new_icode].n_operands;
35659 if (!(n_operands_diff > 0 && n_operands_diff <= 2))
35660 return false;
35662 rtx move = NULL_RTX;
35663 /* Then, loop through the operands of the predicated
35664 instruction, and retain the ones that map to the
35665 unpredicated instruction. */
35666 for (int i = 0; i < recog_data.n_operands; i++)
35668 /* Ignore the VPR and, if needed, the vuninit
35669 operand. */
35670 if (insn_vpr_reg_operand == recog_data.operand[i])
35671 continue;
35672 if (n_operands_diff == 2
35673 && !strcmp (recog_data.constraints[i], "0"))
35675 move = gen_rtx_SET (arr[0], recog_data.operand[i]);
35676 arr[0] = recog_data.operand[i];
35678 else
35679 arr[j++] = recog_data.operand[i];
35682 /* Finally, emit the upredicated instruction. */
35683 rtx_insn *new_insn;
35684 switch (j)
35686 case 1:
35687 new_insn = emit_insn (GEN_FCN (new_icode) (arr[0]));
35688 break;
35689 case 2:
35690 new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1]));
35691 break;
35692 case 3:
35693 new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2]));
35694 break;
35695 case 4:
35696 new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
35697 arr[3]));
35698 break;
35699 case 5:
35700 new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
35701 arr[3], arr[4]));
35702 break;
35703 case 6:
35704 new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
35705 arr[3], arr[4], arr[5]));
35706 break;
35707 case 7:
35708 new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
35709 arr[3], arr[4], arr[5],
35710 arr[6]));
35711 break;
35712 default:
35713 gcc_unreachable ();
35715 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
35716 if (move)
35718 new_insn = emit_insn (move);
35719 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
35721 return true;
35724 /* Return TRUE if INSN defines a MVE vector operand that has zeroed
35725 tail-predicated lanes. This is either true if:
35726 * INSN is predicated by VCTP_VPR_GENERATED and the 'invalid lanes' operand
35727 is in the PROPS_ZERO_SET,
35728 * all MVE vector operands are in the PROPS_ZERO_SET
35731 static bool
35732 arm_mve_propagate_zero_pred_p (vec <rtx_insn *> *props_zero_set,
35733 rtx_insn *insn, rtx vctp_vpr_generated)
35735 if (arm_mve_load_store_insn_p (insn, DL_USAGE_READ))
35736 return true;
35737 if (arm_mve_load_store_insn_p (insn, DL_USAGE_WRITE))
35738 return false;
35740 int inactive_idx = -1;
35742 extract_insn (insn);
35743 /* If INSN is predicated by VCTP_VPR_GENERATED, then all tail-predicated
35744 lanes will keep the value that is in the 'invalid lanes' register which we
35745 identify by the "0" constraint, to ensure it is the same as the 'result'
35746 register of this instruction. */
35747 if (arm_mve_insn_predicated_by (insn, vctp_vpr_generated))
35749 for (int i = 0; i < recog_data.n_operands; i++)
35751 if (strcmp (recog_data.constraints[i], "0") == 0
35752 && VALID_MVE_MODE (GET_MODE (recog_data.operand[i])))
35754 inactive_idx = i;
35755 break;
35760 if (inactive_idx > 0)
35762 rtx op = recog_data.operand[inactive_idx];
35763 rtx_insn *def_insn = arm_last_vect_def_insn (op, insn);
35764 return def_insn != NULL_RTX && props_zero_set->contains (def_insn);
35767 /* If this instruction is not predicated by VCTP_VPR_GENERATED, then we must
35768 check that all vector operands have zeroed tail-predicated lanes, and that
35769 it has at least one vector operand. */
35770 bool at_least_one_vector = false;
35771 df_ref insn_uses;
35772 FOR_EACH_INSN_USE (insn_uses, insn)
35774 rtx reg = DF_REF_REG (insn_uses);
35775 if (!VALID_MVE_MODE (GET_MODE (reg)))
35776 continue;
35778 rtx_insn *def_insn = arm_last_vect_def_insn (reg, insn);
35779 if (def_insn && props_zero_set->contains (def_insn))
35780 at_least_one_vector |= true;
35781 else
35782 return false;
35785 return at_least_one_vector;
35789 /* Attempt to transform the loop contents of loop basic block from VPT
35790 predicated insns into unpredicated insns for a dlstp/letp loop. Returns
35791 the number to decrement from the total number of elements each iteration.
35792 Returns 1 if tail predication can not be performed and fallback to scalar
35793 low-overhead loops. */
35796 arm_attempt_dlstp_transform (rtx label)
35798 if (!dlstp_enabled)
35799 return 1;
35801 basic_block body = single_succ (BLOCK_FOR_INSN (label));
35803 /* Ensure that the bb is within a loop that has all required metadata. */
35804 if (!body->loop_father || !body->loop_father->header
35805 || !body->loop_father->simple_loop_desc)
35806 return 1;
35808 loop *loop = body->loop_father;
35809 /* Instruction that sets the predicate mask depending on how many elements
35810 are left to process. */
35811 rtx_insn *vctp_insn = arm_mve_loop_valid_for_dlstp (loop);
35812 if (!vctp_insn)
35813 return 1;
35815 gcc_assert (single_set (vctp_insn));
35817 rtx vctp_vpr_generated = single_set (vctp_insn);
35818 if (!vctp_vpr_generated)
35819 return 1;
35821 vctp_vpr_generated = SET_DEST (vctp_vpr_generated);
35823 if (!vctp_vpr_generated || !REG_P (vctp_vpr_generated)
35824 || !VALID_MVE_PRED_MODE (GET_MODE (vctp_vpr_generated)))
35825 return 1;
35827 /* decrementunum is already known to be valid at this point. */
35828 int decrementnum = arm_mve_get_vctp_lanes (vctp_insn);
35830 rtx_insn *insn = 0;
35831 rtx_insn *cur_insn = 0;
35832 rtx_insn *seq;
35833 auto_vec <rtx_insn *> props_zero_set;
35835 /* Scan through the insns in the loop bb and emit the transformed bb
35836 insns to a sequence. */
35837 start_sequence ();
35838 FOR_BB_INSNS (body, insn)
35840 if (GET_CODE (insn) == CODE_LABEL || NOTE_INSN_BASIC_BLOCK_P (insn))
35841 continue;
35842 else if (NOTE_P (insn))
35843 emit_note ((enum insn_note)NOTE_KIND (insn));
35844 else if (DEBUG_INSN_P (insn))
35845 emit_debug_insn (PATTERN (insn));
35846 else if (!INSN_P (insn))
35848 end_sequence ();
35849 return 1;
35851 /* If the transformation is successful we no longer need the vctp
35852 instruction. */
35853 else if (insn == vctp_insn)
35854 continue;
35855 /* If the insn pattern requires the use of the VPR value from the
35856 vctp as an input parameter for predication. */
35857 else if (arm_mve_insn_predicated_by (insn, vctp_vpr_generated))
35859 /* Check whether this INSN propagates the zeroed tail-predication
35860 lanes. */
35861 if (arm_mve_propagate_zero_pred_p (&props_zero_set, insn,
35862 vctp_vpr_generated))
35863 props_zero_set.safe_push (insn);
35864 bool success = arm_emit_mve_unpredicated_insn_to_seq (insn);
35865 if (!success)
35867 end_sequence ();
35868 return 1;
35871 /* If the insn isn't VPT predicated on vctp_vpr_generated, we need to
35872 make sure that it is still valid within the dlstp/letp loop. */
35873 else
35875 /* If this instruction USE-s the vctp_vpr_generated other than for
35876 predication, this blocks the transformation as we are not allowed
35877 to optimise the VPR value away. */
35878 df_ref insn_uses = NULL;
35879 FOR_EACH_INSN_USE (insn_uses, insn)
35881 if (reg_overlap_mentioned_p (vctp_vpr_generated,
35882 DF_REF_REG (insn_uses)))
35884 end_sequence ();
35885 return 1;
35888 /* If within the loop we have an MVE vector instruction that is
35889 unpredicated, the dlstp/letp looping will add implicit
35890 predication to it. This will result in a change in behaviour
35891 of the instruction, so we need to find out if any instructions
35892 that feed into the current instruction were implicitly
35893 predicated. */
35894 if (MVE_VPT_PREDICABLE_INSN_P (insn)
35895 && !arm_mve_impl_predicated_p (&props_zero_set, insn,
35896 vctp_vpr_generated))
35898 end_sequence ();
35899 return 1;
35901 emit_insn (PATTERN (insn));
35904 seq = get_insns ();
35905 end_sequence ();
35907 /* Re-write the entire BB contents with the transformed
35908 sequence. */
35909 FOR_BB_INSNS_SAFE (body, insn, cur_insn)
35910 if (!(GET_CODE (insn) == CODE_LABEL || NOTE_INSN_BASIC_BLOCK_P (insn)))
35911 delete_insn (insn);
35913 emit_insn_after (seq, BB_END (body));
35915 /* The transformation has succeeded, so now modify the "count"
35916 (a.k.a. niter_expr) for the middle-end. Also set noloop_assumptions
35917 to NULL to stop the middle-end from making assumptions about the
35918 number of iterations. */
35919 simple_loop_desc (body->loop_father)->niter_expr
35920 = XVECEXP (SET_SRC (PATTERN (vctp_insn)), 0, 0);
35921 simple_loop_desc (body->loop_father)->noloop_assumptions = NULL_RTX;
35922 return decrementnum;
35925 #if CHECKING_P
35926 namespace selftest {
35928 /* Scan the static data tables generated by parsecpu.awk looking for
35929 potential issues with the data. We primarily check for
35930 inconsistencies in the option extensions at present (extensions
35931 that duplicate others but aren't marked as aliases). Furthermore,
35932 for correct canonicalization later options must never be a subset
35933 of an earlier option. Any extension should also only specify other
35934 feature bits and never an architecture bit. The architecture is inferred
35935 from the declaration of the extension. */
35936 static void
35937 arm_test_cpu_arch_data (void)
35939 const arch_option *arch;
35940 const cpu_option *cpu;
35941 auto_sbitmap target_isa (isa_num_bits);
35942 auto_sbitmap isa1 (isa_num_bits);
35943 auto_sbitmap isa2 (isa_num_bits);
35945 for (arch = all_architectures; arch->common.name != NULL; ++arch)
35947 const cpu_arch_extension *ext1, *ext2;
35949 if (arch->common.extensions == NULL)
35950 continue;
35952 arm_initialize_isa (target_isa, arch->common.isa_bits);
35954 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
35956 if (ext1->alias)
35957 continue;
35959 arm_initialize_isa (isa1, ext1->isa_bits);
35960 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
35962 if (ext2->alias || ext1->remove != ext2->remove)
35963 continue;
35965 arm_initialize_isa (isa2, ext2->isa_bits);
35966 /* If the option is a subset of the parent option, it doesn't
35967 add anything and so isn't useful. */
35968 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
35970 /* If the extension specifies any architectural bits then
35971 disallow it. Extensions should only specify feature bits. */
35972 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
35977 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
35979 const cpu_arch_extension *ext1, *ext2;
35981 if (cpu->common.extensions == NULL)
35982 continue;
35984 arm_initialize_isa (target_isa, arch->common.isa_bits);
35986 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
35988 if (ext1->alias)
35989 continue;
35991 arm_initialize_isa (isa1, ext1->isa_bits);
35992 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
35994 if (ext2->alias || ext1->remove != ext2->remove)
35995 continue;
35997 arm_initialize_isa (isa2, ext2->isa_bits);
35998 /* If the option is a subset of the parent option, it doesn't
35999 add anything and so isn't useful. */
36000 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
36002 /* If the extension specifies any architectural bits then
36003 disallow it. Extensions should only specify feature bits. */
36004 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
36010 /* Scan the static data tables generated by parsecpu.awk looking for
36011 potential issues with the data. Here we check for consistency between the
36012 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
36013 a feature bit that is not defined by any FPU flag. */
36014 static void
36015 arm_test_fpu_data (void)
36017 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
36018 auto_sbitmap fpubits (isa_num_bits);
36019 auto_sbitmap tmpset (isa_num_bits);
36021 static const enum isa_feature fpu_bitlist_internal[]
36022 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
36023 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
36025 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
36027 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
36028 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
36029 bitmap_clear (isa_all_fpubits_internal);
36030 bitmap_copy (isa_all_fpubits_internal, tmpset);
36033 if (!bitmap_empty_p (isa_all_fpubits_internal))
36035 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
36036 " group that are not defined by any FPU.\n"
36037 " Check your arm-cpus.in.\n");
36038 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
36042 static void
36043 arm_run_selftests (void)
36045 arm_test_cpu_arch_data ();
36046 arm_test_fpu_data ();
36048 } /* Namespace selftest. */
36050 #undef TARGET_RUN_TARGET_SELFTESTS
36051 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
36052 #endif /* CHECKING_P */
36054 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
36055 global variable based guard use the default else
36056 return a null tree. */
36057 static tree
36058 arm_stack_protect_guard (void)
36060 if (arm_stack_protector_guard == SSP_GLOBAL)
36061 return default_stack_protect_guard ();
36063 return NULL_TREE;
36066 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
36067 Unlike the arm version, we do NOT implement asm flag outputs. */
36069 rtx_insn *
36070 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
36071 vec<machine_mode> & /*input_modes*/,
36072 vec<const char *> &constraints,
36073 vec<rtx> &, vec<rtx> & /*clobbers*/,
36074 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
36076 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
36077 if (startswith (constraints[i], "=@cc"))
36079 sorry ("%<asm%> flags not supported in thumb1 mode");
36080 break;
36082 return NULL;
36085 /* Generate code to enable conditional branches in functions over 1 MiB.
36086 Parameters are:
36087 operands: is the operands list of the asm insn (see arm_cond_branch or
36088 arm_cond_branch_reversed).
36089 pos_label: is an index into the operands array where operands[pos_label] is
36090 the asm label of the final jump destination.
36091 dest: is a string which is used to generate the asm label of the intermediate
36092 destination
36093 branch_format: is a string denoting the intermediate branch format, e.g.
36094 "beq", "bne", etc. */
36096 const char *
36097 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
36098 const char * branch_format)
36100 rtx_code_label * tmp_label = gen_label_rtx ();
36101 char label_buf[256];
36102 char buffer[128];
36103 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
36104 CODE_LABEL_NUMBER (tmp_label));
36105 const char *label_ptr = arm_strip_name_encoding (label_buf);
36106 rtx dest_label = operands[pos_label];
36107 operands[pos_label] = tmp_label;
36109 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
36110 output_asm_insn (buffer, operands);
36112 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
36113 operands[pos_label] = dest_label;
36114 output_asm_insn (buffer, operands);
36115 return "";
36118 /* If given mode matches, load from memory to LO_REGS.
36119 (i.e [Rn], Rn <= LO_REGS). */
36120 enum reg_class
36121 arm_mode_base_reg_class (machine_mode mode)
36123 if (TARGET_HAVE_MVE
36124 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
36125 return LO_REGS;
36127 return MODE_BASE_REG_REG_CLASS (mode);
36130 #undef TARGET_DOCUMENTATION_NAME
36131 #define TARGET_DOCUMENTATION_NAME "ARM"
36133 struct gcc_target targetm = TARGET_INITIALIZER;
36135 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
36137 opt_machine_mode
36138 arm_get_mask_mode (machine_mode mode)
36140 if (TARGET_HAVE_MVE)
36141 return arm_mode_to_pred_mode (mode);
36143 return default_get_mask_mode (mode);
36146 /* Helper function to determine whether SEQ represents a sequence of
36147 instructions representing the vsel<cond> floating point instructions.
36148 This is an heuristic to check whether the proposed optimisation is desired,
36149 the choice has no consequence for correctness. */
36150 static bool
36151 arm_is_vsel_fp_insn (rtx_insn *seq)
36153 rtx_insn *curr_insn = seq;
36154 rtx set = NULL_RTX;
36155 /* The pattern may start with a simple set with register operands. Skip
36156 through any of those. */
36157 while (curr_insn)
36159 set = single_set (curr_insn);
36160 if (!set
36161 || !REG_P (SET_DEST (set)))
36162 return false;
36164 if (!REG_P (SET_SRC (set)))
36165 break;
36166 curr_insn = NEXT_INSN (curr_insn);
36169 if (!set)
36170 return false;
36172 /* The next instruction should be a compare. */
36173 if (!REG_P (SET_DEST (set))
36174 || GET_CODE (SET_SRC (set)) != COMPARE)
36175 return false;
36177 curr_insn = NEXT_INSN (curr_insn);
36178 if (!curr_insn)
36179 return false;
36181 /* And the last instruction should be an IF_THEN_ELSE. */
36182 set = single_set (curr_insn);
36183 if (!set
36184 || !REG_P (SET_DEST (set))
36185 || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
36186 return false;
36188 return !NEXT_INSN (curr_insn);
36192 /* Helper function to determine whether SEQ represents a sequence of
36193 instructions representing the Armv8.1-M Mainline conditional arithmetic
36194 instructions: csinc, csneg and csinv. The cinc instruction is generated
36195 using a different mechanism.
36196 This is an heuristic to check whether the proposed optimisation is desired,
36197 the choice has no consequence for correctness. */
36199 static bool
36200 arm_is_v81m_cond_insn (rtx_insn *seq)
36202 rtx_insn *curr_insn = seq;
36203 rtx set = NULL_RTX;
36204 /* The pattern may start with a simple set with register operands. Skip
36205 through any of those. */
36206 while (curr_insn)
36208 set = single_set (curr_insn);
36209 if (!set
36210 || !REG_P (SET_DEST (set)))
36211 return false;
36213 if (!REG_P (SET_SRC (set)))
36214 break;
36215 curr_insn = NEXT_INSN (curr_insn);
36218 if (!set)
36219 return false;
36221 /* The next instruction should be one of:
36222 NEG: for csneg,
36223 PLUS: for csinc,
36224 NOT: for csinv. */
36225 if (GET_CODE (SET_SRC (set)) != NEG
36226 && GET_CODE (SET_SRC (set)) != PLUS
36227 && GET_CODE (SET_SRC (set)) != NOT)
36228 return false;
36230 curr_insn = NEXT_INSN (curr_insn);
36231 if (!curr_insn)
36232 return false;
36234 /* The next instruction should be a COMPARE. */
36235 set = single_set (curr_insn);
36236 if (!set
36237 || !REG_P (SET_DEST (set))
36238 || GET_CODE (SET_SRC (set)) != COMPARE)
36239 return false;
36241 curr_insn = NEXT_INSN (curr_insn);
36242 if (!curr_insn)
36243 return false;
36245 /* And the last instruction should be an IF_THEN_ELSE. */
36246 set = single_set (curr_insn);
36247 if (!set
36248 || !REG_P (SET_DEST (set))
36249 || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
36250 return false;
36252 return !NEXT_INSN (curr_insn);
36255 /* For Armv8.1-M Mainline we have both conditional execution through IT blocks,
36256 as well as conditional arithmetic instructions controlled by
36257 TARGET_COND_ARITH. To generate the latter we rely on a special part of the
36258 "ce" pass that generates code for targets that don't support conditional
36259 execution of general instructions known as "noce". These transformations
36260 happen before 'reload_completed'. However, "noce" also triggers for some
36261 unwanted patterns [PR 116444] that prevent "ce" optimisations after reload.
36262 To make sure we can get both we use the TARGET_NOCE_CONVERSION_PROFITABLE_P
36263 hook to only allow "noce" to generate the patterns that are profitable. */
36265 bool
36266 arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
36268 if (!TARGET_COND_ARITH
36269 || reload_completed)
36270 return default_noce_conversion_profitable_p (seq, if_info);
36272 if (arm_is_v81m_cond_insn (seq))
36273 return true;
36275 /* Look for vsel<cond> opportunities as we still want to codegen these for
36276 Armv8.1-M Mainline targets. */
36277 if (arm_is_vsel_fp_insn (seq))
36278 return true;
36280 return false;
36283 /* Output assembly to read the thread pointer from the appropriate TPIDR
36284 register into DEST. If PRED_P also emit the %? that can be used to
36285 output the predication code. */
36287 const char *
36288 arm_output_load_tpidr (rtx dst, bool pred_p)
36290 char buf[64];
36291 int tpidr_coproc_num = -1;
36292 switch (target_thread_pointer)
36294 case TP_TPIDRURW:
36295 tpidr_coproc_num = 2;
36296 break;
36297 case TP_TPIDRURO:
36298 tpidr_coproc_num = 3;
36299 break;
36300 case TP_TPIDRPRW:
36301 tpidr_coproc_num = 4;
36302 break;
36303 default:
36304 gcc_unreachable ();
36306 snprintf (buf, sizeof (buf),
36307 "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
36308 pred_p ? "%?" : "", tpidr_coproc_num);
36309 output_asm_insn (buf, &dst);
36310 return "";
36313 /* Return the MVE vector mode that has NUNITS elements of mode INNER_MODE. */
36314 opt_machine_mode
36315 arm_mve_data_mode (scalar_mode inner_mode, poly_uint64 nunits)
36317 enum mode_class mclass
36318 = (SCALAR_FLOAT_MODE_P (inner_mode) ? MODE_VECTOR_FLOAT : MODE_VECTOR_INT);
36319 machine_mode mode;
36320 FOR_EACH_MODE_IN_CLASS (mode, mclass)
36321 if (inner_mode == GET_MODE_INNER (mode)
36322 && known_eq (nunits, GET_MODE_NUNITS (mode)))
36323 return mode;
36324 return opt_machine_mode ();
36327 #include "gt-arm.h"