1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2025 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
38 #include "stringpool.h"
45 #include "diagnostic-core.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
52 #include "insn-attr.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
72 #include "gimple-iterator.h"
74 #include "tree-vectorizer.h"
76 #include "aarch-common.h"
77 #include "aarch-common-protos.h"
80 /* This file should be included last. */
81 #include "target-def.h"
83 /* Forward definitions of types. */
84 typedef struct minipool_node Mnode
;
85 typedef struct minipool_fixup Mfix
;
87 void (*arm_lang_output_object_attributes_hook
)(void);
94 /* Forward function declarations. */
95 static bool arm_const_not_ok_for_debug_p (rtx
);
96 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
97 static int arm_compute_static_chain_stack_bytes (void);
98 static arm_stack_offsets
*arm_get_frame_offsets (void);
99 static void arm_compute_frame_layout (void);
100 static void arm_add_gc_roots (void);
101 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
102 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
103 static unsigned bit_count (unsigned long);
104 static unsigned bitmap_popcount (const sbitmap
);
105 static int arm_address_register_rtx_p (rtx
, int);
106 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
107 static bool is_called_in_ARM_mode (tree
);
108 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
109 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
110 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
111 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
112 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
113 inline static int thumb1_index_register_rtx_p (rtx
, int);
114 static int thumb_far_jump_used_p (void);
115 static bool thumb_force_lr_save (void);
116 static unsigned arm_size_return_regs (void);
117 static bool arm_assemble_integer (rtx
, unsigned int, int);
118 static void arm_print_operand (FILE *, rtx
, int);
119 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
120 static bool arm_print_operand_punct_valid_p (unsigned char code
);
121 static arm_cc
get_arm_condition_code (rtx
);
122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
123 static const char *output_multi_immediate (rtx
*, const char *, const char *,
125 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
126 static struct machine_function
*arm_init_machine_status (void);
127 static void thumb_exit (FILE *, int);
128 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
129 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
130 static Mnode
*add_minipool_forward_ref (Mfix
*);
131 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
132 static Mnode
*add_minipool_backward_ref (Mfix
*);
133 static void assign_minipool_offsets (Mfix
*);
134 static void arm_print_value (FILE *, rtx
);
135 static void dump_minipool (rtx_insn
*);
136 static int arm_barrier_cost (rtx_insn
*);
137 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
138 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
139 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
141 static void arm_reorg (void);
142 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
144 static unsigned long arm_compute_save_core_reg_mask (void);
145 static unsigned long arm_isr_value (tree
);
146 static unsigned long arm_compute_func_type (void);
147 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
148 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
149 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
151 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
153 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
154 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
155 static void arm_output_function_epilogue (FILE *);
156 static void arm_output_function_prologue (FILE *);
157 static int arm_comp_type_attributes (const_tree
, const_tree
);
158 static void arm_set_default_type_attributes (tree
);
159 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
160 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
161 static int optimal_immediate_sequence (enum rtx_code code
,
162 unsigned HOST_WIDE_INT val
,
163 struct four_ints
*return_sequence
);
164 static int optimal_immediate_sequence_1 (enum rtx_code code
,
165 unsigned HOST_WIDE_INT val
,
166 struct four_ints
*return_sequence
,
168 static int arm_get_strip_length (int);
169 static bool arm_function_ok_for_sibcall (tree
, tree
);
170 static machine_mode
arm_promote_function_mode (const_tree
,
173 static bool arm_return_in_memory (const_tree
, const_tree
);
174 static rtx
arm_function_value (const_tree
, const_tree
, bool);
175 static rtx
arm_libcall_value_1 (machine_mode
);
176 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
177 static bool arm_function_value_regno_p (const unsigned int);
178 static void arm_internal_label (FILE *, const char *, unsigned long);
179 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
181 static bool arm_have_conditional_execution (void);
182 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
183 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
184 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
185 static int arm_insn_cost (rtx_insn
*, bool);
186 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
187 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
188 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
189 static void emit_constant_insn (rtx cond
, rtx pattern
);
190 static rtx_insn
*emit_set_insn (rtx
, rtx
);
191 static void arm_add_cfa_adjust_cfa_note (rtx
, int, rtx
, rtx
);
192 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
193 static void arm_emit_multi_reg_pop (unsigned long);
194 static int vfp_emit_fstmd (int, int);
195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx
);
196 static int arm_arg_partial_bytes (cumulative_args_t
,
197 const function_arg_info
&);
198 static rtx
arm_function_arg (cumulative_args_t
, const function_arg_info
&);
199 static void arm_function_arg_advance (cumulative_args_t
,
200 const function_arg_info
&);
201 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
202 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
203 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
205 static rtx
aapcs_libcall_value (machine_mode
);
206 static int aapcs_select_return_coproc (const_tree
, const_tree
);
208 #ifdef OBJECT_FORMAT_ELF
209 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
210 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
212 static void arm_encode_section_info (tree
, rtx
, int);
214 static void arm_file_end (void);
215 static void arm_file_start (void);
216 static void arm_insert_attributes (tree
, tree
*);
218 static void arm_setup_incoming_varargs (cumulative_args_t
,
219 const function_arg_info
&, int *, int);
220 static bool arm_pass_by_reference (cumulative_args_t
,
221 const function_arg_info
&);
222 static bool arm_promote_prototypes (const_tree
);
223 static bool arm_default_short_enums (void);
224 static bool arm_align_anon_bitfield (void);
225 static bool arm_return_in_msb (const_tree
);
226 static bool arm_must_pass_in_stack (const function_arg_info
&);
227 static bool arm_return_in_memory (const_tree
, const_tree
);
229 static void arm_unwind_emit (FILE *, rtx_insn
*);
230 static bool arm_output_ttype (rtx
);
231 static void arm_asm_emit_except_personality (rtx
);
233 static void arm_asm_init_sections (void);
234 static rtx
arm_dwarf_register_span (rtx
);
236 static tree
arm_cxx_guard_type (void);
237 static bool arm_cxx_guard_mask_bit (void);
238 static tree
arm_get_cookie_size (tree
);
239 static bool arm_cookie_has_size (void);
240 static bool arm_cxx_cdtor_returns_this (void);
241 static bool arm_cxx_key_method_may_be_inline (void);
242 static void arm_cxx_determine_class_data_visibility (tree
);
243 static bool arm_cxx_class_data_always_comdat (void);
244 static bool arm_cxx_use_aeabi_atexit (void);
245 static void arm_init_libfuncs (void);
246 static tree
arm_build_builtin_va_list (void);
247 static void arm_expand_builtin_va_start (tree
, rtx
);
248 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
249 static void arm_option_override (void);
250 static void arm_option_restore (struct gcc_options
*, struct gcc_options
*,
251 struct cl_target_option
*);
252 static void arm_override_options_after_change (void);
253 static void arm_option_print (FILE *, int, struct cl_target_option
*);
254 static void arm_set_current_function (tree
);
255 static bool arm_can_inline_p (tree
, tree
);
256 static void arm_relayout_function (tree
);
257 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
258 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
259 static bool arm_sched_can_speculate_insn (rtx_insn
*);
260 static bool arm_macro_fusion_p (void);
261 static bool arm_cannot_copy_insn_p (rtx_insn
*);
262 static int arm_issue_rate (void);
263 static int arm_sched_variable_issue (FILE *, int, rtx_insn
*, int);
264 static int arm_first_cycle_multipass_dfa_lookahead (void);
265 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
266 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
267 static bool arm_output_addr_const_extra (FILE *, rtx
);
268 static bool arm_allocate_stack_slots_for_args (void);
269 static bool arm_warn_func_return (tree
);
270 static tree
arm_promoted_type (const_tree t
);
271 static bool arm_scalar_mode_supported_p (scalar_mode
);
272 static bool arm_frame_pointer_required (void);
273 static bool arm_can_eliminate (const int, const int);
274 static void arm_asm_trampoline_template (FILE *);
275 static void arm_trampoline_init (rtx
, tree
, rtx
);
276 static rtx
arm_trampoline_adjust_address (rtx
);
277 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
278 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
279 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
280 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
281 static opt_machine_mode
arm_array_mode (machine_mode
, unsigned HOST_WIDE_INT
);
282 static bool arm_array_mode_supported_p (machine_mode
,
283 unsigned HOST_WIDE_INT
);
284 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
285 static bool arm_class_likely_spilled_p (reg_class_t
);
286 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
287 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
288 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
292 static void arm_conditional_register_usage (void);
293 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
294 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
295 static unsigned int arm_autovectorize_vector_modes (vector_modes
*, bool);
296 static int arm_default_branch_cost (bool, bool);
297 static int arm_cortex_a5_branch_cost (bool, bool);
298 static int arm_cortex_m_branch_cost (bool, bool);
299 static int arm_cortex_m7_branch_cost (bool, bool);
301 static bool arm_vectorize_vec_perm_const (machine_mode
, machine_mode
, rtx
, rtx
,
302 rtx
, const vec_perm_indices
&);
304 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
306 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
308 int misalign ATTRIBUTE_UNUSED
);
310 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
311 bool op0_preserve_value
);
312 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
314 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
315 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
317 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
318 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
319 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
321 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
322 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
323 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
324 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
325 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
326 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
327 static rtx_insn
*thumb1_md_asm_adjust (vec
<rtx
> &, vec
<rtx
> &,
329 vec
<const char *> &, vec
<rtx
> &,
330 vec
<rtx
> &, HARD_REG_SET
&, location_t
);
331 static const char *arm_identify_fpu_from_isa (sbitmap
);
333 /* Table of machine attributes. */
334 static const attribute_spec arm_gnu_attributes
[] =
336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
337 affects_type_identity, handler, exclude } */
338 /* Function calls made to this symbol must be done indirectly, because
339 it may lie outside of the 26 bit addressing range of a normal function
341 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
342 /* Whereas these functions are always known to reside within the 26 bit
344 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
345 /* Specify the procedure call conventions for a function. */
346 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
348 /* Interrupt Service Routines have special prologue and epilogue requirements. */
349 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
351 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
353 { "naked", 0, 0, true, false, false, false,
354 arm_handle_fndecl_attribute
, NULL
},
355 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
356 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
358 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
360 { "notshared", 0, 0, false, true, false, false,
361 arm_handle_notshared_attribute
, NULL
},
363 /* ARMv8-M Security Extensions support. */
364 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
365 arm_handle_cmse_nonsecure_entry
, NULL
},
366 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
367 arm_handle_cmse_nonsecure_call
, NULL
},
368 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL
, NULL
}
371 static const scoped_attribute_specs arm_gnu_attribute_table
=
373 "gnu", { arm_gnu_attributes
}
376 static const scoped_attribute_specs
*const arm_attribute_table
[] =
378 &arm_gnu_attribute_table
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
387 #undef TARGET_CHECK_BUILTIN_CALL
388 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
390 #undef TARGET_LEGITIMIZE_ADDRESS
391 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
393 #undef TARGET_ATTRIBUTE_TABLE
394 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
396 #undef TARGET_INSERT_ATTRIBUTES
397 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
399 #undef TARGET_ASM_FILE_START
400 #define TARGET_ASM_FILE_START arm_file_start
401 #undef TARGET_ASM_FILE_END
402 #define TARGET_ASM_FILE_END arm_file_end
404 #undef TARGET_ASM_ALIGNED_SI_OP
405 #define TARGET_ASM_ALIGNED_SI_OP NULL
406 #undef TARGET_ASM_INTEGER
407 #define TARGET_ASM_INTEGER arm_assemble_integer
409 #undef TARGET_PRINT_OPERAND
410 #define TARGET_PRINT_OPERAND arm_print_operand
411 #undef TARGET_PRINT_OPERAND_ADDRESS
412 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
413 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
414 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
416 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
417 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
419 #undef TARGET_ASM_FUNCTION_PROLOGUE
420 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
422 #undef TARGET_ASM_FUNCTION_EPILOGUE
423 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
425 #undef TARGET_CAN_INLINE_P
426 #define TARGET_CAN_INLINE_P arm_can_inline_p
428 #undef TARGET_RELAYOUT_FUNCTION
429 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
431 #undef TARGET_OPTION_OVERRIDE
432 #define TARGET_OPTION_OVERRIDE arm_option_override
434 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
435 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
476 #undef TARGET_ENCODE_SECTION_INFO
477 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
479 #undef TARGET_STRIP_NAME_ENCODING
480 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
482 #undef TARGET_ASM_INTERNAL_LABEL
483 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
485 #undef TARGET_FLOATN_MODE
486 #define TARGET_FLOATN_MODE arm_floatn_mode
488 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
489 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
491 #undef TARGET_FUNCTION_VALUE
492 #define TARGET_FUNCTION_VALUE arm_function_value
494 #undef TARGET_LIBCALL_VALUE
495 #define TARGET_LIBCALL_VALUE arm_libcall_value
497 #undef TARGET_FUNCTION_VALUE_REGNO_P
498 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
500 #undef TARGET_GIMPLE_FOLD_BUILTIN
501 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
503 #undef TARGET_ASM_OUTPUT_MI_THUNK
504 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
505 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
506 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
508 #undef TARGET_RTX_COSTS
509 #define TARGET_RTX_COSTS arm_rtx_costs
510 #undef TARGET_ADDRESS_COST
511 #define TARGET_ADDRESS_COST arm_address_cost
512 #undef TARGET_INSN_COST
513 #define TARGET_INSN_COST arm_insn_cost
515 #undef TARGET_SHIFT_TRUNCATION_MASK
516 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
517 #undef TARGET_VECTOR_MODE_SUPPORTED_P
518 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
519 #undef TARGET_ARRAY_MODE
520 #define TARGET_ARRAY_MODE arm_array_mode
521 #undef TARGET_ARRAY_MODE_SUPPORTED_P
522 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
523 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
524 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
525 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
526 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
527 arm_autovectorize_vector_modes
529 #undef TARGET_MACHINE_DEPENDENT_REORG
530 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
532 #undef TARGET_INIT_BUILTINS
533 #define TARGET_INIT_BUILTINS arm_init_builtins
534 #undef TARGET_EXPAND_BUILTIN
535 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
536 #undef TARGET_BUILTIN_DECL
537 #define TARGET_BUILTIN_DECL arm_builtin_decl
539 #undef TARGET_INIT_LIBFUNCS
540 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
542 #undef TARGET_PROMOTE_FUNCTION_MODE
543 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
544 #undef TARGET_PROMOTE_PROTOTYPES
545 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
546 #undef TARGET_PASS_BY_REFERENCE
547 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
548 #undef TARGET_ARG_PARTIAL_BYTES
549 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
550 #undef TARGET_FUNCTION_ARG
551 #define TARGET_FUNCTION_ARG arm_function_arg
552 #undef TARGET_FUNCTION_ARG_ADVANCE
553 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
554 #undef TARGET_FUNCTION_ARG_PADDING
555 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
556 #undef TARGET_FUNCTION_ARG_BOUNDARY
557 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
559 #undef TARGET_SETUP_INCOMING_VARARGS
560 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
562 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
563 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
565 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
566 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
567 #undef TARGET_TRAMPOLINE_INIT
568 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
569 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
570 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
572 #undef TARGET_WARN_FUNC_RETURN
573 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
575 #undef TARGET_DEFAULT_SHORT_ENUMS
576 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
578 #undef TARGET_ALIGN_ANON_BITFIELD
579 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
581 #undef TARGET_NARROW_VOLATILE_BITFIELD
582 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
584 #undef TARGET_CXX_GUARD_TYPE
585 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
587 #undef TARGET_CXX_GUARD_MASK_BIT
588 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
590 #undef TARGET_CXX_GET_COOKIE_SIZE
591 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
593 #undef TARGET_CXX_COOKIE_HAS_SIZE
594 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
596 #undef TARGET_CXX_CDTOR_RETURNS_THIS
597 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
599 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
600 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
602 #undef TARGET_CXX_USE_AEABI_ATEXIT
603 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
605 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
606 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
607 arm_cxx_determine_class_data_visibility
609 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
610 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
612 #undef TARGET_RETURN_IN_MSB
613 #define TARGET_RETURN_IN_MSB arm_return_in_msb
615 #undef TARGET_RETURN_IN_MEMORY
616 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
618 #undef TARGET_MUST_PASS_IN_STACK
619 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
622 #undef TARGET_ASM_UNWIND_EMIT
623 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
625 /* EABI unwinding tables use a different format for the typeinfo tables. */
626 #undef TARGET_ASM_TTYPE
627 #define TARGET_ASM_TTYPE arm_output_ttype
629 #undef TARGET_ARM_EABI_UNWINDER
630 #define TARGET_ARM_EABI_UNWINDER true
632 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
633 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
635 #endif /* ARM_UNWIND_INFO */
637 #undef TARGET_ASM_INIT_SECTIONS
638 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
640 #undef TARGET_DWARF_REGISTER_SPAN
641 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
643 #undef TARGET_CANNOT_COPY_INSN_P
644 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
647 #undef TARGET_HAVE_TLS
648 #define TARGET_HAVE_TLS true
651 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
652 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
654 #undef TARGET_LOOP_UNROLL_ADJUST
655 #define TARGET_LOOP_UNROLL_ADJUST arm_loop_unroll_adjust
657 #undef TARGET_PREDICT_DOLOOP_P
658 #define TARGET_PREDICT_DOLOOP_P arm_predict_doloop_p
660 #undef TARGET_LEGITIMATE_CONSTANT_P
661 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
663 #undef TARGET_CANNOT_FORCE_CONST_MEM
664 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
666 #undef TARGET_MAX_ANCHOR_OFFSET
667 #define TARGET_MAX_ANCHOR_OFFSET 4095
669 /* The minimum is set such that the total size of the block
670 for a particular anchor is -4088 + 1 + 4095 bytes, which is
671 divisible by eight, ensuring natural spacing of anchors. */
672 #undef TARGET_MIN_ANCHOR_OFFSET
673 #define TARGET_MIN_ANCHOR_OFFSET -4088
675 #undef TARGET_SCHED_ISSUE_RATE
676 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
678 #undef TARGET_SCHED_VARIABLE_ISSUE
679 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
683 arm_first_cycle_multipass_dfa_lookahead
685 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
686 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
687 arm_first_cycle_multipass_dfa_lookahead_guard
689 #undef TARGET_MANGLE_TYPE
690 #define TARGET_MANGLE_TYPE arm_mangle_type
692 #undef TARGET_INVALID_CONVERSION
693 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
695 #undef TARGET_INVALID_UNARY_OP
696 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
698 #undef TARGET_INVALID_BINARY_OP
699 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
701 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
702 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
704 #undef TARGET_BUILD_BUILTIN_VA_LIST
705 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
706 #undef TARGET_EXPAND_BUILTIN_VA_START
707 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
708 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
709 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
712 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
713 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
716 #undef TARGET_LEGITIMATE_ADDRESS_P
717 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
719 #undef TARGET_PREFERRED_RELOAD_CLASS
720 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
722 #undef TARGET_PROMOTED_TYPE
723 #define TARGET_PROMOTED_TYPE arm_promoted_type
725 #undef TARGET_SCALAR_MODE_SUPPORTED_P
726 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
728 #undef TARGET_COMPUTE_FRAME_LAYOUT
729 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
731 #undef TARGET_FRAME_POINTER_REQUIRED
732 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
734 #undef TARGET_CAN_ELIMINATE
735 #define TARGET_CAN_ELIMINATE arm_can_eliminate
737 #undef TARGET_CONDITIONAL_REGISTER_USAGE
738 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
740 #undef TARGET_CLASS_LIKELY_SPILLED_P
741 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
743 #undef TARGET_VECTORIZE_BUILTINS
744 #define TARGET_VECTORIZE_BUILTINS
746 #undef TARGET_VECTOR_ALIGNMENT
747 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
749 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
750 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
751 arm_vector_alignment_reachable
753 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
754 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
755 arm_builtin_support_vector_misalignment
757 #undef TARGET_PREFERRED_RENAME_CLASS
758 #define TARGET_PREFERRED_RENAME_CLASS \
759 arm_preferred_rename_class
761 #undef TARGET_VECTORIZE_VEC_PERM_CONST
762 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
764 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
765 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
766 arm_builtin_vectorization_cost
768 #undef TARGET_CANONICALIZE_COMPARISON
769 #define TARGET_CANONICALIZE_COMPARISON \
770 arm_canonicalize_comparison
772 #undef TARGET_ASAN_SHADOW_OFFSET
773 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
775 #undef MAX_INSN_PER_IT_BLOCK
776 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
778 #undef TARGET_CAN_USE_DOLOOP_P
779 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
781 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
782 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
784 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
785 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
787 #undef TARGET_SCHED_FUSION_PRIORITY
788 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
790 #undef TARGET_ASM_FUNCTION_SECTION
791 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
793 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
794 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
796 #undef TARGET_SECTION_TYPE_FLAGS
797 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
799 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
800 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
802 #undef TARGET_C_EXCESS_PRECISION
803 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
805 /* Although the architecture reserves bits 0 and 1, only the former is
806 used for ARM/Thumb ISA selection in v7 and earlier versions. */
807 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
808 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
810 #undef TARGET_FIXED_CONDITION_CODE_REGS
811 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
813 #undef TARGET_HARD_REGNO_NREGS
814 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
815 #undef TARGET_HARD_REGNO_MODE_OK
816 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
818 #undef TARGET_MODES_TIEABLE_P
819 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
821 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
822 #define TARGET_NOCE_CONVERSION_PROFITABLE_P arm_noce_conversion_profitable_p
824 #undef TARGET_CAN_CHANGE_MODE_CLASS
825 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
827 #undef TARGET_CONSTANT_ALIGNMENT
828 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
830 #undef TARGET_INVALID_WITHIN_DOLOOP
831 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
833 #undef TARGET_MD_ASM_ADJUST
834 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
836 #undef TARGET_STACK_PROTECT_GUARD
837 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
839 #undef TARGET_VECTORIZE_GET_MASK_MODE
840 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
842 /* Obstack for minipool constant handling. */
843 static struct obstack minipool_obstack
;
844 static char * minipool_startobj
;
846 /* The maximum number of insns skipped which
847 will be conditionalised if possible. */
848 static int max_insns_skipped
= 5;
850 /* True if we are currently building a constant table. */
851 int making_const_table
;
853 /* The processor for which instructions should be scheduled. */
854 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
856 /* The current tuning set. */
857 const struct tune_params
*current_tune
;
859 /* Which floating point hardware to schedule for. */
862 /* Used for Thumb call_via trampolines. */
863 rtx thumb_call_via_label
[14];
864 static int thumb_call_reg_needed
;
866 /* The bits in this mask specify which instruction scheduling options should
868 unsigned int tune_flags
= 0;
870 /* The highest ARM architecture version supported by the
872 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
874 /* Active target architecture and tuning. */
876 struct arm_build_target arm_active_target
;
878 /* The following are used in the arm.md file as equivalents to bits
879 in the above two flag variables. */
881 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
884 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
887 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
890 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
893 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
896 /* Nonzero if this chip supports the ARM 6K extensions. */
899 /* Nonzero if this chip supports the ARM 6KZ extensions. */
902 /* Nonzero if instructions present in ARMv6-M can be used. */
905 /* Nonzero if this chip supports the ARM 7 extensions. */
908 /* Nonzero if this chip supports the Large Physical Address Extension. */
909 int arm_arch_lpae
= 0;
911 /* Nonzero if instructions not present in the 'M' profile can be used. */
912 int arm_arch_notm
= 0;
914 /* Nonzero if instructions present in ARMv7E-M can be used. */
917 /* Nonzero if instructions present in ARMv8 can be used. */
920 /* Nonzero if this chip supports the ARMv8.1 extensions. */
923 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
926 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
929 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
932 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
934 int arm_arch8m_main
= 0;
936 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
938 int arm_arch8_1m_main
= 0;
940 /* Nonzero if this chip supports the FP16 instructions extension of ARM
942 int arm_fp16_inst
= 0;
944 /* Nonzero if this chip can benefit from load scheduling. */
945 int arm_ld_sched
= 0;
947 /* Nonzero if this chip is a StrongARM. */
948 int arm_tune_strongarm
= 0;
950 /* Nonzero if this chip supports Intel Wireless MMX technology. */
951 int arm_arch_iwmmxt
= 0;
953 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
954 int arm_arch_iwmmxt2
= 0;
956 /* Nonzero if this chip is an XScale. */
957 int arm_arch_xscale
= 0;
959 /* Nonzero if tuning for XScale */
960 int arm_tune_xscale
= 0;
962 /* Nonzero if we want to tune for stores that access the write-buffer.
963 This typically means an ARM6 or ARM7 with MMU or MPU. */
964 int arm_tune_wbuf
= 0;
966 /* Nonzero if tuning for Cortex-A9. */
967 int arm_tune_cortex_a9
= 0;
969 /* Nonzero if we should define __THUMB_INTERWORK__ in the
971 XXX This is a bit of a hack, it's intended to help work around
972 problems in GLD which doesn't understand that armv5t code is
973 interworking clean. */
974 int arm_cpp_interwork
= 0;
976 /* Nonzero if chip supports Thumb 1. */
979 /* Nonzero if chip supports Thumb 2. */
982 /* Nonzero if chip supports integer division instruction. */
983 int arm_arch_arm_hwdiv
;
984 int arm_arch_thumb_hwdiv
;
986 /* Nonzero if chip disallows volatile memory access in IT block. */
987 int arm_arch_no_volatile_ce
;
989 /* Nonzero if we shouldn't use literal pools. */
990 bool arm_disable_literal_pool
= false;
992 /* The register number to be used for the PIC offset register. */
993 unsigned arm_pic_register
= INVALID_REGNUM
;
995 enum arm_pcs arm_pcs_default
;
997 /* For an explanation of these variables, see final_prescan_insn below. */
999 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
1000 enum arm_cond_code arm_current_cc
;
1002 rtx arm_target_insn
;
1003 int arm_target_label
;
1004 /* The number of conditionally executed insns, including the current insn. */
1005 int arm_condexec_count
= 0;
1006 /* A bitmask specifying the patterns for the IT block.
1007 Zero means do not output an IT block before this insn. */
1008 int arm_condexec_mask
= 0;
1009 /* The number of bits used in arm_condexec_mask. */
1010 int arm_condexec_masklen
= 0;
1012 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1013 int arm_arch_crc
= 0;
1015 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1016 int arm_arch_dotprod
= 0;
1018 /* Nonzero if chip supports the ARMv8-M security extensions. */
1019 int arm_arch_cmse
= 0;
1021 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1022 int arm_m_profile_small_mul
= 0;
1024 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1025 int arm_arch_i8mm
= 0;
1027 /* Nonzero if chip supports the BFloat16 instructions. */
1028 int arm_arch_bf16
= 0;
1030 /* Nonzero if chip supports the Custom Datapath Extension. */
1031 int arm_arch_cde
= 0;
1032 int arm_arch_cde_coproc
= 0;
1033 const int arm_arch_cde_coproc_bits
[] = {
1034 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1037 /* The condition codes of the ARM, and the inverse function. */
1038 static const char * const arm_condition_codes
[] =
1040 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1041 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1044 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1045 int arm_regs_in_sequence
[] =
1047 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1050 #define DEF_FP_SYSREG(reg) #reg,
1051 const char *fp_sysreg_names
[NB_FP_SYSREGS
] = {
1054 #undef DEF_FP_SYSREG
1056 #define ARM_LSL_NAME "lsl"
1057 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1059 #define THUMB2_WORK_REGS \
1060 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1061 | (1 << SP_REGNUM) \
1062 | (1 << PC_REGNUM) \
1063 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1064 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1067 /* Initialization code. */
1071 enum processor_type scheduler
;
1072 unsigned int tune_flags
;
1073 const struct tune_params
*tune
;
1076 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1077 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1084 /* arm generic vectorizer costs. */
1086 struct cpu_vec_costs arm_default_vec_cost
= {
1087 1, /* scalar_stmt_cost. */
1088 1, /* scalar load_cost. */
1089 1, /* scalar_store_cost. */
1090 1, /* vec_stmt_cost. */
1091 1, /* vec_to_scalar_cost. */
1092 1, /* scalar_to_vec_cost. */
1093 1, /* vec_align_load_cost. */
1094 1, /* vec_unalign_load_cost. */
1095 1, /* vec_unalign_store_cost. */
1096 1, /* vec_store_cost. */
1097 3, /* cond_taken_branch_cost. */
1098 1, /* cond_not_taken_branch_cost. */
1101 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1102 #include "aarch-cost-tables.h"
1106 const struct cpu_cost_table cortexa9_extra_costs
=
1113 COSTS_N_INSNS (1), /* shift_reg. */
1114 COSTS_N_INSNS (1), /* arith_shift. */
1115 COSTS_N_INSNS (2), /* arith_shift_reg. */
1117 COSTS_N_INSNS (1), /* log_shift_reg. */
1118 COSTS_N_INSNS (1), /* extend. */
1119 COSTS_N_INSNS (2), /* extend_arith. */
1120 COSTS_N_INSNS (1), /* bfi. */
1121 COSTS_N_INSNS (1), /* bfx. */
1125 true /* non_exec_costs_exec. */
1130 COSTS_N_INSNS (3), /* simple. */
1131 COSTS_N_INSNS (3), /* flag_setting. */
1132 COSTS_N_INSNS (2), /* extend. */
1133 COSTS_N_INSNS (3), /* add. */
1134 COSTS_N_INSNS (2), /* extend_add. */
1135 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1139 0, /* simple (N/A). */
1140 0, /* flag_setting (N/A). */
1141 COSTS_N_INSNS (4), /* extend. */
1143 COSTS_N_INSNS (4), /* extend_add. */
1149 COSTS_N_INSNS (2), /* load. */
1150 COSTS_N_INSNS (2), /* load_sign_extend. */
1151 COSTS_N_INSNS (2), /* ldrd. */
1152 COSTS_N_INSNS (2), /* ldm_1st. */
1153 1, /* ldm_regs_per_insn_1st. */
1154 2, /* ldm_regs_per_insn_subsequent. */
1155 COSTS_N_INSNS (5), /* loadf. */
1156 COSTS_N_INSNS (5), /* loadd. */
1157 COSTS_N_INSNS (1), /* load_unaligned. */
1158 COSTS_N_INSNS (2), /* store. */
1159 COSTS_N_INSNS (2), /* strd. */
1160 COSTS_N_INSNS (2), /* stm_1st. */
1161 1, /* stm_regs_per_insn_1st. */
1162 2, /* stm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (1), /* storef. */
1164 COSTS_N_INSNS (1), /* stored. */
1165 COSTS_N_INSNS (1), /* store_unaligned. */
1166 COSTS_N_INSNS (1), /* loadv. */
1167 COSTS_N_INSNS (1) /* storev. */
1172 COSTS_N_INSNS (14), /* div. */
1173 COSTS_N_INSNS (4), /* mult. */
1174 COSTS_N_INSNS (7), /* mult_addsub. */
1175 COSTS_N_INSNS (30), /* fma. */
1176 COSTS_N_INSNS (3), /* addsub. */
1177 COSTS_N_INSNS (1), /* fpconst. */
1178 COSTS_N_INSNS (1), /* neg. */
1179 COSTS_N_INSNS (3), /* compare. */
1180 COSTS_N_INSNS (3), /* widen. */
1181 COSTS_N_INSNS (3), /* narrow. */
1182 COSTS_N_INSNS (3), /* toint. */
1183 COSTS_N_INSNS (3), /* fromint. */
1184 COSTS_N_INSNS (3) /* roundint. */
1188 COSTS_N_INSNS (24), /* div. */
1189 COSTS_N_INSNS (5), /* mult. */
1190 COSTS_N_INSNS (8), /* mult_addsub. */
1191 COSTS_N_INSNS (30), /* fma. */
1192 COSTS_N_INSNS (3), /* addsub. */
1193 COSTS_N_INSNS (1), /* fpconst. */
1194 COSTS_N_INSNS (1), /* neg. */
1195 COSTS_N_INSNS (3), /* compare. */
1196 COSTS_N_INSNS (3), /* widen. */
1197 COSTS_N_INSNS (3), /* narrow. */
1198 COSTS_N_INSNS (3), /* toint. */
1199 COSTS_N_INSNS (3), /* fromint. */
1200 COSTS_N_INSNS (3) /* roundint. */
1205 COSTS_N_INSNS (1), /* alu. */
1206 COSTS_N_INSNS (4), /* mult. */
1207 COSTS_N_INSNS (1), /* movi. */
1208 COSTS_N_INSNS (2), /* dup. */
1209 COSTS_N_INSNS (2) /* extract. */
1213 const struct cpu_cost_table cortexa8_extra_costs
=
1219 COSTS_N_INSNS (1), /* shift. */
1221 COSTS_N_INSNS (1), /* arith_shift. */
1222 0, /* arith_shift_reg. */
1223 COSTS_N_INSNS (1), /* log_shift. */
1224 0, /* log_shift_reg. */
1226 0, /* extend_arith. */
1232 true /* non_exec_costs_exec. */
1237 COSTS_N_INSNS (1), /* simple. */
1238 COSTS_N_INSNS (1), /* flag_setting. */
1239 COSTS_N_INSNS (1), /* extend. */
1240 COSTS_N_INSNS (1), /* add. */
1241 COSTS_N_INSNS (1), /* extend_add. */
1242 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1246 0, /* simple (N/A). */
1247 0, /* flag_setting (N/A). */
1248 COSTS_N_INSNS (2), /* extend. */
1250 COSTS_N_INSNS (2), /* extend_add. */
1256 COSTS_N_INSNS (1), /* load. */
1257 COSTS_N_INSNS (1), /* load_sign_extend. */
1258 COSTS_N_INSNS (1), /* ldrd. */
1259 COSTS_N_INSNS (1), /* ldm_1st. */
1260 1, /* ldm_regs_per_insn_1st. */
1261 2, /* ldm_regs_per_insn_subsequent. */
1262 COSTS_N_INSNS (1), /* loadf. */
1263 COSTS_N_INSNS (1), /* loadd. */
1264 COSTS_N_INSNS (1), /* load_unaligned. */
1265 COSTS_N_INSNS (1), /* store. */
1266 COSTS_N_INSNS (1), /* strd. */
1267 COSTS_N_INSNS (1), /* stm_1st. */
1268 1, /* stm_regs_per_insn_1st. */
1269 2, /* stm_regs_per_insn_subsequent. */
1270 COSTS_N_INSNS (1), /* storef. */
1271 COSTS_N_INSNS (1), /* stored. */
1272 COSTS_N_INSNS (1), /* store_unaligned. */
1273 COSTS_N_INSNS (1), /* loadv. */
1274 COSTS_N_INSNS (1) /* storev. */
1279 COSTS_N_INSNS (36), /* div. */
1280 COSTS_N_INSNS (11), /* mult. */
1281 COSTS_N_INSNS (20), /* mult_addsub. */
1282 COSTS_N_INSNS (30), /* fma. */
1283 COSTS_N_INSNS (9), /* addsub. */
1284 COSTS_N_INSNS (3), /* fpconst. */
1285 COSTS_N_INSNS (3), /* neg. */
1286 COSTS_N_INSNS (6), /* compare. */
1287 COSTS_N_INSNS (4), /* widen. */
1288 COSTS_N_INSNS (4), /* narrow. */
1289 COSTS_N_INSNS (8), /* toint. */
1290 COSTS_N_INSNS (8), /* fromint. */
1291 COSTS_N_INSNS (8) /* roundint. */
1295 COSTS_N_INSNS (64), /* div. */
1296 COSTS_N_INSNS (16), /* mult. */
1297 COSTS_N_INSNS (25), /* mult_addsub. */
1298 COSTS_N_INSNS (30), /* fma. */
1299 COSTS_N_INSNS (9), /* addsub. */
1300 COSTS_N_INSNS (3), /* fpconst. */
1301 COSTS_N_INSNS (3), /* neg. */
1302 COSTS_N_INSNS (6), /* compare. */
1303 COSTS_N_INSNS (6), /* widen. */
1304 COSTS_N_INSNS (6), /* narrow. */
1305 COSTS_N_INSNS (8), /* toint. */
1306 COSTS_N_INSNS (8), /* fromint. */
1307 COSTS_N_INSNS (8) /* roundint. */
1312 COSTS_N_INSNS (1), /* alu. */
1313 COSTS_N_INSNS (4), /* mult. */
1314 COSTS_N_INSNS (1), /* movi. */
1315 COSTS_N_INSNS (2), /* dup. */
1316 COSTS_N_INSNS (2) /* extract. */
1320 const struct cpu_cost_table cortexa5_extra_costs
=
1326 COSTS_N_INSNS (1), /* shift. */
1327 COSTS_N_INSNS (1), /* shift_reg. */
1328 COSTS_N_INSNS (1), /* arith_shift. */
1329 COSTS_N_INSNS (1), /* arith_shift_reg. */
1330 COSTS_N_INSNS (1), /* log_shift. */
1331 COSTS_N_INSNS (1), /* log_shift_reg. */
1332 COSTS_N_INSNS (1), /* extend. */
1333 COSTS_N_INSNS (1), /* extend_arith. */
1334 COSTS_N_INSNS (1), /* bfi. */
1335 COSTS_N_INSNS (1), /* bfx. */
1336 COSTS_N_INSNS (1), /* clz. */
1337 COSTS_N_INSNS (1), /* rev. */
1339 true /* non_exec_costs_exec. */
1346 COSTS_N_INSNS (1), /* flag_setting. */
1347 COSTS_N_INSNS (1), /* extend. */
1348 COSTS_N_INSNS (1), /* add. */
1349 COSTS_N_INSNS (1), /* extend_add. */
1350 COSTS_N_INSNS (7) /* idiv. */
1354 0, /* simple (N/A). */
1355 0, /* flag_setting (N/A). */
1356 COSTS_N_INSNS (1), /* extend. */
1358 COSTS_N_INSNS (2), /* extend_add. */
1364 COSTS_N_INSNS (1), /* load. */
1365 COSTS_N_INSNS (1), /* load_sign_extend. */
1366 COSTS_N_INSNS (6), /* ldrd. */
1367 COSTS_N_INSNS (1), /* ldm_1st. */
1368 1, /* ldm_regs_per_insn_1st. */
1369 2, /* ldm_regs_per_insn_subsequent. */
1370 COSTS_N_INSNS (2), /* loadf. */
1371 COSTS_N_INSNS (4), /* loadd. */
1372 COSTS_N_INSNS (1), /* load_unaligned. */
1373 COSTS_N_INSNS (1), /* store. */
1374 COSTS_N_INSNS (3), /* strd. */
1375 COSTS_N_INSNS (1), /* stm_1st. */
1376 1, /* stm_regs_per_insn_1st. */
1377 2, /* stm_regs_per_insn_subsequent. */
1378 COSTS_N_INSNS (2), /* storef. */
1379 COSTS_N_INSNS (2), /* stored. */
1380 COSTS_N_INSNS (1), /* store_unaligned. */
1381 COSTS_N_INSNS (1), /* loadv. */
1382 COSTS_N_INSNS (1) /* storev. */
1387 COSTS_N_INSNS (15), /* div. */
1388 COSTS_N_INSNS (3), /* mult. */
1389 COSTS_N_INSNS (7), /* mult_addsub. */
1390 COSTS_N_INSNS (7), /* fma. */
1391 COSTS_N_INSNS (3), /* addsub. */
1392 COSTS_N_INSNS (3), /* fpconst. */
1393 COSTS_N_INSNS (3), /* neg. */
1394 COSTS_N_INSNS (3), /* compare. */
1395 COSTS_N_INSNS (3), /* widen. */
1396 COSTS_N_INSNS (3), /* narrow. */
1397 COSTS_N_INSNS (3), /* toint. */
1398 COSTS_N_INSNS (3), /* fromint. */
1399 COSTS_N_INSNS (3) /* roundint. */
1403 COSTS_N_INSNS (30), /* div. */
1404 COSTS_N_INSNS (6), /* mult. */
1405 COSTS_N_INSNS (10), /* mult_addsub. */
1406 COSTS_N_INSNS (7), /* fma. */
1407 COSTS_N_INSNS (3), /* addsub. */
1408 COSTS_N_INSNS (3), /* fpconst. */
1409 COSTS_N_INSNS (3), /* neg. */
1410 COSTS_N_INSNS (3), /* compare. */
1411 COSTS_N_INSNS (3), /* widen. */
1412 COSTS_N_INSNS (3), /* narrow. */
1413 COSTS_N_INSNS (3), /* toint. */
1414 COSTS_N_INSNS (3), /* fromint. */
1415 COSTS_N_INSNS (3) /* roundint. */
1420 COSTS_N_INSNS (1), /* alu. */
1421 COSTS_N_INSNS (4), /* mult. */
1422 COSTS_N_INSNS (1), /* movi. */
1423 COSTS_N_INSNS (2), /* dup. */
1424 COSTS_N_INSNS (2) /* extract. */
1429 const struct cpu_cost_table cortexa7_extra_costs
=
1435 COSTS_N_INSNS (1), /* shift. */
1436 COSTS_N_INSNS (1), /* shift_reg. */
1437 COSTS_N_INSNS (1), /* arith_shift. */
1438 COSTS_N_INSNS (1), /* arith_shift_reg. */
1439 COSTS_N_INSNS (1), /* log_shift. */
1440 COSTS_N_INSNS (1), /* log_shift_reg. */
1441 COSTS_N_INSNS (1), /* extend. */
1442 COSTS_N_INSNS (1), /* extend_arith. */
1443 COSTS_N_INSNS (1), /* bfi. */
1444 COSTS_N_INSNS (1), /* bfx. */
1445 COSTS_N_INSNS (1), /* clz. */
1446 COSTS_N_INSNS (1), /* rev. */
1448 true /* non_exec_costs_exec. */
1455 COSTS_N_INSNS (1), /* flag_setting. */
1456 COSTS_N_INSNS (1), /* extend. */
1457 COSTS_N_INSNS (1), /* add. */
1458 COSTS_N_INSNS (1), /* extend_add. */
1459 COSTS_N_INSNS (7) /* idiv. */
1463 0, /* simple (N/A). */
1464 0, /* flag_setting (N/A). */
1465 COSTS_N_INSNS (1), /* extend. */
1467 COSTS_N_INSNS (2), /* extend_add. */
1473 COSTS_N_INSNS (1), /* load. */
1474 COSTS_N_INSNS (1), /* load_sign_extend. */
1475 COSTS_N_INSNS (3), /* ldrd. */
1476 COSTS_N_INSNS (1), /* ldm_1st. */
1477 1, /* ldm_regs_per_insn_1st. */
1478 2, /* ldm_regs_per_insn_subsequent. */
1479 COSTS_N_INSNS (2), /* loadf. */
1480 COSTS_N_INSNS (2), /* loadd. */
1481 COSTS_N_INSNS (1), /* load_unaligned. */
1482 COSTS_N_INSNS (1), /* store. */
1483 COSTS_N_INSNS (3), /* strd. */
1484 COSTS_N_INSNS (1), /* stm_1st. */
1485 1, /* stm_regs_per_insn_1st. */
1486 2, /* stm_regs_per_insn_subsequent. */
1487 COSTS_N_INSNS (2), /* storef. */
1488 COSTS_N_INSNS (2), /* stored. */
1489 COSTS_N_INSNS (1), /* store_unaligned. */
1490 COSTS_N_INSNS (1), /* loadv. */
1491 COSTS_N_INSNS (1) /* storev. */
1496 COSTS_N_INSNS (15), /* div. */
1497 COSTS_N_INSNS (3), /* mult. */
1498 COSTS_N_INSNS (7), /* mult_addsub. */
1499 COSTS_N_INSNS (7), /* fma. */
1500 COSTS_N_INSNS (3), /* addsub. */
1501 COSTS_N_INSNS (3), /* fpconst. */
1502 COSTS_N_INSNS (3), /* neg. */
1503 COSTS_N_INSNS (3), /* compare. */
1504 COSTS_N_INSNS (3), /* widen. */
1505 COSTS_N_INSNS (3), /* narrow. */
1506 COSTS_N_INSNS (3), /* toint. */
1507 COSTS_N_INSNS (3), /* fromint. */
1508 COSTS_N_INSNS (3) /* roundint. */
1512 COSTS_N_INSNS (30), /* div. */
1513 COSTS_N_INSNS (6), /* mult. */
1514 COSTS_N_INSNS (10), /* mult_addsub. */
1515 COSTS_N_INSNS (7), /* fma. */
1516 COSTS_N_INSNS (3), /* addsub. */
1517 COSTS_N_INSNS (3), /* fpconst. */
1518 COSTS_N_INSNS (3), /* neg. */
1519 COSTS_N_INSNS (3), /* compare. */
1520 COSTS_N_INSNS (3), /* widen. */
1521 COSTS_N_INSNS (3), /* narrow. */
1522 COSTS_N_INSNS (3), /* toint. */
1523 COSTS_N_INSNS (3), /* fromint. */
1524 COSTS_N_INSNS (3) /* roundint. */
1529 COSTS_N_INSNS (1), /* alu. */
1530 COSTS_N_INSNS (4), /* mult. */
1531 COSTS_N_INSNS (1), /* movi. */
1532 COSTS_N_INSNS (2), /* dup. */
1533 COSTS_N_INSNS (2) /* extract. */
1537 const struct cpu_cost_table cortexa12_extra_costs
=
1544 COSTS_N_INSNS (1), /* shift_reg. */
1545 COSTS_N_INSNS (1), /* arith_shift. */
1546 COSTS_N_INSNS (1), /* arith_shift_reg. */
1547 COSTS_N_INSNS (1), /* log_shift. */
1548 COSTS_N_INSNS (1), /* log_shift_reg. */
1550 COSTS_N_INSNS (1), /* extend_arith. */
1552 COSTS_N_INSNS (1), /* bfx. */
1553 COSTS_N_INSNS (1), /* clz. */
1554 COSTS_N_INSNS (1), /* rev. */
1556 true /* non_exec_costs_exec. */
1561 COSTS_N_INSNS (2), /* simple. */
1562 COSTS_N_INSNS (3), /* flag_setting. */
1563 COSTS_N_INSNS (2), /* extend. */
1564 COSTS_N_INSNS (3), /* add. */
1565 COSTS_N_INSNS (2), /* extend_add. */
1566 COSTS_N_INSNS (18) /* idiv. */
1570 0, /* simple (N/A). */
1571 0, /* flag_setting (N/A). */
1572 COSTS_N_INSNS (3), /* extend. */
1574 COSTS_N_INSNS (3), /* extend_add. */
1580 COSTS_N_INSNS (3), /* load. */
1581 COSTS_N_INSNS (3), /* load_sign_extend. */
1582 COSTS_N_INSNS (3), /* ldrd. */
1583 COSTS_N_INSNS (3), /* ldm_1st. */
1584 1, /* ldm_regs_per_insn_1st. */
1585 2, /* ldm_regs_per_insn_subsequent. */
1586 COSTS_N_INSNS (3), /* loadf. */
1587 COSTS_N_INSNS (3), /* loadd. */
1588 0, /* load_unaligned. */
1592 1, /* stm_regs_per_insn_1st. */
1593 2, /* stm_regs_per_insn_subsequent. */
1594 COSTS_N_INSNS (2), /* storef. */
1595 COSTS_N_INSNS (2), /* stored. */
1596 0, /* store_unaligned. */
1597 COSTS_N_INSNS (1), /* loadv. */
1598 COSTS_N_INSNS (1) /* storev. */
1603 COSTS_N_INSNS (17), /* div. */
1604 COSTS_N_INSNS (4), /* mult. */
1605 COSTS_N_INSNS (8), /* mult_addsub. */
1606 COSTS_N_INSNS (8), /* fma. */
1607 COSTS_N_INSNS (4), /* addsub. */
1608 COSTS_N_INSNS (2), /* fpconst. */
1609 COSTS_N_INSNS (2), /* neg. */
1610 COSTS_N_INSNS (2), /* compare. */
1611 COSTS_N_INSNS (4), /* widen. */
1612 COSTS_N_INSNS (4), /* narrow. */
1613 COSTS_N_INSNS (4), /* toint. */
1614 COSTS_N_INSNS (4), /* fromint. */
1615 COSTS_N_INSNS (4) /* roundint. */
1619 COSTS_N_INSNS (31), /* div. */
1620 COSTS_N_INSNS (4), /* mult. */
1621 COSTS_N_INSNS (8), /* mult_addsub. */
1622 COSTS_N_INSNS (8), /* fma. */
1623 COSTS_N_INSNS (4), /* addsub. */
1624 COSTS_N_INSNS (2), /* fpconst. */
1625 COSTS_N_INSNS (2), /* neg. */
1626 COSTS_N_INSNS (2), /* compare. */
1627 COSTS_N_INSNS (4), /* widen. */
1628 COSTS_N_INSNS (4), /* narrow. */
1629 COSTS_N_INSNS (4), /* toint. */
1630 COSTS_N_INSNS (4), /* fromint. */
1631 COSTS_N_INSNS (4) /* roundint. */
1636 COSTS_N_INSNS (1), /* alu. */
1637 COSTS_N_INSNS (4), /* mult. */
1638 COSTS_N_INSNS (1), /* movi. */
1639 COSTS_N_INSNS (2), /* dup. */
1640 COSTS_N_INSNS (2) /* extract. */
1644 const struct cpu_cost_table cortexa15_extra_costs
=
1652 COSTS_N_INSNS (1), /* arith_shift. */
1653 COSTS_N_INSNS (1), /* arith_shift_reg. */
1654 COSTS_N_INSNS (1), /* log_shift. */
1655 COSTS_N_INSNS (1), /* log_shift_reg. */
1657 COSTS_N_INSNS (1), /* extend_arith. */
1658 COSTS_N_INSNS (1), /* bfi. */
1663 true /* non_exec_costs_exec. */
1668 COSTS_N_INSNS (2), /* simple. */
1669 COSTS_N_INSNS (3), /* flag_setting. */
1670 COSTS_N_INSNS (2), /* extend. */
1671 COSTS_N_INSNS (2), /* add. */
1672 COSTS_N_INSNS (2), /* extend_add. */
1673 COSTS_N_INSNS (18) /* idiv. */
1677 0, /* simple (N/A). */
1678 0, /* flag_setting (N/A). */
1679 COSTS_N_INSNS (3), /* extend. */
1681 COSTS_N_INSNS (3), /* extend_add. */
1687 COSTS_N_INSNS (3), /* load. */
1688 COSTS_N_INSNS (3), /* load_sign_extend. */
1689 COSTS_N_INSNS (3), /* ldrd. */
1690 COSTS_N_INSNS (4), /* ldm_1st. */
1691 1, /* ldm_regs_per_insn_1st. */
1692 2, /* ldm_regs_per_insn_subsequent. */
1693 COSTS_N_INSNS (4), /* loadf. */
1694 COSTS_N_INSNS (4), /* loadd. */
1695 0, /* load_unaligned. */
1698 COSTS_N_INSNS (1), /* stm_1st. */
1699 1, /* stm_regs_per_insn_1st. */
1700 2, /* stm_regs_per_insn_subsequent. */
1703 0, /* store_unaligned. */
1704 COSTS_N_INSNS (1), /* loadv. */
1705 COSTS_N_INSNS (1) /* storev. */
1710 COSTS_N_INSNS (17), /* div. */
1711 COSTS_N_INSNS (4), /* mult. */
1712 COSTS_N_INSNS (8), /* mult_addsub. */
1713 COSTS_N_INSNS (8), /* fma. */
1714 COSTS_N_INSNS (4), /* addsub. */
1715 COSTS_N_INSNS (2), /* fpconst. */
1716 COSTS_N_INSNS (2), /* neg. */
1717 COSTS_N_INSNS (5), /* compare. */
1718 COSTS_N_INSNS (4), /* widen. */
1719 COSTS_N_INSNS (4), /* narrow. */
1720 COSTS_N_INSNS (4), /* toint. */
1721 COSTS_N_INSNS (4), /* fromint. */
1722 COSTS_N_INSNS (4) /* roundint. */
1726 COSTS_N_INSNS (31), /* div. */
1727 COSTS_N_INSNS (4), /* mult. */
1728 COSTS_N_INSNS (8), /* mult_addsub. */
1729 COSTS_N_INSNS (8), /* fma. */
1730 COSTS_N_INSNS (4), /* addsub. */
1731 COSTS_N_INSNS (2), /* fpconst. */
1732 COSTS_N_INSNS (2), /* neg. */
1733 COSTS_N_INSNS (2), /* compare. */
1734 COSTS_N_INSNS (4), /* widen. */
1735 COSTS_N_INSNS (4), /* narrow. */
1736 COSTS_N_INSNS (4), /* toint. */
1737 COSTS_N_INSNS (4), /* fromint. */
1738 COSTS_N_INSNS (4) /* roundint. */
1743 COSTS_N_INSNS (1), /* alu. */
1744 COSTS_N_INSNS (4), /* mult. */
1745 COSTS_N_INSNS (1), /* movi. */
1746 COSTS_N_INSNS (2), /* dup. */
1747 COSTS_N_INSNS (2) /* extract. */
1751 const struct cpu_cost_table v7m_extra_costs
=
1759 0, /* arith_shift. */
1760 COSTS_N_INSNS (1), /* arith_shift_reg. */
1762 COSTS_N_INSNS (1), /* log_shift_reg. */
1764 COSTS_N_INSNS (1), /* extend_arith. */
1769 COSTS_N_INSNS (1), /* non_exec. */
1770 false /* non_exec_costs_exec. */
1775 COSTS_N_INSNS (1), /* simple. */
1776 COSTS_N_INSNS (1), /* flag_setting. */
1777 COSTS_N_INSNS (2), /* extend. */
1778 COSTS_N_INSNS (1), /* add. */
1779 COSTS_N_INSNS (3), /* extend_add. */
1780 COSTS_N_INSNS (8) /* idiv. */
1784 0, /* simple (N/A). */
1785 0, /* flag_setting (N/A). */
1786 COSTS_N_INSNS (2), /* extend. */
1788 COSTS_N_INSNS (3), /* extend_add. */
1794 COSTS_N_INSNS (2), /* load. */
1795 0, /* load_sign_extend. */
1796 COSTS_N_INSNS (3), /* ldrd. */
1797 COSTS_N_INSNS (2), /* ldm_1st. */
1798 1, /* ldm_regs_per_insn_1st. */
1799 1, /* ldm_regs_per_insn_subsequent. */
1800 COSTS_N_INSNS (2), /* loadf. */
1801 COSTS_N_INSNS (3), /* loadd. */
1802 COSTS_N_INSNS (1), /* load_unaligned. */
1803 COSTS_N_INSNS (2), /* store. */
1804 COSTS_N_INSNS (3), /* strd. */
1805 COSTS_N_INSNS (2), /* stm_1st. */
1806 1, /* stm_regs_per_insn_1st. */
1807 1, /* stm_regs_per_insn_subsequent. */
1808 COSTS_N_INSNS (2), /* storef. */
1809 COSTS_N_INSNS (3), /* stored. */
1810 COSTS_N_INSNS (1), /* store_unaligned. */
1811 COSTS_N_INSNS (1), /* loadv. */
1812 COSTS_N_INSNS (1) /* storev. */
1817 COSTS_N_INSNS (7), /* div. */
1818 COSTS_N_INSNS (2), /* mult. */
1819 COSTS_N_INSNS (5), /* mult_addsub. */
1820 COSTS_N_INSNS (3), /* fma. */
1821 COSTS_N_INSNS (1), /* addsub. */
1833 COSTS_N_INSNS (15), /* div. */
1834 COSTS_N_INSNS (5), /* mult. */
1835 COSTS_N_INSNS (7), /* mult_addsub. */
1836 COSTS_N_INSNS (7), /* fma. */
1837 COSTS_N_INSNS (3), /* addsub. */
1850 COSTS_N_INSNS (1), /* alu. */
1851 COSTS_N_INSNS (4), /* mult. */
1852 COSTS_N_INSNS (1), /* movi. */
1853 COSTS_N_INSNS (2), /* dup. */
1854 COSTS_N_INSNS (2) /* extract. */
1858 const struct addr_mode_cost_table generic_addr_mode_costs
=
1862 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1863 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1864 COSTS_N_INSNS (0) /* AMO_WB. */
1868 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1869 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1870 COSTS_N_INSNS (0) /* AMO_WB. */
1874 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1875 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1876 COSTS_N_INSNS (0) /* AMO_WB. */
1880 const struct tune_params arm_slowmul_tune
=
1882 &generic_extra_costs
, /* Insn extra costs. */
1883 &generic_addr_mode_costs
, /* Addressing mode costs. */
1884 NULL
, /* Sched adj cost. */
1885 arm_default_branch_cost
,
1886 &arm_default_vec_cost
,
1887 3, /* Constant limit. */
1888 5, /* Max cond insns. */
1889 8, /* Memset max inline. */
1890 1, /* Issue rate. */
1891 ARM_PREFETCH_NOT_BENEFICIAL
,
1892 tune_params::PREF_CONST_POOL_TRUE
,
1893 tune_params::PREF_LDRD_FALSE
,
1894 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1895 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1896 tune_params::DISPARAGE_FLAGS_NEITHER
,
1897 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1898 tune_params::FUSE_NOTHING
,
1899 tune_params::SCHED_AUTOPREF_OFF
1902 const struct tune_params arm_fastmul_tune
=
1904 &generic_extra_costs
, /* Insn extra costs. */
1905 &generic_addr_mode_costs
, /* Addressing mode costs. */
1906 NULL
, /* Sched adj cost. */
1907 arm_default_branch_cost
,
1908 &arm_default_vec_cost
,
1909 1, /* Constant limit. */
1910 5, /* Max cond insns. */
1911 8, /* Memset max inline. */
1912 1, /* Issue rate. */
1913 ARM_PREFETCH_NOT_BENEFICIAL
,
1914 tune_params::PREF_CONST_POOL_TRUE
,
1915 tune_params::PREF_LDRD_FALSE
,
1916 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1917 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1918 tune_params::DISPARAGE_FLAGS_NEITHER
,
1919 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1920 tune_params::FUSE_NOTHING
,
1921 tune_params::SCHED_AUTOPREF_OFF
1924 /* StrongARM has early execution of branches, so a sequence that is worth
1925 skipping is shorter. Set max_insns_skipped to a lower value. */
1927 const struct tune_params arm_strongarm_tune
=
1929 &generic_extra_costs
, /* Insn extra costs. */
1930 &generic_addr_mode_costs
, /* Addressing mode costs. */
1931 NULL
, /* Sched adj cost. */
1932 arm_default_branch_cost
,
1933 &arm_default_vec_cost
,
1934 1, /* Constant limit. */
1935 3, /* Max cond insns. */
1936 8, /* Memset max inline. */
1937 1, /* Issue rate. */
1938 ARM_PREFETCH_NOT_BENEFICIAL
,
1939 tune_params::PREF_CONST_POOL_TRUE
,
1940 tune_params::PREF_LDRD_FALSE
,
1941 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1942 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1943 tune_params::DISPARAGE_FLAGS_NEITHER
,
1944 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1945 tune_params::FUSE_NOTHING
,
1946 tune_params::SCHED_AUTOPREF_OFF
1949 const struct tune_params arm_xscale_tune
=
1951 &generic_extra_costs
, /* Insn extra costs. */
1952 &generic_addr_mode_costs
, /* Addressing mode costs. */
1953 xscale_sched_adjust_cost
,
1954 arm_default_branch_cost
,
1955 &arm_default_vec_cost
,
1956 2, /* Constant limit. */
1957 3, /* Max cond insns. */
1958 8, /* Memset max inline. */
1959 1, /* Issue rate. */
1960 ARM_PREFETCH_NOT_BENEFICIAL
,
1961 tune_params::PREF_CONST_POOL_TRUE
,
1962 tune_params::PREF_LDRD_FALSE
,
1963 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1964 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1965 tune_params::DISPARAGE_FLAGS_NEITHER
,
1966 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1967 tune_params::FUSE_NOTHING
,
1968 tune_params::SCHED_AUTOPREF_OFF
1971 const struct tune_params arm_9e_tune
=
1973 &generic_extra_costs
, /* Insn extra costs. */
1974 &generic_addr_mode_costs
, /* Addressing mode costs. */
1975 NULL
, /* Sched adj cost. */
1976 arm_default_branch_cost
,
1977 &arm_default_vec_cost
,
1978 1, /* Constant limit. */
1979 5, /* Max cond insns. */
1980 8, /* Memset max inline. */
1981 1, /* Issue rate. */
1982 ARM_PREFETCH_NOT_BENEFICIAL
,
1983 tune_params::PREF_CONST_POOL_TRUE
,
1984 tune_params::PREF_LDRD_FALSE
,
1985 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1986 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1987 tune_params::DISPARAGE_FLAGS_NEITHER
,
1988 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1989 tune_params::FUSE_NOTHING
,
1990 tune_params::SCHED_AUTOPREF_OFF
1993 const struct tune_params arm_marvell_pj4_tune
=
1995 &generic_extra_costs
, /* Insn extra costs. */
1996 &generic_addr_mode_costs
, /* Addressing mode costs. */
1997 NULL
, /* Sched adj cost. */
1998 arm_default_branch_cost
,
1999 &arm_default_vec_cost
,
2000 1, /* Constant limit. */
2001 5, /* Max cond insns. */
2002 8, /* Memset max inline. */
2003 2, /* Issue rate. */
2004 ARM_PREFETCH_NOT_BENEFICIAL
,
2005 tune_params::PREF_CONST_POOL_TRUE
,
2006 tune_params::PREF_LDRD_FALSE
,
2007 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2008 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2009 tune_params::DISPARAGE_FLAGS_NEITHER
,
2010 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2011 tune_params::FUSE_NOTHING
,
2012 tune_params::SCHED_AUTOPREF_OFF
2015 const struct tune_params arm_v6t2_tune
=
2017 &generic_extra_costs
, /* Insn extra costs. */
2018 &generic_addr_mode_costs
, /* Addressing mode costs. */
2019 NULL
, /* Sched adj cost. */
2020 arm_default_branch_cost
,
2021 &arm_default_vec_cost
,
2022 1, /* Constant limit. */
2023 5, /* Max cond insns. */
2024 8, /* Memset max inline. */
2025 1, /* Issue rate. */
2026 ARM_PREFETCH_NOT_BENEFICIAL
,
2027 tune_params::PREF_CONST_POOL_FALSE
,
2028 tune_params::PREF_LDRD_FALSE
,
2029 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2030 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2031 tune_params::DISPARAGE_FLAGS_NEITHER
,
2032 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2033 tune_params::FUSE_NOTHING
,
2034 tune_params::SCHED_AUTOPREF_OFF
2038 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2039 const struct tune_params arm_cortex_tune
=
2041 &generic_extra_costs
,
2042 &generic_addr_mode_costs
, /* Addressing mode costs. */
2043 NULL
, /* Sched adj cost. */
2044 arm_default_branch_cost
,
2045 &arm_default_vec_cost
,
2046 1, /* Constant limit. */
2047 5, /* Max cond insns. */
2048 8, /* Memset max inline. */
2049 2, /* Issue rate. */
2050 ARM_PREFETCH_NOT_BENEFICIAL
,
2051 tune_params::PREF_CONST_POOL_FALSE
,
2052 tune_params::PREF_LDRD_FALSE
,
2053 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2054 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2055 tune_params::DISPARAGE_FLAGS_NEITHER
,
2056 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2057 tune_params::FUSE_NOTHING
,
2058 tune_params::SCHED_AUTOPREF_OFF
2061 const struct tune_params arm_cortex_a8_tune
=
2063 &cortexa8_extra_costs
,
2064 &generic_addr_mode_costs
, /* Addressing mode costs. */
2065 NULL
, /* Sched adj cost. */
2066 arm_default_branch_cost
,
2067 &arm_default_vec_cost
,
2068 1, /* Constant limit. */
2069 5, /* Max cond insns. */
2070 8, /* Memset max inline. */
2071 2, /* Issue rate. */
2072 ARM_PREFETCH_NOT_BENEFICIAL
,
2073 tune_params::PREF_CONST_POOL_FALSE
,
2074 tune_params::PREF_LDRD_FALSE
,
2075 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2076 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2077 tune_params::DISPARAGE_FLAGS_NEITHER
,
2078 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2079 tune_params::FUSE_NOTHING
,
2080 tune_params::SCHED_AUTOPREF_OFF
2083 const struct tune_params arm_cortex_a7_tune
=
2085 &cortexa7_extra_costs
,
2086 &generic_addr_mode_costs
, /* Addressing mode costs. */
2087 NULL
, /* Sched adj cost. */
2088 arm_default_branch_cost
,
2089 &arm_default_vec_cost
,
2090 1, /* Constant limit. */
2091 5, /* Max cond insns. */
2092 8, /* Memset max inline. */
2093 2, /* Issue rate. */
2094 ARM_PREFETCH_NOT_BENEFICIAL
,
2095 tune_params::PREF_CONST_POOL_FALSE
,
2096 tune_params::PREF_LDRD_FALSE
,
2097 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2098 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2099 tune_params::DISPARAGE_FLAGS_NEITHER
,
2100 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2101 tune_params::FUSE_NOTHING
,
2102 tune_params::SCHED_AUTOPREF_OFF
2105 const struct tune_params arm_cortex_a15_tune
=
2107 &cortexa15_extra_costs
,
2108 &generic_addr_mode_costs
, /* Addressing mode costs. */
2109 NULL
, /* Sched adj cost. */
2110 arm_default_branch_cost
,
2111 &arm_default_vec_cost
,
2112 1, /* Constant limit. */
2113 2, /* Max cond insns. */
2114 8, /* Memset max inline. */
2115 3, /* Issue rate. */
2116 ARM_PREFETCH_NOT_BENEFICIAL
,
2117 tune_params::PREF_CONST_POOL_FALSE
,
2118 tune_params::PREF_LDRD_TRUE
,
2119 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2120 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2121 tune_params::DISPARAGE_FLAGS_ALL
,
2122 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2123 tune_params::FUSE_NOTHING
,
2124 tune_params::SCHED_AUTOPREF_FULL
2127 const struct tune_params arm_cortex_a35_tune
=
2129 &cortexa53_extra_costs
,
2130 &generic_addr_mode_costs
, /* Addressing mode costs. */
2131 NULL
, /* Sched adj cost. */
2132 arm_default_branch_cost
,
2133 &arm_default_vec_cost
,
2134 1, /* Constant limit. */
2135 5, /* Max cond insns. */
2136 8, /* Memset max inline. */
2137 1, /* Issue rate. */
2138 ARM_PREFETCH_NOT_BENEFICIAL
,
2139 tune_params::PREF_CONST_POOL_FALSE
,
2140 tune_params::PREF_LDRD_FALSE
,
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2142 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2143 tune_params::DISPARAGE_FLAGS_NEITHER
,
2144 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2145 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2146 tune_params::SCHED_AUTOPREF_OFF
2149 const struct tune_params arm_cortex_a53_tune
=
2151 &cortexa53_extra_costs
,
2152 &generic_addr_mode_costs
, /* Addressing mode costs. */
2153 NULL
, /* Sched adj cost. */
2154 arm_default_branch_cost
,
2155 &arm_default_vec_cost
,
2156 1, /* Constant limit. */
2157 5, /* Max cond insns. */
2158 8, /* Memset max inline. */
2159 2, /* Issue rate. */
2160 ARM_PREFETCH_NOT_BENEFICIAL
,
2161 tune_params::PREF_CONST_POOL_FALSE
,
2162 tune_params::PREF_LDRD_FALSE
,
2163 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2164 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2165 tune_params::DISPARAGE_FLAGS_NEITHER
,
2166 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2167 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2168 tune_params::SCHED_AUTOPREF_OFF
2171 const struct tune_params arm_cortex_a57_tune
=
2173 &cortexa57_extra_costs
,
2174 &generic_addr_mode_costs
, /* addressing mode costs */
2175 NULL
, /* Sched adj cost. */
2176 arm_default_branch_cost
,
2177 &arm_default_vec_cost
,
2178 1, /* Constant limit. */
2179 2, /* Max cond insns. */
2180 8, /* Memset max inline. */
2181 3, /* Issue rate. */
2182 ARM_PREFETCH_NOT_BENEFICIAL
,
2183 tune_params::PREF_CONST_POOL_FALSE
,
2184 tune_params::PREF_LDRD_TRUE
,
2185 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2186 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2187 tune_params::DISPARAGE_FLAGS_ALL
,
2188 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2189 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2190 tune_params::SCHED_AUTOPREF_FULL
2193 const struct tune_params arm_exynosm1_tune
=
2195 &exynosm1_extra_costs
,
2196 &generic_addr_mode_costs
, /* Addressing mode costs. */
2197 NULL
, /* Sched adj cost. */
2198 arm_default_branch_cost
,
2199 &arm_default_vec_cost
,
2200 1, /* Constant limit. */
2201 2, /* Max cond insns. */
2202 8, /* Memset max inline. */
2203 3, /* Issue rate. */
2204 ARM_PREFETCH_NOT_BENEFICIAL
,
2205 tune_params::PREF_CONST_POOL_FALSE
,
2206 tune_params::PREF_LDRD_TRUE
,
2207 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2208 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2209 tune_params::DISPARAGE_FLAGS_ALL
,
2210 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2211 tune_params::FUSE_NOTHING
,
2212 tune_params::SCHED_AUTOPREF_OFF
2215 const struct tune_params arm_xgene1_tune
=
2217 &xgene1_extra_costs
,
2218 &generic_addr_mode_costs
, /* Addressing mode costs. */
2219 NULL
, /* Sched adj cost. */
2220 arm_default_branch_cost
,
2221 &arm_default_vec_cost
,
2222 1, /* Constant limit. */
2223 2, /* Max cond insns. */
2224 32, /* Memset max inline. */
2225 4, /* Issue rate. */
2226 ARM_PREFETCH_NOT_BENEFICIAL
,
2227 tune_params::PREF_CONST_POOL_FALSE
,
2228 tune_params::PREF_LDRD_TRUE
,
2229 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2230 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2231 tune_params::DISPARAGE_FLAGS_ALL
,
2232 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2233 tune_params::FUSE_NOTHING
,
2234 tune_params::SCHED_AUTOPREF_OFF
2237 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2238 less appealing. Set max_insns_skipped to a low value. */
2240 const struct tune_params arm_cortex_a5_tune
=
2242 &cortexa5_extra_costs
,
2243 &generic_addr_mode_costs
, /* Addressing mode costs. */
2244 NULL
, /* Sched adj cost. */
2245 arm_cortex_a5_branch_cost
,
2246 &arm_default_vec_cost
,
2247 1, /* Constant limit. */
2248 1, /* Max cond insns. */
2249 8, /* Memset max inline. */
2250 2, /* Issue rate. */
2251 ARM_PREFETCH_NOT_BENEFICIAL
,
2252 tune_params::PREF_CONST_POOL_FALSE
,
2253 tune_params::PREF_LDRD_FALSE
,
2254 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2255 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2256 tune_params::DISPARAGE_FLAGS_NEITHER
,
2257 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2258 tune_params::FUSE_NOTHING
,
2259 tune_params::SCHED_AUTOPREF_OFF
2262 const struct tune_params arm_cortex_a9_tune
=
2264 &cortexa9_extra_costs
,
2265 &generic_addr_mode_costs
, /* Addressing mode costs. */
2266 cortex_a9_sched_adjust_cost
,
2267 arm_default_branch_cost
,
2268 &arm_default_vec_cost
,
2269 1, /* Constant limit. */
2270 5, /* Max cond insns. */
2271 8, /* Memset max inline. */
2272 2, /* Issue rate. */
2273 ARM_PREFETCH_BENEFICIAL(4,32,32),
2274 tune_params::PREF_CONST_POOL_FALSE
,
2275 tune_params::PREF_LDRD_FALSE
,
2276 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2277 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2278 tune_params::DISPARAGE_FLAGS_NEITHER
,
2279 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2280 tune_params::FUSE_NOTHING
,
2281 tune_params::SCHED_AUTOPREF_OFF
2284 const struct tune_params arm_cortex_a12_tune
=
2286 &cortexa12_extra_costs
,
2287 &generic_addr_mode_costs
, /* Addressing mode costs. */
2288 NULL
, /* Sched adj cost. */
2289 arm_default_branch_cost
,
2290 &arm_default_vec_cost
, /* Vectorizer costs. */
2291 1, /* Constant limit. */
2292 2, /* Max cond insns. */
2293 8, /* Memset max inline. */
2294 2, /* Issue rate. */
2295 ARM_PREFETCH_NOT_BENEFICIAL
,
2296 tune_params::PREF_CONST_POOL_FALSE
,
2297 tune_params::PREF_LDRD_TRUE
,
2298 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2299 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2300 tune_params::DISPARAGE_FLAGS_ALL
,
2301 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2302 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2303 tune_params::SCHED_AUTOPREF_OFF
2306 const struct tune_params arm_cortex_a73_tune
=
2308 &cortexa57_extra_costs
,
2309 &generic_addr_mode_costs
, /* Addressing mode costs. */
2310 NULL
, /* Sched adj cost. */
2311 arm_default_branch_cost
,
2312 &arm_default_vec_cost
, /* Vectorizer costs. */
2313 1, /* Constant limit. */
2314 2, /* Max cond insns. */
2315 8, /* Memset max inline. */
2316 2, /* Issue rate. */
2317 ARM_PREFETCH_NOT_BENEFICIAL
,
2318 tune_params::PREF_CONST_POOL_FALSE
,
2319 tune_params::PREF_LDRD_TRUE
,
2320 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2321 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2322 tune_params::DISPARAGE_FLAGS_ALL
,
2323 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2324 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2325 tune_params::SCHED_AUTOPREF_FULL
2328 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2329 cycle to execute each. An LDR from the constant pool also takes two cycles
2330 to execute, but mildly increases pipelining opportunity (consecutive
2331 loads/stores can be pipelined together, saving one cycle), and may also
2332 improve icache utilisation. Hence we prefer the constant pool for such
2335 const struct tune_params arm_v7m_tune
=
2338 &generic_addr_mode_costs
, /* Addressing mode costs. */
2339 NULL
, /* Sched adj cost. */
2340 arm_cortex_m_branch_cost
,
2341 &arm_default_vec_cost
,
2342 1, /* Constant limit. */
2343 2, /* Max cond insns. */
2344 8, /* Memset max inline. */
2345 1, /* Issue rate. */
2346 ARM_PREFETCH_NOT_BENEFICIAL
,
2347 tune_params::PREF_CONST_POOL_TRUE
,
2348 tune_params::PREF_LDRD_FALSE
,
2349 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2350 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2351 tune_params::DISPARAGE_FLAGS_NEITHER
,
2352 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2353 tune_params::FUSE_NOTHING
,
2354 tune_params::SCHED_AUTOPREF_OFF
2357 /* Cortex-M7 tuning. */
2359 const struct tune_params arm_cortex_m7_tune
=
2362 &generic_addr_mode_costs
, /* Addressing mode costs. */
2363 NULL
, /* Sched adj cost. */
2364 arm_cortex_m7_branch_cost
,
2365 &arm_default_vec_cost
,
2366 0, /* Constant limit. */
2367 1, /* Max cond insns. */
2368 8, /* Memset max inline. */
2369 2, /* Issue rate. */
2370 ARM_PREFETCH_NOT_BENEFICIAL
,
2371 tune_params::PREF_CONST_POOL_TRUE
,
2372 tune_params::PREF_LDRD_FALSE
,
2373 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2374 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2375 tune_params::DISPARAGE_FLAGS_NEITHER
,
2376 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2377 tune_params::FUSE_NOTHING
,
2378 tune_params::SCHED_AUTOPREF_OFF
2381 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2382 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2384 const struct tune_params arm_v6m_tune
=
2386 &generic_extra_costs
, /* Insn extra costs. */
2387 &generic_addr_mode_costs
, /* Addressing mode costs. */
2388 NULL
, /* Sched adj cost. */
2389 arm_default_branch_cost
,
2390 &arm_default_vec_cost
, /* Vectorizer costs. */
2391 1, /* Constant limit. */
2392 5, /* Max cond insns. */
2393 8, /* Memset max inline. */
2394 1, /* Issue rate. */
2395 ARM_PREFETCH_NOT_BENEFICIAL
,
2396 tune_params::PREF_CONST_POOL_FALSE
,
2397 tune_params::PREF_LDRD_FALSE
,
2398 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2399 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2400 tune_params::DISPARAGE_FLAGS_NEITHER
,
2401 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2402 tune_params::FUSE_NOTHING
,
2403 tune_params::SCHED_AUTOPREF_OFF
2406 const struct tune_params arm_fa726te_tune
=
2408 &generic_extra_costs
, /* Insn extra costs. */
2409 &generic_addr_mode_costs
, /* Addressing mode costs. */
2410 fa726te_sched_adjust_cost
,
2411 arm_default_branch_cost
,
2412 &arm_default_vec_cost
,
2413 1, /* Constant limit. */
2414 5, /* Max cond insns. */
2415 8, /* Memset max inline. */
2416 2, /* Issue rate. */
2417 ARM_PREFETCH_NOT_BENEFICIAL
,
2418 tune_params::PREF_CONST_POOL_TRUE
,
2419 tune_params::PREF_LDRD_FALSE
,
2420 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2421 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2422 tune_params::DISPARAGE_FLAGS_NEITHER
,
2423 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2424 tune_params::FUSE_NOTHING
,
2425 tune_params::SCHED_AUTOPREF_OFF
2428 /* Auto-generated CPU, FPU and architecture tables. */
2429 #include "arm-cpu-data.h"
2431 /* The name of the preprocessor macro to define for this architecture. PROFILE
2432 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2433 is thus chosen to be big enough to hold the longest architecture name. */
2435 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2437 /* Supported TLS relocations. */
2448 TLS_DESCSEQ
/* GNU scheme */
2451 /* The maximum number of insns to be used when loading a constant. */
2453 arm_constant_limit (bool size_p
)
2455 return size_p
? 1 : current_tune
->constant_limit
;
2458 /* Emit an insn that's a simple single-set. Both the operands must be known
2460 inline static rtx_insn
*
2461 emit_set_insn (rtx x
, rtx y
)
2463 return emit_insn (gen_rtx_SET (x
, y
));
2466 /* Return the number of bits set in VALUE. */
2468 bit_count (unsigned long value
)
2470 unsigned long count
= 0;
2475 value
&= value
- 1; /* Clear the least-significant set bit. */
2481 /* Return the number of bits set in BMAP. */
2483 bitmap_popcount (const sbitmap bmap
)
2485 unsigned int count
= 0;
2487 sbitmap_iterator sbi
;
2489 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2498 } arm_fixed_mode_set
;
2500 /* A small helper for setting fixed-point library libfuncs. */
2503 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2504 const char *funcname
, const char *modename
,
2509 if (num_suffix
== 0)
2510 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2512 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2514 set_optab_libfunc (optable
, mode
, buffer
);
2518 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2519 machine_mode from
, const char *funcname
,
2520 const char *toname
, const char *fromname
)
2523 const char *maybe_suffix_2
= "";
2525 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2526 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2527 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2528 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2529 maybe_suffix_2
= "2";
2531 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2534 set_conv_libfunc (optable
, to
, from
, buffer
);
2537 static GTY(()) rtx speculation_barrier_libfunc
;
2539 /* Record that we have no arithmetic or comparison libfuncs for
2540 machine mode MODE. */
2543 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode
)
2546 set_optab_libfunc (add_optab
, mode
, NULL
);
2547 set_optab_libfunc (sdiv_optab
, mode
, NULL
);
2548 set_optab_libfunc (smul_optab
, mode
, NULL
);
2549 set_optab_libfunc (neg_optab
, mode
, NULL
);
2550 set_optab_libfunc (sub_optab
, mode
, NULL
);
2553 set_optab_libfunc (eq_optab
, mode
, NULL
);
2554 set_optab_libfunc (ne_optab
, mode
, NULL
);
2555 set_optab_libfunc (lt_optab
, mode
, NULL
);
2556 set_optab_libfunc (le_optab
, mode
, NULL
);
2557 set_optab_libfunc (ge_optab
, mode
, NULL
);
2558 set_optab_libfunc (gt_optab
, mode
, NULL
);
2559 set_optab_libfunc (unord_optab
, mode
, NULL
);
2562 /* Set up library functions unique to ARM. */
2564 arm_init_libfuncs (void)
2566 machine_mode mode_iter
;
2568 /* For Linux, we have access to kernel support for atomic operations. */
2569 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2570 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2572 /* There are no special library functions unless we are using the
2577 /* The functions below are described in Section 4 of the "Run-Time
2578 ABI for the ARM architecture", Version 1.0. */
2580 /* Double-precision floating-point arithmetic. Table 2. */
2581 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2582 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2583 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2584 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2585 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2587 /* Double-precision comparisons. Table 3. */
2588 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2589 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2590 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2591 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2592 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2593 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2594 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2596 /* Single-precision floating-point arithmetic. Table 4. */
2597 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2598 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2599 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2600 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2601 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2603 /* Single-precision comparisons. Table 5. */
2604 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2605 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2606 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2607 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2608 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2609 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2610 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2612 /* Floating-point to integer conversions. Table 6. */
2613 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2614 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2615 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2616 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2617 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2618 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2619 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2620 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2622 /* Conversions between floating types. Table 7. */
2623 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2624 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2626 /* Integer to floating-point conversions. Table 8. */
2627 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2628 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2629 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2630 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2631 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2632 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2633 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2634 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2636 /* Long long. Table 9. */
2637 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2638 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2639 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2640 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2641 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2642 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2643 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2644 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2646 /* Integer (32/32->32) division. \S 4.3.1. */
2647 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2648 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2650 /* The divmod functions are designed so that they can be used for
2651 plain division, even though they return both the quotient and the
2652 remainder. The quotient is returned in the usual location (i.e.,
2653 r0 for SImode, {r0, r1} for DImode), just as would be expected
2654 for an ordinary division routine. Because the AAPCS calling
2655 conventions specify that all of { r0, r1, r2, r3 } are
2656 callee-saved registers, there is no need to tell the compiler
2657 explicitly that those registers are clobbered by these
2659 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2660 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2662 /* For SImode division the ABI provides div-without-mod routines,
2663 which are faster. */
2664 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2665 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2667 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2668 divmod libcalls instead. */
2669 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2670 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2671 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2672 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2674 /* Half-precision float operations. The compiler handles all operations
2675 with NULL libfuncs by converting the SFmode. */
2676 switch (arm_fp16_format
)
2678 case ARM_FP16_FORMAT_IEEE
:
2679 case ARM_FP16_FORMAT_ALTERNATIVE
:
2682 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2683 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2685 : "__gnu_f2h_alternative"));
2686 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2687 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2689 : "__gnu_h2f_alternative"));
2691 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2692 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2694 : "__gnu_d2h_alternative"));
2696 arm_block_arith_comp_libfuncs_for_mode (HFmode
);
2703 /* For all possible libcalls in BFmode, record NULL. */
2704 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_FLOAT
)
2706 set_conv_libfunc (trunc_optab
, BFmode
, mode_iter
, NULL
);
2707 set_conv_libfunc (trunc_optab
, mode_iter
, BFmode
, NULL
);
2708 set_conv_libfunc (sext_optab
, mode_iter
, BFmode
, NULL
);
2709 set_conv_libfunc (sext_optab
, BFmode
, mode_iter
, NULL
);
2711 arm_block_arith_comp_libfuncs_for_mode (BFmode
);
2713 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2715 const arm_fixed_mode_set fixed_arith_modes
[] =
2718 { E_UQQmode
, "uqq" },
2720 { E_UHQmode
, "uhq" },
2722 { E_USQmode
, "usq" },
2724 { E_UDQmode
, "udq" },
2726 { E_UTQmode
, "utq" },
2728 { E_UHAmode
, "uha" },
2730 { E_USAmode
, "usa" },
2732 { E_UDAmode
, "uda" },
2734 { E_UTAmode
, "uta" }
2736 const arm_fixed_mode_set fixed_conv_modes
[] =
2739 { E_UQQmode
, "uqq" },
2741 { E_UHQmode
, "uhq" },
2743 { E_USQmode
, "usq" },
2745 { E_UDQmode
, "udq" },
2747 { E_UTQmode
, "utq" },
2749 { E_UHAmode
, "uha" },
2751 { E_USAmode
, "usa" },
2753 { E_UDAmode
, "uda" },
2755 { E_UTAmode
, "uta" },
2766 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2768 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2769 "add", fixed_arith_modes
[i
].name
, 3);
2770 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2771 "ssadd", fixed_arith_modes
[i
].name
, 3);
2772 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2773 "usadd", fixed_arith_modes
[i
].name
, 3);
2774 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2775 "sub", fixed_arith_modes
[i
].name
, 3);
2776 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2777 "sssub", fixed_arith_modes
[i
].name
, 3);
2778 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2779 "ussub", fixed_arith_modes
[i
].name
, 3);
2780 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2781 "mul", fixed_arith_modes
[i
].name
, 3);
2782 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2783 "ssmul", fixed_arith_modes
[i
].name
, 3);
2784 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2785 "usmul", fixed_arith_modes
[i
].name
, 3);
2786 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2787 "div", fixed_arith_modes
[i
].name
, 3);
2788 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2789 "udiv", fixed_arith_modes
[i
].name
, 3);
2790 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2791 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2792 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2793 "usdiv", fixed_arith_modes
[i
].name
, 3);
2794 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2795 "neg", fixed_arith_modes
[i
].name
, 2);
2796 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2797 "ssneg", fixed_arith_modes
[i
].name
, 2);
2798 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2799 "usneg", fixed_arith_modes
[i
].name
, 2);
2800 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2801 "ashl", fixed_arith_modes
[i
].name
, 3);
2802 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2803 "ashr", fixed_arith_modes
[i
].name
, 3);
2804 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2805 "lshr", fixed_arith_modes
[i
].name
, 3);
2806 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2807 "ssashl", fixed_arith_modes
[i
].name
, 3);
2808 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2809 "usashl", fixed_arith_modes
[i
].name
, 3);
2810 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2811 "cmp", fixed_arith_modes
[i
].name
, 2);
2814 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2815 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2818 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2819 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2822 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2823 fixed_conv_modes
[j
].mode
, "fract",
2824 fixed_conv_modes
[i
].name
,
2825 fixed_conv_modes
[j
].name
);
2826 arm_set_fixed_conv_libfunc (satfract_optab
,
2827 fixed_conv_modes
[i
].mode
,
2828 fixed_conv_modes
[j
].mode
, "satfract",
2829 fixed_conv_modes
[i
].name
,
2830 fixed_conv_modes
[j
].name
);
2831 arm_set_fixed_conv_libfunc (fractuns_optab
,
2832 fixed_conv_modes
[i
].mode
,
2833 fixed_conv_modes
[j
].mode
, "fractuns",
2834 fixed_conv_modes
[i
].name
,
2835 fixed_conv_modes
[j
].name
);
2836 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2837 fixed_conv_modes
[i
].mode
,
2838 fixed_conv_modes
[j
].mode
, "satfractuns",
2839 fixed_conv_modes
[i
].name
,
2840 fixed_conv_modes
[j
].name
);
2844 if (TARGET_AAPCS_BASED
)
2845 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2847 speculation_barrier_libfunc
= init_one_libfunc ("__speculation_barrier");
2850 /* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
2852 arm_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
2854 gcall
*stmt
= as_a
<gcall
*> (gsi_stmt (*gsi
));
2855 tree fndecl
= gimple_call_fndecl (stmt
);
2856 unsigned int code
= DECL_MD_FUNCTION_CODE (fndecl
);
2857 unsigned int subcode
= code
>> ARM_BUILTIN_SHIFT
;
2858 gimple
*new_stmt
= NULL
;
2859 switch (code
& ARM_BUILTIN_CLASS
)
2861 case ARM_BUILTIN_GENERAL
:
2863 case ARM_BUILTIN_MVE
:
2864 new_stmt
= arm_mve::gimple_fold_builtin (subcode
, stmt
);
2869 gsi_replace (gsi
, new_stmt
, true);
2873 /* On AAPCS systems, this is the "struct __va_list". */
2874 static GTY(()) tree va_list_type
;
2876 /* Return the type to use as __builtin_va_list. */
2878 arm_build_builtin_va_list (void)
2883 if (!TARGET_AAPCS_BASED
)
2884 return std_build_builtin_va_list ();
2886 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2894 The C Library ABI further reinforces this definition in \S
2897 We must follow this definition exactly. The structure tag
2898 name is visible in C++ mangled names, and thus forms a part
2899 of the ABI. The field name may be used by people who
2900 #include <stdarg.h>. */
2901 /* Create the type. */
2902 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2903 /* Give it the required name. */
2904 va_list_name
= build_decl (BUILTINS_LOCATION
,
2906 get_identifier ("__va_list"),
2908 DECL_ARTIFICIAL (va_list_name
) = 1;
2909 TREE_PUBLIC (va_list_name
) = 1;
2910 TYPE_NAME (va_list_type
) = va_list_name
;
2911 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2912 /* Create the __ap field. */
2913 ap_field
= build_decl (BUILTINS_LOCATION
,
2915 get_identifier ("__ap"),
2917 DECL_ARTIFICIAL (ap_field
) = 1;
2918 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2919 TYPE_FIELDS (va_list_type
) = ap_field
;
2920 /* Compute its layout. */
2921 layout_type (va_list_type
);
2923 return va_list_type
;
2926 /* Return an expression of type "void *" pointing to the next
2927 available argument in a variable-argument list. VALIST is the
2928 user-level va_list object, of type __builtin_va_list. */
2930 arm_extract_valist_ptr (tree valist
)
2932 if (TREE_TYPE (valist
) == error_mark_node
)
2933 return error_mark_node
;
2935 /* On an AAPCS target, the pointer is stored within "struct
2937 if (TARGET_AAPCS_BASED
)
2939 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2940 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2941 valist
, ap_field
, NULL_TREE
);
2947 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2949 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2951 valist
= arm_extract_valist_ptr (valist
);
2952 std_expand_builtin_va_start (valist
, nextarg
);
2955 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2957 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2960 valist
= arm_extract_valist_ptr (valist
);
2961 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2964 /* Check any incompatible options that the user has specified. */
2966 arm_option_check_internal (struct gcc_options
*opts
)
2968 int flags
= opts
->x_target_flags
;
2970 /* iWMMXt and NEON are incompatible. */
2972 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2973 error ("iWMMXt and NEON are incompatible");
2975 /* Make sure that the processor choice does not conflict with any of the
2976 other command line choices. */
2977 if (TARGET_ARM_P (flags
)
2978 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2979 error ("target CPU does not support ARM mode");
2981 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2982 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2983 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2985 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2986 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2988 /* If this target is normally configured to use APCS frames, warn if they
2989 are turned off and debugging is turned on. */
2990 if (TARGET_ARM_P (flags
)
2991 && write_symbols
!= NO_DEBUG
2992 && !TARGET_APCS_FRAME
2993 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2994 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2997 /* iWMMXt unsupported under Thumb mode. */
2998 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2999 error ("iWMMXt unsupported under Thumb mode");
3001 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
3002 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3004 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
3006 error ("RTP PIC is incompatible with Thumb");
3010 if (target_pure_code
|| target_slow_flash_data
)
3012 const char *flag
= (target_pure_code
? "-mpure-code" :
3013 "-mslow-flash-data");
3014 bool common_unsupported_modes
= arm_arch_notm
|| flag_pic
|| TARGET_NEON
;
3016 /* We only support -mslow-flash-data on M-profile targets with
3018 if (target_slow_flash_data
&& (!TARGET_HAVE_MOVT
|| common_unsupported_modes
))
3019 error ("%qs only supports non-pic code on M-profile targets with the "
3020 "MOVT instruction", flag
);
3022 /* We only support -mpure-code on M-profile targets. */
3023 if (target_pure_code
&& common_unsupported_modes
)
3024 error ("%qs only supports non-pic code on M-profile targets", flag
);
3026 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3027 -mword-relocations forbids relocation of MOVT/MOVW. */
3028 if (target_word_relocations
)
3029 error ("%qs is incompatible with %<-mword-relocations%>", flag
);
3033 /* Recompute the global settings depending on target attribute options. */
3036 arm_option_params_internal (void)
3038 /* If we are not using the default (ARM mode) section anchor offset
3039 ranges, then set the correct ranges now. */
3042 /* Thumb-1 LDR instructions cannot have negative offsets.
3043 Permissible positive offset ranges are 5-bit (for byte loads),
3044 6-bit (for halfword loads), or 7-bit (for word loads).
3045 Empirical results suggest a 7-bit anchor range gives the best
3046 overall code size. */
3047 targetm
.min_anchor_offset
= 0;
3048 targetm
.max_anchor_offset
= 127;
3050 else if (TARGET_THUMB2
)
3052 /* The minimum is set such that the total size of the block
3053 for a particular anchor is 248 + 1 + 4095 bytes, which is
3054 divisible by eight, ensuring natural spacing of anchors. */
3055 targetm
.min_anchor_offset
= -248;
3056 targetm
.max_anchor_offset
= 4095;
3060 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
3061 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
3064 /* Increase the number of conditional instructions with -Os. */
3065 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
3067 /* For THUMB2, we limit the conditional sequence to one IT block. */
3069 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
3072 targetm
.md_asm_adjust
= thumb1_md_asm_adjust
;
3074 targetm
.md_asm_adjust
= arm_md_asm_adjust
;
3077 /* True if -mflip-thumb should next add an attribute for the default
3078 mode, false if it should next add an attribute for the opposite mode. */
3079 static GTY(()) bool thumb_flipper
;
3081 /* Options after initial target override. */
3082 static GTY(()) tree init_optimize
;
3085 arm_override_options_after_change_1 (struct gcc_options
*opts
,
3086 struct gcc_options
*opts_set
)
3088 /* -falign-functions without argument: supply one. */
3089 if (opts
->x_flag_align_functions
&& !opts_set
->x_str_align_functions
)
3090 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
3091 && opts
->x_optimize_size
? "2" : "4";
3094 /* Implement targetm.override_options_after_change. */
3097 arm_override_options_after_change (void)
3099 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
3102 /* Implement TARGET_OPTION_RESTORE. */
3104 arm_option_restore (struct gcc_options */
* opts */
,
3105 struct gcc_options */
* opts_set */
,
3106 struct cl_target_option
*ptr
)
3108 arm_configure_build_target (&arm_active_target
, ptr
, false);
3109 arm_option_reconfigure_globals ();
3112 /* Reset options between modes that the user has specified. */
3114 arm_option_override_internal (struct gcc_options
*opts
,
3115 struct gcc_options
*opts_set
)
3117 arm_override_options_after_change_1 (opts
, opts_set
);
3119 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3121 /* The default is to enable interworking, so this warning message would
3122 be confusing to users who have just compiled with
3123 eg, -march=armv4. */
3124 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3125 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3128 if (TARGET_THUMB_P (opts
->x_target_flags
)
3129 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3131 warning (0, "target CPU does not support THUMB instructions");
3132 opts
->x_target_flags
&= ~MASK_THUMB
;
3135 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3137 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3138 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3141 /* Callee super interworking implies thumb interworking. Adding
3142 this to the flags here simplifies the logic elsewhere. */
3143 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3144 opts
->x_target_flags
|= MASK_INTERWORK
;
3146 /* need to remember initial values so combinaisons of options like
3147 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3148 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3150 if (! opts_set
->x_arm_restrict_it
)
3151 opts
->x_arm_restrict_it
= arm_arch8
;
3153 /* ARM execution state and M profile don't have [restrict] IT. */
3154 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3155 opts
->x_arm_restrict_it
= 0;
3157 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3158 if (!opts_set
->x_arm_restrict_it
3159 && (opts_set
->x_arm_cpu_string
|| opts_set
->x_arm_tune_string
))
3160 opts
->x_arm_restrict_it
= 0;
3162 /* Enable -munaligned-access by default for
3163 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3164 i.e. Thumb2 and ARM state only.
3165 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3166 - ARMv8 architecture-base processors.
3168 Disable -munaligned-access by default for
3169 - all pre-ARMv6 architecture-based processors
3170 - ARMv6-M architecture-based processors
3171 - ARMv8-M Baseline processors. */
3173 if (! opts_set
->x_unaligned_access
)
3175 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3176 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3178 else if (opts
->x_unaligned_access
== 1
3179 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3181 warning (0, "target CPU does not support unaligned accesses");
3182 opts
->x_unaligned_access
= 0;
3185 /* Don't warn since it's on by default in -O2. */
3186 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3187 opts
->x_flag_schedule_insns
= 0;
3189 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3191 /* Disable shrink-wrap when optimizing function for size, since it tends to
3192 generate additional returns. */
3193 if (optimize_function_for_size_p (cfun
)
3194 && TARGET_THUMB2_P (opts
->x_target_flags
))
3195 opts
->x_flag_shrink_wrap
= false;
3197 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3199 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3200 - epilogue_insns - does not accurately model the corresponding insns
3201 emitted in the asm file. In particular, see the comment in thumb_exit
3202 'Find out how many of the (return) argument registers we can corrupt'.
3203 As a consequence, the epilogue may clobber registers without fipa-ra
3204 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3205 TODO: Accurately model clobbers for epilogue_insns and reenable
3207 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3208 opts
->x_flag_ipa_ra
= 0;
3210 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3212 /* Thumb2 inline assembly code should always use unified syntax.
3213 This will apply to ARM and Thumb1 eventually. */
3214 if (TARGET_THUMB2_P (opts
->x_target_flags
))
3215 opts
->x_inline_asm_unified
= true;
3217 if (arm_stack_protector_guard
== SSP_GLOBAL
3218 && opts
->x_arm_stack_protector_guard_offset_str
)
3220 error ("incompatible options %<-mstack-protector-guard=global%> and "
3221 "%<-mstack-protector-guard-offset=%s%>",
3222 arm_stack_protector_guard_offset_str
);
3225 if (opts
->x_arm_stack_protector_guard_offset_str
)
3228 const char *str
= arm_stack_protector_guard_offset_str
;
3230 long offs
= strtol (arm_stack_protector_guard_offset_str
, &end
, 0);
3231 if (!*str
|| *end
|| errno
)
3232 error ("%qs is not a valid offset in %qs", str
,
3233 "-mstack-protector-guard-offset=");
3234 arm_stack_protector_guard_offset
= offs
;
3237 if (arm_current_function_pac_enabled_p ())
3239 if (!arm_arch8m_main
)
3240 error ("This architecture does not support branch protection "
3242 if (TARGET_TPCS_FRAME
)
3243 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3246 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3247 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3251 static sbitmap isa_all_fpubits_internal
;
3252 static sbitmap isa_all_fpbits
;
3253 static sbitmap isa_quirkbits
;
3256 arm_handle_no_branch_protection (void)
3258 aarch_ra_sign_scope
= AARCH_FUNCTION_NONE
;
3259 aarch_enable_bti
= 0;
3263 arm_handle_standard_branch_protection (void)
3265 aarch_ra_sign_scope
= AARCH_FUNCTION_NON_LEAF
;
3266 aarch_enable_bti
= 1;
3270 arm_handle_pac_ret_protection (void)
3272 aarch_ra_sign_scope
= AARCH_FUNCTION_NON_LEAF
;
3276 arm_handle_pac_ret_leaf (void)
3278 aarch_ra_sign_scope
= AARCH_FUNCTION_ALL
;
3282 arm_handle_bti_protection (void)
3284 aarch_enable_bti
= 1;
3287 static const struct aarch_branch_protect_type arm_pac_ret_subtypes
[] = {
3288 { "leaf", false, arm_handle_pac_ret_leaf
, NULL
, 0 },
3289 { NULL
, false, NULL
, NULL
, 0 }
3292 static const struct aarch_branch_protect_type arm_branch_protect_types
[] = {
3293 { "none", true, arm_handle_no_branch_protection
, NULL
, 0 },
3294 { "standard", true, arm_handle_standard_branch_protection
, NULL
, 0 },
3295 { "pac-ret", false, arm_handle_pac_ret_protection
, arm_pac_ret_subtypes
,
3296 ARRAY_SIZE (arm_pac_ret_subtypes
) },
3297 { "bti", false, arm_handle_bti_protection
, NULL
, 0 },
3298 { NULL
, false, NULL
, NULL
, 0 }
3301 /* Configure a build target TARGET from the user-specified options OPTS and
3302 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3303 architecture have been specified, but the two are not identical. */
3305 arm_configure_build_target (struct arm_build_target
*target
,
3306 struct cl_target_option
*opts
,
3307 bool warn_compatible
)
3309 const cpu_option
*arm_selected_tune
= NULL
;
3310 const arch_option
*arm_selected_arch
= NULL
;
3311 const cpu_option
*arm_selected_cpu
= NULL
;
3312 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3313 const char *tune_opts
= NULL
;
3314 const char *arch_opts
= NULL
;
3315 const char *cpu_opts
= NULL
;
3317 bitmap_clear (target
->isa
);
3318 target
->core_name
= NULL
;
3319 target
->arch_name
= NULL
;
3321 if (opts
->x_arm_arch_string
)
3323 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3325 opts
->x_arm_arch_string
);
3326 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3329 if (opts
->x_arm_cpu_string
)
3331 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3332 opts
->x_arm_cpu_string
);
3333 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3334 arm_selected_tune
= arm_selected_cpu
;
3335 /* If taking the tuning from -mcpu, we don't need to rescan the
3336 options for tuning. */
3339 if (opts
->x_arm_tune_string
)
3341 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3342 opts
->x_arm_tune_string
);
3343 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3346 if (opts
->x_arm_branch_protection_string
)
3348 aarch_validate_mbranch_protection (arm_branch_protect_types
,
3349 opts
->x_arm_branch_protection_string
,
3350 "-mbranch-protection=");
3353 if (arm_selected_arch
)
3355 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3356 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3359 if (arm_selected_cpu
)
3361 auto_sbitmap
cpu_isa (isa_num_bits
);
3362 auto_sbitmap
isa_delta (isa_num_bits
);
3364 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3365 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3367 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3368 /* Ignore any bits that are quirk bits. */
3369 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3370 /* If the user (or the default configuration) has specified a
3371 specific FPU, then ignore any bits that depend on the FPU
3372 configuration. Do similarly if using the soft-float
3374 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
3375 || arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3376 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpbits
);
3378 if (!bitmap_empty_p (isa_delta
))
3380 if (warn_compatible
)
3381 warning (0, "switch %<-mcpu=%s%> conflicts "
3382 "with switch %<-march=%s%>",
3383 opts
->x_arm_cpu_string
,
3384 opts
->x_arm_arch_string
);
3386 /* -march wins for code generation.
3387 -mcpu wins for default tuning. */
3388 if (!arm_selected_tune
)
3389 arm_selected_tune
= arm_selected_cpu
;
3391 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3392 target
->arch_name
= arm_selected_arch
->common
.name
;
3396 /* Architecture and CPU are essentially the same.
3397 Prefer the CPU setting. */
3398 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3399 target
->core_name
= arm_selected_cpu
->common
.name
;
3400 /* Copy the CPU's capabilities, so that we inherit the
3401 appropriate extensions and quirks. */
3402 bitmap_copy (target
->isa
, cpu_isa
);
3407 /* Pick a CPU based on the architecture. */
3408 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3409 target
->arch_name
= arm_selected_arch
->common
.name
;
3410 /* Note: target->core_name is left unset in this path. */
3413 else if (arm_selected_cpu
)
3415 target
->core_name
= arm_selected_cpu
->common
.name
;
3416 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3417 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3419 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3421 /* If the user did not specify a processor or architecture, choose
3425 const cpu_option
*sel
;
3426 auto_sbitmap
sought_isa (isa_num_bits
);
3427 bitmap_clear (sought_isa
);
3428 auto_sbitmap
default_isa (isa_num_bits
);
3430 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3431 TARGET_CPU_DEFAULT
);
3432 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3433 gcc_assert (arm_selected_cpu
->common
.name
);
3435 /* RWE: All of the selection logic below (to the end of this
3436 'if' clause) looks somewhat suspect. It appears to be mostly
3437 there to support forcing thumb support when the default CPU
3438 does not have thumb (somewhat dubious in terms of what the
3439 user might be expecting). I think it should be removed once
3440 support for the pre-thumb era cores is removed. */
3441 sel
= arm_selected_cpu
;
3442 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3443 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3446 /* Now check to see if the user has specified any command line
3447 switches that require certain abilities from the cpu. */
3449 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3450 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3452 /* If there are such requirements and the default CPU does not
3453 satisfy them, we need to run over the complete list of
3454 cores looking for one that is satisfactory. */
3455 if (!bitmap_empty_p (sought_isa
)
3456 && !bitmap_subset_p (sought_isa
, default_isa
))
3458 auto_sbitmap
candidate_isa (isa_num_bits
);
3459 /* We're only interested in a CPU with at least the
3460 capabilities of the default CPU and the required
3461 additional features. */
3462 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3464 /* Try to locate a CPU type that supports all of the abilities
3465 of the default CPU, plus the extra abilities requested by
3467 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3469 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3470 /* An exact match? */
3471 if (bitmap_equal_p (default_isa
, candidate_isa
))
3475 if (sel
->common
.name
== NULL
)
3477 unsigned current_bit_count
= isa_num_bits
;
3478 const cpu_option
*best_fit
= NULL
;
3480 /* Ideally we would like to issue an error message here
3481 saying that it was not possible to find a CPU compatible
3482 with the default CPU, but which also supports the command
3483 line options specified by the programmer, and so they
3484 ought to use the -mcpu=<name> command line option to
3485 override the default CPU type.
3487 If we cannot find a CPU that has exactly the
3488 characteristics of the default CPU and the given
3489 command line options we scan the array again looking
3490 for a best match. The best match must have at least
3491 the capabilities of the perfect match. */
3492 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3494 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3496 if (bitmap_subset_p (default_isa
, candidate_isa
))
3500 bitmap_and_compl (candidate_isa
, candidate_isa
,
3502 count
= bitmap_popcount (candidate_isa
);
3504 if (count
< current_bit_count
)
3507 current_bit_count
= count
;
3511 gcc_assert (best_fit
);
3515 arm_selected_cpu
= sel
;
3518 /* Now we know the CPU, we can finally initialize the target
3520 target
->core_name
= arm_selected_cpu
->common
.name
;
3521 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3522 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3524 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3527 gcc_assert (arm_selected_cpu
);
3528 gcc_assert (arm_selected_arch
);
3530 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3532 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3533 auto_sbitmap
fpu_bits (isa_num_bits
);
3535 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3536 /* This should clear out ALL bits relating to the FPU/simd
3537 extensions, to avoid potentially invalid combinations later on
3538 that we can't match. At present we only clear out those bits
3539 that can be set by -mfpu. This should be fixed in GCC-12. */
3540 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits_internal
);
3541 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3544 /* If we have the soft-float ABI, clear any feature bits relating to use of
3545 floating-point operations. They'll just confuse things later on. */
3546 if (arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3547 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpbits
);
3549 /* There may be implied bits which we still need to enable. These are
3550 non-named features which are needed to complete other sets of features,
3551 but cannot be enabled from arm-cpus.in due to being shared between
3552 multiple fgroups. Each entry in all_implied_fbits is of the form
3553 ante -> cons, meaning that if the feature "ante" is enabled, we should
3554 implicitly enable "cons". */
3555 const struct fbit_implication
*impl
= all_implied_fbits
;
3558 if (bitmap_bit_p (target
->isa
, impl
->ante
))
3559 bitmap_set_bit (target
->isa
, impl
->cons
);
3563 if (!arm_selected_tune
)
3564 arm_selected_tune
= arm_selected_cpu
;
3565 else /* Validate the features passed to -mtune. */
3566 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3568 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3570 /* Finish initializing the target structure. */
3571 if (!target
->arch_name
)
3572 target
->arch_name
= arm_selected_arch
->common
.name
;
3573 target
->arch_pp_name
= arm_selected_arch
->arch
;
3574 target
->base_arch
= arm_selected_arch
->base_arch
;
3575 target
->profile
= arm_selected_arch
->profile
;
3577 target
->tune_flags
= tune_data
->tune_flags
;
3578 target
->tune
= tune_data
->tune
;
3579 target
->tune_core
= tune_data
->scheduler
;
3582 /* Fix up any incompatible options that the user has specified. */
3584 arm_option_override (void)
3586 static const enum isa_feature fpu_bitlist_internal
[]
3587 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3588 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3589 static const enum isa_feature fp_bitlist
[]
3590 = { ISA_ALL_FP
, isa_bit_mve_float
, isa_nobit
};
3591 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3592 cl_target_option opts
;
3594 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3595 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3597 isa_all_fpubits_internal
= sbitmap_alloc (isa_num_bits
);
3598 isa_all_fpbits
= sbitmap_alloc (isa_num_bits
);
3599 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
3600 arm_initialize_isa (isa_all_fpbits
, fp_bitlist
);
3602 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3604 if (!OPTION_SET_P (arm_fpu_index
))
3609 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3612 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3615 cl_target_option_save (&opts
, &global_options
, &global_options_set
);
3616 arm_configure_build_target (&arm_active_target
, &opts
, true);
3618 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3619 SUBTARGET_OVERRIDE_OPTIONS
;
3622 /* Initialize boolean versions of the architectural flags, for use
3623 in the arm.md file and for enabling feature flags. */
3624 arm_option_reconfigure_globals ();
3626 arm_tune
= arm_active_target
.tune_core
;
3627 tune_flags
= arm_active_target
.tune_flags
;
3628 current_tune
= arm_active_target
.tune
;
3630 /* TBD: Dwarf info for apcs frame is not handled yet. */
3631 if (TARGET_APCS_FRAME
)
3632 flag_shrink_wrap
= false;
3634 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3636 warning (0, "%<-mapcs-stack-check%> incompatible with "
3637 "%<-mno-apcs-frame%>");
3638 target_flags
|= MASK_APCS_FRAME
;
3641 if (TARGET_POKE_FUNCTION_NAME
)
3642 target_flags
|= MASK_APCS_FRAME
;
3644 if (TARGET_APCS_REENT
&& flag_pic
)
3645 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3647 if (TARGET_APCS_REENT
)
3648 warning (0, "APCS reentrant code not supported. Ignored");
3650 /* Set up some tuning parameters. */
3651 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3652 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3653 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3654 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3655 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3656 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3658 /* For arm2/3 there is no need to do any scheduling if we are doing
3659 software floating-point. */
3660 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3661 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3663 /* Override the default structure alignment for AAPCS ABI. */
3664 if (!OPTION_SET_P (arm_structure_size_boundary
))
3666 if (TARGET_AAPCS_BASED
)
3667 arm_structure_size_boundary
= 8;
3671 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3673 if (arm_structure_size_boundary
!= 8
3674 && arm_structure_size_boundary
!= 32
3675 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3677 if (ARM_DOUBLEWORD_ALIGN
)
3679 "structure size boundary can only be set to 8, 32 or 64");
3681 warning (0, "structure size boundary can only be set to 8 or 32");
3682 arm_structure_size_boundary
3683 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3687 if (TARGET_VXWORKS_RTP
)
3689 if (!OPTION_SET_P (arm_pic_data_is_text_relative
))
3690 arm_pic_data_is_text_relative
= 0;
3693 && !arm_pic_data_is_text_relative
3694 && !(OPTION_SET_P (target_flags
) & MASK_SINGLE_PIC_BASE
))
3695 /* When text & data segments don't have a fixed displacement, the
3696 intended use is with a single, read only, pic base register.
3697 Unless the user explicitly requested not to do that, set
3699 target_flags
|= MASK_SINGLE_PIC_BASE
;
3701 /* If stack checking is disabled, we can use r10 as the PIC register,
3702 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3703 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3705 if (TARGET_VXWORKS_RTP
)
3706 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3707 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3710 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3711 arm_pic_register
= 9;
3713 /* If in FDPIC mode then force arm_pic_register to be r9. */
3716 arm_pic_register
= FDPIC_REGNUM
;
3718 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3721 if (arm_pic_register_string
!= NULL
)
3723 int pic_register
= decode_reg_name (arm_pic_register_string
);
3726 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3728 /* Prevent the user from choosing an obviously stupid PIC register. */
3729 else if (pic_register
< 0 || call_used_or_fixed_reg_p (pic_register
)
3730 || pic_register
== HARD_FRAME_POINTER_REGNUM
3731 || pic_register
== STACK_POINTER_REGNUM
3732 || pic_register
>= PC_REGNUM
3733 || (TARGET_VXWORKS_RTP
3734 && (unsigned int) pic_register
!= arm_pic_register
))
3735 error ("unable to use %qs for PIC register", arm_pic_register_string
);
3737 arm_pic_register
= pic_register
;
3741 target_word_relocations
= 1;
3743 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3744 if (fix_cm3_ldrd
== 2)
3746 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3752 /* Enable fix_vlldm by default if required. */
3755 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_vlldm
))
3761 /* Enable fix_aes by default if required. */
3762 if (fix_aes_erratum_1742098
== 2)
3764 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_aes_1742098
))
3765 fix_aes_erratum_1742098
= 1;
3767 fix_aes_erratum_1742098
= 0;
3770 /* Hot/Cold partitioning is not currently supported, since we can't
3771 handle literal pool placement in that case. */
3772 if (flag_reorder_blocks_and_partition
)
3774 inform (input_location
,
3775 "%<-freorder-blocks-and-partition%> not supported "
3776 "on this architecture");
3777 flag_reorder_blocks_and_partition
= 0;
3778 flag_reorder_blocks
= 1;
3782 /* Hoisting PIC address calculations more aggressively provides a small,
3783 but measurable, size reduction for PIC code. Therefore, we decrease
3784 the bar for unrestricted expression hoisting to the cost of PIC address
3785 calculation, which is 2 instructions. */
3786 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3787 param_gcse_unrestricted_cost
, 2);
3789 /* ARM EABI defaults to strict volatile bitfields. */
3790 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3791 && abi_version_at_least(2))
3792 flag_strict_volatile_bitfields
= 1;
3794 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3795 have deemed it beneficial (signified by setting
3796 prefetch.num_slots to 1 or more). */
3797 if (flag_prefetch_loop_arrays
< 0
3800 && current_tune
->prefetch
.num_slots
> 0)
3801 flag_prefetch_loop_arrays
= 1;
3803 /* Set up parameters to be used in prefetching algorithm. Do not
3804 override the defaults unless we are tuning for a core we have
3805 researched values for. */
3806 if (current_tune
->prefetch
.num_slots
> 0)
3807 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3808 param_simultaneous_prefetches
,
3809 current_tune
->prefetch
.num_slots
);
3810 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3811 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3812 param_l1_cache_line_size
,
3813 current_tune
->prefetch
.l1_cache_line_size
);
3814 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3816 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3817 param_destruct_interfere_size
,
3818 current_tune
->prefetch
.l1_cache_line_size
);
3819 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3820 param_construct_interfere_size
,
3821 current_tune
->prefetch
.l1_cache_line_size
);
3825 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3826 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3828 /* More recent Cortex chips have a 64-byte cache line, but are marked
3829 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3830 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3831 param_destruct_interfere_size
, 64);
3832 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3833 param_construct_interfere_size
, 64);
3836 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3837 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3838 param_l1_cache_size
,
3839 current_tune
->prefetch
.l1_cache_size
);
3841 /* Look through ready list and all of queue for instructions
3842 relevant for L2 auto-prefetcher. */
3843 int sched_autopref_queue_depth
;
3845 switch (current_tune
->sched_autopref
)
3847 case tune_params::SCHED_AUTOPREF_OFF
:
3848 sched_autopref_queue_depth
= -1;
3851 case tune_params::SCHED_AUTOPREF_RANK
:
3852 sched_autopref_queue_depth
= 0;
3855 case tune_params::SCHED_AUTOPREF_FULL
:
3856 sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3863 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3864 param_sched_autopref_queue_depth
,
3865 sched_autopref_queue_depth
);
3867 /* Currently, for slow flash data, we just disable literal pools. We also
3868 disable it for pure-code. */
3869 if (target_slow_flash_data
|| target_pure_code
)
3870 arm_disable_literal_pool
= true;
3872 /* Disable scheduling fusion by default if it's not armv7 processor
3873 or doesn't prefer ldrd/strd. */
3874 if (flag_schedule_fusion
== 2
3875 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3876 flag_schedule_fusion
= 0;
3878 /* Need to remember initial options before they are overriden. */
3879 init_optimize
= build_optimization_node (&global_options
,
3880 &global_options_set
);
3882 arm_options_perform_arch_sanity_checks ();
3883 arm_option_override_internal (&global_options
, &global_options_set
);
3884 arm_option_check_internal (&global_options
);
3885 arm_option_params_internal ();
3887 /* Create the default target_options structure. */
3888 target_option_default_node
= target_option_current_node
3889 = build_target_option_node (&global_options
, &global_options_set
);
3891 /* Register global variables with the garbage collector. */
3892 arm_add_gc_roots ();
3894 /* Init initial mode for testing. */
3895 thumb_flipper
= TARGET_THUMB
;
3899 /* Reconfigure global status flags from the active_target.isa. */
3901 arm_option_reconfigure_globals (void)
3903 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3904 arm_base_arch
= arm_active_target
.base_arch
;
3906 /* Initialize boolean versions of the architectural flags, for use
3907 in the arm.md file. */
3908 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3909 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3910 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3911 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3912 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3913 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3914 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3915 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3916 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3917 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3918 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3919 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3920 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3921 arm_arch8_3
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_3
);
3922 arm_arch8_4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_4
);
3923 arm_arch8_1m_main
= bitmap_bit_p (arm_active_target
.isa
,
3924 isa_bit_armv8_1m_main
);
3925 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3926 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3927 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3928 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3929 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3930 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3931 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3932 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3933 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3934 arm_arch8m_main
= arm_arch7
&& arm_arch_cmse
;
3935 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3936 arm_arch_i8mm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_i8mm
);
3937 arm_arch_bf16
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_bf16
);
3939 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3942 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3943 error ("selected fp16 options are incompatible");
3944 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3948 arm_arch_cde_coproc
= 0;
3949 int cde_bits
[] = {isa_bit_cdecp0
, isa_bit_cdecp1
, isa_bit_cdecp2
,
3950 isa_bit_cdecp3
, isa_bit_cdecp4
, isa_bit_cdecp5
,
3951 isa_bit_cdecp6
, isa_bit_cdecp7
};
3952 for (int i
= 0, e
= ARRAY_SIZE (cde_bits
); i
< e
; i
++)
3954 int cde_bit
= bitmap_bit_p (arm_active_target
.isa
, cde_bits
[i
]);
3957 arm_arch_cde
|= cde_bit
;
3958 arm_arch_cde_coproc
|= arm_arch_cde_coproc_bits
[i
];
3962 /* And finally, set up some quirks. */
3963 arm_arch_no_volatile_ce
3964 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3965 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3966 isa_bit_quirk_armv6kz
);
3968 /* Use the cp15 method if it is available. */
3969 if (target_thread_pointer
== TP_AUTO
)
3971 if (arm_arch6k
&& !TARGET_THUMB1
)
3972 target_thread_pointer
= TP_TPIDRURO
;
3974 target_thread_pointer
= TP_SOFT
;
3977 if (!TARGET_HARD_TP
&& arm_stack_protector_guard
== SSP_TLSREG
)
3978 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3981 /* Perform some validation between the desired architecture and the rest of the
3984 arm_options_perform_arch_sanity_checks (void)
3986 /* V5T code we generate is completely interworking capable, so we turn off
3987 TARGET_INTERWORK here to avoid many tests later on. */
3989 /* XXX However, we must pass the right pre-processor defines to CPP
3990 or GLD can get confused. This is a hack. */
3991 if (TARGET_INTERWORK
)
3992 arm_cpp_interwork
= 1;
3995 target_flags
&= ~MASK_INTERWORK
;
3997 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3998 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
4000 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
4001 error ("iwmmxt abi requires an iwmmxt capable cpu");
4003 /* BPABI targets use linker tricks to allow interworking on cores
4004 without thumb support. */
4005 if (TARGET_INTERWORK
4007 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
4009 warning (0, "target CPU does not support interworking" );
4010 target_flags
&= ~MASK_INTERWORK
;
4013 /* If soft-float is specified then don't use FPU. */
4014 if (TARGET_SOFT_FLOAT
)
4015 arm_fpu_attr
= FPU_NONE
;
4017 arm_fpu_attr
= FPU_VFP
;
4019 if (TARGET_AAPCS_BASED
)
4021 if (TARGET_CALLER_INTERWORKING
)
4022 error ("AAPCS does not support %<-mcaller-super-interworking%>");
4024 if (TARGET_CALLEE_INTERWORKING
)
4025 error ("AAPCS does not support %<-mcallee-super-interworking%>");
4028 /* __fp16 support currently assumes the core has ldrh. */
4029 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
4030 sorry ("%<__fp16%> and no ldrh");
4032 if (use_cmse
&& !arm_arch_cmse
)
4033 error ("target CPU does not support ARMv8-M Security Extensions");
4035 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4036 and ARMv8-M Baseline and Mainline do not allow such configuration. */
4037 if (use_cmse
&& TARGET_HARD_FLOAT
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
4038 error ("ARMv8-M Security Extensions incompatible with selected FPU");
4041 if (TARGET_AAPCS_BASED
)
4043 if (arm_abi
== ARM_ABI_IWMMXT
)
4044 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
4045 else if (TARGET_HARD_FLOAT_ABI
)
4047 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
4048 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
)
4049 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_mve
))
4050 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4053 arm_pcs_default
= ARM_PCS_AAPCS
;
4057 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
4058 sorry ("%<-mfloat-abi=hard%> and VFP");
4060 if (arm_abi
== ARM_ABI_APCS
)
4061 arm_pcs_default
= ARM_PCS_APCS
;
4063 arm_pcs_default
= ARM_PCS_ATPCS
;
4067 /* Test whether a local function descriptor is canonical, i.e.,
4068 whether we can use GOTOFFFUNCDESC to compute the address of the
4071 arm_fdpic_local_funcdesc_p (rtx fnx
)
4074 enum symbol_visibility vis
;
4080 if (! SYMBOL_REF_LOCAL_P (fnx
))
4083 fn
= SYMBOL_REF_DECL (fnx
);
4088 vis
= DECL_VISIBILITY (fn
);
4090 if (vis
== VISIBILITY_PROTECTED
)
4091 /* Private function descriptors for protected functions are not
4092 canonical. Temporarily change the visibility to global so that
4093 we can ensure uniqueness of funcdesc pointers. */
4094 DECL_VISIBILITY (fn
) = VISIBILITY_DEFAULT
;
4096 ret
= default_binds_local_p_1 (fn
, flag_pic
);
4098 DECL_VISIBILITY (fn
) = vis
;
4104 arm_add_gc_roots (void)
4106 gcc_obstack_init(&minipool_obstack
);
4107 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
4110 /* A table of known ARM exception types.
4111 For use with the interrupt function attribute. */
4115 const char *const arg
;
4116 const unsigned long return_value
;
4120 static const isr_attribute_arg isr_attribute_args
[] =
4122 { "IRQ", ARM_FT_ISR
},
4123 { "irq", ARM_FT_ISR
},
4124 { "FIQ", ARM_FT_FIQ
},
4125 { "fiq", ARM_FT_FIQ
},
4126 { "ABORT", ARM_FT_ISR
},
4127 { "abort", ARM_FT_ISR
},
4128 { "UNDEF", ARM_FT_EXCEPTION
},
4129 { "undef", ARM_FT_EXCEPTION
},
4130 { "SWI", ARM_FT_EXCEPTION
},
4131 { "swi", ARM_FT_EXCEPTION
},
4132 { NULL
, ARM_FT_NORMAL
}
4135 /* Returns the (interrupt) function type of the current
4136 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4138 static unsigned long
4139 arm_isr_value (tree argument
)
4141 const isr_attribute_arg
* ptr
;
4145 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
4147 /* No argument - default to IRQ. */
4148 if (argument
== NULL_TREE
)
4151 /* Get the value of the argument. */
4152 if (TREE_VALUE (argument
) == NULL_TREE
4153 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
4154 return ARM_FT_UNKNOWN
;
4156 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
4158 /* Check it against the list of known arguments. */
4159 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4160 if (streq (arg
, ptr
->arg
))
4161 return ptr
->return_value
;
4163 /* An unrecognized interrupt type. */
4164 return ARM_FT_UNKNOWN
;
4167 /* Computes the type of the current function. */
4169 static unsigned long
4170 arm_compute_func_type (void)
4172 unsigned long type
= ARM_FT_UNKNOWN
;
4176 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
4178 /* Decide if the current function is volatile. Such functions
4179 never return, and many memory cycles can be saved by not storing
4180 register values that will never be needed again. This optimization
4181 was added to speed up context switching in a kernel application. */
4183 && (TREE_NOTHROW (current_function_decl
)
4184 || !(flag_unwind_tables
4186 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
4187 && TREE_THIS_VOLATILE (current_function_decl
))
4188 type
|= ARM_FT_VOLATILE
;
4190 if (cfun
->static_chain_decl
!= NULL
)
4191 type
|= ARM_FT_NESTED
;
4193 attr
= DECL_ATTRIBUTES (current_function_decl
);
4195 a
= lookup_attribute ("naked", attr
);
4197 type
|= ARM_FT_NAKED
;
4199 a
= lookup_attribute ("isr", attr
);
4201 a
= lookup_attribute ("interrupt", attr
);
4204 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
4206 type
|= arm_isr_value (TREE_VALUE (a
));
4208 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
4209 type
|= ARM_FT_CMSE_ENTRY
;
4214 /* Returns the type of the current function. */
4217 arm_current_func_type (void)
4219 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
4220 cfun
->machine
->func_type
= arm_compute_func_type ();
4222 return cfun
->machine
->func_type
;
4226 arm_allocate_stack_slots_for_args (void)
4228 /* Naked functions should not allocate stack slots for arguments. */
4229 return !IS_NAKED (arm_current_func_type ());
4233 arm_warn_func_return (tree decl
)
4235 /* Naked functions are implemented entirely in assembly, including the
4236 return sequence, so suppress warnings about this. */
4237 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
4241 /* Output assembler code for a block containing the constant parts
4242 of a trampoline, leaving space for the variable parts.
4244 On the ARM, (if r8 is the static chain regnum, and remembering that
4245 referencing pc adds an offset of 8) the trampoline looks like:
4248 .word static chain value
4249 .word function's address
4250 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4252 In FDPIC mode, the trampoline looks like:
4253 .word trampoline address
4254 .word trampoline GOT address
4255 ldr r12, [pc, #8] ; #4 for Arm mode
4256 ldr r9, [pc, #8] ; #4 for Arm mode
4257 ldr pc, [pc, #8] ; #4 for Arm mode
4258 .word static chain value
4260 .word function's address
4264 arm_asm_trampoline_template (FILE *f
)
4266 fprintf (f
, "\t.syntax unified\n");
4270 /* The first two words are a function descriptor pointing to the
4271 trampoline code just below. */
4273 fprintf (f
, "\t.arm\n");
4274 else if (TARGET_THUMB2
)
4275 fprintf (f
, "\t.thumb\n");
4277 /* Only ARM and Thumb-2 are supported. */
4280 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4281 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4282 /* Trampoline code which sets the static chain register but also
4283 PIC register before jumping into real code. */
4284 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4285 STATIC_CHAIN_REGNUM
, PC_REGNUM
,
4286 TARGET_THUMB2
? 8 : 4);
4287 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4288 PIC_OFFSET_TABLE_REGNUM
, PC_REGNUM
,
4289 TARGET_THUMB2
? 8 : 4);
4290 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4291 PC_REGNUM
, PC_REGNUM
,
4292 TARGET_THUMB2
? 8 : 4);
4293 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4295 else if (TARGET_ARM
)
4297 fprintf (f
, "\t.arm\n");
4298 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4299 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
4301 else if (TARGET_THUMB2
)
4303 fprintf (f
, "\t.thumb\n");
4304 /* The Thumb-2 trampoline is similar to the arm implementation.
4305 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4306 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
4307 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4308 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
4312 ASM_OUTPUT_ALIGN (f
, 2);
4313 fprintf (f
, "\t.code\t16\n");
4314 fprintf (f
, ".Ltrampoline_start:\n");
4315 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
4316 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4317 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
4318 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4319 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
4320 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
4322 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4323 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4326 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4329 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4331 rtx fnaddr
, mem
, a_tramp
;
4333 emit_block_move (m_tramp
, assemble_trampoline_template (),
4334 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
4338 rtx funcdesc
= XEXP (DECL_RTL (fndecl
), 0);
4339 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
4340 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
4341 /* The function start address is at offset 8, but in Thumb mode
4342 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4344 rtx trampoline_code_start
4345 = plus_constant (Pmode
, XEXP (m_tramp
, 0), TARGET_THUMB2
? 9 : 8);
4347 /* Write initial funcdesc which points to the trampoline. */
4348 mem
= adjust_address (m_tramp
, SImode
, 0);
4349 emit_move_insn (mem
, trampoline_code_start
);
4350 mem
= adjust_address (m_tramp
, SImode
, 4);
4351 emit_move_insn (mem
, gen_rtx_REG (Pmode
, PIC_OFFSET_TABLE_REGNUM
));
4352 /* Setup static chain. */
4353 mem
= adjust_address (m_tramp
, SImode
, 20);
4354 emit_move_insn (mem
, chain_value
);
4355 /* GOT + real function entry point. */
4356 mem
= adjust_address (m_tramp
, SImode
, 24);
4357 emit_move_insn (mem
, gotaddr
);
4358 mem
= adjust_address (m_tramp
, SImode
, 28);
4359 emit_move_insn (mem
, fnaddr
);
4363 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
4364 emit_move_insn (mem
, chain_value
);
4366 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
4367 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4368 emit_move_insn (mem
, fnaddr
);
4371 a_tramp
= XEXP (m_tramp
, 0);
4372 maybe_emit_call_builtin___clear_cache (a_tramp
,
4373 plus_constant (ptr_mode
,
4378 /* Thumb trampolines should be entered in thumb mode, so set
4379 the bottom bit of the address. */
4382 arm_trampoline_adjust_address (rtx addr
)
4384 /* For FDPIC don't fix trampoline address since it's a function
4385 descriptor and not a function address. */
4386 if (TARGET_THUMB
&& !TARGET_FDPIC
)
4387 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
4388 NULL
, 0, OPTAB_LIB_WIDEN
);
4392 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4393 includes call-clobbered registers too. If this is a leaf function
4394 we can just examine the registers used by the RTL, but otherwise we
4395 have to assume that whatever function is called might clobber
4396 anything, and so we have to save all the call-clobbered registers
4398 static inline bool reg_needs_saving_p (unsigned reg
)
4400 unsigned long func_type
= arm_current_func_type ();
4402 if (IS_INTERRUPT (func_type
))
4403 if (df_regs_ever_live_p (reg
)
4404 /* Save call-clobbered core registers. */
4405 || (! crtl
->is_leaf
&& call_used_or_fixed_reg_p (reg
) && reg
< FIRST_VFP_REGNUM
))
4410 if (!df_regs_ever_live_p (reg
)
4411 || call_used_or_fixed_reg_p (reg
))
4417 /* Return 1 if it is possible to return using a single instruction.
4418 If SIBLING is non-null, this is a test for a return before a sibling
4419 call. SIBLING is the call insn, so we can examine its register usage. */
4422 use_return_insn (int iscond
, rtx sibling
)
4425 unsigned int func_type
;
4426 unsigned long saved_int_regs
;
4427 unsigned HOST_WIDE_INT stack_adjust
;
4428 arm_stack_offsets
*offsets
;
4430 /* Never use a return instruction before reload has run. */
4431 if (!reload_completed
)
4434 /* Never use a return instruction when return address signing
4435 mechanism is enabled as it requires more than one
4437 if (arm_current_function_pac_enabled_p ())
4440 func_type
= arm_current_func_type ();
4442 /* Naked, volatile and stack alignment functions need special
4444 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4447 /* So do interrupt functions that use the frame pointer and Thumb
4448 interrupt functions. */
4449 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4452 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4453 && !optimize_function_for_size_p (cfun
))
4456 offsets
= arm_get_frame_offsets ();
4457 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4459 /* As do variadic functions. */
4460 if (crtl
->args
.pretend_args_size
4461 || cfun
->machine
->uses_anonymous_args
4462 /* Or if the function calls __builtin_eh_return () */
4463 || crtl
->calls_eh_return
4464 /* Or if the function calls alloca */
4465 || cfun
->calls_alloca
4466 /* Or if there is a stack adjustment. However, if the stack pointer
4467 is saved on the stack, we can use a pre-incrementing stack load. */
4468 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4469 && stack_adjust
== 4))
4470 /* Or if the static chain register was saved above the frame, under the
4471 assumption that the stack pointer isn't saved on the stack. */
4472 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4473 && arm_compute_static_chain_stack_bytes() != 0))
4476 saved_int_regs
= offsets
->saved_regs_mask
;
4478 /* Unfortunately, the insn
4480 ldmib sp, {..., sp, ...}
4482 triggers a bug on most SA-110 based devices, such that the stack
4483 pointer won't be correctly restored if the instruction takes a
4484 page fault. We work around this problem by popping r3 along with
4485 the other registers, since that is never slower than executing
4486 another instruction.
4488 We test for !arm_arch5t here, because code for any architecture
4489 less than this could potentially be run on one of the buggy
4491 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4493 /* Validate that r3 is a call-clobbered register (always true in
4494 the default abi) ... */
4495 if (!call_used_or_fixed_reg_p (3))
4498 /* ... that it isn't being used for a return value ... */
4499 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4502 /* ... or for a tail-call argument ... */
4505 gcc_assert (CALL_P (sibling
));
4507 if (find_regno_fusage (sibling
, USE
, 3))
4511 /* ... and that there are no call-saved registers in r0-r2
4512 (always true in the default ABI). */
4513 if (saved_int_regs
& 0x7)
4517 /* Can't be done if interworking with Thumb, and any registers have been
4519 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4522 /* On StrongARM, conditional returns are expensive if they aren't
4523 taken and multiple registers have been stacked. */
4524 if (iscond
&& arm_tune_strongarm
)
4526 /* Conditional return when just the LR is stored is a simple
4527 conditional-load instruction, that's not expensive. */
4528 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4532 && arm_pic_register
!= INVALID_REGNUM
4533 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4537 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4538 several instructions if anything needs to be popped. Armv8.1-M Mainline
4539 also needs several instructions to save and restore FP context. */
4540 if (IS_CMSE_ENTRY (func_type
) && (saved_int_regs
|| TARGET_HAVE_FPCXT_CMSE
))
4543 /* If there are saved registers but the LR isn't saved, then we need
4544 two instructions for the return. */
4545 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4548 /* Can't be done if any of the VFP regs are pushed,
4549 since this also requires an insn. */
4550 if (TARGET_VFP_BASE
)
4551 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4552 if (reg_needs_saving_p (regno
))
4555 if (TARGET_REALLY_IWMMXT
)
4556 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4557 if (reg_needs_saving_p (regno
))
4563 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4564 shrink-wrapping if possible. This is the case if we need to emit a
4565 prologue, which we can test by looking at the offsets. */
4567 use_simple_return_p (void)
4569 arm_stack_offsets
*offsets
;
4571 /* Note this function can be called before or after reload. */
4572 if (!reload_completed
)
4573 arm_compute_frame_layout ();
4575 offsets
= arm_get_frame_offsets ();
4576 return offsets
->outgoing_args
!= 0;
4579 /* Return TRUE if int I is a valid immediate ARM constant. */
4582 const_ok_for_arm (HOST_WIDE_INT i
)
4586 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4587 be all zero, or all one. */
4588 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4589 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4590 != ((~(unsigned HOST_WIDE_INT
) 0)
4591 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4594 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4596 /* Fast return for 0 and small values. We must do this for zero, since
4597 the code below can't handle that one case. */
4598 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4601 /* Get the number of trailing zeros. */
4602 lowbit
= ffs((int) i
) - 1;
4604 /* Only even shifts are allowed in ARM mode so round down to the
4605 nearest even number. */
4609 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4614 /* Allow rotated constants in ARM mode. */
4616 && ((i
& ~0xc000003f) == 0
4617 || (i
& ~0xf000000f) == 0
4618 || (i
& ~0xfc000003) == 0))
4621 else if (TARGET_THUMB2
)
4625 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4628 if (i
== v
|| i
== (v
| (v
<< 8)))
4631 /* Allow repeated pattern 0xXY00XY00. */
4637 else if (TARGET_HAVE_MOVT
)
4639 /* Thumb-1 Targets with MOVT. */
4649 /* Return true if I is a valid constant for the operation CODE. */
4651 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4653 if (const_ok_for_arm (i
))
4659 /* See if we can use movw. */
4660 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4663 /* Otherwise, try mvn. */
4664 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4667 /* See if we can use addw or subw. */
4669 && ((i
& 0xfffff000) == 0
4670 || ((-i
) & 0xfffff000) == 0))
4691 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4693 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4699 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4703 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4710 /* Return true if I is a valid di mode constant for the operation CODE. */
4712 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4714 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4715 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4716 rtx hi
= GEN_INT (hi_val
);
4717 rtx lo
= GEN_INT (lo_val
);
4727 return const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF
4728 || const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF;
4730 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4737 /* Emit a sequence of insns to handle a large constant.
4738 CODE is the code of the operation required, it can be any of SET, PLUS,
4739 IOR, AND, XOR, MINUS;
4740 MODE is the mode in which the operation is being performed;
4741 VAL is the integer to operate on;
4742 SOURCE is the other operand (a register, or a null-pointer for SET);
4743 SUBTARGETS means it is safe to create scratch registers if that will
4744 either produce a simpler sequence, or we will want to cse the values.
4745 Return value is the number of insns emitted. */
4747 /* ??? Tweak this for thumb2. */
4749 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4750 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4754 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4755 cond
= COND_EXEC_TEST (PATTERN (insn
));
4759 if (subtargets
|| code
== SET
4760 || (REG_P (target
) && REG_P (source
)
4761 && REGNO (target
) != REGNO (source
)))
4763 /* After arm_reorg has been called, we can't fix up expensive
4764 constants by pushing them into memory so we must synthesize
4765 them in-line, regardless of the cost. This is only likely to
4766 be more costly on chips that have load delay slots and we are
4767 compiling without running the scheduler (so no splitting
4768 occurred before the final instruction emission).
4770 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4772 if (!cfun
->machine
->after_arm_reorg
4774 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4776 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4781 /* Currently SET is the only monadic value for CODE, all
4782 the rest are diadic. */
4783 if (TARGET_USE_MOVT
)
4784 arm_emit_movpair (target
, GEN_INT (val
));
4786 emit_set_insn (target
, GEN_INT (val
));
4792 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4794 if (TARGET_USE_MOVT
)
4795 arm_emit_movpair (temp
, GEN_INT (val
));
4797 emit_set_insn (temp
, GEN_INT (val
));
4799 /* For MINUS, the value is subtracted from, since we never
4800 have subtraction of a constant. */
4802 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4804 emit_set_insn (target
,
4805 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4811 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4815 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4816 ARM/THUMB2 immediates, and add up to VAL.
4817 Thr function return value gives the number of insns required. */
4819 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4820 struct four_ints
*return_sequence
)
4822 int best_consecutive_zeros
= 0;
4826 struct four_ints tmp_sequence
;
4828 /* If we aren't targeting ARM, the best place to start is always at
4829 the bottom, otherwise look more closely. */
4832 for (i
= 0; i
< 32; i
+= 2)
4834 int consecutive_zeros
= 0;
4836 if (!(val
& (3 << i
)))
4838 while ((i
< 32) && !(val
& (3 << i
)))
4840 consecutive_zeros
+= 2;
4843 if (consecutive_zeros
> best_consecutive_zeros
)
4845 best_consecutive_zeros
= consecutive_zeros
;
4846 best_start
= i
- consecutive_zeros
;
4853 /* So long as it won't require any more insns to do so, it's
4854 desirable to emit a small constant (in bits 0...9) in the last
4855 insn. This way there is more chance that it can be combined with
4856 a later addressing insn to form a pre-indexed load or store
4857 operation. Consider:
4859 *((volatile int *)0xe0000100) = 1;
4860 *((volatile int *)0xe0000110) = 2;
4862 We want this to wind up as:
4866 str rB, [rA, #0x100]
4868 str rB, [rA, #0x110]
4870 rather than having to synthesize both large constants from scratch.
4872 Therefore, we calculate how many insns would be required to emit
4873 the constant starting from `best_start', and also starting from
4874 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4875 yield a shorter sequence, we may as well use zero. */
4876 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4878 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4880 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4881 if (insns2
<= insns1
)
4883 *return_sequence
= tmp_sequence
;
4891 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4893 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4894 struct four_ints
*return_sequence
, int i
)
4896 int remainder
= val
& 0xffffffff;
4899 /* Try and find a way of doing the job in either two or three
4902 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4903 location. We start at position I. This may be the MSB, or
4904 optimial_immediate_sequence may have positioned it at the largest block
4905 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4906 wrapping around to the top of the word when we drop off the bottom.
4907 In the worst case this code should produce no more than four insns.
4909 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4910 constants, shifted to any arbitrary location. We should always start
4915 unsigned int b1
, b2
, b3
, b4
;
4916 unsigned HOST_WIDE_INT result
;
4919 gcc_assert (insns
< 4);
4924 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4925 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4928 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4929 /* We can use addw/subw for the last 12 bits. */
4933 /* Use an 8-bit shifted/rotated immediate. */
4937 result
= remainder
& ((0x0ff << end
)
4938 | ((i
< end
) ? (0xff >> (32 - end
))
4945 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4946 arbitrary shifts. */
4947 i
-= TARGET_ARM
? 2 : 1;
4951 /* Next, see if we can do a better job with a thumb2 replicated
4954 We do it this way around to catch the cases like 0x01F001E0 where
4955 two 8-bit immediates would work, but a replicated constant would
4958 TODO: 16-bit constants that don't clear all the bits, but still win.
4959 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4962 b1
= (remainder
& 0xff000000) >> 24;
4963 b2
= (remainder
& 0x00ff0000) >> 16;
4964 b3
= (remainder
& 0x0000ff00) >> 8;
4965 b4
= remainder
& 0xff;
4969 /* The 8-bit immediate already found clears b1 (and maybe b2),
4970 but must leave b3 and b4 alone. */
4972 /* First try to find a 32-bit replicated constant that clears
4973 almost everything. We can assume that we can't do it in one,
4974 or else we wouldn't be here. */
4975 unsigned int tmp
= b1
& b2
& b3
& b4
;
4976 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4978 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4979 + (tmp
== b3
) + (tmp
== b4
);
4981 && (matching_bytes
>= 3
4982 || (matching_bytes
== 2
4983 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4985 /* At least 3 of the bytes match, and the fourth has at
4986 least as many bits set, or two of the bytes match
4987 and it will only require one more insn to finish. */
4995 /* Second, try to find a 16-bit replicated constant that can
4996 leave three of the bytes clear. If b2 or b4 is already
4997 zero, then we can. If the 8-bit from above would not
4998 clear b2 anyway, then we still win. */
4999 else if (b1
== b3
&& (!b2
|| !b4
5000 || (remainder
& 0x00ff0000 & ~result
)))
5002 result
= remainder
& 0xff00ff00;
5008 /* The 8-bit immediate already found clears b2 (and maybe b3)
5009 and we don't get here unless b1 is alredy clear, but it will
5010 leave b4 unchanged. */
5012 /* If we can clear b2 and b4 at once, then we win, since the
5013 8-bits couldn't possibly reach that far. */
5016 result
= remainder
& 0x00ff00ff;
5022 return_sequence
->i
[insns
++] = result
;
5023 remainder
&= ~result
;
5025 if (code
== SET
|| code
== MINUS
)
5033 /* Emit an instruction with the indicated PATTERN. If COND is
5034 non-NULL, conditionalize the execution of the instruction on COND
5038 emit_constant_insn (rtx cond
, rtx pattern
)
5041 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
5042 emit_insn (pattern
);
5045 /* As above, but extra parameter GENERATE which, if clear, suppresses
5049 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
5050 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
5051 int subtargets
, int generate
)
5055 int final_invert
= 0;
5057 int set_sign_bit_copies
= 0;
5058 int clear_sign_bit_copies
= 0;
5059 int clear_zero_bit_copies
= 0;
5060 int set_zero_bit_copies
= 0;
5061 int insns
= 0, neg_insns
, inv_insns
;
5062 unsigned HOST_WIDE_INT temp1
, temp2
;
5063 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
5064 struct four_ints
*immediates
;
5065 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
5067 /* Find out which operations are safe for a given CODE. Also do a quick
5068 check for degenerate cases; these can occur when DImode operations
5081 if (remainder
== 0xffffffff)
5084 emit_constant_insn (cond
,
5085 gen_rtx_SET (target
,
5086 GEN_INT (ARM_SIGN_EXTEND (val
))));
5092 if (reload_completed
&& rtx_equal_p (target
, source
))
5096 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5105 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
5108 if (remainder
== 0xffffffff)
5110 if (reload_completed
&& rtx_equal_p (target
, source
))
5113 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5122 if (reload_completed
&& rtx_equal_p (target
, source
))
5125 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5129 if (remainder
== 0xffffffff)
5132 emit_constant_insn (cond
,
5133 gen_rtx_SET (target
,
5134 gen_rtx_NOT (mode
, source
)));
5141 /* We treat MINUS as (val - source), since (source - val) is always
5142 passed as (source + (-val)). */
5146 emit_constant_insn (cond
,
5147 gen_rtx_SET (target
,
5148 gen_rtx_NEG (mode
, source
)));
5151 if (const_ok_for_arm (val
))
5154 emit_constant_insn (cond
,
5155 gen_rtx_SET (target
,
5156 gen_rtx_MINUS (mode
, GEN_INT (val
),
5167 /* If we can do it in one insn get out quickly. */
5168 if (const_ok_for_op (val
, code
))
5171 emit_constant_insn (cond
,
5172 gen_rtx_SET (target
,
5174 ? gen_rtx_fmt_ee (code
, mode
, source
,
5180 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5182 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
5183 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
5187 if (mode
== SImode
&& i
== 16)
5188 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5190 emit_constant_insn (cond
,
5191 gen_zero_extendhisi2
5192 (target
, gen_lowpart (HImode
, source
)));
5194 /* Extz only supports SImode, but we can coerce the operands
5196 emit_constant_insn (cond
,
5197 gen_extzv_t2 (gen_lowpart (SImode
, target
),
5198 gen_lowpart (SImode
, source
),
5199 GEN_INT (i
), const0_rtx
));
5205 /* Calculate a few attributes that may be useful for specific
5207 /* Count number of leading zeros. */
5208 for (i
= 31; i
>= 0; i
--)
5210 if ((remainder
& (1 << i
)) == 0)
5211 clear_sign_bit_copies
++;
5216 /* Count number of leading 1's. */
5217 for (i
= 31; i
>= 0; i
--)
5219 if ((remainder
& (1 << i
)) != 0)
5220 set_sign_bit_copies
++;
5225 /* Count number of trailing zero's. */
5226 for (i
= 0; i
<= 31; i
++)
5228 if ((remainder
& (1 << i
)) == 0)
5229 clear_zero_bit_copies
++;
5234 /* Count number of trailing 1's. */
5235 for (i
= 0; i
<= 31; i
++)
5237 if ((remainder
& (1 << i
)) != 0)
5238 set_zero_bit_copies
++;
5246 /* See if we can do this by sign_extending a constant that is known
5247 to be negative. This is a good, way of doing it, since the shift
5248 may well merge into a subsequent insn. */
5249 if (set_sign_bit_copies
> 1)
5251 if (const_ok_for_arm
5252 (temp1
= ARM_SIGN_EXTEND (remainder
5253 << (set_sign_bit_copies
- 1))))
5257 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5258 emit_constant_insn (cond
,
5259 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5260 emit_constant_insn (cond
,
5261 gen_ashrsi3 (target
, new_src
,
5262 GEN_INT (set_sign_bit_copies
- 1)));
5266 /* For an inverted constant, we will need to set the low bits,
5267 these will be shifted out of harm's way. */
5268 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
5269 if (const_ok_for_arm (~temp1
))
5273 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5274 emit_constant_insn (cond
,
5275 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5276 emit_constant_insn (cond
,
5277 gen_ashrsi3 (target
, new_src
,
5278 GEN_INT (set_sign_bit_copies
- 1)));
5284 /* See if we can calculate the value as the difference between two
5285 valid immediates. */
5286 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
5288 int topshift
= clear_sign_bit_copies
& ~1;
5290 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
5291 & (0xff000000 >> topshift
));
5293 /* If temp1 is zero, then that means the 9 most significant
5294 bits of remainder were 1 and we've caused it to overflow.
5295 When topshift is 0 we don't need to do anything since we
5296 can borrow from 'bit 32'. */
5297 if (temp1
== 0 && topshift
!= 0)
5298 temp1
= 0x80000000 >> (topshift
- 1);
5300 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
5302 if (const_ok_for_arm (temp2
))
5306 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5307 emit_constant_insn (cond
,
5308 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5309 emit_constant_insn (cond
,
5310 gen_addsi3 (target
, new_src
,
5318 /* See if we can generate this by setting the bottom (or the top)
5319 16 bits, and then shifting these into the other half of the
5320 word. We only look for the simplest cases, to do more would cost
5321 too much. Be careful, however, not to generate this when the
5322 alternative would take fewer insns. */
5323 if (val
& 0xffff0000)
5325 temp1
= remainder
& 0xffff0000;
5326 temp2
= remainder
& 0x0000ffff;
5328 /* Overlaps outside this range are best done using other methods. */
5329 for (i
= 9; i
< 24; i
++)
5331 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
5332 && !const_ok_for_arm (temp2
))
5334 rtx new_src
= (subtargets
5335 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5337 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
5338 source
, subtargets
, generate
);
5346 gen_rtx_ASHIFT (mode
, source
,
5353 /* Don't duplicate cases already considered. */
5354 for (i
= 17; i
< 24; i
++)
5356 if (((temp1
| (temp1
>> i
)) == remainder
)
5357 && !const_ok_for_arm (temp1
))
5359 rtx new_src
= (subtargets
5360 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5362 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
5363 source
, subtargets
, generate
);
5368 gen_rtx_SET (target
,
5371 gen_rtx_LSHIFTRT (mode
, source
,
5382 /* If we have IOR or XOR, and the constant can be loaded in a
5383 single instruction, and we can find a temporary to put it in,
5384 then this can be done in two instructions instead of 3-4. */
5386 /* TARGET can't be NULL if SUBTARGETS is 0 */
5387 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
5389 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
5393 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5395 emit_constant_insn (cond
,
5396 gen_rtx_SET (sub
, GEN_INT (val
)));
5397 emit_constant_insn (cond
,
5398 gen_rtx_SET (target
,
5399 gen_rtx_fmt_ee (code
, mode
,
5410 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5411 and the remainder 0s for e.g. 0xfff00000)
5412 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5414 This can be done in 2 instructions by using shifts with mov or mvn.
5419 mvn r0, r0, lsr #12 */
5420 if (set_sign_bit_copies
> 8
5421 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
5425 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5426 rtx shift
= GEN_INT (set_sign_bit_copies
);
5432 gen_rtx_ASHIFT (mode
,
5437 gen_rtx_SET (target
,
5439 gen_rtx_LSHIFTRT (mode
, sub
,
5446 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5448 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5450 For eg. r0 = r0 | 0xfff
5455 if (set_zero_bit_copies
> 8
5456 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5460 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5461 rtx shift
= GEN_INT (set_zero_bit_copies
);
5467 gen_rtx_LSHIFTRT (mode
,
5472 gen_rtx_SET (target
,
5474 gen_rtx_ASHIFT (mode
, sub
,
5480 /* This will never be reached for Thumb2 because orn is a valid
5481 instruction. This is for Thumb1 and the ARM 32 bit cases.
5483 x = y | constant (such that ~constant is a valid constant)
5485 x = ~(~y & ~constant).
5487 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5491 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5492 emit_constant_insn (cond
,
5494 gen_rtx_NOT (mode
, source
)));
5497 sub
= gen_reg_rtx (mode
);
5498 emit_constant_insn (cond
,
5500 gen_rtx_AND (mode
, source
,
5502 emit_constant_insn (cond
,
5503 gen_rtx_SET (target
,
5504 gen_rtx_NOT (mode
, sub
)));
5511 /* See if two shifts will do 2 or more insn's worth of work. */
5512 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5514 HOST_WIDE_INT shift_mask
= ((0xffffffff
5515 << (32 - clear_sign_bit_copies
))
5518 if ((remainder
| shift_mask
) != 0xffffffff)
5520 HOST_WIDE_INT new_val
5521 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5525 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5526 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5527 new_src
, source
, subtargets
, 1);
5532 rtx targ
= subtargets
? NULL_RTX
: target
;
5533 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5534 targ
, source
, subtargets
, 0);
5540 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5541 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5543 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5544 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5550 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5552 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5554 if ((remainder
| shift_mask
) != 0xffffffff)
5556 HOST_WIDE_INT new_val
5557 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5560 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5562 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5563 new_src
, source
, subtargets
, 1);
5568 rtx targ
= subtargets
? NULL_RTX
: target
;
5570 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5571 targ
, source
, subtargets
, 0);
5577 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5578 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5580 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5581 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5593 /* Calculate what the instruction sequences would be if we generated it
5594 normally, negated, or inverted. */
5596 /* AND cannot be split into multiple insns, so invert and use BIC. */
5599 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5602 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5607 if (can_invert
|| final_invert
)
5608 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5613 immediates
= &pos_immediates
;
5615 /* Is the negated immediate sequence more efficient? */
5616 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5619 immediates
= &neg_immediates
;
5624 /* Is the inverted immediate sequence more efficient?
5625 We must allow for an extra NOT instruction for XOR operations, although
5626 there is some chance that the final 'mvn' will get optimized later. */
5627 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5630 immediates
= &inv_immediates
;
5638 /* Now output the chosen sequence as instructions. */
5641 for (i
= 0; i
< insns
; i
++)
5643 rtx new_src
, temp1_rtx
;
5645 temp1
= immediates
->i
[i
];
5647 if (code
== SET
|| code
== MINUS
)
5648 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5649 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5650 new_src
= gen_reg_rtx (mode
);
5656 else if (can_negate
)
5659 temp1
= trunc_int_for_mode (temp1
, mode
);
5660 temp1_rtx
= GEN_INT (temp1
);
5664 else if (code
== MINUS
)
5665 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5667 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5669 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5674 can_negate
= can_invert
;
5678 else if (code
== MINUS
)
5686 emit_constant_insn (cond
, gen_rtx_SET (target
,
5687 gen_rtx_NOT (mode
, source
)));
5694 /* Return TRUE if op is a constant where both the low and top words are
5695 suitable for RSB/RSC instructions. This is never true for Thumb, since
5696 we do not have RSC in that case. */
5698 arm_const_double_prefer_rsbs_rsc (rtx op
)
5700 /* Thumb lacks RSC, so we never prefer that sequence. */
5701 if (TARGET_THUMB
|| !CONST_INT_P (op
))
5703 HOST_WIDE_INT hi
, lo
;
5704 lo
= UINTVAL (op
) & 0xffffffffULL
;
5705 hi
= UINTVAL (op
) >> 32;
5706 return const_ok_for_arm (lo
) && const_ok_for_arm (hi
);
5709 /* Canonicalize a comparison so that we are more likely to recognize it.
5710 This can be done for a few constant compares, where we can make the
5711 immediate value easier to load. */
5714 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5715 bool op0_preserve_value
)
5718 unsigned HOST_WIDE_INT i
, maxval
;
5720 mode
= GET_MODE (*op0
);
5721 if (mode
== VOIDmode
)
5722 mode
= GET_MODE (*op1
);
5724 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5726 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5727 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5728 either reversed or (for constant OP1) adjusted to GE/LT.
5729 Similarly for GTU/LEU in Thumb mode. */
5733 if (*code
== GT
|| *code
== LE
5734 || *code
== GTU
|| *code
== LEU
)
5736 /* Missing comparison. First try to use an available
5738 if (CONST_INT_P (*op1
))
5747 /* Try to convert to GE/LT, unless that would be more
5749 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5750 && arm_const_double_prefer_rsbs_rsc (*op1
))
5752 *op1
= GEN_INT (i
+ 1);
5753 *code
= *code
== GT
? GE
: LT
;
5757 /* GT maxval is always false, LE maxval is always true.
5758 We can't fold that away here as we must make a
5759 comparison, but we can fold them to comparisons
5760 with the same result that can be handled:
5761 op0 GT maxval -> op0 LT minval
5762 op0 LE maxval -> op0 GE minval
5763 where minval = (-maxval - 1). */
5764 *op1
= GEN_INT (-maxval
- 1);
5765 *code
= *code
== GT
? LT
: GE
;
5771 if (i
!= ~((unsigned HOST_WIDE_INT
) 0))
5773 /* Try to convert to GEU/LTU, unless that would
5774 be more expensive. */
5775 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5776 && arm_const_double_prefer_rsbs_rsc (*op1
))
5778 *op1
= GEN_INT (i
+ 1);
5779 *code
= *code
== GTU
? GEU
: LTU
;
5783 /* GTU ~0 is always false, LEU ~0 is always true.
5784 We can't fold that away here as we must make a
5785 comparison, but we can fold them to comparisons
5786 with the same result that can be handled:
5787 op0 GTU ~0 -> op0 LTU 0
5788 op0 LEU ~0 -> op0 GEU 0. */
5790 *code
= *code
== GTU
? LTU
: GEU
;
5799 if (!op0_preserve_value
)
5801 std::swap (*op0
, *op1
);
5802 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5808 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5809 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5810 to facilitate possible combining with a cmp into 'ands'. */
5812 && GET_CODE (*op0
) == ZERO_EXTEND
5813 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5814 && GET_MODE (XEXP (*op0
, 0)) == QImode
5815 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5816 && subreg_lowpart_p (XEXP (*op0
, 0))
5817 && *op1
== const0_rtx
)
5818 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5821 /* Comparisons smaller than DImode. Only adjust comparisons against
5822 an out-of-range constant. */
5823 if (!CONST_INT_P (*op1
)
5824 || const_ok_for_arm (INTVAL (*op1
))
5825 || const_ok_for_arm (- INTVAL (*op1
)))
5839 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5841 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5842 *code
= *code
== GT
? GE
: LT
;
5850 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5852 *op1
= GEN_INT (i
- 1);
5853 *code
= *code
== GE
? GT
: LE
;
5860 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5861 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5863 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5864 *code
= *code
== GTU
? GEU
: LTU
;
5872 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5874 *op1
= GEN_INT (i
- 1);
5875 *code
= *code
== GEU
? GTU
: LEU
;
5886 /* Define how to find the value returned by a function. */
5889 arm_function_value(const_tree type
, const_tree func
,
5890 bool outgoing ATTRIBUTE_UNUSED
)
5893 int unsignedp ATTRIBUTE_UNUSED
;
5894 rtx r ATTRIBUTE_UNUSED
;
5896 mode
= TYPE_MODE (type
);
5898 if (TARGET_AAPCS_BASED
)
5899 return aapcs_allocate_return_reg (mode
, type
, func
);
5901 /* Promote integer types. */
5902 if (INTEGRAL_TYPE_P (type
))
5903 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5905 /* Promotes small structs returned in a register to full-word size
5906 for big-endian AAPCS. */
5907 if (arm_return_in_msb (type
))
5909 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5910 if (size
% UNITS_PER_WORD
!= 0)
5912 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5913 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5917 return arm_libcall_value_1 (mode
);
5920 /* libcall hashtable helpers. */
5922 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5924 static inline hashval_t
hash (const rtx_def
*);
5925 static inline bool equal (const rtx_def
*, const rtx_def
*);
5926 static inline void remove (rtx_def
*);
5930 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5932 return rtx_equal_p (p1
, p2
);
5936 libcall_hasher::hash (const rtx_def
*p1
)
5938 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5941 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5944 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5946 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5950 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5952 static bool init_done
= false;
5953 static libcall_table_type
*libcall_htab
= NULL
;
5959 libcall_htab
= new libcall_table_type (31);
5960 add_libcall (libcall_htab
,
5961 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5962 add_libcall (libcall_htab
,
5963 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5964 add_libcall (libcall_htab
,
5965 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5966 add_libcall (libcall_htab
,
5967 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5969 add_libcall (libcall_htab
,
5970 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5971 add_libcall (libcall_htab
,
5972 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5973 add_libcall (libcall_htab
,
5974 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5975 add_libcall (libcall_htab
,
5976 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5978 add_libcall (libcall_htab
,
5979 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5980 add_libcall (libcall_htab
,
5981 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5982 add_libcall (libcall_htab
,
5983 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5984 add_libcall (libcall_htab
,
5985 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5986 add_libcall (libcall_htab
,
5987 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5988 add_libcall (libcall_htab
,
5989 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5990 add_libcall (libcall_htab
,
5991 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5992 add_libcall (libcall_htab
,
5993 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5994 add_libcall (libcall_htab
,
5995 convert_optab_libfunc (sfix_optab
, SImode
, SFmode
));
5996 add_libcall (libcall_htab
,
5997 convert_optab_libfunc (ufix_optab
, SImode
, SFmode
));
5999 /* Values from double-precision helper functions are returned in core
6000 registers if the selected core only supports single-precision
6001 arithmetic, even if we are using the hard-float ABI. The same is
6002 true for single-precision helpers except in case of MVE, because in
6003 MVE we will be using the hard-float ABI on a CPU which doesn't support
6004 single-precision operations in hardware. In MVE the following check
6005 enables use of emulation for the single-precision arithmetic
6007 if (TARGET_HAVE_MVE
)
6009 add_libcall (libcall_htab
, optab_libfunc (add_optab
, SFmode
));
6010 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, SFmode
));
6011 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, SFmode
));
6012 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, SFmode
));
6013 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, SFmode
));
6014 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, SFmode
));
6015 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, SFmode
));
6016 add_libcall (libcall_htab
, optab_libfunc (le_optab
, SFmode
));
6017 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, SFmode
));
6018 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, SFmode
));
6019 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, SFmode
));
6021 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
6022 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
6023 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
6024 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
6025 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
6026 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
6027 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
6028 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
6029 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
6030 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
6031 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
6032 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
6034 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
6036 add_libcall (libcall_htab
,
6037 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
6040 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
6044 arm_libcall_value_1 (machine_mode mode
)
6046 if (TARGET_AAPCS_BASED
)
6047 return aapcs_libcall_value (mode
);
6048 else if (TARGET_IWMMXT_ABI
6049 && arm_vector_mode_supported_p (mode
))
6050 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
6052 return gen_rtx_REG (mode
, ARG_REGISTER (1));
6055 /* Define how to find the value returned by a library function
6056 assuming the value has mode MODE. */
6059 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
6061 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
6062 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6064 /* The following libcalls return their result in integer registers,
6065 even though they return a floating point value. */
6066 if (arm_libcall_uses_aapcs_base (libcall
))
6067 return gen_rtx_REG (mode
, ARG_REGISTER(1));
6071 return arm_libcall_value_1 (mode
);
6074 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6077 arm_function_value_regno_p (const unsigned int regno
)
6079 if (regno
== ARG_REGISTER (1)
6081 && TARGET_AAPCS_BASED
6082 && TARGET_HARD_FLOAT
6083 && regno
== FIRST_VFP_REGNUM
)
6084 || (TARGET_IWMMXT_ABI
6085 && regno
== FIRST_IWMMXT_REGNUM
))
6091 /* Determine the amount of memory needed to store the possible return
6092 registers of an untyped call. */
6094 arm_apply_result_size (void)
6100 if (TARGET_HARD_FLOAT_ABI
)
6102 if (TARGET_IWMMXT_ABI
)
6109 /* Decide whether TYPE should be returned in memory (true)
6110 or in a register (false). FNTYPE is the type of the function making
6113 arm_return_in_memory (const_tree type
, const_tree fntype
)
6117 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
6119 if (TARGET_AAPCS_BASED
)
6121 /* Simple, non-aggregate types (ie not including vectors and
6122 complex) are always returned in a register (or registers).
6123 We don't care about which register here, so we can short-cut
6124 some of the detail. */
6125 if (!AGGREGATE_TYPE_P (type
)
6126 && TREE_CODE (type
) != VECTOR_TYPE
6127 && TREE_CODE (type
) != COMPLEX_TYPE
)
6130 /* Any return value that is no larger than one word can be
6132 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
6135 /* Check any available co-processors to see if they accept the
6136 type as a register candidate (VFP, for example, can return
6137 some aggregates in consecutive registers). These aren't
6138 available if the call is variadic. */
6139 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
6142 /* Vector values should be returned using ARM registers, not
6143 memory (unless they're over 16 bytes, which will break since
6144 we only have four call-clobbered registers to play with). */
6145 if (TREE_CODE (type
) == VECTOR_TYPE
)
6146 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6148 /* The rest go in memory. */
6152 if (TREE_CODE (type
) == VECTOR_TYPE
)
6153 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6155 if (!AGGREGATE_TYPE_P (type
) &&
6156 (TREE_CODE (type
) != VECTOR_TYPE
))
6157 /* All simple types are returned in registers. */
6160 if (arm_abi
!= ARM_ABI_APCS
)
6162 /* ATPCS and later return aggregate types in memory only if they are
6163 larger than a word (or are variable size). */
6164 return (size
< 0 || size
> UNITS_PER_WORD
);
6167 /* For the arm-wince targets we choose to be compatible with Microsoft's
6168 ARM and Thumb compilers, which always return aggregates in memory. */
6170 /* All structures/unions bigger than one word are returned in memory.
6171 Also catch the case where int_size_in_bytes returns -1. In this case
6172 the aggregate is either huge or of variable size, and in either case
6173 we will want to return it via memory and not in a register. */
6174 if (size
< 0 || size
> UNITS_PER_WORD
)
6177 if (TREE_CODE (type
) == RECORD_TYPE
)
6181 /* For a struct the APCS says that we only return in a register
6182 if the type is 'integer like' and every addressable element
6183 has an offset of zero. For practical purposes this means
6184 that the structure can have at most one non bit-field element
6185 and that this element must be the first one in the structure. */
6187 /* Find the first field, ignoring non FIELD_DECL things which will
6188 have been created by C++. */
6189 /* NOTE: This code is deprecated and has not been updated to handle
6190 DECL_FIELD_ABI_IGNORED. */
6191 for (field
= TYPE_FIELDS (type
);
6192 field
&& TREE_CODE (field
) != FIELD_DECL
;
6193 field
= DECL_CHAIN (field
))
6197 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6199 /* Check that the first field is valid for returning in a register. */
6201 /* ... Floats are not allowed */
6202 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6205 /* ... Aggregates that are not themselves valid for returning in
6206 a register are not allowed. */
6207 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6210 /* Now check the remaining fields, if any. Only bitfields are allowed,
6211 since they are not addressable. */
6212 for (field
= DECL_CHAIN (field
);
6214 field
= DECL_CHAIN (field
))
6216 if (TREE_CODE (field
) != FIELD_DECL
)
6219 if (!DECL_BIT_FIELD_TYPE (field
))
6226 if (TREE_CODE (type
) == UNION_TYPE
)
6230 /* Unions can be returned in registers if every element is
6231 integral, or can be returned in an integer register. */
6232 for (field
= TYPE_FIELDS (type
);
6234 field
= DECL_CHAIN (field
))
6236 if (TREE_CODE (field
) != FIELD_DECL
)
6239 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6242 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6248 #endif /* not ARM_WINCE */
6250 /* Return all other types in memory. */
6254 const struct pcs_attribute_arg
6258 } pcs_attribute_args
[] =
6260 {"aapcs", ARM_PCS_AAPCS
},
6261 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
6263 /* We could recognize these, but changes would be needed elsewhere
6264 * to implement them. */
6265 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
6266 {"atpcs", ARM_PCS_ATPCS
},
6267 {"apcs", ARM_PCS_APCS
},
6269 {NULL
, ARM_PCS_UNKNOWN
}
6273 arm_pcs_from_attribute (tree attr
)
6275 const struct pcs_attribute_arg
*ptr
;
6278 /* Get the value of the argument. */
6279 if (TREE_VALUE (attr
) == NULL_TREE
6280 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
6281 return ARM_PCS_UNKNOWN
;
6283 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
6285 /* Check it against the list of known arguments. */
6286 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
6287 if (streq (arg
, ptr
->arg
))
6290 /* An unrecognized interrupt type. */
6291 return ARM_PCS_UNKNOWN
;
6294 /* Get the PCS variant to use for this call. TYPE is the function's type
6295 specification, DECL is the specific declartion. DECL may be null if
6296 the call could be indirect or if this is a library call. */
6298 arm_get_pcs_model (const_tree type
, const_tree decl ATTRIBUTE_UNUSED
)
6300 bool user_convention
= false;
6301 enum arm_pcs user_pcs
= arm_pcs_default
;
6306 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
6309 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
6310 user_convention
= true;
6313 if (TARGET_AAPCS_BASED
)
6315 /* Detect varargs functions. These always use the base rules
6316 (no argument is ever a candidate for a co-processor
6318 bool base_rules
= stdarg_p (type
);
6320 if (user_convention
)
6322 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
6323 sorry ("non-AAPCS derived PCS variant");
6324 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
6325 error ("variadic functions must use the base AAPCS variant");
6329 return ARM_PCS_AAPCS
;
6330 else if (user_convention
)
6333 /* Unfortunately, this is not safe and can lead to wrong code
6334 being generated (PR96882). Not all calls into the back-end
6335 pass the DECL, so it is unsafe to make any PCS-changing
6336 decisions based on it. In particular the RETURN_IN_MEMORY
6337 hook is only ever passed a TYPE. This needs revisiting to
6338 see if there are any partial improvements that can be
6340 else if (decl
&& flag_unit_at_a_time
)
6342 /* Local functions never leak outside this compilation unit,
6343 so we are free to use whatever conventions are
6345 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6346 cgraph_node
*local_info_node
6347 = cgraph_node::local_info_node (CONST_CAST_TREE (decl
));
6348 if (local_info_node
&& local_info_node
->local
)
6349 return ARM_PCS_AAPCS_LOCAL
;
6353 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
6354 sorry ("PCS variant");
6356 /* For everything else we use the target's default. */
6357 return arm_pcs_default
;
6362 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6363 const_tree fntype ATTRIBUTE_UNUSED
,
6364 rtx libcall ATTRIBUTE_UNUSED
,
6365 const_tree fndecl ATTRIBUTE_UNUSED
)
6367 /* Record the unallocated VFP registers. */
6368 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
6369 pcum
->aapcs_vfp_reg_alloc
= 0;
6372 /* Bitmasks that indicate whether earlier versions of GCC would have
6373 taken a different path through the ABI logic. This should result in
6374 a -Wpsabi warning if the earlier path led to a different ABI decision.
6376 WARN_PSABI_EMPTY_CXX17_BASE
6377 Indicates that the type includes an artificial empty C++17 base field
6378 that, prior to GCC 10.1, would prevent the type from being treated as
6379 a HFA or HVA. See PR94711 for details.
6381 WARN_PSABI_NO_UNIQUE_ADDRESS
6382 Indicates that the type includes an empty [[no_unique_address]] field
6383 that, prior to GCC 10.1, would prevent the type from being treated as
6385 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE
= 1U << 0;
6386 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS
= 1U << 1;
6387 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD
= 1U << 2;
6389 /* Walk down the type tree of TYPE counting consecutive base elements.
6390 If *MODEP is VOIDmode, then set it to the first valid floating point
6391 type. If a non-floating point type is found, or if a floating point
6392 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6393 otherwise return the count in the sub-tree.
6395 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6396 function has changed its behavior relative to earlier versions of GCC.
6397 Normally the argument should be nonnull and point to a zero-initialized
6398 variable. The function then records whether the ABI decision might
6399 be affected by a known fix to the ABI logic, setting the associated
6400 WARN_PSABI_* bits if so.
6402 When the argument is instead a null pointer, the function tries to
6403 simulate the behavior of GCC before all such ABI fixes were made.
6404 This is useful to check whether the function returns something
6405 different after the ABI fixes. */
6407 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
,
6408 unsigned int *warn_psabi_flags
)
6413 switch (TREE_CODE (type
))
6416 mode
= TYPE_MODE (type
);
6417 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
&& mode
!= BFmode
)
6420 if (*modep
== VOIDmode
)
6429 mode
= TYPE_MODE (TREE_TYPE (type
));
6430 if (mode
!= DFmode
&& mode
!= SFmode
)
6433 if (*modep
== VOIDmode
)
6442 /* Use V2SImode and V4SImode as representatives of all 64-bit
6443 and 128-bit vector types, whether or not those modes are
6444 supported with the present options. */
6445 size
= int_size_in_bytes (type
);
6458 if (*modep
== VOIDmode
)
6461 /* Vector modes are considered to be opaque: two vectors are
6462 equivalent for the purposes of being homogeneous aggregates
6463 if they are the same size. */
6472 tree index
= TYPE_DOMAIN (type
);
6474 /* Can't handle incomplete types nor sizes that are not
6476 if (!COMPLETE_TYPE_P (type
)
6477 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6480 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
,
6484 || !TYPE_MAX_VALUE (index
)
6485 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6486 || !TYPE_MIN_VALUE (index
)
6487 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6491 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6492 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6494 /* There must be no padding. */
6495 if (wi::to_wide (TYPE_SIZE (type
))
6496 != count
* GET_MODE_BITSIZE (*modep
))
6508 /* Can't handle incomplete types nor sizes that are not
6510 if (!COMPLETE_TYPE_P (type
)
6511 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6514 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6516 if (TREE_CODE (field
) != FIELD_DECL
)
6519 if (DECL_FIELD_ABI_IGNORED (field
))
6521 /* See whether this is something that earlier versions of
6522 GCC failed to ignore. */
6524 if (lookup_attribute ("no_unique_address",
6525 DECL_ATTRIBUTES (field
)))
6526 flag
= WARN_PSABI_NO_UNIQUE_ADDRESS
;
6527 else if (cxx17_empty_base_field_p (field
))
6528 flag
= WARN_PSABI_EMPTY_CXX17_BASE
;
6530 /* No compatibility problem. */
6533 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6534 if (warn_psabi_flags
)
6536 *warn_psabi_flags
|= flag
;
6540 /* A zero-width bitfield may affect layout in some
6541 circumstances, but adds no members. The determination
6542 of whether or not a type is an HFA is performed after
6543 layout is complete, so if the type still looks like an
6544 HFA afterwards, it is still classed as one. This is
6545 potentially an ABI break for the hard-float ABI. */
6546 else if (DECL_BIT_FIELD (field
)
6547 && integer_zerop (DECL_SIZE (field
)))
6549 /* Prior to GCC-12 these fields were striped early,
6550 hiding them from the back-end entirely and
6551 resulting in the correct behaviour for argument
6552 passing. Simulate that old behaviour without
6553 generating a warning. */
6554 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field
))
6556 if (warn_psabi_flags
)
6558 *warn_psabi_flags
|= WARN_PSABI_ZERO_WIDTH_BITFIELD
;
6563 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6570 /* There must be no padding. */
6571 if (wi::to_wide (TYPE_SIZE (type
))
6572 != count
* GET_MODE_BITSIZE (*modep
))
6579 case QUAL_UNION_TYPE
:
6581 /* These aren't very interesting except in a degenerate case. */
6586 /* Can't handle incomplete types nor sizes that are not
6588 if (!COMPLETE_TYPE_P (type
)
6589 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6592 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6594 if (TREE_CODE (field
) != FIELD_DECL
)
6597 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6601 count
= count
> sub_count
? count
: sub_count
;
6604 /* There must be no padding. */
6605 if (wi::to_wide (TYPE_SIZE (type
))
6606 != count
* GET_MODE_BITSIZE (*modep
))
6619 /* Return true if PCS_VARIANT should use VFP registers. */
6621 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6623 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6625 static bool seen_thumb1_vfp
= false;
6627 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6629 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6630 /* sorry() is not immediately fatal, so only display this once. */
6631 seen_thumb1_vfp
= true;
6637 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6640 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6641 (TARGET_VFP_DOUBLE
|| !is_double
));
6644 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6645 suitable for passing or returning in VFP registers for the PCS
6646 variant selected. If it is, then *BASE_MODE is updated to contain
6647 a machine mode describing each element of the argument's type and
6648 *COUNT to hold the number of such elements. */
6650 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6651 machine_mode mode
, const_tree type
,
6652 machine_mode
*base_mode
, int *count
)
6654 machine_mode new_mode
= VOIDmode
;
6656 /* If we have the type information, prefer that to working things
6657 out from the mode. */
6660 unsigned int warn_psabi_flags
= 0;
6661 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
,
6663 if (ag_count
> 0 && ag_count
<= 4)
6665 static unsigned last_reported_type_uid
;
6666 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (type
));
6670 && uid
!= last_reported_type_uid
6671 && ((alt
= aapcs_vfp_sub_candidate (type
, &new_mode
, NULL
))
6675 = CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
6677 = CHANGES_ROOT_URL
"gcc-12/changes.html#zero_width_bitfields";
6678 gcc_assert (alt
== -1);
6679 last_reported_type_uid
= uid
;
6680 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6682 if (warn_psabi_flags
& WARN_PSABI_NO_UNIQUE_ADDRESS
)
6683 inform (input_location
, "parameter passing for argument of "
6684 "type %qT with %<[[no_unique_address]]%> members "
6685 "changed %{in GCC 10.1%}",
6686 TYPE_MAIN_VARIANT (type
), url10
);
6687 else if (warn_psabi_flags
& WARN_PSABI_EMPTY_CXX17_BASE
)
6688 inform (input_location
, "parameter passing for argument of "
6689 "type %qT when C++17 is enabled changed to match "
6690 "C++14 %{in GCC 10.1%}",
6691 TYPE_MAIN_VARIANT (type
), url10
);
6692 else if (warn_psabi_flags
& WARN_PSABI_ZERO_WIDTH_BITFIELD
)
6693 inform (input_location
, "parameter passing for argument of "
6694 "type %qT changed %{in GCC 12.1%}",
6695 TYPE_MAIN_VARIANT (type
), url12
);
6702 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6703 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6704 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6709 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6712 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6718 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6721 *base_mode
= new_mode
;
6723 if (TARGET_GENERAL_REGS_ONLY
)
6724 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6731 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6732 machine_mode mode
, const_tree type
)
6734 int count ATTRIBUTE_UNUSED
;
6735 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6737 if (!use_vfp_abi (pcs_variant
, false))
6739 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6744 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6747 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6750 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6751 &pcum
->aapcs_vfp_rmode
,
6752 &pcum
->aapcs_vfp_rcount
);
6755 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6756 for the behaviour of this function. */
6759 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6760 const_tree type ATTRIBUTE_UNUSED
)
6763 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6764 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6765 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6768 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6769 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6771 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6773 || (mode
== TImode
&& ! (TARGET_NEON
|| TARGET_HAVE_MVE
))
6774 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6777 int rcount
= pcum
->aapcs_vfp_rcount
;
6779 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6781 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6783 /* Avoid using unsupported vector modes. */
6784 if (rmode
== V2SImode
)
6786 else if (rmode
== V4SImode
)
6793 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6794 for (i
= 0; i
< rcount
; i
++)
6796 rtx tmp
= gen_rtx_REG (rmode
,
6797 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6798 tmp
= gen_rtx_EXPR_LIST
6800 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6801 XVECEXP (par
, 0, i
) = tmp
;
6804 pcum
->aapcs_reg
= par
;
6807 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6813 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6814 comment there for the behaviour of this function. */
6817 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6819 const_tree type ATTRIBUTE_UNUSED
)
6821 if (!use_vfp_abi (pcs_variant
, false))
6825 || (GET_MODE_CLASS (mode
) == MODE_INT
6826 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6827 && !(TARGET_NEON
|| TARGET_HAVE_MVE
)))
6830 machine_mode ag_mode
;
6835 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6838 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6840 if (ag_mode
== V2SImode
)
6842 else if (ag_mode
== V4SImode
)
6848 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6849 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6850 for (i
= 0; i
< count
; i
++)
6852 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6853 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6854 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6855 XVECEXP (par
, 0, i
) = tmp
;
6861 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6865 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6866 machine_mode mode ATTRIBUTE_UNUSED
,
6867 const_tree type ATTRIBUTE_UNUSED
)
6869 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6870 pcum
->aapcs_vfp_reg_alloc
= 0;
6874 #define AAPCS_CP(X) \
6876 aapcs_ ## X ## _cum_init, \
6877 aapcs_ ## X ## _is_call_candidate, \
6878 aapcs_ ## X ## _allocate, \
6879 aapcs_ ## X ## _is_return_candidate, \
6880 aapcs_ ## X ## _allocate_return_reg, \
6881 aapcs_ ## X ## _advance \
6884 /* Table of co-processors that can be used to pass arguments in
6885 registers. Idealy no arugment should be a candidate for more than
6886 one co-processor table entry, but the table is processed in order
6887 and stops after the first match. If that entry then fails to put
6888 the argument into a co-processor register, the argument will go on
6892 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6893 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6895 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6896 BLKmode) is a candidate for this co-processor's registers; this
6897 function should ignore any position-dependent state in
6898 CUMULATIVE_ARGS and only use call-type dependent information. */
6899 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6901 /* Return true if the argument does get a co-processor register; it
6902 should set aapcs_reg to an RTX of the register allocated as is
6903 required for a return from FUNCTION_ARG. */
6904 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6906 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6907 be returned in this co-processor's registers. */
6908 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6910 /* Allocate and return an RTX element to hold the return type of a call. This
6911 routine must not fail and will only be called if is_return_candidate
6912 returned true with the same parameters. */
6913 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6915 /* Finish processing this argument and prepare to start processing
6917 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6918 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6926 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6931 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6932 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6939 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6941 /* We aren't passed a decl, so we can't check that a call is local.
6942 However, it isn't clear that that would be a win anyway, since it
6943 might limit some tail-calling opportunities. */
6944 enum arm_pcs pcs_variant
;
6948 const_tree fndecl
= NULL_TREE
;
6950 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6953 fntype
= TREE_TYPE (fntype
);
6956 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6959 pcs_variant
= arm_pcs_default
;
6961 if (pcs_variant
!= ARM_PCS_AAPCS
)
6965 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6966 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6975 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6978 /* We aren't passed a decl, so we can't check that a call is local.
6979 However, it isn't clear that that would be a win anyway, since it
6980 might limit some tail-calling opportunities. */
6981 enum arm_pcs pcs_variant
;
6982 int unsignedp ATTRIBUTE_UNUSED
;
6986 const_tree fndecl
= NULL_TREE
;
6988 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6991 fntype
= TREE_TYPE (fntype
);
6994 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6997 pcs_variant
= arm_pcs_default
;
6999 /* Promote integer types. */
7000 if (type
&& INTEGRAL_TYPE_P (type
))
7001 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
7003 if (pcs_variant
!= ARM_PCS_AAPCS
)
7007 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
7008 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
7010 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
7014 /* Promotes small structs returned in a register to full-word size
7015 for big-endian AAPCS. */
7016 if (type
&& arm_return_in_msb (type
))
7018 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7019 if (size
% UNITS_PER_WORD
!= 0)
7021 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
7022 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
7026 return gen_rtx_REG (mode
, R0_REGNUM
);
7030 aapcs_libcall_value (machine_mode mode
)
7032 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
7033 && GET_MODE_SIZE (mode
) <= 4)
7036 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
7039 /* Lay out a function argument using the AAPCS rules. The rule
7040 numbers referred to here are those in the AAPCS. */
7042 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
7043 const_tree type
, bool named
)
7048 /* We only need to do this once per argument. */
7049 if (pcum
->aapcs_arg_processed
)
7052 pcum
->aapcs_arg_processed
= true;
7054 /* Special case: if named is false then we are handling an incoming
7055 anonymous argument which is on the stack. */
7059 /* Is this a potential co-processor register candidate? */
7060 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
7062 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
7063 pcum
->aapcs_cprc_slot
= slot
;
7065 /* We don't have to apply any of the rules from part B of the
7066 preparation phase, these are handled elsewhere in the
7071 /* A Co-processor register candidate goes either in its own
7072 class of registers or on the stack. */
7073 if (!pcum
->aapcs_cprc_failed
[slot
])
7075 /* C1.cp - Try to allocate the argument to co-processor
7077 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
7080 /* C2.cp - Put the argument on the stack and note that we
7081 can't assign any more candidates in this slot. We also
7082 need to note that we have allocated stack space, so that
7083 we won't later try to split a non-cprc candidate between
7084 core registers and the stack. */
7085 pcum
->aapcs_cprc_failed
[slot
] = true;
7086 pcum
->can_split
= false;
7089 /* We didn't get a register, so this argument goes on the
7091 gcc_assert (pcum
->can_split
== false);
7096 /* C3 - For double-word aligned arguments, round the NCRN up to the
7097 next even number. */
7098 ncrn
= pcum
->aapcs_ncrn
;
7101 int res
= arm_needs_doubleword_align (mode
, type
);
7102 /* Only warn during RTL expansion of call stmts, otherwise we would
7103 warn e.g. during gimplification even on functions that will be
7104 always inlined, and we'd warn multiple times. Don't warn when
7105 called in expand_function_start either, as we warn instead in
7106 arm_function_arg_boundary in that case. */
7107 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
7108 inform (input_location
, "parameter passing for argument of type "
7109 "%qT changed in GCC 7.1", type
);
7114 nregs
= ARM_NUM_REGS2(mode
, type
);
7116 /* Sigh, this test should really assert that nregs > 0, but a GCC
7117 extension allows empty structs and then gives them empty size; it
7118 then allows such a structure to be passed by value. For some of
7119 the code below we have to pretend that such an argument has
7120 non-zero size so that we 'locate' it correctly either in
7121 registers or on the stack. */
7122 gcc_assert (nregs
>= 0);
7124 nregs2
= nregs
? nregs
: 1;
7126 /* C4 - Argument fits entirely in core registers. */
7127 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
7129 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7130 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
7134 /* C5 - Some core registers left and there are no arguments already
7135 on the stack: split this argument between the remaining core
7136 registers and the stack. */
7137 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
7139 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7140 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7141 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
7145 /* C6 - NCRN is set to 4. */
7146 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7148 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7152 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7153 for a call to a function whose data type is FNTYPE.
7154 For a library call, FNTYPE is NULL. */
7156 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
7158 tree fndecl ATTRIBUTE_UNUSED
)
7160 /* Long call handling. */
7162 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
7164 pcum
->pcs_variant
= arm_pcs_default
;
7166 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7168 if (arm_libcall_uses_aapcs_base (libname
))
7169 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
7171 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
7172 pcum
->aapcs_reg
= NULL_RTX
;
7173 pcum
->aapcs_partial
= 0;
7174 pcum
->aapcs_arg_processed
= false;
7175 pcum
->aapcs_cprc_slot
= -1;
7176 pcum
->can_split
= true;
7178 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
7182 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
7184 pcum
->aapcs_cprc_failed
[i
] = false;
7185 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
7193 /* On the ARM, the offset starts at 0. */
7195 pcum
->iwmmxt_nregs
= 0;
7196 pcum
->can_split
= true;
7198 /* Varargs vectors are treated the same as long long.
7199 named_count avoids having to change the way arm handles 'named' */
7200 pcum
->named_count
= 0;
7203 if (TARGET_REALLY_IWMMXT
&& fntype
)
7207 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
7209 fn_arg
= TREE_CHAIN (fn_arg
))
7210 pcum
->named_count
+= 1;
7212 if (! pcum
->named_count
)
7213 pcum
->named_count
= INT_MAX
;
7217 /* Return 2 if double word alignment is required for argument passing,
7218 but wasn't required before the fix for PR88469.
7219 Return 1 if double word alignment is required for argument passing.
7220 Return -1 if double word alignment used to be required for argument
7221 passing before PR77728 ABI fix, but is not required anymore.
7222 Return 0 if double word alignment is not required and wasn't requried
7225 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
7228 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
7230 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7231 if (!AGGREGATE_TYPE_P (type
))
7232 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
7234 /* Array types: Use member alignment of element type. */
7235 if (TREE_CODE (type
) == ARRAY_TYPE
)
7236 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
7240 /* Record/aggregate types: Use greatest member alignment of any member.
7242 Note that we explicitly consider zero-sized fields here, even though
7243 they don't map to AAPCS machine types. For example, in:
7245 struct __attribute__((aligned(8))) empty {};
7248 [[no_unique_address]] empty e;
7252 "s" contains only one Fundamental Data Type (the int field)
7253 but gains 8-byte alignment and size thanks to "e". */
7254 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7255 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
7257 if (TREE_CODE (field
) == FIELD_DECL
)
7260 /* Before PR77728 fix, we were incorrectly considering also
7261 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7262 Make sure we can warn about that with -Wpsabi. */
7265 else if (TREE_CODE (field
) == FIELD_DECL
7266 && DECL_BIT_FIELD_TYPE (field
)
7267 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field
)) > PARM_BOUNDARY
)
7277 /* Determine where to put an argument to a function.
7278 Value is zero to push the argument on the stack,
7279 or a hard register in which to store the argument.
7281 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7282 the preceding args and about the function being called.
7283 ARG is a description of the argument.
7285 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7286 other arguments are passed on the stack. If (NAMED == 0) (which happens
7287 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7288 defined), say it is passed in the stack (function_prologue will
7289 indeed make it pass in the stack if necessary). */
7292 arm_function_arg (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7294 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7297 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7298 a call insn (op3 of a call_value insn). */
7299 if (arg
.end_marker_p ())
7302 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7304 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7305 return pcum
->aapcs_reg
;
7308 /* Varargs vectors are treated the same as long long.
7309 named_count avoids having to change the way arm handles 'named' */
7310 if (TARGET_IWMMXT_ABI
7311 && arm_vector_mode_supported_p (arg
.mode
)
7312 && pcum
->named_count
> pcum
->nargs
+ 1)
7314 if (pcum
->iwmmxt_nregs
<= 9)
7315 return gen_rtx_REG (arg
.mode
,
7316 pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
7319 pcum
->can_split
= false;
7324 /* Put doubleword aligned quantities in even register pairs. */
7325 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
7327 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
7328 if (res
< 0 && warn_psabi
)
7329 inform (input_location
, "parameter passing for argument of type "
7330 "%qT changed in GCC 7.1", arg
.type
);
7334 if (res
> 1 && warn_psabi
)
7335 inform (input_location
, "parameter passing for argument of type "
7336 "%qT changed in GCC 9.1", arg
.type
);
7340 /* Only allow splitting an arg between regs and memory if all preceding
7341 args were allocated to regs. For args passed by reference we only count
7342 the reference pointer. */
7343 if (pcum
->can_split
)
7346 nregs
= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7348 if (!arg
.named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
7351 return gen_rtx_REG (arg
.mode
, pcum
->nregs
);
7355 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
7357 if (!ARM_DOUBLEWORD_ALIGN
)
7358 return PARM_BOUNDARY
;
7360 int res
= arm_needs_doubleword_align (mode
, type
);
7361 if (res
< 0 && warn_psabi
)
7362 inform (input_location
, "parameter passing for argument of type %qT "
7363 "changed in GCC 7.1", type
);
7364 if (res
> 1 && warn_psabi
)
7365 inform (input_location
, "parameter passing for argument of type "
7366 "%qT changed in GCC 9.1", type
);
7368 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
7372 arm_arg_partial_bytes (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7374 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7375 int nregs
= pcum
->nregs
;
7377 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7379 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7380 return pcum
->aapcs_partial
;
7383 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (arg
.mode
))
7386 if (NUM_ARG_REGS
> nregs
7387 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (arg
.mode
, arg
.type
))
7389 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
7394 /* Update the data in PCUM to advance over argument ARG. */
7397 arm_function_arg_advance (cumulative_args_t pcum_v
,
7398 const function_arg_info
&arg
)
7400 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7402 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7404 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7406 if (pcum
->aapcs_cprc_slot
>= 0)
7408 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, arg
.mode
,
7410 pcum
->aapcs_cprc_slot
= -1;
7413 /* Generic stuff. */
7414 pcum
->aapcs_arg_processed
= false;
7415 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
7416 pcum
->aapcs_reg
= NULL_RTX
;
7417 pcum
->aapcs_partial
= 0;
7422 if (arm_vector_mode_supported_p (arg
.mode
)
7423 && pcum
->named_count
> pcum
->nargs
7424 && TARGET_IWMMXT_ABI
)
7425 pcum
->iwmmxt_nregs
+= 1;
7427 pcum
->nregs
+= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7431 /* Variable sized types are passed by reference. This is a GCC
7432 extension to the ARM ABI. */
7435 arm_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
7437 return arg
.type
&& TREE_CODE (TYPE_SIZE (arg
.type
)) != INTEGER_CST
;
7440 /* Encode the current state of the #pragma [no_]long_calls. */
7443 OFF
, /* No #pragma [no_]long_calls is in effect. */
7444 LONG
, /* #pragma long_calls is in effect. */
7445 SHORT
/* #pragma no_long_calls is in effect. */
7448 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
7451 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7453 arm_pragma_long_calls
= LONG
;
7457 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7459 arm_pragma_long_calls
= SHORT
;
7463 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7465 arm_pragma_long_calls
= OFF
;
7468 /* Handle an attribute requiring a FUNCTION_DECL;
7469 arguments as in struct attribute_spec.handler. */
7471 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
7472 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7474 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7476 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7478 *no_add_attrs
= true;
7484 /* Handle an "interrupt" or "isr" attribute;
7485 arguments as in struct attribute_spec.handler. */
7487 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
7492 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7494 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7496 *no_add_attrs
= true;
7498 else if (TARGET_VFP_BASE
)
7500 warning (OPT_Wattributes
, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7503 /* FIXME: the argument if any is checked for type attributes;
7504 should it be checked for decl ones? */
7508 if (FUNC_OR_METHOD_TYPE_P (*node
))
7510 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
7512 warning (OPT_Wattributes
, "%qE attribute ignored",
7514 *no_add_attrs
= true;
7517 else if (TREE_CODE (*node
) == POINTER_TYPE
7518 && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node
))
7519 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
7521 *node
= build_variant_type_copy (*node
);
7522 TREE_TYPE (*node
) = build_type_attribute_variant
7524 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
7525 *no_add_attrs
= true;
7529 /* Possibly pass this attribute on from the type to a decl. */
7530 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
7531 | (int) ATTR_FLAG_FUNCTION_NEXT
7532 | (int) ATTR_FLAG_ARRAY_NEXT
))
7534 *no_add_attrs
= true;
7535 return tree_cons (name
, args
, NULL_TREE
);
7539 warning (OPT_Wattributes
, "%qE attribute ignored",
7548 /* Handle a "pcs" attribute; arguments as in struct
7549 attribute_spec.handler. */
7551 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
7552 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7554 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
7556 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
7557 *no_add_attrs
= true;
7562 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7563 /* Handle the "notshared" attribute. This attribute is another way of
7564 requesting hidden visibility. ARM's compiler supports
7565 "__declspec(notshared)"; we support the same thing via an
7569 arm_handle_notshared_attribute (tree
*node
,
7570 tree name ATTRIBUTE_UNUSED
,
7571 tree args ATTRIBUTE_UNUSED
,
7572 int flags ATTRIBUTE_UNUSED
,
7575 tree decl
= TYPE_NAME (*node
);
7579 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
7580 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
7581 *no_add_attrs
= false;
7587 /* This function returns true if a function with declaration FNDECL and type
7588 FNTYPE uses the stack to pass arguments or return variables and false
7589 otherwise. This is used for functions with the attributes
7590 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7591 diagnostic messages if the stack is used. NAME is the name of the attribute
7595 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
7597 function_args_iterator args_iter
;
7598 CUMULATIVE_ARGS args_so_far_v
;
7599 cumulative_args_t args_so_far
;
7600 bool first_param
= true;
7601 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
7603 /* Error out if any argument is passed on the stack. */
7604 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
7605 args_so_far
= pack_cumulative_args (&args_so_far_v
);
7606 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
7610 prev_arg_type
= arg_type
;
7611 if (VOID_TYPE_P (arg_type
))
7614 function_arg_info
arg (arg_type
, /*named=*/true);
7616 /* ??? We should advance after processing the argument and pass
7617 the argument we're advancing past. */
7618 arm_function_arg_advance (args_so_far
, arg
);
7619 arg_rtx
= arm_function_arg (args_so_far
, arg
);
7620 if (!arg_rtx
|| arm_arg_partial_bytes (args_so_far
, arg
))
7622 error ("%qE attribute not available to functions with arguments "
7623 "passed on the stack", name
);
7626 first_param
= false;
7629 /* Error out for variadic functions since we cannot control how many
7630 arguments will be passed and thus stack could be used. stdarg_p () is not
7631 used for the checking to avoid browsing arguments twice. */
7632 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
7634 error ("%qE attribute not available to functions with variable number "
7635 "of arguments", name
);
7639 /* Error out if return value is passed on the stack. */
7640 ret_type
= TREE_TYPE (fntype
);
7641 if (arm_return_in_memory (ret_type
, fntype
))
7643 error ("%qE attribute not available to functions that return value on "
7650 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7651 function will check whether the attribute is allowed here and will add the
7652 attribute to the function declaration tree or otherwise issue a warning. */
7655 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7664 *no_add_attrs
= true;
7665 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7670 /* Ignore attribute for function types. */
7671 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7673 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7675 *no_add_attrs
= true;
7681 /* Warn for static linkage functions. */
7682 if (!TREE_PUBLIC (fndecl
))
7684 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7685 "with static linkage", name
);
7686 *no_add_attrs
= true;
7690 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7691 TREE_TYPE (fndecl
));
7696 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7697 function will check whether the attribute is allowed here and will add the
7698 attribute to the function type tree or otherwise issue a diagnostic. The
7699 reason we check this at declaration time is to only allow the use of the
7700 attribute with declarations of function pointers and not function
7701 declarations. This function checks NODE is of the expected type and issues
7702 diagnostics otherwise using NAME. If it is not of the expected type
7703 *NO_ADD_ATTRS will be set to true. */
7706 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7711 tree decl
= NULL_TREE
;
7716 *no_add_attrs
= true;
7717 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7724 fntype
= TREE_TYPE (*node
);
7726 if (VAR_P (*node
) || TREE_CODE (*node
) == TYPE_DECL
)
7732 while (fntype
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7733 fntype
= TREE_TYPE (fntype
);
7735 if ((DECL_P (*node
) && !decl
) || TREE_CODE (fntype
) != FUNCTION_TYPE
)
7737 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7738 "function pointer", name
);
7739 *no_add_attrs
= true;
7743 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7748 /* Prevent trees being shared among function types with and without
7749 cmse_nonsecure_call attribute. */
7752 type
= build_distinct_type_copy (TREE_TYPE (decl
));
7753 TREE_TYPE (decl
) = type
;
7757 type
= build_distinct_type_copy (*node
);
7763 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7766 fntype
= TREE_TYPE (fntype
);
7767 fntype
= build_distinct_type_copy (fntype
);
7768 TREE_TYPE (type
) = fntype
;
7771 /* Construct a type attribute and add it to the function type. */
7772 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7773 TYPE_ATTRIBUTES (fntype
));
7774 TYPE_ATTRIBUTES (fntype
) = attrs
;
7778 /* Return 0 if the attributes for two types are incompatible, 1 if they
7779 are compatible, and 2 if they are nearly compatible (which causes a
7780 warning to be generated). */
7782 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7786 tree attrs1
= lookup_attribute ("Advanced SIMD type",
7787 TYPE_ATTRIBUTES (type1
));
7788 tree attrs2
= lookup_attribute ("Advanced SIMD type",
7789 TYPE_ATTRIBUTES (type2
));
7790 if (bool (attrs1
) != bool (attrs2
))
7792 if (attrs1
&& !attribute_value_equal (attrs1
, attrs2
))
7795 /* Check for mismatch of non-default calling convention. */
7796 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7799 /* Check for mismatched call attributes. */
7800 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7801 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7802 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7803 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7805 /* Only bother to check if an attribute is defined. */
7806 if (l1
| l2
| s1
| s2
)
7808 /* If one type has an attribute, the other must have the same attribute. */
7809 if ((l1
!= l2
) || (s1
!= s2
))
7812 /* Disallow mixed attributes. */
7813 if ((l1
& s2
) || (l2
& s1
))
7817 /* Check for mismatched ISR attribute. */
7818 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7820 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7821 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7823 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7827 l1
= lookup_attribute ("cmse_nonsecure_call",
7828 TYPE_ATTRIBUTES (type1
)) != NULL
;
7829 l2
= lookup_attribute ("cmse_nonsecure_call",
7830 TYPE_ATTRIBUTES (type2
)) != NULL
;
7838 /* Assigns default attributes to newly defined type. This is used to
7839 set short_call/long_call attributes for function types of
7840 functions defined inside corresponding #pragma scopes. */
7842 arm_set_default_type_attributes (tree type
)
7844 /* Add __attribute__ ((long_call)) to all functions, when
7845 inside #pragma long_calls or __attribute__ ((short_call)),
7846 when inside #pragma no_long_calls. */
7847 if (FUNC_OR_METHOD_TYPE_P (type
))
7849 tree type_attr_list
, attr_name
;
7850 type_attr_list
= TYPE_ATTRIBUTES (type
);
7852 if (arm_pragma_long_calls
== LONG
)
7853 attr_name
= get_identifier ("long_call");
7854 else if (arm_pragma_long_calls
== SHORT
)
7855 attr_name
= get_identifier ("short_call");
7859 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7860 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7864 /* Return true if DECL is known to be linked into section SECTION. */
7867 arm_function_in_section_p (tree decl
, section
*section
)
7869 /* We can only be certain about the prevailing symbol definition. */
7870 if (!decl_binds_to_current_def_p (decl
))
7873 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7874 if (!DECL_SECTION_NAME (decl
))
7876 /* Make sure that we will not create a unique section for DECL. */
7877 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7881 return function_section (decl
) == section
;
7884 /* Return nonzero if a 32-bit "long_call" should be generated for
7885 a call from the current function to DECL. We generate a long_call
7888 a. has an __attribute__((long call))
7889 or b. is within the scope of a #pragma long_calls
7890 or c. the -mlong-calls command line switch has been specified
7892 However we do not generate a long call if the function:
7894 d. has an __attribute__ ((short_call))
7895 or e. is inside the scope of a #pragma no_long_calls
7896 or f. is defined in the same section as the current function. */
7899 arm_is_long_call_p (tree decl
)
7904 return TARGET_LONG_CALLS
;
7906 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7907 if (lookup_attribute ("short_call", attrs
))
7910 /* For "f", be conservative, and only cater for cases in which the
7911 whole of the current function is placed in the same section. */
7912 if (!flag_reorder_blocks_and_partition
7913 && TREE_CODE (decl
) == FUNCTION_DECL
7914 && arm_function_in_section_p (decl
, current_function_section ()))
7917 if (lookup_attribute ("long_call", attrs
))
7920 return TARGET_LONG_CALLS
;
7923 /* Return nonzero if it is ok to make a tail-call to DECL. */
7925 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7927 unsigned long func_type
;
7929 if (cfun
->machine
->sibcall_blocked
)
7934 /* In FDPIC, never tailcall something for which we have no decl:
7935 the target function could be in a different module, requiring
7936 a different FDPIC register value. */
7941 /* Never tailcall something if we are generating code for Thumb-1. */
7945 /* The PIC register is live on entry to VxWorks PLT entries, so we
7946 must make the call before restoring the PIC register. */
7947 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7950 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7951 may be used both as target of the call and base register for restoring
7952 the VFP registers */
7953 if (TARGET_APCS_FRAME
&& TARGET_ARM
7954 && TARGET_HARD_FLOAT
7955 && decl
&& arm_is_long_call_p (decl
))
7958 /* If we are interworking and the function is not declared static
7959 then we can't tail-call it unless we know that it exists in this
7960 compilation unit (since it might be a Thumb routine). */
7961 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7962 && !TREE_ASM_WRITTEN (decl
))
7965 func_type
= arm_current_func_type ();
7966 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7967 if (IS_INTERRUPT (func_type
))
7970 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7971 generated for entry functions themselves. */
7972 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7975 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7976 this would complicate matters for later code generation. */
7977 if (TREE_CODE (exp
) == CALL_EXPR
)
7979 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7980 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7984 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7986 /* Check that the return value locations are the same. For
7987 example that we aren't returning a value from the sibling in
7988 a VFP register but then need to transfer it to a core
7991 tree decl_or_type
= decl
;
7993 /* If it is an indirect function pointer, get the function type. */
7995 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7997 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7998 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
8000 if (!rtx_equal_p (a
, b
))
8004 /* Never tailcall if function may be called with a misaligned SP. */
8005 if (IS_STACKALIGN (func_type
))
8008 /* The AAPCS says that, on bare-metal, calls to unresolved weak
8009 references should become a NOP. Don't convert such calls into
8011 if (TARGET_AAPCS_BASED
8012 && arm_abi
== ARM_ABI_AAPCS
8014 && DECL_WEAK (decl
))
8017 /* Indirect tailcalls need a call-clobbered register to hold the function
8018 address. But we only have r0-r3 and ip in that class. If r0-r3 all hold
8019 function arguments, then we can only use IP. But IP may be needed in the
8020 epilogue (for PAC validation), or for passing the static chain. We have
8021 to disable the tail call if nothing is available. */
8023 && ((CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
8024 || arm_current_function_pac_enabled_p()))
8026 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
8027 CUMULATIVE_ARGS cum
;
8028 cumulative_args_t cum_v
;
8030 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
8031 cum_v
= pack_cumulative_args (&cum
);
8034 call_expr_arg_iterator iter
;
8035 unsigned used_regs
= 0;
8037 /* Layout each actual argument in turn. If it is allocated to
8038 core regs, note which regs have been allocated. */
8039 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
8041 tree type
= TREE_TYPE (arg
);
8042 function_arg_info
arg_info (type
, /*named=*/true);
8043 rtx reg
= arm_function_arg (cum_v
, arg_info
);
8044 if (reg
&& REG_P (reg
)
8045 && REGNO (reg
) <= LAST_ARG_REGNUM
)
8047 /* Avoid any chance of UB here. We don't care if TYPE
8048 is very large since it will use up all the argument regs. */
8049 unsigned nregs
= MIN (ARM_NUM_REGS2 (GET_MODE (reg
), type
),
8050 LAST_ARG_REGNUM
+ 1);
8051 used_regs
|= ((1 << nregs
) - 1) << REGNO (reg
);
8053 arm_function_arg_advance (cum_v
, arg_info
);
8056 /* We've used all the argument regs, and we know IP is live during the
8057 epilogue for some reason, so we can't tailcall. */
8058 if ((used_regs
& ((1 << (LAST_ARG_REGNUM
+ 1)) - 1))
8059 == ((1 << (LAST_ARG_REGNUM
+ 1)) - 1))
8063 /* Everything else is ok. */
8068 /* Addressing mode support functions. */
8070 /* Return nonzero if X is a legitimate immediate operand when compiling
8071 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
8073 legitimate_pic_operand_p (rtx x
)
8075 if (SYMBOL_REF_P (x
)
8076 || (GET_CODE (x
) == CONST
8077 && GET_CODE (XEXP (x
, 0)) == PLUS
8078 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
8084 /* Record that the current function needs a PIC register. If PIC_REG is null,
8085 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8086 both case cfun->machine->pic_reg is initialized if we have not already done
8087 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8088 PIC register is reloaded in the current position of the instruction stream
8089 irregardless of whether it was loaded before. Otherwise, it is only loaded
8090 if not already done so (crtl->uses_pic_offset_table is null). Note that
8091 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8092 is only supported iff COMPUTE_NOW is false. */
8095 require_pic_register (rtx pic_reg
, bool compute_now
)
8097 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8099 /* A lot of the logic here is made obscure by the fact that this
8100 routine gets called as part of the rtx cost estimation process.
8101 We don't want those calls to affect any assumptions about the real
8102 function; and further, we can't call entry_of_function() until we
8103 start the real expansion process. */
8104 if (!crtl
->uses_pic_offset_table
|| compute_now
)
8106 gcc_assert (can_create_pseudo_p ()
8107 || (pic_reg
!= NULL_RTX
8109 && GET_MODE (pic_reg
) == Pmode
));
8110 if (arm_pic_register
!= INVALID_REGNUM
8112 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
8114 if (!cfun
->machine
->pic_reg
)
8115 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
8117 /* Play games to avoid marking the function as needing pic
8118 if we are being called as part of the cost-estimation
8120 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8121 crtl
->uses_pic_offset_table
= 1;
8125 rtx_insn
*seq
, *insn
;
8127 if (pic_reg
== NULL_RTX
)
8128 pic_reg
= gen_reg_rtx (Pmode
);
8129 if (!cfun
->machine
->pic_reg
)
8130 cfun
->machine
->pic_reg
= pic_reg
;
8132 /* Play games to avoid marking the function as needing pic
8133 if we are being called as part of the cost-estimation
8135 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8137 crtl
->uses_pic_offset_table
= 1;
8140 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
8141 && arm_pic_register
> LAST_LO_REGNUM
8143 emit_move_insn (cfun
->machine
->pic_reg
,
8144 gen_rtx_REG (Pmode
, arm_pic_register
));
8146 arm_load_pic_register (0UL, pic_reg
);
8151 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
8153 INSN_LOCATION (insn
) = prologue_location
;
8155 /* We can be called during expansion of PHI nodes, where
8156 we can't yet emit instructions directly in the final
8157 insn stream. Queue the insns on the entry edge, they will
8158 be committed after everything else is expanded. */
8159 if (currently_expanding_to_rtl
)
8160 insert_insn_on_edge (seq
,
8162 (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
8170 /* Generate insns to calculate the address of ORIG in pic mode. */
8172 calculate_pic_address_constant (rtx reg
, rtx pic_reg
, rtx orig
)
8177 pat
= gen_calculate_pic_address (reg
, pic_reg
, orig
);
8179 /* Make the MEM as close to a constant as possible. */
8180 mem
= SET_SRC (pat
);
8181 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
8182 MEM_READONLY_P (mem
) = 1;
8183 MEM_NOTRAP_P (mem
) = 1;
8185 return emit_insn (pat
);
8188 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8189 created to hold the result of the load. If not NULL, PIC_REG indicates
8190 which register to use as PIC register, otherwise it is decided by register
8191 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8192 location in the instruction stream, irregardless of whether it was loaded
8193 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8194 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8196 Returns the register REG into which the PIC load is performed. */
8199 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
, rtx pic_reg
,
8202 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8204 if (SYMBOL_REF_P (orig
)
8205 || LABEL_REF_P (orig
))
8209 gcc_assert (can_create_pseudo_p ());
8210 reg
= gen_reg_rtx (Pmode
);
8213 /* VxWorks does not impose a fixed gap between segments; the run-time
8214 gap can be different from the object-file gap. We therefore can't
8215 use GOTOFF unless we are absolutely sure that the symbol is in the
8216 same segment as the GOT. Unfortunately, the flexibility of linker
8217 scripts means that we can't be sure of that in general, so assume
8218 that GOTOFF is never valid on VxWorks. */
8219 /* References to weak symbols cannot be resolved locally: they
8220 may be overridden by a non-weak definition at link time. */
8222 if ((LABEL_REF_P (orig
)
8223 || (SYMBOL_REF_P (orig
)
8224 && SYMBOL_REF_LOCAL_P (orig
)
8225 && (SYMBOL_REF_DECL (orig
)
8226 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)
8227 && (!SYMBOL_REF_FUNCTION_P (orig
)
8228 || arm_fdpic_local_funcdesc_p (orig
))))
8230 && arm_pic_data_is_text_relative
)
8231 insn
= arm_pic_static_addr (orig
, reg
);
8234 /* If this function doesn't have a pic register, create one now. */
8235 require_pic_register (pic_reg
, compute_now
);
8237 if (pic_reg
== NULL_RTX
)
8238 pic_reg
= cfun
->machine
->pic_reg
;
8240 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8243 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8245 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
8249 else if (GET_CODE (orig
) == CONST
)
8253 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8254 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
8257 /* Handle the case where we have: const (UNSPEC_TLS). */
8258 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
8259 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
8262 /* Handle the case where we have:
8263 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8265 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8266 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
8267 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
8269 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
8275 gcc_assert (can_create_pseudo_p ());
8276 reg
= gen_reg_rtx (Pmode
);
8279 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
8281 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
,
8282 pic_reg
, compute_now
);
8283 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
8284 base
== reg
? 0 : reg
, pic_reg
,
8287 if (CONST_INT_P (offset
))
8289 /* The base register doesn't really matter, we only want to
8290 test the index for the appropriate mode. */
8291 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
8293 gcc_assert (can_create_pseudo_p ());
8294 offset
= force_reg (Pmode
, offset
);
8297 if (CONST_INT_P (offset
))
8298 return plus_constant (Pmode
, base
, INTVAL (offset
));
8301 if (GET_MODE_SIZE (mode
) > 4
8302 && (GET_MODE_CLASS (mode
) == MODE_INT
8303 || TARGET_SOFT_FLOAT
))
8305 emit_insn (gen_addsi3 (reg
, base
, offset
));
8309 return gen_rtx_PLUS (Pmode
, base
, offset
);
8316 /* Generate insns that produce the address of the stack canary */
8318 arm_stack_protect_tls_canary_mem (bool reload
)
8320 rtx tp
= gen_reg_rtx (SImode
);
8322 emit_insn (gen_reload_tp_hard (tp
));
8324 emit_insn (gen_load_tp_hard (tp
));
8326 rtx reg
= gen_reg_rtx (SImode
);
8327 rtx offset
= GEN_INT (arm_stack_protector_guard_offset
);
8328 emit_set_insn (reg
, gen_rtx_PLUS (SImode
, tp
, offset
));
8329 return gen_rtx_MEM (SImode
, reg
);
8333 /* Whether a register is callee saved or not. This is necessary because high
8334 registers are marked as caller saved when optimizing for size on Thumb-1
8335 targets despite being callee saved in order to avoid using them. */
8336 #define callee_saved_reg_p(reg) \
8337 (!call_used_or_fixed_reg_p (reg) \
8338 || (TARGET_THUMB1 && optimize_size \
8339 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8341 /* Return a mask for the call-clobbered low registers that are unused
8342 at the end of the prologue. */
8343 static unsigned long
8344 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8346 unsigned long mask
= 0;
8347 bitmap prologue_live_out
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
8349 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8350 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (prologue_live_out
, reg
))
8351 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8355 /* Similarly for the start of the epilogue. */
8356 static unsigned long
8357 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8359 unsigned long mask
= 0;
8360 bitmap epilogue_live_in
= df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun
));
8362 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8363 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (epilogue_live_in
, reg
))
8364 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8368 /* Find a spare register to use during the prolog of a function. */
8371 thumb_find_work_register (unsigned long pushed_regs_mask
)
8375 unsigned long unused_regs
8376 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8378 /* Check the argument registers first as these are call-used. The
8379 register allocation order means that sometimes r3 might be used
8380 but earlier argument registers might not, so check them all. */
8381 for (reg
= LAST_LO_REGNUM
; reg
>= FIRST_LO_REGNUM
; reg
--)
8382 if (unused_regs
& (1 << (reg
- FIRST_LO_REGNUM
)))
8385 /* Otherwise look for a call-saved register that is going to be pushed. */
8386 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
8387 if (pushed_regs_mask
& (1 << reg
))
8392 /* Thumb-2 can use high regs. */
8393 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
8394 if (pushed_regs_mask
& (1 << reg
))
8397 /* Something went wrong - thumb_compute_save_reg_mask()
8398 should have arranged for a suitable register to be pushed. */
8402 static GTY(()) int pic_labelno
;
8404 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8408 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
, rtx pic_reg
)
8410 rtx l1
, labelno
, pic_tmp
, pic_rtx
;
8412 if (crtl
->uses_pic_offset_table
== 0
8413 || TARGET_SINGLE_PIC_BASE
8417 gcc_assert (flag_pic
);
8419 if (pic_reg
== NULL_RTX
)
8420 pic_reg
= cfun
->machine
->pic_reg
;
8421 if (TARGET_VXWORKS_RTP
)
8423 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
8424 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8425 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
8427 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
8429 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8430 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
8434 /* We use an UNSPEC rather than a LABEL_REF because this label
8435 never appears in the code stream. */
8437 labelno
= GEN_INT (pic_labelno
++);
8438 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8439 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8441 /* On the ARM the PC register contains 'dot + 8' at the time of the
8442 addition, on the Thumb it is 'dot + 4'. */
8443 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8444 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
8446 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8450 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8452 else /* TARGET_THUMB1 */
8454 if (arm_pic_register
!= INVALID_REGNUM
8455 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
8457 /* We will have pushed the pic register, so we should always be
8458 able to find a work register. */
8459 pic_tmp
= gen_rtx_REG (SImode
,
8460 thumb_find_work_register (saved_regs
));
8461 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
8462 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
8463 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
8465 else if (arm_pic_register
!= INVALID_REGNUM
8466 && arm_pic_register
> LAST_LO_REGNUM
8467 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
8469 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8470 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
8471 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
8474 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8478 /* Need to emit this whether or not we obey regdecls,
8479 since setjmp/longjmp can cause life info to screw up. */
8483 /* Try to determine whether an object, referenced via ORIG, will be
8484 placed in the text or data segment. This is used in FDPIC mode, to
8485 decide which relocations to use when accessing ORIG. *IS_READONLY
8486 is set to true if ORIG is a read-only location, false otherwise.
8487 Return true if we could determine the location of ORIG, false
8488 otherwise. *IS_READONLY is valid only when we return true. */
8490 arm_is_segment_info_known (rtx orig
, bool *is_readonly
)
8492 *is_readonly
= false;
8494 if (LABEL_REF_P (orig
))
8496 *is_readonly
= true;
8500 if (SYMBOL_REF_P (orig
))
8502 if (CONSTANT_POOL_ADDRESS_P (orig
))
8504 *is_readonly
= true;
8507 if (SYMBOL_REF_LOCAL_P (orig
)
8508 && !SYMBOL_REF_EXTERNAL_P (orig
)
8509 && SYMBOL_REF_DECL (orig
)
8510 && (!DECL_P (SYMBOL_REF_DECL (orig
))
8511 || !DECL_COMMON (SYMBOL_REF_DECL (orig
))))
8513 tree decl
= SYMBOL_REF_DECL (orig
);
8514 tree init
= VAR_P (decl
)
8515 ? DECL_INITIAL (decl
) : (TREE_CODE (decl
) == CONSTRUCTOR
)
8518 bool named_section
, readonly
;
8520 if (init
&& init
!= error_mark_node
)
8521 reloc
= compute_reloc_for_constant (init
);
8523 named_section
= VAR_P (decl
)
8524 && lookup_attribute ("section", DECL_ATTRIBUTES (decl
));
8525 readonly
= decl_readonly_section (decl
, reloc
);
8527 /* We don't know where the link script will put a named
8528 section, so return false in such a case. */
8532 *is_readonly
= readonly
;
8536 /* We don't know. */
8543 /* Generate code to load the address of a static var when flag_pic is set. */
8545 arm_pic_static_addr (rtx orig
, rtx reg
)
8547 rtx l1
, labelno
, offset_rtx
;
8550 gcc_assert (flag_pic
);
8552 bool is_readonly
= false;
8553 bool info_known
= false;
8556 && SYMBOL_REF_P (orig
)
8557 && !SYMBOL_REF_FUNCTION_P (orig
))
8558 info_known
= arm_is_segment_info_known (orig
, &is_readonly
);
8561 && SYMBOL_REF_P (orig
)
8562 && !SYMBOL_REF_FUNCTION_P (orig
)
8565 /* We don't know where orig is stored, so we have be
8566 pessimistic and use a GOT relocation. */
8567 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8569 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8571 else if (TARGET_FDPIC
8572 && SYMBOL_REF_P (orig
)
8573 && (SYMBOL_REF_FUNCTION_P (orig
)
8576 /* We use the GOTOFF relocation. */
8577 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8579 rtx l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig
), UNSPEC_PIC_SYM
);
8580 emit_insn (gen_movsi (reg
, l1
));
8581 insn
= emit_insn (gen_addsi3 (reg
, reg
, pic_reg
));
8585 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8586 PC-relative access. */
8587 /* We use an UNSPEC rather than a LABEL_REF because this label
8588 never appears in the code stream. */
8589 labelno
= GEN_INT (pic_labelno
++);
8590 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8591 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8593 /* On the ARM the PC register contains 'dot + 8' at the time of the
8594 addition, on the Thumb it is 'dot + 4'. */
8595 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8596 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
8597 UNSPEC_SYMBOL_OFFSET
);
8598 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
8600 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
,
8607 /* Return nonzero if X is valid as an ARM state addressing register. */
8609 arm_address_register_rtx_p (rtx x
, int strict_p
)
8619 return ARM_REGNO_OK_FOR_BASE_P (regno
);
8621 return (regno
<= LAST_ARM_REGNUM
8622 || regno
>= FIRST_PSEUDO_REGISTER
8623 || regno
== FRAME_POINTER_REGNUM
8624 || regno
== ARG_POINTER_REGNUM
);
8627 /* Return TRUE if this rtx is the difference of a symbol and a label,
8628 and will reduce to a PC-relative relocation in the object file.
8629 Expressions like this can be left alone when generating PIC, rather
8630 than forced through the GOT. */
8632 pcrel_constant_p (rtx x
)
8634 if (GET_CODE (x
) == MINUS
)
8635 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
8640 /* Return true if X will surely end up in an index register after next
8643 will_be_in_index_register (const_rtx x
)
8645 /* arm.md: calculate_pic_address will split this into a register. */
8646 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
8649 /* Return nonzero if X is a valid ARM state address operand. */
8651 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
8655 enum rtx_code code
= GET_CODE (x
);
8657 if (arm_address_register_rtx_p (x
, strict_p
))
8660 use_ldrd
= (TARGET_LDRD
8661 && (mode
== DImode
|| mode
== DFmode
));
8663 if (code
== POST_INC
|| code
== PRE_DEC
8664 || ((code
== PRE_INC
|| code
== POST_DEC
)
8665 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8666 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8668 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8669 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8670 && GET_CODE (XEXP (x
, 1)) == PLUS
8671 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8673 rtx addend
= XEXP (XEXP (x
, 1), 1);
8675 /* Don't allow ldrd post increment by register because it's hard
8676 to fixup invalid register choices. */
8678 && GET_CODE (x
) == POST_MODIFY
8682 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
8683 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
8686 /* After reload constants split into minipools will have addresses
8687 from a LABEL_REF. */
8688 else if (reload_completed
8689 && (code
== LABEL_REF
8691 && GET_CODE (XEXP (x
, 0)) == PLUS
8692 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8693 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8696 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8699 else if (code
== PLUS
)
8701 rtx xop0
= XEXP (x
, 0);
8702 rtx xop1
= XEXP (x
, 1);
8704 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8705 && ((CONST_INT_P (xop1
)
8706 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
8707 || (!strict_p
&& will_be_in_index_register (xop1
))))
8708 || (arm_address_register_rtx_p (xop1
, strict_p
)
8709 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
8713 /* Reload currently can't handle MINUS, so disable this for now */
8714 else if (GET_CODE (x
) == MINUS
)
8716 rtx xop0
= XEXP (x
, 0);
8717 rtx xop1
= XEXP (x
, 1);
8719 return (arm_address_register_rtx_p (xop0
, strict_p
)
8720 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
8724 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8725 && code
== SYMBOL_REF
8726 && CONSTANT_POOL_ADDRESS_P (x
)
8728 && symbol_mentioned_p (get_pool_constant (x
))
8729 && ! pcrel_constant_p (get_pool_constant (x
))))
8735 /* Return true if we can avoid creating a constant pool entry for x. */
8737 can_avoid_literal_pool_for_label_p (rtx x
)
8739 /* Normally we can assign constant values to target registers without
8740 the help of constant pool. But there are cases we have to use constant
8742 1) assign a label to register.
8743 2) sign-extend a 8bit value to 32bit and then assign to register.
8745 Constant pool access in format:
8746 (set (reg r0) (mem (symbol_ref (".LC0"))))
8747 will cause the use of literal pool (later in function arm_reorg).
8748 So here we mark such format as an invalid format, then the compiler
8749 will adjust it into:
8750 (set (reg r0) (symbol_ref (".LC0")))
8751 (set (reg r0) (mem (reg r0))).
8752 No extra register is required, and (mem (reg r0)) won't cause the use
8753 of literal pools. */
8754 if (arm_disable_literal_pool
&& SYMBOL_REF_P (x
)
8755 && CONSTANT_POOL_ADDRESS_P (x
))
8761 /* Return nonzero if X is a valid Thumb-2 address operand. */
8763 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8766 enum rtx_code code
= GET_CODE (x
);
8768 /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8769 can store and load it like any other 16-bit value. */
8770 if (TARGET_HAVE_MVE
&& VALID_MVE_PRED_MODE (mode
))
8773 if (TARGET_HAVE_MVE
&& VALID_MVE_MODE (mode
))
8774 return mve_vector_mem_operand (mode
, x
, strict_p
);
8776 if (arm_address_register_rtx_p (x
, strict_p
))
8779 use_ldrd
= (TARGET_LDRD
8780 && (mode
== DImode
|| mode
== DFmode
));
8782 if (code
== POST_INC
|| code
== PRE_DEC
8783 || ((code
== PRE_INC
|| code
== POST_DEC
)
8784 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8785 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8787 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8788 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8789 && GET_CODE (XEXP (x
, 1)) == PLUS
8790 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8792 /* Thumb-2 only has autoincrement by constant. */
8793 rtx addend
= XEXP (XEXP (x
, 1), 1);
8794 HOST_WIDE_INT offset
;
8796 if (!CONST_INT_P (addend
))
8799 offset
= INTVAL(addend
);
8800 if (GET_MODE_SIZE (mode
) <= 4)
8801 return (offset
> -256 && offset
< 256);
8803 return (use_ldrd
&& offset
> -1024 && offset
< 1024
8804 && (offset
& 3) == 0);
8807 /* After reload constants split into minipools will have addresses
8808 from a LABEL_REF. */
8809 else if (reload_completed
8810 && (code
== LABEL_REF
8812 && GET_CODE (XEXP (x
, 0)) == PLUS
8813 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8814 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8817 else if (mode
== TImode
8818 || (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8819 || (TARGET_HAVE_MVE
&& VALID_MVE_STRUCT_MODE (mode
)))
8822 else if (code
== PLUS
)
8824 rtx xop0
= XEXP (x
, 0);
8825 rtx xop1
= XEXP (x
, 1);
8827 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8828 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
8829 || (!strict_p
&& will_be_in_index_register (xop1
))))
8830 || (arm_address_register_rtx_p (xop1
, strict_p
)
8831 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
8834 else if (can_avoid_literal_pool_for_label_p (x
))
8837 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8838 && code
== SYMBOL_REF
8839 && CONSTANT_POOL_ADDRESS_P (x
)
8841 && symbol_mentioned_p (get_pool_constant (x
))
8842 && ! pcrel_constant_p (get_pool_constant (x
))))
8848 /* Return nonzero if INDEX is valid for an address index operand in
8851 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
8854 HOST_WIDE_INT range
;
8855 enum rtx_code code
= GET_CODE (index
);
8857 /* Standard coprocessor addressing modes. */
8858 if (TARGET_HARD_FLOAT
8859 && (mode
== SFmode
|| mode
== DFmode
))
8860 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8861 && INTVAL (index
) > -1024
8862 && (INTVAL (index
) & 3) == 0);
8864 if (arm_address_register_rtx_p (index
, strict_p
)
8865 && (GET_MODE_SIZE (mode
) <= 4))
8868 /* This handles DFmode only if !TARGET_HARD_FLOAT. */
8869 if (mode
== DImode
|| mode
== DFmode
)
8871 if (code
== CONST_INT
)
8873 HOST_WIDE_INT val
= INTVAL (index
);
8875 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8876 If vldr is selected it uses arm_coproc_mem_operand. */
8878 return val
> -256 && val
< 256;
8880 return val
> -4096 && val
< 4092;
8883 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8886 /* For quad modes, we restrict the constant offset to be slightly less
8887 than what the instruction format permits. We do this because for
8888 quad mode moves, we will actually decompose them into two separate
8889 double-mode reads or writes. INDEX must therefore be a valid
8890 (double-mode) offset and so should INDEX+8. */
8891 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8892 return (code
== CONST_INT
8893 && INTVAL (index
) < 1016
8894 && INTVAL (index
) > -1024
8895 && (INTVAL (index
) & 3) == 0);
8897 /* We have no such constraint on double mode offsets, so we permit the
8898 full range of the instruction format. Note DImode is included here. */
8899 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8900 return (code
== CONST_INT
8901 && INTVAL (index
) < 1024
8902 && INTVAL (index
) > -1024
8903 && (INTVAL (index
) & 3) == 0);
8905 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8906 return (code
== CONST_INT
8907 && INTVAL (index
) < 1024
8908 && INTVAL (index
) > -1024
8909 && (INTVAL (index
) & 3) == 0);
8911 if (GET_MODE_SIZE (mode
) <= 4
8915 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8919 rtx xiop0
= XEXP (index
, 0);
8920 rtx xiop1
= XEXP (index
, 1);
8922 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8923 && power_of_two_operand (xiop1
, SImode
))
8924 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8925 && power_of_two_operand (xiop0
, SImode
)));
8927 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8928 || code
== ASHIFT
|| code
== ROTATERT
)
8930 rtx op
= XEXP (index
, 1);
8932 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8935 && INTVAL (op
) <= 31);
8939 /* For ARM v4 we may be doing a sign-extend operation during the
8945 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8951 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8953 return (code
== CONST_INT
8954 && INTVAL (index
) < range
8955 && INTVAL (index
) > -range
);
8958 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8959 index operand. i.e. 1, 2, 4 or 8. */
8961 thumb2_index_mul_operand (rtx op
)
8965 if (!CONST_INT_P (op
))
8969 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8972 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8974 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8976 enum rtx_code code
= GET_CODE (index
);
8978 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8979 /* Standard coprocessor addressing modes. */
8981 && (mode
== SFmode
|| mode
== DFmode
))
8982 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8983 /* Thumb-2 allows only > -256 index range for it's core register
8984 load/stores. Since we allow SF/DF in core registers, we have
8985 to use the intersection between -256~4096 (core) and -1024~1024
8987 && INTVAL (index
) > -256
8988 && (INTVAL (index
) & 3) == 0);
8990 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8992 /* For DImode assume values will usually live in core regs
8993 and only allow LDRD addressing modes. */
8994 if (!TARGET_LDRD
|| mode
!= DImode
)
8995 return (code
== CONST_INT
8996 && INTVAL (index
) < 1024
8997 && INTVAL (index
) > -1024
8998 && (INTVAL (index
) & 3) == 0);
9001 /* For quad modes, we restrict the constant offset to be slightly less
9002 than what the instruction format permits. We do this because for
9003 quad mode moves, we will actually decompose them into two separate
9004 double-mode reads or writes. INDEX must therefore be a valid
9005 (double-mode) offset and so should INDEX+8. */
9006 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
9007 return (code
== CONST_INT
9008 && INTVAL (index
) < 1016
9009 && INTVAL (index
) > -1024
9010 && (INTVAL (index
) & 3) == 0);
9012 /* We have no such constraint on double mode offsets, so we permit the
9013 full range of the instruction format. Note DImode is included here. */
9014 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
9015 return (code
== CONST_INT
9016 && INTVAL (index
) < 1024
9017 && INTVAL (index
) > -1024
9018 && (INTVAL (index
) & 3) == 0);
9020 if (arm_address_register_rtx_p (index
, strict_p
)
9021 && (GET_MODE_SIZE (mode
) <= 4))
9024 /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE. */
9025 if (mode
== DImode
|| mode
== DFmode
)
9027 if (code
== CONST_INT
)
9029 HOST_WIDE_INT val
= INTVAL (index
);
9030 /* Thumb-2 ldrd only has reg+const addressing modes.
9031 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
9032 If vldr is selected it uses arm_coproc_mem_operand. */
9034 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
9036 return IN_RANGE (val
, -255, 4095 - 4);
9044 rtx xiop0
= XEXP (index
, 0);
9045 rtx xiop1
= XEXP (index
, 1);
9047 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
9048 && thumb2_index_mul_operand (xiop1
))
9049 || (arm_address_register_rtx_p (xiop1
, strict_p
)
9050 && thumb2_index_mul_operand (xiop0
)));
9052 else if (code
== ASHIFT
)
9054 rtx op
= XEXP (index
, 1);
9056 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
9059 && INTVAL (op
) <= 3);
9062 return (code
== CONST_INT
9063 && INTVAL (index
) < 4096
9064 && INTVAL (index
) > -256);
9067 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
9069 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
9079 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
9081 return (regno
<= LAST_LO_REGNUM
9082 || regno
> LAST_VIRTUAL_REGISTER
9083 || regno
== FRAME_POINTER_REGNUM
9084 || (GET_MODE_SIZE (mode
) >= 4
9085 && (regno
== STACK_POINTER_REGNUM
9086 || regno
>= FIRST_PSEUDO_REGISTER
9087 || x
== hard_frame_pointer_rtx
9088 || x
== arg_pointer_rtx
)));
9091 /* Return nonzero if x is a legitimate index register. This is the case
9092 for any base register that can access a QImode object. */
9094 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
9096 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
9099 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9101 The AP may be eliminated to either the SP or the FP, so we use the
9102 least common denominator, e.g. SImode, and offsets from 0 to 64.
9104 ??? Verify whether the above is the right approach.
9106 ??? Also, the FP may be eliminated to the SP, so perhaps that
9107 needs special handling also.
9109 ??? Look at how the mips16 port solves this problem. It probably uses
9110 better ways to solve some of these problems.
9112 Although it is not incorrect, we don't accept QImode and HImode
9113 addresses based on the frame pointer or arg pointer until the
9114 reload pass starts. This is so that eliminating such addresses
9115 into stack based ones won't produce impossible code. */
9117 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
9119 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
9122 /* ??? Not clear if this is right. Experiment. */
9123 if (GET_MODE_SIZE (mode
) < 4
9124 && !(reload_in_progress
|| reload_completed
)
9125 && (reg_mentioned_p (frame_pointer_rtx
, x
)
9126 || reg_mentioned_p (arg_pointer_rtx
, x
)
9127 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
9128 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
9129 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
9130 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
9133 /* Accept any base register. SP only in SImode or larger. */
9134 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
9137 /* This is PC relative data before arm_reorg runs. */
9138 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
9140 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
9141 && !arm_disable_literal_pool
)
9144 /* This is PC relative data after arm_reorg runs. */
9145 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
9148 || (GET_CODE (x
) == CONST
9149 && GET_CODE (XEXP (x
, 0)) == PLUS
9150 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
9151 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
9154 /* Post-inc indexing only supported for SImode and larger. */
9155 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
9156 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
9159 else if (GET_CODE (x
) == PLUS
)
9161 /* REG+REG address can be any two index registers. */
9162 /* We disallow FRAME+REG addressing since we know that FRAME
9163 will be replaced with STACK, and SP relative addressing only
9164 permits SP+OFFSET. */
9165 if (GET_MODE_SIZE (mode
) <= 4
9166 && XEXP (x
, 0) != frame_pointer_rtx
9167 && XEXP (x
, 1) != frame_pointer_rtx
9168 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9169 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
9170 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
9173 /* REG+const has 5-7 bit offset for non-SP registers. */
9174 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9175 || XEXP (x
, 0) == arg_pointer_rtx
)
9176 && CONST_INT_P (XEXP (x
, 1))
9177 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
9180 /* REG+const has 10-bit offset for SP, but only SImode and
9181 larger is supported. */
9182 /* ??? Should probably check for DI/DFmode overflow here
9183 just like GO_IF_LEGITIMATE_OFFSET does. */
9184 else if (REG_P (XEXP (x
, 0))
9185 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
9186 && GET_MODE_SIZE (mode
) >= 4
9187 && CONST_INT_P (XEXP (x
, 1))
9188 && INTVAL (XEXP (x
, 1)) >= 0
9189 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
9190 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9193 else if (REG_P (XEXP (x
, 0))
9194 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
9195 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
9196 || VIRTUAL_REGISTER_P (XEXP (x
, 0)))
9197 && GET_MODE_SIZE (mode
) >= 4
9198 && CONST_INT_P (XEXP (x
, 1))
9199 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9203 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
9204 && GET_MODE_SIZE (mode
) == 4
9206 && CONSTANT_POOL_ADDRESS_P (x
)
9207 && !arm_disable_literal_pool
9209 && symbol_mentioned_p (get_pool_constant (x
))
9210 && ! pcrel_constant_p (get_pool_constant (x
))))
9216 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9217 instruction of mode MODE. */
9219 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
9221 switch (GET_MODE_SIZE (mode
))
9224 return val
>= 0 && val
< 32;
9227 return val
>= 0 && val
< 64 && (val
& 1) == 0;
9231 && (val
+ GET_MODE_SIZE (mode
)) <= 128
9237 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
, code_helper
)
9240 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
9241 else if (TARGET_THUMB2
)
9242 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
9243 else /* if (TARGET_THUMB1) */
9244 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
9247 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9249 Given an rtx X being reloaded into a reg required to be
9250 in class CLASS, return the class of reg to actually use.
9251 In general this is just CLASS, but for the Thumb core registers and
9252 immediate constants we prefer a LO_REGS class or a subset. */
9255 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
9261 if (rclass
== GENERAL_REGS
)
9268 /* Build the SYMBOL_REF for __tls_get_addr. */
9270 static GTY(()) rtx tls_get_addr_libfunc
;
9273 get_tls_get_addr (void)
9275 if (!tls_get_addr_libfunc
)
9276 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
9277 return tls_get_addr_libfunc
;
9281 arm_load_tp (rtx target
)
9284 target
= gen_reg_rtx (SImode
);
9288 /* Can return in any reg. */
9289 emit_insn (gen_load_tp_hard (target
));
9293 /* Always returned in r0. Immediately copy the result into a pseudo,
9294 otherwise other uses of r0 (e.g. setting up function arguments) may
9295 clobber the value. */
9301 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
9302 rtx initial_fdpic_reg
= get_hard_reg_initial_val (Pmode
, FDPIC_REGNUM
);
9304 emit_insn (gen_load_tp_soft_fdpic ());
9307 emit_insn (gen_restore_pic_register_after_call(fdpic_reg
, initial_fdpic_reg
));
9310 emit_insn (gen_load_tp_soft ());
9312 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
9313 emit_move_insn (target
, tmp
);
9319 load_tls_operand (rtx x
, rtx reg
)
9323 if (reg
== NULL_RTX
)
9324 reg
= gen_reg_rtx (SImode
);
9326 tmp
= gen_rtx_CONST (SImode
, x
);
9328 emit_move_insn (reg
, tmp
);
9334 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
9336 rtx label
, labelno
= NULL_RTX
, sum
;
9338 gcc_assert (reloc
!= TLS_DESCSEQ
);
9343 sum
= gen_rtx_UNSPEC (Pmode
,
9344 gen_rtvec (2, x
, GEN_INT (reloc
)),
9349 labelno
= GEN_INT (pic_labelno
++);
9350 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9351 label
= gen_rtx_CONST (VOIDmode
, label
);
9353 sum
= gen_rtx_UNSPEC (Pmode
,
9354 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
9355 GEN_INT (TARGET_ARM
? 8 : 4)),
9358 reg
= load_tls_operand (sum
, reg
);
9361 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9362 else if (TARGET_ARM
)
9363 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
9365 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9367 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
9368 LCT_PURE
, /* LCT_CONST? */
9371 rtx_insn
*insns
= get_insns ();
9378 arm_tls_descseq_addr (rtx x
, rtx reg
)
9380 rtx labelno
= GEN_INT (pic_labelno
++);
9381 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9382 rtx sum
= gen_rtx_UNSPEC (Pmode
,
9383 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
9384 gen_rtx_CONST (VOIDmode
, label
),
9385 GEN_INT (!TARGET_ARM
)),
9387 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
9389 emit_insn (gen_tlscall (x
, labelno
));
9391 reg
= gen_reg_rtx (SImode
);
9393 gcc_assert (REGNO (reg
) != R0_REGNUM
);
9395 emit_move_insn (reg
, reg0
);
9402 legitimize_tls_address (rtx x
, rtx reg
)
9404 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
9406 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
9410 case TLS_MODEL_GLOBAL_DYNAMIC
:
9411 if (TARGET_GNU2_TLS
)
9413 gcc_assert (!TARGET_FDPIC
);
9415 reg
= arm_tls_descseq_addr (x
, reg
);
9417 tp
= arm_load_tp (NULL_RTX
);
9419 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9423 /* Original scheme */
9425 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32_FDPIC
);
9427 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
9428 dest
= gen_reg_rtx (Pmode
);
9429 emit_libcall_block (insns
, dest
, ret
, x
);
9433 case TLS_MODEL_LOCAL_DYNAMIC
:
9434 if (TARGET_GNU2_TLS
)
9436 gcc_assert (!TARGET_FDPIC
);
9438 reg
= arm_tls_descseq_addr (x
, reg
);
9440 tp
= arm_load_tp (NULL_RTX
);
9442 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9447 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32_FDPIC
);
9449 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
9451 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9452 share the LDM result with other LD model accesses. */
9453 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
9455 dest
= gen_reg_rtx (Pmode
);
9456 emit_libcall_block (insns
, dest
, ret
, eqv
);
9458 /* Load the addend. */
9459 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
9460 GEN_INT (TLS_LDO32
)),
9462 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
9463 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
9467 case TLS_MODEL_INITIAL_EXEC
:
9470 sum
= gen_rtx_UNSPEC (Pmode
,
9471 gen_rtvec (2, x
, GEN_INT (TLS_IE32_FDPIC
)),
9473 reg
= load_tls_operand (sum
, reg
);
9474 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9475 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
9479 labelno
= GEN_INT (pic_labelno
++);
9480 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9481 label
= gen_rtx_CONST (VOIDmode
, label
);
9482 sum
= gen_rtx_UNSPEC (Pmode
,
9483 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
9484 GEN_INT (TARGET_ARM
? 8 : 4)),
9486 reg
= load_tls_operand (sum
, reg
);
9489 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
9490 else if (TARGET_THUMB2
)
9491 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
9494 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9495 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
9499 tp
= arm_load_tp (NULL_RTX
);
9501 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9503 case TLS_MODEL_LOCAL_EXEC
:
9504 tp
= arm_load_tp (NULL_RTX
);
9506 reg
= gen_rtx_UNSPEC (Pmode
,
9507 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
9509 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
9511 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9518 /* Try machine-dependent ways of modifying an illegitimate address
9519 to be legitimate. If we find one, return the new, valid address. */
9521 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9523 if (arm_tls_referenced_p (x
))
9527 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
9529 addend
= XEXP (XEXP (x
, 0), 1);
9530 x
= XEXP (XEXP (x
, 0), 0);
9533 if (!SYMBOL_REF_P (x
))
9536 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
9538 x
= legitimize_tls_address (x
, NULL_RTX
);
9542 x
= gen_rtx_PLUS (SImode
, x
, addend
);
9550 return thumb_legitimize_address (x
, orig_x
, mode
);
9552 if (GET_CODE (x
) == PLUS
)
9554 rtx xop0
= XEXP (x
, 0);
9555 rtx xop1
= XEXP (x
, 1);
9557 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
9558 xop0
= force_reg (SImode
, xop0
);
9560 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
9561 && !symbol_mentioned_p (xop1
))
9562 xop1
= force_reg (SImode
, xop1
);
9564 if (ARM_BASE_REGISTER_RTX_P (xop0
)
9565 && CONST_INT_P (xop1
))
9567 HOST_WIDE_INT n
, low_n
;
9571 /* VFP addressing modes actually allow greater offsets, but for
9572 now we just stick with the lowest common denominator. */
9573 if (mode
== DImode
|| mode
== DFmode
)
9585 low_n
= ((mode
) == TImode
? 0
9586 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
9590 base_reg
= gen_reg_rtx (SImode
);
9591 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
9592 emit_move_insn (base_reg
, val
);
9593 x
= plus_constant (Pmode
, base_reg
, low_n
);
9595 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9596 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9599 /* XXX We don't allow MINUS any more -- see comment in
9600 arm_legitimate_address_outer_p (). */
9601 else if (GET_CODE (x
) == MINUS
)
9603 rtx xop0
= XEXP (x
, 0);
9604 rtx xop1
= XEXP (x
, 1);
9606 if (CONSTANT_P (xop0
))
9607 xop0
= force_reg (SImode
, xop0
);
9609 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
9610 xop1
= force_reg (SImode
, xop1
);
9612 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9613 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
9616 /* Make sure to take full advantage of the pre-indexed addressing mode
9617 with absolute addresses which often allows for the base register to
9618 be factorized for multiple adjacent memory references, and it might
9619 even allows for the mini pool to be avoided entirely. */
9620 else if (CONST_INT_P (x
) && optimize
> 0)
9623 HOST_WIDE_INT mask
, base
, index
;
9626 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9627 only use a 8-bit index. So let's use a 12-bit index for
9628 SImode only and hope that arm_gen_constant will enable LDRB
9629 to use more bits. */
9630 bits
= (mode
== SImode
) ? 12 : 8;
9631 mask
= (1 << bits
) - 1;
9632 base
= INTVAL (x
) & ~mask
;
9633 index
= INTVAL (x
) & mask
;
9634 if (TARGET_ARM
&& bit_count (base
& 0xffffffff) > (32 - bits
)/2)
9636 /* It'll most probably be more efficient to generate the
9637 base with more bits set and use a negative index instead.
9638 Don't do this for Thumb as negative offsets are much more
9643 base_reg
= force_reg (SImode
, GEN_INT (base
));
9644 x
= plus_constant (Pmode
, base_reg
, index
);
9649 /* We need to find and carefully transform any SYMBOL and LABEL
9650 references; so go back to the original address expression. */
9651 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9652 false /*compute_now*/);
9654 if (new_x
!= orig_x
)
9662 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9663 to be legitimate. If we find one, return the new, valid address. */
9665 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9667 if (GET_CODE (x
) == PLUS
9668 && CONST_INT_P (XEXP (x
, 1))
9669 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
9670 || INTVAL (XEXP (x
, 1)) < 0))
9672 rtx xop0
= XEXP (x
, 0);
9673 rtx xop1
= XEXP (x
, 1);
9674 HOST_WIDE_INT offset
= INTVAL (xop1
);
9676 /* Try and fold the offset into a biasing of the base register and
9677 then offsetting that. Don't do this when optimizing for space
9678 since it can cause too many CSEs. */
9679 if (optimize_size
&& offset
>= 0
9680 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
9682 HOST_WIDE_INT delta
;
9685 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
9686 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
9687 delta
= 31 * GET_MODE_SIZE (mode
);
9689 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
9691 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
9693 x
= plus_constant (Pmode
, xop0
, delta
);
9695 else if (offset
< 0 && offset
> -256)
9696 /* Small negative offsets are best done with a subtract before the
9697 dereference, forcing these into a register normally takes two
9699 x
= force_operand (x
, NULL_RTX
);
9702 /* For the remaining cases, force the constant into a register. */
9703 xop1
= force_reg (SImode
, xop1
);
9704 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9707 else if (GET_CODE (x
) == PLUS
9708 && s_register_operand (XEXP (x
, 1), SImode
)
9709 && !s_register_operand (XEXP (x
, 0), SImode
))
9711 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
9713 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
9718 /* We need to find and carefully transform any SYMBOL and LABEL
9719 references; so go back to the original address expression. */
9720 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9721 false /*compute_now*/);
9723 if (new_x
!= orig_x
)
9730 /* Return TRUE if X contains any TLS symbol references. */
9733 arm_tls_referenced_p (rtx x
)
9735 if (! TARGET_HAVE_TLS
)
9738 subrtx_iterator::array_type array
;
9739 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
9741 const_rtx x
= *iter
;
9742 if (SYMBOL_REF_P (x
) && SYMBOL_REF_TLS_MODEL (x
) != 0)
9744 /* ARM currently does not provide relocations to encode TLS variables
9745 into AArch32 instructions, only data, so there is no way to
9746 currently implement these if a literal pool is disabled. */
9747 if (arm_disable_literal_pool
)
9748 sorry ("accessing thread-local storage is not currently supported "
9749 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9754 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9755 TLS offsets, not real symbol references. */
9756 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9757 iter
.skip_subrtxes ();
9762 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9764 On the ARM, allow any integer (invalid ones are removed later by insn
9765 patterns), nice doubles and symbol_refs which refer to the function's
9768 When generating pic allow anything. */
9771 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
9773 if (GET_CODE (x
) == CONST_VECTOR
&& !neon_make_constant (x
, false))
9776 return flag_pic
|| !label_mentioned_p (x
);
9780 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9782 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9783 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9784 for ARMv8-M Baseline or later the result is valid. */
9785 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
9788 return (CONST_INT_P (x
)
9789 || CONST_DOUBLE_P (x
)
9790 || CONSTANT_ADDRESS_P (x
)
9791 || (TARGET_HAVE_MOVT
&& SYMBOL_REF_P (x
))
9792 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9793 we build the symbol address with upper/lower
9796 && !label_mentioned_p (x
)
9797 && arm_valid_symbolic_address_p (x
)
9798 && arm_disable_literal_pool
)
9803 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
9805 return (!arm_cannot_force_const_mem (mode
, x
)
9807 ? arm_legitimate_constant_p_1 (mode
, x
)
9808 : thumb_legitimate_constant_p (mode
, x
)));
9811 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9814 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9817 split_const (x
, &base
, &offset
);
9819 if (SYMBOL_REF_P (base
))
9821 /* Function symbols cannot have an offset due to the Thumb bit. */
9822 if ((SYMBOL_REF_FLAGS (base
) & SYMBOL_FLAG_FUNCTION
)
9823 && INTVAL (offset
) != 0)
9826 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9827 && !offset_within_block_p (base
, INTVAL (offset
)))
9830 return arm_tls_referenced_p (x
);
9833 #define REG_OR_SUBREG_REG(X) \
9835 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9837 #define REG_OR_SUBREG_RTX(X) \
9838 (REG_P (X) ? (X) : SUBREG_REG (X))
9841 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9843 machine_mode mode
= GET_MODE (x
);
9852 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9859 return COSTS_N_INSNS (1);
9862 if (arm_arch6m
&& arm_m_profile_small_mul
)
9863 return COSTS_N_INSNS (32);
9865 if (CONST_INT_P (XEXP (x
, 1)))
9868 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
9875 return COSTS_N_INSNS (2) + cycles
;
9877 return COSTS_N_INSNS (1) + 16;
9880 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9882 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9883 return (COSTS_N_INSNS (words
)
9884 + 4 * ((MEM_P (SET_SRC (x
)))
9885 + MEM_P (SET_DEST (x
))));
9890 if (UINTVAL (x
) < 256
9891 /* 16-bit constant. */
9892 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
9894 if (thumb_shiftable_const (INTVAL (x
)))
9895 return COSTS_N_INSNS (2);
9896 return arm_disable_literal_pool
9898 : COSTS_N_INSNS (3);
9900 else if ((outer
== PLUS
|| outer
== COMPARE
)
9901 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9903 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9904 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9905 return COSTS_N_INSNS (1);
9906 else if (outer
== AND
)
9909 /* This duplicates the tests in the andsi3 expander. */
9910 for (i
= 9; i
<= 31; i
++)
9911 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9912 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9913 return COSTS_N_INSNS (2);
9915 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9916 || outer
== LSHIFTRT
)
9918 return COSTS_N_INSNS (2);
9924 return COSTS_N_INSNS (3);
9942 /* XXX another guess. */
9943 /* Memory costs quite a lot for the first word, but subsequent words
9944 load at the equivalent of a single insn each. */
9945 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9946 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
9951 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9957 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9958 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9964 return total
+ COSTS_N_INSNS (1);
9966 /* Assume a two-shift sequence. Increase the cost slightly so
9967 we prefer actual shifts over an extend operation. */
9968 return total
+ 1 + COSTS_N_INSNS (2);
9975 /* Estimates the size cost of thumb1 instructions.
9976 For now most of the code is copied from thumb1_rtx_costs. We need more
9977 fine grain tuning when we have more related test cases. */
9979 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9981 machine_mode mode
= GET_MODE (x
);
9990 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9994 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9995 defined by RTL expansion, especially for the expansion of
9997 if ((GET_CODE (XEXP (x
, 0)) == MULT
9998 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9999 || (GET_CODE (XEXP (x
, 1)) == MULT
10000 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
10001 return COSTS_N_INSNS (2);
10002 /* Fall through. */
10006 return COSTS_N_INSNS (1);
10009 if (CONST_INT_P (XEXP (x
, 1)))
10011 /* Thumb1 mul instruction can't operate on const. We must Load it
10012 into a register first. */
10013 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
10014 /* For the targets which have a very small and high-latency multiply
10015 unit, we prefer to synthesize the mult with up to 5 instructions,
10016 giving a good balance between size and performance. */
10017 if (arm_arch6m
&& arm_m_profile_small_mul
)
10018 return COSTS_N_INSNS (5);
10020 return COSTS_N_INSNS (1) + const_size
;
10022 return COSTS_N_INSNS (1);
10025 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
10027 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
10028 cost
= COSTS_N_INSNS (words
);
10029 if (satisfies_constraint_J (SET_SRC (x
))
10030 || satisfies_constraint_K (SET_SRC (x
))
10031 /* Too big an immediate for a 2-byte mov, using MOVT. */
10032 || (CONST_INT_P (SET_SRC (x
))
10033 && UINTVAL (SET_SRC (x
)) >= 256
10034 && TARGET_HAVE_MOVT
10035 && satisfies_constraint_j (SET_SRC (x
)))
10036 /* thumb1_movdi_insn. */
10037 || ((words
> 1) && MEM_P (SET_SRC (x
))))
10038 cost
+= COSTS_N_INSNS (1);
10044 if (UINTVAL (x
) < 256)
10045 return COSTS_N_INSNS (1);
10046 /* movw is 4byte long. */
10047 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
10048 return COSTS_N_INSNS (2);
10049 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
10050 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
10051 return COSTS_N_INSNS (2);
10052 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
10053 if (thumb_shiftable_const (INTVAL (x
)))
10054 return COSTS_N_INSNS (2);
10055 return arm_disable_literal_pool
10056 ? COSTS_N_INSNS (8)
10057 : COSTS_N_INSNS (3);
10059 else if ((outer
== PLUS
|| outer
== COMPARE
)
10060 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
10062 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
10063 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
10064 return COSTS_N_INSNS (1);
10065 else if (outer
== AND
)
10068 /* This duplicates the tests in the andsi3 expander. */
10069 for (i
= 9; i
<= 31; i
++)
10070 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
10071 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
10072 return COSTS_N_INSNS (2);
10074 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
10075 || outer
== LSHIFTRT
)
10077 return COSTS_N_INSNS (2);
10083 return COSTS_N_INSNS (3);
10097 return COSTS_N_INSNS (1);
10100 return (COSTS_N_INSNS (1)
10101 + COSTS_N_INSNS (1)
10102 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
10103 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
10104 ? COSTS_N_INSNS (1) : 0));
10108 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10113 /* XXX still guessing. */
10114 switch (GET_MODE (XEXP (x
, 0)))
10117 return (1 + (mode
== DImode
? 4 : 0)
10118 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10121 return (4 + (mode
== DImode
? 4 : 0)
10122 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10125 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
10136 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10137 PLUS, adds the carry flag, then return the other operand. If
10138 neither is a carry, return OP unchanged. */
10140 strip_carry_operation (rtx op
)
10142 gcc_assert (GET_CODE (op
) == PLUS
);
10143 if (arm_carry_operation (XEXP (op
, 0), GET_MODE (op
)))
10144 return XEXP (op
, 1);
10145 else if (arm_carry_operation (XEXP (op
, 1), GET_MODE (op
)))
10146 return XEXP (op
, 0);
10150 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10151 operand, then return the operand that is being shifted. If the shift
10152 is not by a constant, then set SHIFT_REG to point to the operand.
10153 Return NULL if OP is not a shifter operand. */
10155 shifter_op_p (rtx op
, rtx
*shift_reg
)
10157 enum rtx_code code
= GET_CODE (op
);
10159 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
10160 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
10161 return XEXP (op
, 0);
10162 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
10163 return XEXP (op
, 0);
10164 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
10165 || code
== ASHIFTRT
)
10167 if (!CONST_INT_P (XEXP (op
, 1)))
10168 *shift_reg
= XEXP (op
, 1);
10169 return XEXP (op
, 0);
10176 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
10178 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
10179 rtx_code code
= GET_CODE (x
);
10180 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
10182 switch (XINT (x
, 1))
10184 case UNSPEC_UNALIGNED_LOAD
:
10185 /* We can only do unaligned loads into the integer unit, and we can't
10186 use LDM or LDRD. */
10187 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10189 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
10190 + extra_cost
->ldst
.load_unaligned
);
10193 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10194 ADDR_SPACE_GENERIC
, speed_p
);
10198 case UNSPEC_UNALIGNED_STORE
:
10199 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10201 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
10202 + extra_cost
->ldst
.store_unaligned
);
10204 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
10206 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10207 ADDR_SPACE_GENERIC
, speed_p
);
10211 case UNSPEC_VRINTZ
:
10212 case UNSPEC_VRINTP
:
10213 case UNSPEC_VRINTM
:
10214 case UNSPEC_VRINTR
:
10215 case UNSPEC_VRINTX
:
10216 case UNSPEC_VRINTA
:
10218 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
10222 *cost
= COSTS_N_INSNS (2);
10228 /* Cost of a libcall. We assume one insn per argument, an amount for the
10229 call (one insn for -Os) and then one for processing the result. */
10230 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10232 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10235 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10236 if (shift_op != NULL \
10237 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10242 *cost += extra_cost->alu.arith_shift_reg; \
10243 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10244 ASHIFT, 1, speed_p); \
10246 else if (speed_p) \
10247 *cost += extra_cost->alu.arith_shift; \
10249 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10250 ASHIFT, 0, speed_p) \
10251 + rtx_cost (XEXP (x, 1 - IDX), \
10252 GET_MODE (shift_op), \
10253 OP, 1, speed_p)); \
10259 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10260 considering the costs of the addressing mode and memory access
10263 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
10264 int *cost
, bool speed_p
)
10266 machine_mode mode
= GET_MODE (x
);
10268 *cost
= COSTS_N_INSNS (1);
10271 && GET_CODE (XEXP (x
, 0)) == PLUS
10272 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
10273 /* This will be split into two instructions. Add the cost of the
10274 additional instruction here. The cost of the memory access is computed
10275 below. See arm.md:calculate_pic_address. */
10276 *cost
+= COSTS_N_INSNS (1);
10278 /* Calculate cost of the addressing mode. */
10281 arm_addr_mode_op op_type
;
10282 switch (GET_CODE (XEXP (x
, 0)))
10286 op_type
= AMO_DEFAULT
;
10289 /* MINUS does not appear in RTL, but the architecture supports it,
10290 so handle this case defensively. */
10293 op_type
= AMO_NO_WB
;
10305 if (VECTOR_MODE_P (mode
))
10306 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
10307 else if (FLOAT_MODE_P (mode
))
10308 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
10310 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
10313 /* Calculate cost of memory access. */
10316 if (FLOAT_MODE_P (mode
))
10318 if (GET_MODE_SIZE (mode
) == 8)
10319 *cost
+= extra_cost
->ldst
.loadd
;
10321 *cost
+= extra_cost
->ldst
.loadf
;
10323 else if (VECTOR_MODE_P (mode
))
10324 *cost
+= extra_cost
->ldst
.loadv
;
10327 /* Integer modes */
10328 if (GET_MODE_SIZE (mode
) == 8)
10329 *cost
+= extra_cost
->ldst
.ldrd
;
10331 *cost
+= extra_cost
->ldst
.load
;
10338 /* Helper for arm_bfi_p. */
10340 arm_bfi_1_p (rtx op0
, rtx op1
, rtx
*sub0
, rtx
*sub1
)
10342 unsigned HOST_WIDE_INT const1
;
10343 unsigned HOST_WIDE_INT const2
= 0;
10345 if (!CONST_INT_P (XEXP (op0
, 1)))
10348 const1
= UINTVAL (XEXP (op0
, 1));
10349 if (!CONST_INT_P (XEXP (op1
, 1))
10350 || ~UINTVAL (XEXP (op1
, 1)) != const1
)
10353 if (GET_CODE (XEXP (op0
, 0)) == ASHIFT
10354 && CONST_INT_P (XEXP (XEXP (op0
, 0), 1)))
10356 const2
= UINTVAL (XEXP (XEXP (op0
, 0), 1));
10357 *sub0
= XEXP (XEXP (op0
, 0), 0);
10360 *sub0
= XEXP (op0
, 0);
10362 if (const2
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
10365 *sub1
= XEXP (op1
, 0);
10366 return exact_log2 (const1
+ (HOST_WIDE_INT_1U
<< const2
)) >= 0;
10369 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10370 format looks something like:
10372 (IOR (AND (reg1) (~const1))
10373 (AND (ASHIFT (reg2) (const2))
10376 where const1 is a consecutive sequence of 1-bits with the
10377 least-significant non-zero bit starting at bit position const2. If
10378 const2 is zero, then the shift will not appear at all, due to
10379 canonicalization. The two arms of the IOR expression may be
10382 arm_bfi_p (rtx x
, rtx
*sub0
, rtx
*sub1
)
10384 if (GET_CODE (x
) != IOR
)
10386 if (GET_CODE (XEXP (x
, 0)) != AND
10387 || GET_CODE (XEXP (x
, 1)) != AND
)
10389 return (arm_bfi_1_p (XEXP (x
, 0), XEXP (x
, 1), sub0
, sub1
)
10390 || arm_bfi_1_p (XEXP (x
, 1), XEXP (x
, 0), sub1
, sub0
));
10393 /* RTX costs. Make an estimate of the cost of executing the operation
10394 X, which is contained within an operation with code OUTER_CODE.
10395 SPEED_P indicates whether the cost desired is the performance cost,
10396 or the size cost. The estimate is stored in COST and the return
10397 value is TRUE if the cost calculation is final, or FALSE if the
10398 caller should recurse through the operands of X to add additional
10401 We currently make no attempt to model the size savings of Thumb-2
10402 16-bit instructions. At the normal points in compilation where
10403 this code is called we have no measure of whether the condition
10404 flags are live or not, and thus no realistic way to determine what
10405 the size will eventually be. */
10407 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10408 const struct cpu_cost_table
*extra_cost
,
10409 int *cost
, bool speed_p
)
10411 machine_mode mode
= GET_MODE (x
);
10413 *cost
= COSTS_N_INSNS (1);
10418 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
10420 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
10428 /* SET RTXs don't have a mode so we get it from the destination. */
10429 mode
= GET_MODE (SET_DEST (x
));
10431 if (REG_P (SET_SRC (x
))
10432 && REG_P (SET_DEST (x
)))
10434 /* Assume that most copies can be done with a single insn,
10435 unless we don't have HW FP, in which case everything
10436 larger than word mode will require two insns. */
10437 *cost
= COSTS_N_INSNS (((!TARGET_VFP_BASE
10438 && GET_MODE_SIZE (mode
) > 4)
10441 /* Conditional register moves can be encoded
10442 in 16 bits in Thumb mode. */
10443 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
10449 if (CONST_INT_P (SET_SRC (x
)))
10451 /* Handle CONST_INT here, since the value doesn't have a mode
10452 and we would otherwise be unable to work out the true cost. */
10453 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
10456 /* Slightly lower the cost of setting a core reg to a constant.
10457 This helps break up chains and allows for better scheduling. */
10458 if (REG_P (SET_DEST (x
))
10459 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
10462 /* Immediate moves with an immediate in the range [0, 255] can be
10463 encoded in 16 bits in Thumb mode. */
10464 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
10465 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
10467 goto const_int_cost
;
10473 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
10477 /* Calculations of LDM costs are complex. We assume an initial cost
10478 (ldm_1st) which will load the number of registers mentioned in
10479 ldm_regs_per_insn_1st registers; then each additional
10480 ldm_regs_per_insn_subsequent registers cost one more insn. The
10481 formula for N regs is thus:
10483 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10484 + ldm_regs_per_insn_subsequent - 1)
10485 / ldm_regs_per_insn_subsequent).
10487 Additional costs may also be added for addressing. A similar
10488 formula is used for STM. */
10490 bool is_ldm
= load_multiple_operation (x
, SImode
);
10491 bool is_stm
= store_multiple_operation (x
, SImode
);
10493 if (is_ldm
|| is_stm
)
10497 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
10498 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
10499 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
10500 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
10501 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
10502 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
10503 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
10505 *cost
+= regs_per_insn_1st
10506 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
10507 + regs_per_insn_sub
- 1)
10508 / regs_per_insn_sub
);
10517 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10518 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10519 *cost
+= COSTS_N_INSNS (speed_p
10520 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
10521 else if (mode
== SImode
&& TARGET_IDIV
)
10522 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
10524 *cost
= LIBCALL_COST (2);
10526 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10527 possible udiv is prefered. */
10528 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
10529 return false; /* All arguments must be in registers. */
10532 /* MOD by a power of 2 can be expanded as:
10534 and r0, r0, #(n - 1)
10535 and r1, r1, #(n - 1)
10536 rsbpl r0, r1, #0. */
10537 if (CONST_INT_P (XEXP (x
, 1))
10538 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
10541 *cost
+= COSTS_N_INSNS (3);
10544 *cost
+= 2 * extra_cost
->alu
.logical
10545 + extra_cost
->alu
.arith
;
10549 /* Fall-through. */
10551 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10552 possible udiv is prefered. */
10553 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
10554 return false; /* All arguments must be in registers. */
10557 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
10559 *cost
+= (COSTS_N_INSNS (1)
10560 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10562 *cost
+= extra_cost
->alu
.shift_reg
;
10570 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
10572 *cost
+= (COSTS_N_INSNS (2)
10573 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10575 *cost
+= 2 * extra_cost
->alu
.shift
;
10576 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10577 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
10581 else if (mode
== SImode
)
10583 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10584 /* Slightly disparage register shifts at -Os, but not by much. */
10585 if (!CONST_INT_P (XEXP (x
, 1)))
10586 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10587 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10590 else if (GET_MODE_CLASS (mode
) == MODE_INT
10591 && GET_MODE_SIZE (mode
) < 4)
10593 if (code
== ASHIFT
)
10595 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10596 /* Slightly disparage register shifts at -Os, but not by
10598 if (!CONST_INT_P (XEXP (x
, 1)))
10599 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10600 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10602 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
10604 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
10606 /* Can use SBFX/UBFX. */
10608 *cost
+= extra_cost
->alu
.bfx
;
10609 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10613 *cost
+= COSTS_N_INSNS (1);
10614 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10617 if (CONST_INT_P (XEXP (x
, 1)))
10618 *cost
+= 2 * extra_cost
->alu
.shift
;
10620 *cost
+= (extra_cost
->alu
.shift
10621 + extra_cost
->alu
.shift_reg
);
10624 /* Slightly disparage register shifts. */
10625 *cost
+= !CONST_INT_P (XEXP (x
, 1));
10628 else /* Rotates. */
10630 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
10631 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10634 if (CONST_INT_P (XEXP (x
, 1)))
10635 *cost
+= (2 * extra_cost
->alu
.shift
10636 + extra_cost
->alu
.log_shift
);
10638 *cost
+= (extra_cost
->alu
.shift
10639 + extra_cost
->alu
.shift_reg
10640 + extra_cost
->alu
.log_shift_reg
);
10646 *cost
= LIBCALL_COST (2);
10652 if (mode
== SImode
)
10655 *cost
+= extra_cost
->alu
.rev
;
10662 /* No rev instruction available. Look at arm_legacy_rev
10663 and thumb_legacy_rev for the form of RTL used then. */
10666 *cost
+= COSTS_N_INSNS (9);
10670 *cost
+= 6 * extra_cost
->alu
.shift
;
10671 *cost
+= 3 * extra_cost
->alu
.logical
;
10676 *cost
+= COSTS_N_INSNS (4);
10680 *cost
+= 2 * extra_cost
->alu
.shift
;
10681 *cost
+= extra_cost
->alu
.arith_shift
;
10682 *cost
+= 2 * extra_cost
->alu
.logical
;
10690 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10691 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10693 if (GET_CODE (XEXP (x
, 0)) == MULT
10694 || GET_CODE (XEXP (x
, 1)) == MULT
)
10696 rtx mul_op0
, mul_op1
, sub_op
;
10699 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10701 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10703 mul_op0
= XEXP (XEXP (x
, 0), 0);
10704 mul_op1
= XEXP (XEXP (x
, 0), 1);
10705 sub_op
= XEXP (x
, 1);
10709 mul_op0
= XEXP (XEXP (x
, 1), 0);
10710 mul_op1
= XEXP (XEXP (x
, 1), 1);
10711 sub_op
= XEXP (x
, 0);
10714 /* The first operand of the multiply may be optionally
10716 if (GET_CODE (mul_op0
) == NEG
)
10717 mul_op0
= XEXP (mul_op0
, 0);
10719 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10720 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10721 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
10727 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10731 if (mode
== SImode
)
10733 rtx shift_by_reg
= NULL
;
10736 rtx op0
= XEXP (x
, 0);
10737 rtx op1
= XEXP (x
, 1);
10739 /* Factor out any borrow operation. There's more than one way
10740 of expressing this; try to recognize them all. */
10741 if (GET_CODE (op0
) == MINUS
)
10743 if (arm_borrow_operation (op1
, SImode
))
10745 op1
= XEXP (op0
, 1);
10746 op0
= XEXP (op0
, 0);
10748 else if (arm_borrow_operation (XEXP (op0
, 1), SImode
))
10749 op0
= XEXP (op0
, 0);
10751 else if (GET_CODE (op1
) == PLUS
10752 && arm_borrow_operation (XEXP (op1
, 0), SImode
))
10753 op1
= XEXP (op1
, 0);
10754 else if (GET_CODE (op0
) == NEG
10755 && arm_borrow_operation (op1
, SImode
))
10757 /* Negate with carry-in. For Thumb2 this is done with
10758 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10759 RSC instruction that exists in Arm mode. */
10761 *cost
+= (TARGET_THUMB2
10762 ? extra_cost
->alu
.arith_shift
10763 : extra_cost
->alu
.arith
);
10764 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed_p
);
10767 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10768 Note we do mean ~borrow here. */
10769 else if (TARGET_ARM
&& arm_carry_operation (op0
, SImode
))
10771 *cost
+= rtx_cost (op1
, mode
, code
, 1, speed_p
);
10775 shift_op
= shifter_op_p (op0
, &shift_by_reg
);
10776 if (shift_op
== NULL
)
10778 shift_op
= shifter_op_p (op1
, &shift_by_reg
);
10779 non_shift_op
= op0
;
10782 non_shift_op
= op1
;
10784 if (shift_op
!= NULL
)
10786 if (shift_by_reg
!= NULL
)
10789 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10790 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
10793 *cost
+= extra_cost
->alu
.arith_shift
;
10795 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
10796 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
10800 if (arm_arch_thumb2
10801 && GET_CODE (XEXP (x
, 1)) == MULT
)
10805 *cost
+= extra_cost
->mult
[0].add
;
10806 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
10807 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
10808 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
10812 if (CONST_INT_P (op0
))
10814 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
10815 INTVAL (op0
), NULL_RTX
,
10817 *cost
= COSTS_N_INSNS (insns
);
10819 *cost
+= insns
* extra_cost
->alu
.arith
;
10820 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10824 *cost
+= extra_cost
->alu
.arith
;
10826 /* Don't recurse as we don't want to cost any borrow that
10828 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed_p
);
10829 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10833 if (GET_MODE_CLASS (mode
) == MODE_INT
10834 && GET_MODE_SIZE (mode
) < 4)
10836 rtx shift_op
, shift_reg
;
10839 /* We check both sides of the MINUS for shifter operands since,
10840 unlike PLUS, it's not commutative. */
10842 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
10843 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
10845 /* Slightly disparage, as we might need to widen the result. */
10848 *cost
+= extra_cost
->alu
.arith
;
10850 if (CONST_INT_P (XEXP (x
, 0)))
10852 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10859 if (mode
== DImode
)
10861 *cost
+= COSTS_N_INSNS (1);
10863 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
10865 rtx op1
= XEXP (x
, 1);
10868 *cost
+= 2 * extra_cost
->alu
.arith
;
10870 if (GET_CODE (op1
) == ZERO_EXTEND
)
10871 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10874 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10875 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10879 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10882 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10883 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10885 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10888 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10889 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10892 *cost
+= (extra_cost
->alu
.arith
10893 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10894 ? extra_cost
->alu
.arith
10895 : extra_cost
->alu
.arith_shift
));
10896 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10897 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10898 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10903 *cost
+= 2 * extra_cost
->alu
.arith
;
10909 *cost
= LIBCALL_COST (2);
10913 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10914 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10916 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10918 rtx mul_op0
, mul_op1
, add_op
;
10921 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10923 mul_op0
= XEXP (XEXP (x
, 0), 0);
10924 mul_op1
= XEXP (XEXP (x
, 0), 1);
10925 add_op
= XEXP (x
, 1);
10927 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10928 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10929 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10935 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10938 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10940 *cost
= LIBCALL_COST (2);
10944 /* Narrow modes can be synthesized in SImode, but the range
10945 of useful sub-operations is limited. Check for shift operations
10946 on one of the operands. Only left shifts can be used in the
10948 if (GET_MODE_CLASS (mode
) == MODE_INT
10949 && GET_MODE_SIZE (mode
) < 4)
10951 rtx shift_op
, shift_reg
;
10954 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
10956 if (CONST_INT_P (XEXP (x
, 1)))
10958 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10959 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10961 *cost
= COSTS_N_INSNS (insns
);
10963 *cost
+= insns
* extra_cost
->alu
.arith
;
10964 /* Slightly penalize a narrow operation as the result may
10966 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10970 /* Slightly penalize a narrow operation as the result may
10974 *cost
+= extra_cost
->alu
.arith
;
10979 if (mode
== SImode
)
10981 rtx shift_op
, shift_reg
;
10983 if (TARGET_INT_SIMD
10984 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10985 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10987 /* UXTA[BH] or SXTA[BH]. */
10989 *cost
+= extra_cost
->alu
.extend_arith
;
10990 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10992 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10996 rtx op0
= XEXP (x
, 0);
10997 rtx op1
= XEXP (x
, 1);
10999 /* Handle a side effect of adding in the carry to an addition. */
11000 if (GET_CODE (op0
) == PLUS
11001 && arm_carry_operation (op1
, mode
))
11003 op1
= XEXP (op0
, 1);
11004 op0
= XEXP (op0
, 0);
11006 else if (GET_CODE (op1
) == PLUS
11007 && arm_carry_operation (op0
, mode
))
11009 op0
= XEXP (op1
, 0);
11010 op1
= XEXP (op1
, 1);
11012 else if (GET_CODE (op0
) == PLUS
)
11014 op0
= strip_carry_operation (op0
);
11015 if (swap_commutative_operands_p (op0
, op1
))
11016 std::swap (op0
, op1
);
11019 if (arm_carry_operation (op0
, mode
))
11021 /* Adding the carry to a register is a canonicalization of
11022 adding 0 to the register plus the carry. */
11024 *cost
+= extra_cost
->alu
.arith
;
11025 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
11030 shift_op
= shifter_op_p (op0
, &shift_reg
);
11031 if (shift_op
!= NULL
)
11036 *cost
+= extra_cost
->alu
.arith_shift_reg
;
11037 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11040 *cost
+= extra_cost
->alu
.arith_shift
;
11042 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
11043 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
11047 if (GET_CODE (op0
) == MULT
)
11051 if (TARGET_DSP_MULTIPLY
11052 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
11053 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
11054 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
11055 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
11056 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
11057 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
11058 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
11059 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
11060 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
11061 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
11062 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
11063 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
11066 /* SMLA[BT][BT]. */
11068 *cost
+= extra_cost
->mult
[0].extend_add
;
11069 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
11070 SIGN_EXTEND
, 0, speed_p
)
11071 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
11072 SIGN_EXTEND
, 0, speed_p
)
11073 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
11078 *cost
+= extra_cost
->mult
[0].add
;
11079 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
11080 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
11081 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
11085 if (CONST_INT_P (op1
))
11087 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
11088 INTVAL (op1
), NULL_RTX
,
11090 *cost
= COSTS_N_INSNS (insns
);
11092 *cost
+= insns
* extra_cost
->alu
.arith
;
11093 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
11098 *cost
+= extra_cost
->alu
.arith
;
11100 /* Don't recurse here because we want to test the operands
11101 without any carry operation. */
11102 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
11103 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
11107 if (mode
== DImode
)
11109 if (GET_CODE (XEXP (x
, 0)) == MULT
11110 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
11111 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
11112 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
11113 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
11116 *cost
+= extra_cost
->mult
[1].extend_add
;
11117 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11118 ZERO_EXTEND
, 0, speed_p
)
11119 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
11120 ZERO_EXTEND
, 0, speed_p
)
11121 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11125 *cost
+= COSTS_N_INSNS (1);
11127 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11128 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
11131 *cost
+= (extra_cost
->alu
.arith
11132 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11133 ? extra_cost
->alu
.arith
11134 : extra_cost
->alu
.arith_shift
));
11136 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
11138 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11143 *cost
+= 2 * extra_cost
->alu
.arith
;
11148 *cost
= LIBCALL_COST (2);
11153 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
11156 *cost
+= extra_cost
->alu
.rev
;
11160 else if (mode
== SImode
&& arm_arch_thumb2
11161 && arm_bfi_p (x
, &sub0
, &sub1
))
11163 *cost
+= rtx_cost (sub0
, mode
, ZERO_EXTRACT
, 1, speed_p
);
11164 *cost
+= rtx_cost (sub1
, mode
, ZERO_EXTRACT
, 0, speed_p
);
11166 *cost
+= extra_cost
->alu
.bfi
;
11172 /* Fall through. */
11173 case AND
: case XOR
:
11174 if (mode
== SImode
)
11176 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
11177 rtx op0
= XEXP (x
, 0);
11178 rtx shift_op
, shift_reg
;
11182 || (code
== IOR
&& TARGET_THUMB2
)))
11183 op0
= XEXP (op0
, 0);
11186 shift_op
= shifter_op_p (op0
, &shift_reg
);
11187 if (shift_op
!= NULL
)
11192 *cost
+= extra_cost
->alu
.log_shift_reg
;
11193 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11196 *cost
+= extra_cost
->alu
.log_shift
;
11198 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
11199 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11203 if (CONST_INT_P (XEXP (x
, 1)))
11205 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
11206 INTVAL (XEXP (x
, 1)), NULL_RTX
,
11209 *cost
= COSTS_N_INSNS (insns
);
11211 *cost
+= insns
* extra_cost
->alu
.logical
;
11212 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
11217 *cost
+= extra_cost
->alu
.logical
;
11218 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
11219 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11223 if (mode
== DImode
)
11225 rtx op0
= XEXP (x
, 0);
11226 enum rtx_code subcode
= GET_CODE (op0
);
11228 *cost
+= COSTS_N_INSNS (1);
11232 || (code
== IOR
&& TARGET_THUMB2
)))
11233 op0
= XEXP (op0
, 0);
11235 if (GET_CODE (op0
) == ZERO_EXTEND
)
11238 *cost
+= 2 * extra_cost
->alu
.logical
;
11240 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
11242 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11245 else if (GET_CODE (op0
) == SIGN_EXTEND
)
11248 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
11250 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
11252 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11257 *cost
+= 2 * extra_cost
->alu
.logical
;
11263 *cost
= LIBCALL_COST (2);
11267 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11268 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11270 rtx op0
= XEXP (x
, 0);
11272 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
11273 op0
= XEXP (op0
, 0);
11276 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
11278 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
11279 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
11282 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11284 *cost
= LIBCALL_COST (2);
11288 if (mode
== SImode
)
11290 if (TARGET_DSP_MULTIPLY
11291 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11292 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11293 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11294 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11295 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
11296 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11297 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11298 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
11299 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11300 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11301 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11302 && (INTVAL (XEXP (XEXP (x
, 1), 1))
11305 /* SMUL[TB][TB]. */
11307 *cost
+= extra_cost
->mult
[0].extend
;
11308 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
11309 SIGN_EXTEND
, 0, speed_p
);
11310 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
11311 SIGN_EXTEND
, 1, speed_p
);
11315 *cost
+= extra_cost
->mult
[0].simple
;
11319 if (mode
== DImode
)
11321 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11322 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
11323 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11324 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
11327 *cost
+= extra_cost
->mult
[1].extend
;
11328 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
11329 ZERO_EXTEND
, 0, speed_p
)
11330 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
11331 ZERO_EXTEND
, 0, speed_p
));
11335 *cost
= LIBCALL_COST (2);
11340 *cost
= LIBCALL_COST (2);
11344 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11345 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11347 if (GET_CODE (XEXP (x
, 0)) == MULT
)
11350 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
11355 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11359 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11361 *cost
= LIBCALL_COST (1);
11365 if (mode
== SImode
)
11367 if (GET_CODE (XEXP (x
, 0)) == ABS
)
11369 *cost
+= COSTS_N_INSNS (1);
11370 /* Assume the non-flag-changing variant. */
11372 *cost
+= (extra_cost
->alu
.log_shift
11373 + extra_cost
->alu
.arith_shift
);
11374 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
11378 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
11379 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
11381 *cost
+= COSTS_N_INSNS (1);
11382 /* No extra cost for MOV imm and MVN imm. */
11383 /* If the comparison op is using the flags, there's no further
11384 cost, otherwise we need to add the cost of the comparison. */
11385 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
11386 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
11387 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
11389 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
11390 *cost
+= (COSTS_N_INSNS (1)
11391 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
11393 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
11396 *cost
+= extra_cost
->alu
.arith
;
11402 *cost
+= extra_cost
->alu
.arith
;
11406 if (GET_MODE_CLASS (mode
) == MODE_INT
11407 && GET_MODE_SIZE (mode
) < 4)
11409 /* Slightly disparage, as we might need an extend operation. */
11412 *cost
+= extra_cost
->alu
.arith
;
11416 if (mode
== DImode
)
11418 *cost
+= COSTS_N_INSNS (1);
11420 *cost
+= 2 * extra_cost
->alu
.arith
;
11425 *cost
= LIBCALL_COST (1);
11429 if (mode
== SImode
)
11432 rtx shift_reg
= NULL
;
11434 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11438 if (shift_reg
!= NULL
)
11441 *cost
+= extra_cost
->alu
.log_shift_reg
;
11442 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11445 *cost
+= extra_cost
->alu
.log_shift
;
11446 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
11451 *cost
+= extra_cost
->alu
.logical
;
11454 if (mode
== DImode
)
11456 *cost
+= COSTS_N_INSNS (1);
11462 *cost
+= LIBCALL_COST (1);
11467 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
11469 *cost
+= COSTS_N_INSNS (3);
11472 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
11473 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
11475 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
11476 /* Assume that if one arm of the if_then_else is a register,
11477 that it will be tied with the result and eliminate the
11478 conditional insn. */
11479 if (REG_P (XEXP (x
, 1)))
11481 else if (REG_P (XEXP (x
, 2)))
11487 if (extra_cost
->alu
.non_exec_costs_exec
)
11488 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
11490 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
11493 *cost
+= op1cost
+ op2cost
;
11499 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
11503 machine_mode op0mode
;
11504 /* We'll mostly assume that the cost of a compare is the cost of the
11505 LHS. However, there are some notable exceptions. */
11507 /* Floating point compares are never done as side-effects. */
11508 op0mode
= GET_MODE (XEXP (x
, 0));
11509 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
11510 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11513 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
11515 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
11517 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
11523 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
11525 *cost
= LIBCALL_COST (2);
11529 /* DImode compares normally take two insns. */
11530 if (op0mode
== DImode
)
11532 *cost
+= COSTS_N_INSNS (1);
11534 *cost
+= 2 * extra_cost
->alu
.arith
;
11538 if (op0mode
== SImode
)
11543 if (XEXP (x
, 1) == const0_rtx
11544 && !(REG_P (XEXP (x
, 0))
11545 || (GET_CODE (XEXP (x
, 0)) == SUBREG
11546 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
11548 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11550 /* Multiply operations that set the flags are often
11551 significantly more expensive. */
11553 && GET_CODE (XEXP (x
, 0)) == MULT
11554 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
11555 *cost
+= extra_cost
->mult
[0].flag_setting
;
11558 && GET_CODE (XEXP (x
, 0)) == PLUS
11559 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11560 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
11562 *cost
+= extra_cost
->mult
[0].flag_setting
;
11567 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11568 if (shift_op
!= NULL
)
11570 if (shift_reg
!= NULL
)
11572 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
11575 *cost
+= extra_cost
->alu
.arith_shift_reg
;
11578 *cost
+= extra_cost
->alu
.arith_shift
;
11579 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
11580 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
11585 *cost
+= extra_cost
->alu
.arith
;
11586 if (CONST_INT_P (XEXP (x
, 1))
11587 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11589 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11597 *cost
= LIBCALL_COST (2);
11607 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11610 && TARGET_HARD_FLOAT
11611 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
11612 && (XEXP (x
, 1) == CONST0_RTX (mode
)))
11618 /* Fall through. */
11632 if (outer_code
== SET
)
11634 /* Is it a store-flag operation? */
11635 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11636 && XEXP (x
, 1) == const0_rtx
)
11638 /* Thumb also needs an IT insn. */
11639 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
11642 if (XEXP (x
, 1) == const0_rtx
)
11647 /* LSR Rd, Rn, #31. */
11649 *cost
+= extra_cost
->alu
.shift
;
11659 *cost
+= COSTS_N_INSNS (1);
11663 /* RSBS T1, Rn, Rn, LSR #31
11665 *cost
+= COSTS_N_INSNS (1);
11667 *cost
+= extra_cost
->alu
.arith_shift
;
11671 /* RSB Rd, Rn, Rn, ASR #1
11672 LSR Rd, Rd, #31. */
11673 *cost
+= COSTS_N_INSNS (1);
11675 *cost
+= (extra_cost
->alu
.arith_shift
11676 + extra_cost
->alu
.shift
);
11682 *cost
+= COSTS_N_INSNS (1);
11684 *cost
+= extra_cost
->alu
.shift
;
11688 /* Remaining cases are either meaningless or would take
11689 three insns anyway. */
11690 *cost
= COSTS_N_INSNS (3);
11693 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11698 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
11699 if (CONST_INT_P (XEXP (x
, 1))
11700 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11702 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11709 /* Not directly inside a set. If it involves the condition code
11710 register it must be the condition for a branch, cond_exec or
11711 I_T_E operation. Since the comparison is performed elsewhere
11712 this is just the control part which has no additional
11714 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11715 && XEXP (x
, 1) == const0_rtx
)
11723 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11724 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11727 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11731 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11733 *cost
= LIBCALL_COST (1);
11737 if (mode
== SImode
)
11740 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
11744 *cost
= LIBCALL_COST (1);
11748 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
11749 && MEM_P (XEXP (x
, 0)))
11751 if (mode
== DImode
)
11752 *cost
+= COSTS_N_INSNS (1);
11757 if (GET_MODE (XEXP (x
, 0)) == SImode
)
11758 *cost
+= extra_cost
->ldst
.load
;
11760 *cost
+= extra_cost
->ldst
.load_sign_extend
;
11762 if (mode
== DImode
)
11763 *cost
+= extra_cost
->alu
.shift
;
11768 /* Widening from less than 32-bits requires an extend operation. */
11769 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11771 /* We have SXTB/SXTH. */
11772 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11774 *cost
+= extra_cost
->alu
.extend
;
11776 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11778 /* Needs two shifts. */
11779 *cost
+= COSTS_N_INSNS (1);
11780 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11782 *cost
+= 2 * extra_cost
->alu
.shift
;
11785 /* Widening beyond 32-bits requires one more insn. */
11786 if (mode
== DImode
)
11788 *cost
+= COSTS_N_INSNS (1);
11790 *cost
+= extra_cost
->alu
.shift
;
11797 || GET_MODE (XEXP (x
, 0)) == SImode
11798 || GET_MODE (XEXP (x
, 0)) == QImode
)
11799 && MEM_P (XEXP (x
, 0)))
11801 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11803 if (mode
== DImode
)
11804 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11809 /* Widening from less than 32-bits requires an extend operation. */
11810 if (GET_MODE (XEXP (x
, 0)) == QImode
)
11812 /* UXTB can be a shorter instruction in Thumb2, but it might
11813 be slower than the AND Rd, Rn, #255 alternative. When
11814 optimizing for speed it should never be slower to use
11815 AND, and we don't really model 16-bit vs 32-bit insns
11818 *cost
+= extra_cost
->alu
.logical
;
11820 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11822 /* We have UXTB/UXTH. */
11823 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11825 *cost
+= extra_cost
->alu
.extend
;
11827 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11829 /* Needs two shifts. It's marginally preferable to use
11830 shifts rather than two BIC instructions as the second
11831 shift may merge with a subsequent insn as a shifter
11833 *cost
= COSTS_N_INSNS (2);
11834 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11836 *cost
+= 2 * extra_cost
->alu
.shift
;
11839 /* Widening beyond 32-bits requires one more insn. */
11840 if (mode
== DImode
)
11842 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11849 /* CONST_INT has no mode, so we cannot tell for sure how many
11850 insns are really going to be needed. The best we can do is
11851 look at the value passed. If it fits in SImode, then assume
11852 that's the mode it will be used for. Otherwise assume it
11853 will be used in DImode. */
11854 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
11859 /* Avoid blowing up in arm_gen_constant (). */
11860 if (!(outer_code
== PLUS
11861 || outer_code
== AND
11862 || outer_code
== IOR
11863 || outer_code
== XOR
11864 || outer_code
== MINUS
))
11868 if (mode
== SImode
)
11870 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
11871 INTVAL (x
), NULL
, NULL
,
11877 *cost
+= COSTS_N_INSNS (arm_gen_constant
11878 (outer_code
, SImode
, NULL
,
11879 trunc_int_for_mode (INTVAL (x
), SImode
),
11881 + arm_gen_constant (outer_code
, SImode
, NULL
,
11882 INTVAL (x
) >> 32, NULL
,
11894 if (arm_arch_thumb2
&& !flag_pic
)
11895 *cost
+= COSTS_N_INSNS (1);
11897 *cost
+= extra_cost
->ldst
.load
;
11900 *cost
+= COSTS_N_INSNS (1);
11904 *cost
+= COSTS_N_INSNS (1);
11906 *cost
+= extra_cost
->alu
.arith
;
11912 *cost
= COSTS_N_INSNS (4);
11917 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11918 && (mode
== SFmode
|| mode
== HFmode
|| !TARGET_VFP_SINGLE
))
11920 if (vfp3_const_double_rtx (x
))
11923 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
11929 if (mode
== DFmode
)
11930 *cost
+= extra_cost
->ldst
.loadd
;
11932 *cost
+= extra_cost
->ldst
.loadf
;
11935 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11939 *cost
= COSTS_N_INSNS (4);
11943 if (((TARGET_NEON
&& TARGET_HARD_FLOAT
11944 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
11945 || TARGET_HAVE_MVE
)
11946 && simd_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11947 *cost
= COSTS_N_INSNS (1);
11948 else if (TARGET_HAVE_MVE
)
11950 /* 128-bit vector requires two vldr.64 on MVE. */
11951 *cost
= COSTS_N_INSNS (2);
11953 *cost
+= extra_cost
->ldst
.loadd
* 2;
11956 *cost
= COSTS_N_INSNS (4);
11961 /* When optimizing for size, we prefer constant pool entries to
11962 MOVW/MOVT pairs, so bump the cost of these slightly. */
11969 *cost
+= extra_cost
->alu
.clz
;
11973 if (XEXP (x
, 1) == const0_rtx
)
11976 *cost
+= extra_cost
->alu
.log_shift
;
11977 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11980 /* Fall through. */
11984 *cost
+= COSTS_N_INSNS (1);
11988 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11989 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11990 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11991 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11992 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11993 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11994 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11995 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11999 *cost
+= extra_cost
->mult
[1].extend
;
12000 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
12001 ZERO_EXTEND
, 0, speed_p
)
12002 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
12003 ZERO_EXTEND
, 0, speed_p
));
12006 *cost
= LIBCALL_COST (1);
12009 case UNSPEC_VOLATILE
:
12011 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
12014 /* Reading the PC is like reading any other register. Writing it
12015 is more expensive, but we take that into account elsewhere. */
12020 /* TODO: Simple zero_extract of bottom bits using AND. */
12021 /* Fall through. */
12025 && CONST_INT_P (XEXP (x
, 1))
12026 && CONST_INT_P (XEXP (x
, 2)))
12029 *cost
+= extra_cost
->alu
.bfx
;
12030 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
12033 /* Without UBFX/SBFX, need to resort to shift operations. */
12034 *cost
+= COSTS_N_INSNS (1);
12036 *cost
+= 2 * extra_cost
->alu
.shift
;
12037 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
12041 if (TARGET_HARD_FLOAT
)
12044 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
12046 && GET_MODE (XEXP (x
, 0)) == HFmode
)
12048 /* Pre v8, widening HF->DF is a two-step process, first
12049 widening to SFmode. */
12050 *cost
+= COSTS_N_INSNS (1);
12052 *cost
+= extra_cost
->fp
[0].widen
;
12054 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
12058 *cost
= LIBCALL_COST (1);
12061 case FLOAT_TRUNCATE
:
12062 if (TARGET_HARD_FLOAT
)
12065 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
12066 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
12068 /* Vector modes? */
12070 *cost
= LIBCALL_COST (1);
12074 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
12076 rtx op0
= XEXP (x
, 0);
12077 rtx op1
= XEXP (x
, 1);
12078 rtx op2
= XEXP (x
, 2);
12081 /* vfms or vfnma. */
12082 if (GET_CODE (op0
) == NEG
)
12083 op0
= XEXP (op0
, 0);
12085 /* vfnms or vfnma. */
12086 if (GET_CODE (op2
) == NEG
)
12087 op2
= XEXP (op2
, 0);
12089 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
12090 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
12091 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
12094 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
12099 *cost
= LIBCALL_COST (3);
12104 if (TARGET_HARD_FLOAT
)
12106 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12107 a vcvt fixed-point conversion. */
12108 if (code
== FIX
&& mode
== SImode
12109 && GET_CODE (XEXP (x
, 0)) == FIX
12110 && GET_MODE (XEXP (x
, 0)) == SFmode
12111 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12112 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
12116 *cost
+= extra_cost
->fp
[0].toint
;
12118 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
12123 if (GET_MODE_CLASS (mode
) == MODE_INT
)
12125 mode
= GET_MODE (XEXP (x
, 0));
12127 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
12128 /* Strip of the 'cost' of rounding towards zero. */
12129 if (GET_CODE (XEXP (x
, 0)) == FIX
)
12130 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
12133 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
12134 /* ??? Increase the cost to deal with transferring from
12135 FP -> CORE registers? */
12138 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
12142 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
12145 /* Vector costs? */
12147 *cost
= LIBCALL_COST (1);
12151 case UNSIGNED_FLOAT
:
12152 if (TARGET_HARD_FLOAT
)
12154 /* ??? Increase the cost to deal with transferring from CORE
12155 -> FP registers? */
12157 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
12160 *cost
= LIBCALL_COST (1);
12168 /* Just a guess. Guess number of instructions in the asm
12169 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12170 though (see PR60663). */
12171 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
12172 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
12174 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
12178 if (mode
!= VOIDmode
)
12179 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
12181 *cost
= COSTS_N_INSNS (4); /* Who knows? */
12186 #undef HANDLE_NARROW_SHIFT_ARITH
12188 /* RTX costs entry point. */
12191 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
12192 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
12195 int code
= GET_CODE (x
);
12196 gcc_assert (current_tune
->insn_extra_cost
);
12198 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
12199 (enum rtx_code
) outer_code
,
12200 current_tune
->insn_extra_cost
,
12203 if (dump_file
&& arm_verbose_cost
)
12205 print_rtl_single (dump_file
, x
);
12206 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
12207 *total
, result
? "final" : "partial");
12213 arm_insn_cost (rtx_insn
*insn
, bool speed
)
12217 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12218 will likely disappear during register allocation. */
12219 if (!reload_completed
12220 && GET_CODE (PATTERN (insn
)) == SET
12221 && REG_P (SET_DEST (PATTERN (insn
)))
12222 && REG_P (SET_SRC (PATTERN (insn
))))
12224 cost
= pattern_cost (PATTERN (insn
), speed
);
12225 /* If the cost is zero, then it's likely a complex insn. We don't want the
12226 cost of these to be less than something we know about. */
12227 return cost
? cost
: COSTS_N_INSNS (2);
12230 /* All address computations that can be done are free, but rtx cost returns
12231 the same for practically all of them. So we weight the different types
12232 of address here in the order (most pref first):
12233 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12235 arm_arm_address_cost (rtx x
)
12237 enum rtx_code c
= GET_CODE (x
);
12239 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
12241 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
12246 if (CONST_INT_P (XEXP (x
, 1)))
12249 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
12259 arm_thumb_address_cost (rtx x
)
12261 enum rtx_code c
= GET_CODE (x
);
12266 && REG_P (XEXP (x
, 0))
12267 && CONST_INT_P (XEXP (x
, 1)))
12274 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
12275 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
12277 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
12280 /* Adjust cost hook for XScale. */
12282 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12285 /* Some true dependencies can have a higher cost depending
12286 on precisely how certain input operands are used. */
12288 && recog_memoized (insn
) >= 0
12289 && recog_memoized (dep
) >= 0)
12291 int shift_opnum
= get_attr_shift (insn
);
12292 enum attr_type attr_type
= get_attr_type (dep
);
12294 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12295 operand for INSN. If we have a shifted input operand and the
12296 instruction we depend on is another ALU instruction, then we may
12297 have to account for an additional stall. */
12298 if (shift_opnum
!= 0
12299 && (attr_type
== TYPE_ALU_SHIFT_IMM_LSL_1TO4
12300 || attr_type
== TYPE_ALU_SHIFT_IMM_OTHER
12301 || attr_type
== TYPE_ALUS_SHIFT_IMM
12302 || attr_type
== TYPE_LOGIC_SHIFT_IMM
12303 || attr_type
== TYPE_LOGICS_SHIFT_IMM
12304 || attr_type
== TYPE_ALU_SHIFT_REG
12305 || attr_type
== TYPE_ALUS_SHIFT_REG
12306 || attr_type
== TYPE_LOGIC_SHIFT_REG
12307 || attr_type
== TYPE_LOGICS_SHIFT_REG
12308 || attr_type
== TYPE_MOV_SHIFT
12309 || attr_type
== TYPE_MVN_SHIFT
12310 || attr_type
== TYPE_MOV_SHIFT_REG
12311 || attr_type
== TYPE_MVN_SHIFT_REG
))
12313 rtx shifted_operand
;
12316 /* Get the shifted operand. */
12317 extract_insn (insn
);
12318 shifted_operand
= recog_data
.operand
[shift_opnum
];
12320 /* Iterate over all the operands in DEP. If we write an operand
12321 that overlaps with SHIFTED_OPERAND, then we have increase the
12322 cost of this dependency. */
12323 extract_insn (dep
);
12324 preprocess_constraints (dep
);
12325 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12327 /* We can ignore strict inputs. */
12328 if (recog_data
.operand_type
[opno
] == OP_IN
)
12331 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
12343 /* Adjust cost hook for Cortex A9. */
12345 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12355 case REG_DEP_OUTPUT
:
12356 if (recog_memoized (insn
) >= 0
12357 && recog_memoized (dep
) >= 0)
12359 if (GET_CODE (PATTERN (insn
)) == SET
)
12362 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
12364 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
12366 enum attr_type attr_type_insn
= get_attr_type (insn
);
12367 enum attr_type attr_type_dep
= get_attr_type (dep
);
12369 /* By default all dependencies of the form
12372 have an extra latency of 1 cycle because
12373 of the input and output dependency in this
12374 case. However this gets modeled as an true
12375 dependency and hence all these checks. */
12376 if (REG_P (SET_DEST (PATTERN (insn
)))
12377 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
12379 /* FMACS is a special case where the dependent
12380 instruction can be issued 3 cycles before
12381 the normal latency in case of an output
12383 if ((attr_type_insn
== TYPE_FMACS
12384 || attr_type_insn
== TYPE_FMACD
)
12385 && (attr_type_dep
== TYPE_FMACS
12386 || attr_type_dep
== TYPE_FMACD
))
12388 if (dep_type
== REG_DEP_OUTPUT
)
12389 *cost
= insn_default_latency (dep
) - 3;
12391 *cost
= insn_default_latency (dep
);
12396 if (dep_type
== REG_DEP_OUTPUT
)
12397 *cost
= insn_default_latency (dep
) + 1;
12399 *cost
= insn_default_latency (dep
);
12409 gcc_unreachable ();
12415 /* Adjust cost hook for FA726TE. */
12417 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12420 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12421 have penalty of 3. */
12422 if (dep_type
== REG_DEP_TRUE
12423 && recog_memoized (insn
) >= 0
12424 && recog_memoized (dep
) >= 0
12425 && get_attr_conds (dep
) == CONDS_SET
)
12427 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12428 if (get_attr_conds (insn
) == CONDS_USE
12429 && get_attr_type (insn
) != TYPE_BRANCH
)
12435 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
12436 || get_attr_conds (insn
) == CONDS_USE
)
12446 /* Implement TARGET_REGISTER_MOVE_COST.
12448 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12449 it is typically more expensive than a single memory access. We set
12450 the cost to less than two memory accesses so that floating
12451 point to integer conversion does not go through memory. */
12454 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
12455 reg_class_t from
, reg_class_t to
)
12459 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
12460 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
12462 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
12463 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
12465 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
12472 if (from
== HI_REGS
|| to
== HI_REGS
)
12479 /* Implement TARGET_MEMORY_MOVE_COST. */
12482 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
12483 bool in ATTRIBUTE_UNUSED
)
12489 if (GET_MODE_SIZE (mode
) < 4)
12492 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
12496 /* Vectorizer cost model implementation. */
12498 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12500 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
12502 int misalign ATTRIBUTE_UNUSED
)
12506 switch (type_of_cost
)
12509 return current_tune
->vec_costs
->scalar_stmt_cost
;
12512 return current_tune
->vec_costs
->scalar_load_cost
;
12515 return current_tune
->vec_costs
->scalar_store_cost
;
12518 return current_tune
->vec_costs
->vec_stmt_cost
;
12521 return current_tune
->vec_costs
->vec_align_load_cost
;
12524 return current_tune
->vec_costs
->vec_store_cost
;
12526 case vec_to_scalar
:
12527 return current_tune
->vec_costs
->vec_to_scalar_cost
;
12529 case scalar_to_vec
:
12530 return current_tune
->vec_costs
->scalar_to_vec_cost
;
12532 case unaligned_load
:
12533 case vector_gather_load
:
12534 return current_tune
->vec_costs
->vec_unalign_load_cost
;
12536 case unaligned_store
:
12537 case vector_scatter_store
:
12538 return current_tune
->vec_costs
->vec_unalign_store_cost
;
12540 case cond_branch_taken
:
12541 return current_tune
->vec_costs
->cond_taken_branch_cost
;
12543 case cond_branch_not_taken
:
12544 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
12547 case vec_promote_demote
:
12548 return current_tune
->vec_costs
->vec_stmt_cost
;
12550 case vec_construct
:
12551 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
12552 return elements
/ 2 + 1;
12555 gcc_unreachable ();
12559 /* Return true if and only if this insn can dual-issue only as older. */
12561 cortexa7_older_only (rtx_insn
*insn
)
12563 if (recog_memoized (insn
) < 0)
12566 switch (get_attr_type (insn
))
12568 case TYPE_ALU_DSP_REG
:
12569 case TYPE_ALU_SREG
:
12570 case TYPE_ALUS_SREG
:
12571 case TYPE_LOGIC_REG
:
12572 case TYPE_LOGICS_REG
:
12574 case TYPE_ADCS_REG
:
12579 case TYPE_SHIFT_IMM
:
12580 case TYPE_SHIFT_REG
:
12581 case TYPE_LOAD_BYTE
:
12584 case TYPE_FFARITHS
:
12586 case TYPE_FFARITHD
:
12604 case TYPE_F_STORES
:
12611 /* Return true if and only if this insn can dual-issue as younger. */
12613 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
12615 if (recog_memoized (insn
) < 0)
12618 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
12622 switch (get_attr_type (insn
))
12625 case TYPE_ALUS_IMM
:
12626 case TYPE_LOGIC_IMM
:
12627 case TYPE_LOGICS_IMM
:
12632 case TYPE_MOV_SHIFT
:
12633 case TYPE_MOV_SHIFT_REG
:
12643 /* Look for an instruction that can dual issue only as an older
12644 instruction, and move it in front of any instructions that can
12645 dual-issue as younger, while preserving the relative order of all
12646 other instructions in the ready list. This is a hueuristic to help
12647 dual-issue in later cycles, by postponing issue of more flexible
12648 instructions. This heuristic may affect dual issue opportunities
12649 in the current cycle. */
12651 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
12652 int *n_readyp
, int clock
)
12655 int first_older_only
= -1, first_younger
= -1;
12659 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12663 /* Traverse the ready list from the head (the instruction to issue
12664 first), and looking for the first instruction that can issue as
12665 younger and the first instruction that can dual-issue only as
12667 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12669 rtx_insn
*insn
= ready
[i
];
12670 if (cortexa7_older_only (insn
))
12672 first_older_only
= i
;
12674 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12677 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12681 /* Nothing to reorder because either no younger insn found or insn
12682 that can dual-issue only as older appears before any insn that
12683 can dual-issue as younger. */
12684 if (first_younger
== -1)
12687 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12691 /* Nothing to reorder because no older-only insn in the ready list. */
12692 if (first_older_only
== -1)
12695 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12699 /* Move first_older_only insn before first_younger. */
12701 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12702 INSN_UID(ready
[first_older_only
]),
12703 INSN_UID(ready
[first_younger
]));
12704 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12705 for (i
= first_older_only
; i
< first_younger
; i
++)
12707 ready
[i
] = ready
[i
+1];
12710 ready
[i
] = first_older_only_insn
;
12714 /* Implement TARGET_SCHED_REORDER. */
12716 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12721 case TARGET_CPU_cortexa7
:
12722 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12725 /* Do nothing for other cores. */
12729 return arm_issue_rate ();
12732 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12733 It corrects the value of COST based on the relationship between
12734 INSN and DEP through the dependence LINK. It returns the new
12735 value. There is a per-core adjust_cost hook to adjust scheduler costs
12736 and the per-core hook can choose to completely override the generic
12737 adjust_cost function. Only put bits of code into arm_adjust_cost that
12738 are common across all cores. */
12740 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
12745 /* When generating Thumb-1 code, we want to place flag-setting operations
12746 close to a conditional branch which depends on them, so that we can
12747 omit the comparison. */
12750 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12751 && recog_memoized (dep
) >= 0
12752 && get_attr_conds (dep
) == CONDS_SET
)
12755 if (current_tune
->sched_adjust_cost
!= NULL
)
12757 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
12761 /* XXX Is this strictly true? */
12762 if (dep_type
== REG_DEP_ANTI
12763 || dep_type
== REG_DEP_OUTPUT
)
12766 /* Call insns don't incur a stall, even if they follow a load. */
12771 if ((i_pat
= single_set (insn
)) != NULL
12772 && MEM_P (SET_SRC (i_pat
))
12773 && (d_pat
= single_set (dep
)) != NULL
12774 && MEM_P (SET_DEST (d_pat
)))
12776 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12777 /* This is a load after a store, there is no conflict if the load reads
12778 from a cached area. Assume that loads from the stack, and from the
12779 constant pool are cached, and that others will miss. This is a
12782 if ((SYMBOL_REF_P (src_mem
)
12783 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12784 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12785 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12786 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12794 arm_max_conditional_execute (void)
12796 return max_insns_skipped
;
12800 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12803 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12805 return (optimize
> 0) ? 2 : 0;
12809 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12811 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12814 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12815 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12816 sequences of non-executed instructions in IT blocks probably take the same
12817 amount of time as executed instructions (and the IT instruction itself takes
12818 space in icache). This function was experimentally determined to give good
12819 results on a popular embedded benchmark. */
12822 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12824 return (TARGET_32BIT
&& speed_p
) ? 1
12825 : arm_default_branch_cost (speed_p
, predictable_p
);
12829 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12831 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12834 /* Return TRUE if rtx X is a valid immediate FP constant. */
12836 arm_const_double_rtx (rtx x
)
12838 return (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
12839 && x
== CONST0_RTX (GET_MODE (x
)));
12842 /* VFPv3 has a fairly wide range of representable immediates, formed from
12843 "quarter-precision" floating-point values. These can be evaluated using this
12844 formula (with ^ for exponentiation):
12848 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12849 16 <= n <= 31 and 0 <= r <= 7.
12851 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12853 - A (most-significant) is the sign bit.
12854 - BCD are the exponent (encoded as r XOR 3).
12855 - EFGH are the mantissa (encoded as n - 16).
12858 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12859 fconst[sd] instruction, or -1 if X isn't suitable. */
12861 vfp3_const_double_index (rtx x
)
12863 REAL_VALUE_TYPE r
, m
;
12864 int sign
, exponent
;
12865 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12866 unsigned HOST_WIDE_INT mask
;
12867 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12870 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12873 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12875 /* We can't represent these things, so detect them first. */
12876 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12879 /* Extract sign, exponent and mantissa. */
12880 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12881 r
= real_value_abs (&r
);
12882 exponent
= REAL_EXP (&r
);
12883 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12884 highest (sign) bit, with a fixed binary point at bit point_pos.
12885 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12886 bits for the mantissa, this may fail (low bits would be lost). */
12887 real_ldexp (&m
, &r
, point_pos
- exponent
);
12888 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12889 mantissa
= w
.elt (0);
12890 mant_hi
= w
.elt (1);
12892 /* If there are bits set in the low part of the mantissa, we can't
12893 represent this value. */
12897 /* Now make it so that mantissa contains the most-significant bits, and move
12898 the point_pos to indicate that the least-significant bits have been
12900 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12901 mantissa
= mant_hi
;
12903 /* We can permit four significant bits of mantissa only, plus a high bit
12904 which is always 1. */
12905 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12906 if ((mantissa
& mask
) != 0)
12909 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12910 mantissa
>>= point_pos
- 5;
12912 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12913 floating-point immediate zero with Neon using an integer-zero load, but
12914 that case is handled elsewhere.) */
12918 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12920 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12921 normalized significands are in the range [1, 2). (Our mantissa is shifted
12922 left 4 places at this point relative to normalized IEEE754 values). GCC
12923 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12924 REAL_EXP must be altered. */
12925 exponent
= 5 - exponent
;
12927 if (exponent
< 0 || exponent
> 7)
12930 /* Sign, mantissa and exponent are now in the correct form to plug into the
12931 formula described in the comment above. */
12932 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12935 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12937 vfp3_const_double_rtx (rtx x
)
12942 return vfp3_const_double_index (x
) != -1;
12945 /* Recognize immediates which can be used in various Neon and MVE instructions.
12946 Legal immediates are described by the following table (for VMVN variants, the
12947 bitwise inverse of the constant shown is recognized. In either case, VMOV
12948 is output and the correct instruction to use for a given constant is chosen
12949 by the assembler). The constant shown is replicated across all elements of
12950 the destination vector.
12952 insn elems variant constant (binary)
12953 ---- ----- ------- -----------------
12954 vmov i32 0 00000000 00000000 00000000 abcdefgh
12955 vmov i32 1 00000000 00000000 abcdefgh 00000000
12956 vmov i32 2 00000000 abcdefgh 00000000 00000000
12957 vmov i32 3 abcdefgh 00000000 00000000 00000000
12958 vmov i16 4 00000000 abcdefgh
12959 vmov i16 5 abcdefgh 00000000
12960 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12961 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12962 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12963 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12964 vmvn i16 10 00000000 abcdefgh
12965 vmvn i16 11 abcdefgh 00000000
12966 vmov i32 12 00000000 00000000 abcdefgh 11111111
12967 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12968 vmov i32 14 00000000 abcdefgh 11111111 11111111
12969 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12970 vmov i8 16 abcdefgh
12971 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12972 eeeeeeee ffffffff gggggggg hhhhhhhh
12973 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12974 vmov f32 19 00000000 00000000 00000000 00000000
12976 For case 18, B = !b. Representable values are exactly those accepted by
12977 vfp3_const_double_index, but are output as floating-point numbers rather
12980 For case 19, we will change it to vmov.i32 when assembling.
12982 Variants 0-5 (inclusive) may also be used as immediates for the second
12983 operand of VORR/VBIC instructions.
12985 The INVERSE argument causes the bitwise inverse of the given operand to be
12986 recognized instead (used for recognizing legal immediates for the VAND/VORN
12987 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12988 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12989 output, rather than the real insns vbic/vorr).
12991 INVERSE makes no difference to the recognition of float vectors.
12993 The return value is the variant of immediate as shown in the above table, or
12994 -1 if the given value doesn't match any of the listed patterns.
12997 simd_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12998 rtx
*modconst
, int *elementwidth
)
13000 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
13002 for (i = 0; i < idx; i += (STRIDE)) \
13007 immtype = (CLASS); \
13008 elsize = (ELSIZE); \
13012 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
13013 unsigned int innersize
;
13014 unsigned char bytes
[16] = {};
13015 int immtype
= -1, matches
;
13016 unsigned int invmask
= inverse
? 0xff : 0;
13017 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
13020 n_elts
= CONST_VECTOR_NUNITS (op
);
13024 gcc_assert (mode
!= VOIDmode
);
13027 innersize
= GET_MODE_UNIT_SIZE (mode
);
13029 /* Only support 128-bit vectors for MVE. */
13030 if (TARGET_HAVE_MVE
13032 || VALID_MVE_PRED_MODE (mode
)
13033 || n_elts
* innersize
!= 16))
13036 if (!TARGET_HAVE_MVE
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
)
13039 /* Vectors of float constants. */
13040 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
13042 rtx el0
= CONST_VECTOR_ELT (op
, 0);
13044 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
13047 /* FP16 vectors cannot be represented. */
13048 if (GET_MODE_INNER (mode
) == HFmode
)
13051 /* All elements in the vector must be the same. Note that 0.0 and -0.0
13052 are distinct in this context. */
13053 if (!const_vec_duplicate_p (op
))
13057 *modconst
= CONST_VECTOR_ELT (op
, 0);
13062 if (el0
== CONST0_RTX (GET_MODE (el0
)))
13068 /* The tricks done in the code below apply for little-endian vector layout.
13069 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13070 FIXME: Implement logic for big-endian vectors. */
13071 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
13074 /* Splat vector constant out into a byte vector. */
13075 for (i
= 0; i
< n_elts
; i
++)
13077 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
13078 unsigned HOST_WIDE_INT elpart
;
13080 gcc_assert (CONST_INT_P (el
));
13081 elpart
= INTVAL (el
);
13083 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
13085 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
13086 elpart
>>= BITS_PER_UNIT
;
13090 /* Sanity check. */
13091 gcc_assert (idx
== GET_MODE_SIZE (mode
));
13095 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
13096 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13098 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13099 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13101 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13102 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13104 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13105 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
13107 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
13109 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
13111 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
13112 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13114 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13115 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13117 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13118 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13120 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13121 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
13123 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
13125 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
13127 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13128 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13130 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13131 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13133 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13134 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13136 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13137 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13139 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
13141 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
13142 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
13150 *elementwidth
= elsize
;
13154 unsigned HOST_WIDE_INT imm
= 0;
13156 /* Un-invert bytes of recognized vector, if necessary. */
13158 for (i
= 0; i
< idx
; i
++)
13159 bytes
[i
] ^= invmask
;
13163 /* FIXME: Broken on 32-bit H_W_I hosts. */
13164 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
13166 for (i
= 0; i
< 8; i
++)
13167 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
13168 << (i
* BITS_PER_UNIT
);
13170 *modconst
= GEN_INT (imm
);
13174 unsigned HOST_WIDE_INT imm
= 0;
13176 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
13177 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
13179 *modconst
= GEN_INT (imm
);
13187 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13188 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13189 (or zero for float elements), and a modified constant (whatever should be
13190 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13191 modified to "simd_immediate_valid_for_move" as this function will be used
13192 both by neon and mve. */
13194 simd_immediate_valid_for_move (rtx op
, machine_mode mode
,
13195 rtx
*modconst
, int *elementwidth
)
13199 int retval
= simd_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
13205 *modconst
= tmpconst
;
13208 *elementwidth
= tmpwidth
;
13213 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13214 the immediate is valid, write a constant suitable for using as an operand
13215 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13216 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13219 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
13220 rtx
*modconst
, int *elementwidth
)
13224 int retval
= simd_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
13226 if (retval
< 0 || retval
> 5)
13230 *modconst
= tmpconst
;
13233 *elementwidth
= tmpwidth
;
13238 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13239 the immediate is valid, write a constant suitable for using as an operand
13240 to VSHR/VSHL to *MODCONST and the corresponding element width to
13241 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13242 because they have different limitations. */
13245 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
13246 rtx
*modconst
, int *elementwidth
,
13249 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
13250 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
13251 unsigned HOST_WIDE_INT last_elt
= 0;
13252 unsigned HOST_WIDE_INT maxshift
;
13254 /* Split vector constant out into a byte vector. */
13255 for (i
= 0; i
< n_elts
; i
++)
13257 rtx el
= CONST_VECTOR_ELT (op
, i
);
13258 unsigned HOST_WIDE_INT elpart
;
13260 if (CONST_INT_P (el
))
13261 elpart
= INTVAL (el
);
13262 else if (CONST_DOUBLE_P (el
))
13265 gcc_unreachable ();
13267 if (i
!= 0 && elpart
!= last_elt
)
13273 /* Shift less than element size. */
13274 maxshift
= innersize
* 8;
13278 /* Left shift immediate value can be from 0 to <size>-1. */
13279 if (last_elt
>= maxshift
)
13284 /* Right shift immediate value can be from 1 to <size>. */
13285 if (last_elt
== 0 || last_elt
> maxshift
)
13290 *elementwidth
= innersize
* 8;
13293 *modconst
= CONST_VECTOR_ELT (op
, 0);
13298 /* Return a string suitable for output of Neon immediate logic operation
13302 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
13303 int inverse
, int quad
)
13305 int width
, is_valid
;
13306 static char templ
[40];
13308 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
13310 gcc_assert (is_valid
!= 0);
13313 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
13315 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
13320 /* Return a string suitable for output of Neon immediate shift operation
13321 (VSHR or VSHL) MNEM. */
13324 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
13325 machine_mode mode
, int quad
,
13328 int width
, is_valid
;
13329 static char templ
[40];
13331 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
13332 gcc_assert (is_valid
!= 0);
13335 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
13337 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
13342 /* Output a sequence of pairwise operations to implement a reduction.
13343 NOTE: We do "too much work" here, because pairwise operations work on two
13344 registers-worth of operands in one go. Unfortunately we can't exploit those
13345 extra calculations to do the full operation in fewer steps, I don't think.
13346 Although all vector elements of the result but the first are ignored, we
13347 actually calculate the same result in each of the elements. An alternative
13348 such as initially loading a vector with zero to use as each of the second
13349 operands would use up an additional register and take an extra instruction,
13350 for no particular gain. */
13353 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
13354 rtx (*reduc
) (rtx
, rtx
, rtx
))
13356 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
13359 for (i
= parts
/ 2; i
>= 1; i
/= 2)
13361 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
13362 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
13367 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13368 loaded into a register using VDUP.
13370 If this is the case, and GENERATE is set, we also generate
13371 instructions to do this and return an RTX to assign to the register. */
13374 neon_vdup_constant (rtx vals
, bool generate
)
13376 machine_mode mode
= GET_MODE (vals
);
13377 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13380 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
13383 if (!const_vec_duplicate_p (vals
, &x
))
13384 /* The elements are not all the same. We could handle repeating
13385 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13386 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13393 /* We can load this constant by using VDUP and a constant in a
13394 single ARM register. This will be cheaper than a vector
13397 x
= copy_to_mode_reg (inner_mode
, x
);
13398 return gen_vec_duplicate (mode
, x
);
13401 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13403 mve_bool_vec_to_const (rtx const_vec
)
13405 machine_mode mode
= GET_MODE (const_vec
);
13407 if (!VECTOR_MODE_P (mode
))
13410 unsigned n_elts
= GET_MODE_NUNITS (mode
);
13411 unsigned el_prec
= GET_MODE_PRECISION (GET_MODE_INNER (mode
));
13412 unsigned shift_c
= 16 / n_elts
;
13416 for (i
= 0; i
< n_elts
; i
++)
13418 rtx el
= CONST_VECTOR_ELT (const_vec
, i
);
13419 unsigned HOST_WIDE_INT elpart
;
13421 gcc_assert (CONST_INT_P (el
));
13422 elpart
= INTVAL (el
) & ((1U << el_prec
) - 1);
13424 unsigned index
= BYTES_BIG_ENDIAN
? n_elts
- i
- 1 : i
;
13426 hi_val
|= elpart
<< (index
* shift_c
);
13428 /* We are using mov immediate to encode this constant which writes 32-bits
13429 so we need to make sure the top 16-bits are all 0, otherwise we can't
13430 guarantee we can actually write this immediate. */
13431 return gen_int_mode (hi_val
, SImode
);
13434 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13435 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13438 If this is the case, and GENERATE is set, we also generate code to do
13439 this and return an RTX to copy into the register. */
13442 neon_make_constant (rtx vals
, bool generate
)
13444 machine_mode mode
= GET_MODE (vals
);
13446 rtx const_vec
= NULL_RTX
;
13447 int n_elts
= GET_MODE_NUNITS (mode
);
13451 if (GET_CODE (vals
) == CONST_VECTOR
)
13453 else if (GET_CODE (vals
) == PARALLEL
)
13455 /* A CONST_VECTOR must contain only CONST_INTs and
13456 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13457 Only store valid constants in a CONST_VECTOR. */
13458 for (i
= 0; i
< n_elts
; ++i
)
13460 rtx x
= XVECEXP (vals
, 0, i
);
13461 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
13464 if (n_const
== n_elts
)
13465 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
13468 gcc_unreachable ();
13470 if (const_vec
!= NULL
13471 && simd_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
13472 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13474 else if (TARGET_HAVE_MVE
&& VALID_MVE_PRED_MODE(mode
))
13475 return mve_bool_vec_to_const (const_vec
);
13476 else if ((target
= neon_vdup_constant (vals
, generate
)) != NULL_RTX
)
13477 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13478 pipeline cycle; creating the constant takes one or two ARM
13479 pipeline cycles. */
13481 else if (const_vec
!= NULL_RTX
)
13482 /* Load from constant pool. On Cortex-A8 this takes two cycles
13483 (for either double or quad vectors). We cannot take advantage
13484 of single-cycle VLD1 because we need a PC-relative addressing
13486 return arm_disable_literal_pool
? NULL_RTX
: const_vec
;
13488 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13489 We cannot construct an initializer. */
13493 /* Initialize vector TARGET to VALS. */
13496 neon_expand_vector_init (rtx target
, rtx vals
)
13498 machine_mode mode
= GET_MODE (target
);
13499 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13500 int n_elts
= GET_MODE_NUNITS (mode
);
13501 int n_var
= 0, one_var
= -1;
13502 bool all_same
= true;
13506 for (i
= 0; i
< n_elts
; ++i
)
13508 x
= XVECEXP (vals
, 0, i
);
13509 if (!CONSTANT_P (x
))
13510 ++n_var
, one_var
= i
;
13512 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
13518 rtx constant
= neon_make_constant (vals
);
13519 if (constant
!= NULL_RTX
)
13521 emit_move_insn (target
, constant
);
13526 /* Splat a single non-constant element if we can. */
13527 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
13529 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
13530 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
13534 /* One field is non-constant. Load constant then overwrite varying
13535 field. This is more efficient than using the stack. */
13538 rtx copy
= copy_rtx (vals
);
13539 rtx merge_mask
= GEN_INT (1 << one_var
);
13541 /* Load constant part of vector, substitute neighboring value for
13542 varying element. */
13543 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
13544 neon_expand_vector_init (target
, copy
);
13546 /* Insert variable. */
13547 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
13548 emit_insn (gen_vec_set_internal (mode
, target
, x
, merge_mask
, target
));
13552 /* Construct the vector in memory one field at a time
13553 and load the whole vector. */
13554 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
13555 for (i
= 0; i
< n_elts
; i
++)
13556 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
13557 i
* GET_MODE_SIZE (inner_mode
)),
13558 XVECEXP (vals
, 0, i
));
13559 emit_move_insn (target
, mem
);
13562 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13563 ERR if it doesn't. EXP indicates the source location, which includes the
13564 inlining history for intrinsics. */
13567 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13568 const_tree exp
, const char *desc
)
13570 HOST_WIDE_INT lane
;
13572 gcc_assert (CONST_INT_P (operand
));
13574 lane
= INTVAL (operand
);
13576 if (lane
< low
|| lane
>= high
)
13579 error_at (EXPR_LOCATION (exp
),
13580 "%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13582 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13586 /* Bounds-check lanes. */
13589 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13592 bounds_check (operand
, low
, high
, exp
, "lane");
13595 /* Bounds-check constants. */
13598 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
13600 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
13604 neon_element_bits (machine_mode mode
)
13606 return GET_MODE_UNIT_BITSIZE (mode
);
13610 /* Predicates for `match_operand' and `match_operator'. */
13612 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13613 WB level is 2 if full writeback address modes are allowed, 1
13614 if limited writeback address modes (POST_INC and PRE_DEC) are
13615 allowed and 0 if no writeback at all is supported. */
13618 arm_coproc_mem_operand_wb (rtx op
, int wb_level
)
13620 gcc_assert (wb_level
== 0 || wb_level
== 1 || wb_level
== 2);
13623 /* Reject eliminable registers. */
13624 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
13625 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13626 || reg_mentioned_p (arg_pointer_rtx
, op
)
13627 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13628 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13629 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13630 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13633 /* Constants are converted into offsets from labels. */
13637 ind
= XEXP (op
, 0);
13639 if (reload_completed
13640 && (LABEL_REF_P (ind
)
13641 || (GET_CODE (ind
) == CONST
13642 && GET_CODE (XEXP (ind
, 0)) == PLUS
13643 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13644 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13647 /* Match: (mem (reg)). */
13649 return arm_address_register_rtx_p (ind
, 0);
13651 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13652 acceptable in any case (subject to verification by
13653 arm_address_register_rtx_p). We need full writeback to accept
13654 PRE_INC and POST_DEC, and at least restricted writeback for
13655 PRE_INC and POST_DEC. */
13657 && (GET_CODE (ind
) == POST_INC
13658 || GET_CODE (ind
) == PRE_DEC
13660 && (GET_CODE (ind
) == PRE_INC
13661 || GET_CODE (ind
) == POST_DEC
))))
13662 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13665 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
13666 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
13667 && GET_CODE (XEXP (ind
, 1)) == PLUS
13668 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
13669 ind
= XEXP (ind
, 1);
13675 The encoded immediate for 16-bit modes is multiplied by 2,
13676 while the encoded immediate for 32-bit and 64-bit modes is
13677 multiplied by 4. */
13678 int factor
= MIN (GET_MODE_SIZE (GET_MODE (op
)), 4);
13679 if (GET_CODE (ind
) == PLUS
13680 && REG_P (XEXP (ind
, 0))
13681 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13682 && CONST_INT_P (XEXP (ind
, 1))
13683 && IN_RANGE (INTVAL (XEXP (ind
, 1)), -255 * factor
, 255 * factor
)
13684 && (INTVAL (XEXP (ind
, 1)) & (factor
- 1)) == 0)
13690 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13691 WB is true if full writeback address modes are allowed and is false
13692 if limited writeback address modes (POST_INC and PRE_DEC) are
13695 int arm_coproc_mem_operand (rtx op
, bool wb
)
13697 return arm_coproc_mem_operand_wb (op
, wb
? 2 : 1);
13700 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13701 context in which no writeback address modes are allowed. */
13704 arm_coproc_mem_operand_no_writeback (rtx op
)
13706 return arm_coproc_mem_operand_wb (op
, 0);
13709 /* In non-STRICT mode, return the register number; in STRICT mode return
13710 the hard regno or the replacement if it won't be a mem. Otherwise, return
13711 the original pseudo number. */
13713 arm_effective_regno (rtx op
, bool strict
)
13715 gcc_assert (REG_P (op
));
13716 if (!strict
|| REGNO (op
) < FIRST_PSEUDO_REGISTER
13717 || !reg_renumber
|| reg_renumber
[REGNO (op
)] < 0)
13719 return reg_renumber
[REGNO (op
)];
13722 /* This function returns TRUE on matching mode and op.
13723 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13724 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13726 mve_vector_mem_operand (machine_mode mode
, rtx op
, bool strict
)
13728 enum rtx_code code
;
13731 /* Match: (mem (reg)). */
13734 reg_no
= arm_effective_regno (op
, strict
);
13735 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13736 ? reg_no
<= LAST_LO_REGNUM
13737 : reg_no
< LAST_ARM_REGNUM
)
13738 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13740 code
= GET_CODE (op
);
13742 if ((code
== POST_INC
13745 || code
== POST_DEC
)
13746 && REG_P (XEXP (op
, 0)))
13748 reg_no
= arm_effective_regno (XEXP (op
, 0), strict
);
13749 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13750 ? reg_no
<= LAST_LO_REGNUM
13751 :(reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
))
13752 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13754 else if (((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
13755 && GET_CODE (XEXP (op
, 1)) == PLUS
13756 && XEXP (op
, 0) == XEXP (XEXP (op
, 1), 0)
13757 && REG_P (XEXP (op
, 0))
13758 && GET_CODE (XEXP (XEXP (op
, 1), 1)) == CONST_INT
)
13759 /* Make sure to only accept PLUS after reload_completed, otherwise
13760 this will interfere with auto_inc's pattern detection. */
13761 || (reload_completed
&& code
== PLUS
&& REG_P (XEXP (op
, 0))
13762 && GET_CODE (XEXP (op
, 1)) == CONST_INT
))
13764 reg_no
= arm_effective_regno (XEXP (op
, 0), strict
);
13766 val
= INTVAL (XEXP (op
, 1));
13768 val
= INTVAL (XEXP(XEXP (op
, 1), 1));
13775 if (abs (val
) > 127)
13782 if (val
% 2 != 0 || abs (val
) > 254)
13787 if (val
% 4 != 0 || abs (val
) > 508)
13793 return ((!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
)
13794 || (MVE_STN_LDW_MODE (mode
)
13795 ? reg_no
<= LAST_LO_REGNUM
13796 : (reg_no
< LAST_ARM_REGNUM
13797 && (code
== PLUS
|| reg_no
!= SP_REGNUM
))));
13802 /* Return TRUE if OP is a memory operand which we can load or store a vector
13803 to/from. TYPE is one of the following values:
13804 0 - Vector load/stor (vldr)
13805 1 - Core registers (ldm)
13806 2 - Element/structure loads (vld1)
13809 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13813 /* Reject eliminable registers. */
13814 if (strict
&& ! (reload_in_progress
|| reload_completed
)
13815 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13816 || reg_mentioned_p (arg_pointer_rtx
, op
)
13817 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13818 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13819 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13820 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13823 /* Constants are converted into offsets from labels. */
13827 ind
= XEXP (op
, 0);
13829 if (reload_completed
13830 && (LABEL_REF_P (ind
)
13831 || (GET_CODE (ind
) == CONST
13832 && GET_CODE (XEXP (ind
, 0)) == PLUS
13833 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13834 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13837 /* Match: (mem (reg)). */
13839 return arm_address_register_rtx_p (ind
, 0);
13841 /* Allow post-increment with Neon registers. */
13842 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13843 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13844 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13846 /* Allow post-increment by register for VLDn */
13847 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13848 && GET_CODE (XEXP (ind
, 1)) == PLUS
13849 && REG_P (XEXP (XEXP (ind
, 1), 1))
13850 && REG_P (XEXP (ind
, 0))
13851 && rtx_equal_p (XEXP (ind
, 0), XEXP (XEXP (ind
, 1), 0)))
13858 && GET_CODE (ind
) == PLUS
13859 && REG_P (XEXP (ind
, 0))
13860 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13861 && CONST_INT_P (XEXP (ind
, 1))
13862 && INTVAL (XEXP (ind
, 1)) > -1024
13863 /* For quad modes, we restrict the constant offset to be slightly less
13864 than what the instruction format permits. We have no such constraint
13865 on double mode offsets. (This must match arm_legitimate_index_p.) */
13866 && (INTVAL (XEXP (ind
, 1))
13867 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13868 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13874 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13877 mve_struct_mem_operand (rtx op
)
13879 rtx ind
= XEXP (op
, 0);
13881 /* Match: (mem (reg)). */
13883 return arm_address_register_rtx_p (ind
, 0);
13885 /* Allow only post-increment by the mode size. */
13886 if (GET_CODE (ind
) == POST_INC
)
13887 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13892 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13895 neon_struct_mem_operand (rtx op
)
13899 /* Reject eliminable registers. */
13900 if (! (reload_in_progress
|| reload_completed
)
13901 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13902 || reg_mentioned_p (arg_pointer_rtx
, op
)
13903 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13904 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13905 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13906 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13909 /* Constants are converted into offsets from labels. */
13913 ind
= XEXP (op
, 0);
13915 if (reload_completed
13916 && (LABEL_REF_P (ind
)
13917 || (GET_CODE (ind
) == CONST
13918 && GET_CODE (XEXP (ind
, 0)) == PLUS
13919 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13920 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13923 /* Match: (mem (reg)). */
13925 return arm_address_register_rtx_p (ind
, 0);
13927 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13928 if (GET_CODE (ind
) == POST_INC
13929 || GET_CODE (ind
) == PRE_DEC
)
13930 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13935 /* Prepares the operands for the VCMLA by lane instruction such that the right
13936 register number is selected. This instruction is special in that it always
13937 requires a D register, however there is a choice to be made between Dn[0],
13938 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13940 The VCMLA by lane function always selects two values. For instance given D0
13941 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13942 used by the instruction. However given V4SF then index 0 and 1 are valid as
13943 D0[0] or D1[0] are both valid.
13945 This function centralizes that information based on OPERANDS, OPERANDS[3]
13946 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13947 updated to contain the right index. */
13950 neon_vcmla_lane_prepare_operands (rtx
*operands
)
13952 int lane
= INTVAL (operands
[4]);
13953 machine_mode constmode
= SImode
;
13954 machine_mode mode
= GET_MODE (operands
[3]);
13955 int regno
= REGNO (operands
[3]);
13956 regno
= ((regno
- FIRST_VFP_REGNUM
) >> 1);
13957 if (lane
> 0 && lane
>= GET_MODE_NUNITS (mode
) / 4)
13959 operands
[3] = gen_int_mode (regno
+ 1, constmode
);
13961 = gen_int_mode (lane
- GET_MODE_NUNITS (mode
) / 4, constmode
);
13965 operands
[3] = gen_int_mode (regno
, constmode
);
13966 operands
[4] = gen_int_mode (lane
, constmode
);
13972 /* Return true if X is a register that will be eliminated later on. */
13974 arm_eliminable_register (rtx x
)
13976 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13977 || REGNO (x
) == ARG_POINTER_REGNUM
13978 || VIRTUAL_REGISTER_P (x
));
13981 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13982 coprocessor registers. Otherwise return NO_REGS. */
13985 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13987 if (mode
== HFmode
)
13989 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
13990 return GENERAL_REGS
;
13991 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13993 return GENERAL_REGS
;
13996 /* The neon move patterns handle all legitimate vector and struct
13999 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
14000 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
14001 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
14002 || VALID_NEON_STRUCT_MODE (mode
)))
14005 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
14008 return GENERAL_REGS
;
14011 /* Values which must be returned in the most-significant end of the return
14015 arm_return_in_msb (const_tree valtype
)
14017 return (TARGET_AAPCS_BASED
14018 && BYTES_BIG_ENDIAN
14019 && (AGGREGATE_TYPE_P (valtype
)
14020 || TREE_CODE (valtype
) == COMPLEX_TYPE
14021 || FIXED_POINT_TYPE_P (valtype
)));
14024 /* Return TRUE if X references a SYMBOL_REF. */
14026 symbol_mentioned_p (rtx x
)
14031 if (SYMBOL_REF_P (x
))
14034 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
14035 are constant offsets, not symbols. */
14036 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
14039 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
14041 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
14047 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
14048 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
14051 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
14058 /* Return TRUE if X references a LABEL_REF. */
14060 label_mentioned_p (rtx x
)
14065 if (LABEL_REF_P (x
))
14068 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14069 instruction, but they are constant offsets, not symbols. */
14070 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
14073 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
14074 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
14080 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
14081 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
14084 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
14092 tls_mentioned_p (rtx x
)
14094 switch (GET_CODE (x
))
14097 return tls_mentioned_p (XEXP (x
, 0));
14100 if (XINT (x
, 1) == UNSPEC_TLS
)
14103 /* Fall through. */
14109 /* Must not copy any rtx that uses a pc-relative address.
14110 Also, disallow copying of load-exclusive instructions that
14111 may appear after splitting of compare-and-swap-style operations
14112 so as to prevent those loops from being transformed away from their
14113 canonical forms (see PR 69904). */
14116 arm_cannot_copy_insn_p (rtx_insn
*insn
)
14118 /* The tls call insn cannot be copied, as it is paired with a data
14120 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
14123 subrtx_iterator::array_type array
;
14124 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
14126 const_rtx x
= *iter
;
14127 if (GET_CODE (x
) == UNSPEC
14128 && (XINT (x
, 1) == UNSPEC_PIC_BASE
14129 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
14133 rtx set
= single_set (insn
);
14136 rtx src
= SET_SRC (set
);
14137 if (GET_CODE (src
) == ZERO_EXTEND
)
14138 src
= XEXP (src
, 0);
14140 /* Catch the load-exclusive and load-acquire operations. */
14141 if (GET_CODE (src
) == UNSPEC_VOLATILE
14142 && (XINT (src
, 1) == VUNSPEC_LL
14143 || XINT (src
, 1) == VUNSPEC_LAX
))
14150 minmax_code (rtx x
)
14152 enum rtx_code code
= GET_CODE (x
);
14165 gcc_unreachable ();
14169 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14172 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
14173 int *mask
, bool *signed_sat
)
14175 /* The high bound must be a power of two minus one. */
14176 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
14180 /* The low bound is either zero (for usat) or one less than the
14181 negation of the high bound (for ssat). */
14182 if (INTVAL (lo_bound
) == 0)
14187 *signed_sat
= false;
14192 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
14197 *signed_sat
= true;
14205 /* Return 1 if memory locations are adjacent. */
14207 adjacent_mem_locations (rtx a
, rtx b
)
14209 /* We don't guarantee to preserve the order of these memory refs. */
14210 if (volatile_refs_p (a
) || volatile_refs_p (b
))
14213 if ((REG_P (XEXP (a
, 0))
14214 || (GET_CODE (XEXP (a
, 0)) == PLUS
14215 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
14216 && (REG_P (XEXP (b
, 0))
14217 || (GET_CODE (XEXP (b
, 0)) == PLUS
14218 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
14220 HOST_WIDE_INT val0
= 0, val1
= 0;
14224 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
14226 reg0
= XEXP (XEXP (a
, 0), 0);
14227 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
14230 reg0
= XEXP (a
, 0);
14232 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
14234 reg1
= XEXP (XEXP (b
, 0), 0);
14235 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
14238 reg1
= XEXP (b
, 0);
14240 /* Don't accept any offset that will require multiple
14241 instructions to handle, since this would cause the
14242 arith_adjacentmem pattern to output an overlong sequence. */
14243 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
14246 /* Don't allow an eliminable register: register elimination can make
14247 the offset too large. */
14248 if (arm_eliminable_register (reg0
))
14251 val_diff
= val1
- val0
;
14255 /* If the target has load delay slots, then there's no benefit
14256 to using an ldm instruction unless the offset is zero and
14257 we are optimizing for size. */
14258 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
14259 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
14260 && (val_diff
== 4 || val_diff
== -4));
14263 return ((REGNO (reg0
) == REGNO (reg1
))
14264 && (val_diff
== 4 || val_diff
== -4));
14270 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14271 for load operations, false for store operations. CONSECUTIVE is true
14272 if the register numbers in the operation must be consecutive in the register
14273 bank. RETURN_PC is true if value is to be loaded in PC.
14274 The pattern we are trying to match for load is:
14275 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14276 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14279 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14282 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14283 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14284 3. If consecutive is TRUE, then for kth register being loaded,
14285 REGNO (R_dk) = REGNO (R_d0) + k.
14286 The pattern for store is similar. */
14288 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
14289 bool consecutive
, bool return_pc
)
14291 HOST_WIDE_INT count
= XVECLEN (op
, 0);
14292 rtx reg
, mem
, addr
;
14294 unsigned first_regno
;
14295 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
14297 bool addr_reg_in_reglist
= false;
14298 bool update
= false;
14303 /* If not in SImode, then registers must be consecutive
14304 (e.g., VLDM instructions for DFmode). */
14305 gcc_assert ((mode
== SImode
) || consecutive
);
14306 /* Setting return_pc for stores is illegal. */
14307 gcc_assert (!return_pc
|| load
);
14309 /* Set up the increments and the regs per val based on the mode. */
14310 reg_increment
= GET_MODE_SIZE (mode
);
14311 regs_per_val
= reg_increment
/ 4;
14312 offset_adj
= return_pc
? 1 : 0;
14315 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
14316 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
14319 /* Check if this is a write-back. */
14320 elt
= XVECEXP (op
, 0, offset_adj
);
14321 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
14327 /* The offset adjustment must be the number of registers being
14328 popped times the size of a single register. */
14329 if (!REG_P (SET_DEST (elt
))
14330 || !REG_P (XEXP (SET_SRC (elt
), 0))
14331 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
14332 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
14333 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
14334 ((count
- 1 - offset_adj
) * reg_increment
))
14338 i
= i
+ offset_adj
;
14339 base
= base
+ offset_adj
;
14340 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14341 success depends on the type: VLDM can do just one reg,
14342 LDM must do at least two. */
14343 if ((count
<= i
) && (mode
== SImode
))
14346 elt
= XVECEXP (op
, 0, i
- 1);
14347 if (GET_CODE (elt
) != SET
)
14352 reg
= SET_DEST (elt
);
14353 mem
= SET_SRC (elt
);
14357 reg
= SET_SRC (elt
);
14358 mem
= SET_DEST (elt
);
14361 if (!REG_P (reg
) || !MEM_P (mem
))
14364 regno
= REGNO (reg
);
14365 first_regno
= regno
;
14366 addr
= XEXP (mem
, 0);
14367 if (GET_CODE (addr
) == PLUS
)
14369 if (!CONST_INT_P (XEXP (addr
, 1)))
14372 offset
= INTVAL (XEXP (addr
, 1));
14373 addr
= XEXP (addr
, 0);
14379 /* Don't allow SP to be loaded unless it is also the base register. It
14380 guarantees that SP is reset correctly when an LDM instruction
14381 is interrupted. Otherwise, we might end up with a corrupt stack. */
14382 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14385 if (regno
== REGNO (addr
))
14386 addr_reg_in_reglist
= true;
14388 for (; i
< count
; i
++)
14390 elt
= XVECEXP (op
, 0, i
);
14391 if (GET_CODE (elt
) != SET
)
14396 reg
= SET_DEST (elt
);
14397 mem
= SET_SRC (elt
);
14401 reg
= SET_SRC (elt
);
14402 mem
= SET_DEST (elt
);
14406 || GET_MODE (reg
) != mode
14407 || REGNO (reg
) <= regno
14410 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
14411 /* Don't allow SP to be loaded unless it is also the base register. It
14412 guarantees that SP is reset correctly when an LDM instruction
14413 is interrupted. Otherwise, we might end up with a corrupt stack. */
14414 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14416 || GET_MODE (mem
) != mode
14417 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
14418 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
14419 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
14420 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
14421 offset
+ (i
- base
) * reg_increment
))
14422 && (!REG_P (XEXP (mem
, 0))
14423 || offset
+ (i
- base
) * reg_increment
!= 0)))
14426 regno
= REGNO (reg
);
14427 if (regno
== REGNO (addr
))
14428 addr_reg_in_reglist
= true;
14433 if (update
&& addr_reg_in_reglist
)
14436 /* For Thumb-1, address register is always modified - either by write-back
14437 or by explicit load. If the pattern does not describe an update,
14438 then the address register must be in the list of loaded registers. */
14440 return update
|| addr_reg_in_reglist
;
14446 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14447 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14450 [(set (reg:SI <N>) (const_int 0))
14451 (set (reg:SI <M>) (const_int 0))
14453 (unspec_volatile [(const_int 0)]
14455 (clobber (reg:CC CC_REGNUM))
14458 Any number (including 0) of set expressions is valid, the volatile unspec is
14459 optional. All registers but SP and PC are allowed and registers must be in
14460 strict increasing order.
14462 To be a valid VSCCLRM pattern, OP must have the following form:
14464 [(unspec_volatile [(const_int 0)]
14465 VUNSPEC_VSCCLRM_VPR)
14466 (set (reg:SF <N>) (const_int 0))
14467 (set (reg:SF <M>) (const_int 0))
14471 As with CLRM, any number (including 0) of set expressions is valid, however
14472 the volatile unspec is mandatory here. Any VFP single-precision register is
14473 accepted but all registers must be consecutive and in increasing order. */
14476 clear_operation_p (rtx op
, bool vfp
)
14479 unsigned last_regno
= INVALID_REGNUM
;
14480 rtx elt
, reg
, zero
;
14481 int count
= XVECLEN (op
, 0);
14482 int first_set
= vfp
? 1 : 0;
14483 machine_mode expected_mode
= vfp
? E_SFmode
: E_SImode
;
14485 for (int i
= first_set
; i
< count
; i
++)
14487 elt
= XVECEXP (op
, 0, i
);
14489 if (!vfp
&& GET_CODE (elt
) == UNSPEC_VOLATILE
)
14491 if (XINT (elt
, 1) != VUNSPEC_CLRM_APSR
14492 || XVECLEN (elt
, 0) != 1
14493 || XVECEXP (elt
, 0, 0) != CONST0_RTX (SImode
)
14500 if (GET_CODE (elt
) == CLOBBER
)
14503 if (GET_CODE (elt
) != SET
)
14506 reg
= SET_DEST (elt
);
14507 zero
= SET_SRC (elt
);
14510 || GET_MODE (reg
) != expected_mode
14511 || zero
!= CONST0_RTX (SImode
))
14514 regno
= REGNO (reg
);
14518 if (i
!= first_set
&& regno
!= last_regno
+ 1)
14523 if (regno
== SP_REGNUM
|| regno
== PC_REGNUM
)
14525 if (i
!= first_set
&& regno
<= last_regno
)
14529 last_regno
= regno
;
14535 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14536 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14537 instruction. ADD_OFFSET is nonzero if the base address register needs
14538 to be modified with an add instruction before we can use it. */
14541 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
14542 int nops
, HOST_WIDE_INT add_offset
)
14544 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14545 if the offset isn't small enough. The reason 2 ldrs are faster
14546 is because these ARMs are able to do more than one cache access
14547 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14548 whilst the ARM8 has a double bandwidth cache. This means that
14549 these cores can do both an instruction fetch and a data fetch in
14550 a single cycle, so the trick of calculating the address into a
14551 scratch register (one of the result regs) and then doing a load
14552 multiple actually becomes slower (and no smaller in code size).
14553 That is the transformation
14555 ldr rd1, [rbase + offset]
14556 ldr rd2, [rbase + offset + 4]
14560 add rd1, rbase, offset
14561 ldmia rd1, {rd1, rd2}
14563 produces worse code -- '3 cycles + any stalls on rd2' instead of
14564 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14565 access per cycle, the first sequence could never complete in less
14566 than 6 cycles, whereas the ldm sequence would only take 5 and
14567 would make better use of sequential accesses if not hitting the
14570 We cheat here and test 'arm_ld_sched' which we currently know to
14571 only be true for the ARM8, ARM9 and StrongARM. If this ever
14572 changes, then the test below needs to be reworked. */
14573 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
14576 /* XScale has load-store double instructions, but they have stricter
14577 alignment requirements than load-store multiple, so we cannot
14580 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14581 the pipeline until completion.
14589 An ldr instruction takes 1-3 cycles, but does not block the
14598 Best case ldr will always win. However, the more ldr instructions
14599 we issue, the less likely we are to be able to schedule them well.
14600 Using ldr instructions also increases code size.
14602 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14603 for counts of 3 or 4 regs. */
14604 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
14609 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14610 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14611 an array ORDER which describes the sequence to use when accessing the
14612 offsets that produces an ascending order. In this sequence, each
14613 offset must be larger by exactly 4 than the previous one. ORDER[0]
14614 must have been filled in with the lowest offset by the caller.
14615 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14616 we use to verify that ORDER produces an ascending order of registers.
14617 Return true if it was possible to construct such an order, false if
14621 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
14622 int *unsorted_regs
)
14625 for (i
= 1; i
< nops
; i
++)
14629 order
[i
] = order
[i
- 1];
14630 for (j
= 0; j
< nops
; j
++)
14631 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
14633 /* We must find exactly one offset that is higher than the
14634 previous one by 4. */
14635 if (order
[i
] != order
[i
- 1])
14639 if (order
[i
] == order
[i
- 1])
14641 /* The register numbers must be ascending. */
14642 if (unsorted_regs
!= NULL
14643 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
14649 /* Used to determine in a peephole whether a sequence of load
14650 instructions can be changed into a load-multiple instruction.
14651 NOPS is the number of separate load instructions we are examining. The
14652 first NOPS entries in OPERANDS are the destination registers, the
14653 next NOPS entries are memory operands. If this function is
14654 successful, *BASE is set to the common base register of the memory
14655 accesses; *LOAD_OFFSET is set to the first memory location's offset
14656 from that base register.
14657 REGS is an array filled in with the destination register numbers.
14658 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14659 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14660 the sequence of registers in REGS matches the loads from ascending memory
14661 locations, and the function verifies that the register numbers are
14662 themselves ascending. If CHECK_REGS is false, the register numbers
14663 are stored in the order they are found in the operands. */
14665 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
14666 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
14668 int unsorted_regs
[MAX_LDM_STM_OPS
];
14669 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14670 int order
[MAX_LDM_STM_OPS
];
14674 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14675 easily extended if required. */
14676 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14678 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14680 /* Loop over the operands and check that the memory references are
14681 suitable (i.e. immediate offsets from the same base register). At
14682 the same time, extract the target register, and the memory
14684 for (i
= 0; i
< nops
; i
++)
14689 /* Convert a subreg of a mem into the mem itself. */
14690 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14691 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14693 gcc_assert (MEM_P (operands
[nops
+ i
]));
14695 /* Don't reorder volatile memory references; it doesn't seem worth
14696 looking for the case where the order is ok anyway. */
14697 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14700 offset
= const0_rtx
;
14702 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14704 && REG_P (reg
= SUBREG_REG (reg
))))
14705 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14706 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14708 && REG_P (reg
= SUBREG_REG (reg
))))
14709 && (CONST_INT_P (offset
14710 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14714 base_reg
= REGNO (reg
);
14715 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14718 else if (base_reg
!= (int) REGNO (reg
))
14719 /* Not addressed from the same base register. */
14722 unsorted_regs
[i
] = (REG_P (operands
[i
])
14723 ? REGNO (operands
[i
])
14724 : REGNO (SUBREG_REG (operands
[i
])));
14726 /* If it isn't an integer register, or if it overwrites the
14727 base register but isn't the last insn in the list, then
14728 we can't do this. */
14729 if (unsorted_regs
[i
] < 0
14730 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14731 || unsorted_regs
[i
] > 14
14732 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
14735 /* Don't allow SP to be loaded unless it is also the base
14736 register. It guarantees that SP is reset correctly when
14737 an LDM instruction is interrupted. Otherwise, we might
14738 end up with a corrupt stack. */
14739 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
14742 unsorted_offsets
[i
] = INTVAL (offset
);
14743 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14747 /* Not a suitable memory address. */
14751 /* All the useful information has now been extracted from the
14752 operands into unsorted_regs and unsorted_offsets; additionally,
14753 order[0] has been set to the lowest offset in the list. Sort
14754 the offsets into order, verifying that they are adjacent, and
14755 check that the register numbers are ascending. */
14756 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14757 check_regs
? unsorted_regs
: NULL
))
14761 memcpy (saved_order
, order
, sizeof order
);
14767 for (i
= 0; i
< nops
; i
++)
14768 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14770 *load_offset
= unsorted_offsets
[order
[0]];
14773 if (unsorted_offsets
[order
[0]] == 0)
14774 ldm_case
= 1; /* ldmia */
14775 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14776 ldm_case
= 2; /* ldmib */
14777 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14778 ldm_case
= 3; /* ldmda */
14779 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14780 ldm_case
= 4; /* ldmdb */
14781 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
14782 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
14787 if (!multiple_operation_profitable_p (false, nops
,
14789 ? unsorted_offsets
[order
[0]] : 0))
14795 /* Used to determine in a peephole whether a sequence of store instructions can
14796 be changed into a store-multiple instruction.
14797 NOPS is the number of separate store instructions we are examining.
14798 NOPS_TOTAL is the total number of instructions recognized by the peephole
14800 The first NOPS entries in OPERANDS are the source registers, the next
14801 NOPS entries are memory operands. If this function is successful, *BASE is
14802 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14803 to the first memory location's offset from that base register. REGS is an
14804 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14805 likewise filled with the corresponding rtx's.
14806 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14807 numbers to an ascending order of stores.
14808 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14809 from ascending memory locations, and the function verifies that the register
14810 numbers are themselves ascending. If CHECK_REGS is false, the register
14811 numbers are stored in the order they are found in the operands. */
14813 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
14814 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
14815 HOST_WIDE_INT
*load_offset
, bool check_regs
)
14817 int unsorted_regs
[MAX_LDM_STM_OPS
];
14818 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
14819 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14820 int order
[MAX_LDM_STM_OPS
];
14822 rtx base_reg_rtx
= NULL
;
14825 /* Write back of base register is currently only supported for Thumb 1. */
14826 int base_writeback
= TARGET_THUMB1
;
14828 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14829 easily extended if required. */
14830 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14832 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14834 /* Loop over the operands and check that the memory references are
14835 suitable (i.e. immediate offsets from the same base register). At
14836 the same time, extract the target register, and the memory
14838 for (i
= 0; i
< nops
; i
++)
14843 /* Convert a subreg of a mem into the mem itself. */
14844 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14845 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14847 gcc_assert (MEM_P (operands
[nops
+ i
]));
14849 /* Don't reorder volatile memory references; it doesn't seem worth
14850 looking for the case where the order is ok anyway. */
14851 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14854 offset
= const0_rtx
;
14856 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14858 && REG_P (reg
= SUBREG_REG (reg
))))
14859 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14860 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14862 && REG_P (reg
= SUBREG_REG (reg
))))
14863 && (CONST_INT_P (offset
14864 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14866 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
14867 ? operands
[i
] : SUBREG_REG (operands
[i
]));
14868 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
14872 base_reg
= REGNO (reg
);
14873 base_reg_rtx
= reg
;
14874 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14877 else if (base_reg
!= (int) REGNO (reg
))
14878 /* Not addressed from the same base register. */
14881 /* If it isn't an integer register, then we can't do this. */
14882 if (unsorted_regs
[i
] < 0
14883 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14884 /* The effects are unpredictable if the base register is
14885 both updated and stored. */
14886 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
14887 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
14888 || unsorted_regs
[i
] > 14)
14891 unsorted_offsets
[i
] = INTVAL (offset
);
14892 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14896 /* Not a suitable memory address. */
14900 /* All the useful information has now been extracted from the
14901 operands into unsorted_regs and unsorted_offsets; additionally,
14902 order[0] has been set to the lowest offset in the list. Sort
14903 the offsets into order, verifying that they are adjacent, and
14904 check that the register numbers are ascending. */
14905 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14906 check_regs
? unsorted_regs
: NULL
))
14910 memcpy (saved_order
, order
, sizeof order
);
14916 for (i
= 0; i
< nops
; i
++)
14918 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14920 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
14923 *load_offset
= unsorted_offsets
[order
[0]];
14927 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14930 if (unsorted_offsets
[order
[0]] == 0)
14931 stm_case
= 1; /* stmia */
14932 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14933 stm_case
= 2; /* stmib */
14934 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14935 stm_case
= 3; /* stmda */
14936 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14937 stm_case
= 4; /* stmdb */
14941 if (!multiple_operation_profitable_p (false, nops
, 0))
14947 /* Routines for use in generating RTL. */
14949 /* Generate a load-multiple instruction. COUNT is the number of loads in
14950 the instruction; REGS and MEMS are arrays containing the operands.
14951 BASEREG is the base register to be used in addressing the memory operands.
14952 WBACK_OFFSET is nonzero if the instruction should update the base
14956 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14957 HOST_WIDE_INT wback_offset
)
14962 if (!multiple_operation_profitable_p (false, count
, 0))
14968 for (i
= 0; i
< count
; i
++)
14969 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14971 if (wback_offset
!= 0)
14972 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14974 seq
= get_insns ();
14980 result
= gen_rtx_PARALLEL (VOIDmode
,
14981 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14982 if (wback_offset
!= 0)
14984 XVECEXP (result
, 0, 0)
14985 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14990 for (j
= 0; i
< count
; i
++, j
++)
14991 XVECEXP (result
, 0, i
)
14992 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14997 /* Generate a store-multiple instruction. COUNT is the number of stores in
14998 the instruction; REGS and MEMS are arrays containing the operands.
14999 BASEREG is the base register to be used in addressing the memory operands.
15000 WBACK_OFFSET is nonzero if the instruction should update the base
15004 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
15005 HOST_WIDE_INT wback_offset
)
15010 if (GET_CODE (basereg
) == PLUS
)
15011 basereg
= XEXP (basereg
, 0);
15013 if (!multiple_operation_profitable_p (false, count
, 0))
15019 for (i
= 0; i
< count
; i
++)
15020 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
15022 if (wback_offset
!= 0)
15023 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
15025 seq
= get_insns ();
15031 result
= gen_rtx_PARALLEL (VOIDmode
,
15032 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
15033 if (wback_offset
!= 0)
15035 XVECEXP (result
, 0, 0)
15036 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
15041 for (j
= 0; i
< count
; i
++, j
++)
15042 XVECEXP (result
, 0, i
)
15043 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
15048 /* Generate either a load-multiple or a store-multiple instruction. This
15049 function can be used in situations where we can start with a single MEM
15050 rtx and adjust its address upwards.
15051 COUNT is the number of operations in the instruction, not counting a
15052 possible update of the base register. REGS is an array containing the
15054 BASEREG is the base register to be used in addressing the memory operands,
15055 which are constructed from BASEMEM.
15056 WRITE_BACK specifies whether the generated instruction should include an
15057 update of the base register.
15058 OFFSETP is used to pass an offset to and from this function; this offset
15059 is not used when constructing the address (instead BASEMEM should have an
15060 appropriate offset in its address), it is used only for setting
15061 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
15064 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
15065 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
15067 rtx mems
[MAX_LDM_STM_OPS
];
15068 HOST_WIDE_INT offset
= *offsetp
;
15071 gcc_assert (count
<= MAX_LDM_STM_OPS
);
15073 if (GET_CODE (basereg
) == PLUS
)
15074 basereg
= XEXP (basereg
, 0);
15076 for (i
= 0; i
< count
; i
++)
15078 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
15079 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
15087 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
15088 write_back
? 4 * count
: 0);
15090 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
15091 write_back
? 4 * count
: 0);
15095 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
15096 rtx basemem
, HOST_WIDE_INT
*offsetp
)
15098 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
15103 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
15104 rtx basemem
, HOST_WIDE_INT
*offsetp
)
15106 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
15110 /* Called from a peephole2 expander to turn a sequence of loads into an
15111 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15112 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15113 is true if we can reorder the registers because they are used commutatively
15115 Returns true iff we could generate a new instruction. */
15118 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
15120 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15121 rtx mems
[MAX_LDM_STM_OPS
];
15122 int i
, j
, base_reg
;
15124 HOST_WIDE_INT offset
;
15125 int write_back
= FALSE
;
15129 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
15130 &base_reg
, &offset
, !sort_regs
);
15136 for (i
= 0; i
< nops
- 1; i
++)
15137 for (j
= i
+ 1; j
< nops
; j
++)
15138 if (regs
[i
] > regs
[j
])
15144 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15148 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
15150 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15152 for (i
= 0; i
< nops
; i
++)
15153 if (base_reg
== regs
[i
])
15154 write_back
= false;
15156 /* Ensure the base is dead if it is updated. */
15157 if (write_back
&& !peep2_reg_dead_p (nops
, base_reg_rtx
))
15163 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
15164 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
15166 base_reg_rtx
= newbase
;
15169 for (i
= 0; i
< nops
; i
++)
15171 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15172 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15175 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15176 write_back
? offset
+ i
* 4 : 0));
15180 /* Called from a peephole2 expander to turn a sequence of stores into an
15181 STM instruction. OPERANDS are the operands found by the peephole matcher;
15182 NOPS indicates how many separate stores we are trying to combine.
15183 Returns true iff we could generate a new instruction. */
15186 gen_stm_seq (rtx
*operands
, int nops
)
15189 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15190 rtx mems
[MAX_LDM_STM_OPS
];
15193 HOST_WIDE_INT offset
;
15194 int write_back
= FALSE
;
15197 bool base_reg_dies
;
15199 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
15200 mem_order
, &base_reg
, &offset
, true);
15205 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15207 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
15210 gcc_assert (base_reg_dies
);
15216 gcc_assert (base_reg_dies
);
15217 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15221 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15223 for (i
= 0; i
< nops
; i
++)
15225 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15226 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15229 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15230 write_back
? offset
+ i
* 4 : 0));
15234 /* Called from a peephole2 expander to turn a sequence of stores that are
15235 preceded by constant loads into an STM instruction. OPERANDS are the
15236 operands found by the peephole matcher; NOPS indicates how many
15237 separate stores we are trying to combine; there are 2 * NOPS
15238 instructions in the peephole.
15239 Returns true iff we could generate a new instruction. */
15242 gen_const_stm_seq (rtx
*operands
, int nops
)
15244 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
15245 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15246 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
15247 rtx mems
[MAX_LDM_STM_OPS
];
15250 HOST_WIDE_INT offset
;
15251 int write_back
= FALSE
;
15254 bool base_reg_dies
;
15256 HARD_REG_SET allocated
;
15258 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
15259 mem_order
, &base_reg
, &offset
, false);
15264 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
15266 /* If the same register is used more than once, try to find a free
15268 CLEAR_HARD_REG_SET (allocated
);
15269 for (i
= 0; i
< nops
; i
++)
15271 for (j
= i
+ 1; j
< nops
; j
++)
15272 if (regs
[i
] == regs
[j
])
15274 rtx t
= peep2_find_free_register (0, nops
* 2,
15275 TARGET_THUMB1
? "l" : "r",
15276 SImode
, &allocated
);
15280 regs
[i
] = REGNO (t
);
15284 /* Compute an ordering that maps the register numbers to an ascending
15287 for (i
= 0; i
< nops
; i
++)
15288 if (regs
[i
] < regs
[reg_order
[0]])
15291 for (i
= 1; i
< nops
; i
++)
15293 int this_order
= reg_order
[i
- 1];
15294 for (j
= 0; j
< nops
; j
++)
15295 if (regs
[j
] > regs
[reg_order
[i
- 1]]
15296 && (this_order
== reg_order
[i
- 1]
15297 || regs
[j
] < regs
[this_order
]))
15299 reg_order
[i
] = this_order
;
15302 /* Ensure that registers that must be live after the instruction end
15303 up with the correct value. */
15304 for (i
= 0; i
< nops
; i
++)
15306 int this_order
= reg_order
[i
];
15307 if ((this_order
!= mem_order
[i
]
15308 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
15309 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
15313 /* Load the constants. */
15314 for (i
= 0; i
< nops
; i
++)
15316 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
15317 sorted_regs
[i
] = regs
[reg_order
[i
]];
15318 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
15321 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15323 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
15326 gcc_assert (base_reg_dies
);
15332 gcc_assert (base_reg_dies
);
15333 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15337 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15339 for (i
= 0; i
< nops
; i
++)
15341 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15342 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15345 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
15346 write_back
? offset
+ i
* 4 : 0));
15350 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15351 unaligned copies on processors which support unaligned semantics for those
15352 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15353 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15354 An interleave factor of 1 (the minimum) will perform no interleaving.
15355 Load/store multiple are used for aligned addresses where possible. */
15358 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
15359 HOST_WIDE_INT length
,
15360 unsigned int interleave_factor
)
15362 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
15363 int *regnos
= XALLOCAVEC (int, interleave_factor
);
15364 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
15365 HOST_WIDE_INT i
, j
;
15366 HOST_WIDE_INT remaining
= length
, words
;
15367 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
15369 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
15370 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
15371 HOST_WIDE_INT srcoffset
, dstoffset
;
15372 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
15375 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
15377 /* Use hard registers if we have aligned source or destination so we can use
15378 load/store multiple with contiguous registers. */
15379 if (dst_aligned
|| src_aligned
)
15380 for (i
= 0; i
< interleave_factor
; i
++)
15381 regs
[i
] = gen_rtx_REG (SImode
, i
);
15383 for (i
= 0; i
< interleave_factor
; i
++)
15384 regs
[i
] = gen_reg_rtx (SImode
);
15386 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
15387 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
15389 srcoffset
= dstoffset
= 0;
15391 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15392 For copying the last bytes we want to subtract this offset again. */
15393 src_autoinc
= dst_autoinc
= 0;
15395 for (i
= 0; i
< interleave_factor
; i
++)
15398 /* Copy BLOCK_SIZE_BYTES chunks. */
15400 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
15403 if (src_aligned
&& interleave_factor
> 1)
15405 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
15406 TRUE
, srcbase
, &srcoffset
));
15407 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15411 for (j
= 0; j
< interleave_factor
; j
++)
15413 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
15415 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15416 srcoffset
+ j
* UNITS_PER_WORD
);
15417 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15419 srcoffset
+= block_size_bytes
;
15423 if (dst_aligned
&& interleave_factor
> 1)
15425 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
15426 TRUE
, dstbase
, &dstoffset
));
15427 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15431 for (j
= 0; j
< interleave_factor
; j
++)
15433 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
15435 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15436 dstoffset
+ j
* UNITS_PER_WORD
);
15437 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15439 dstoffset
+= block_size_bytes
;
15442 remaining
-= block_size_bytes
;
15445 /* Copy any whole words left (note these aren't interleaved with any
15446 subsequent halfword/byte load/stores in the interests of simplicity). */
15448 words
= remaining
/ UNITS_PER_WORD
;
15450 gcc_assert (words
< interleave_factor
);
15452 if (src_aligned
&& words
> 1)
15454 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
15456 src_autoinc
+= UNITS_PER_WORD
* words
;
15460 for (j
= 0; j
< words
; j
++)
15462 addr
= plus_constant (Pmode
, src
,
15463 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
15464 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15465 srcoffset
+ j
* UNITS_PER_WORD
);
15467 emit_move_insn (regs
[j
], mem
);
15469 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15471 srcoffset
+= words
* UNITS_PER_WORD
;
15474 if (dst_aligned
&& words
> 1)
15476 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
15478 dst_autoinc
+= words
* UNITS_PER_WORD
;
15482 for (j
= 0; j
< words
; j
++)
15484 addr
= plus_constant (Pmode
, dst
,
15485 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
15486 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15487 dstoffset
+ j
* UNITS_PER_WORD
);
15489 emit_move_insn (mem
, regs
[j
]);
15491 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15493 dstoffset
+= words
* UNITS_PER_WORD
;
15496 remaining
-= words
* UNITS_PER_WORD
;
15498 gcc_assert (remaining
< 4);
15500 /* Copy a halfword if necessary. */
15502 if (remaining
>= 2)
15504 halfword_tmp
= gen_reg_rtx (SImode
);
15506 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15507 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
15508 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
15510 /* Either write out immediately, or delay until we've loaded the last
15511 byte, depending on interleave factor. */
15512 if (interleave_factor
== 1)
15514 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15515 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15516 emit_insn (gen_unaligned_storehi (mem
,
15517 gen_lowpart (HImode
, halfword_tmp
)));
15518 halfword_tmp
= NULL
;
15526 gcc_assert (remaining
< 2);
15528 /* Copy last byte. */
15530 if ((remaining
& 1) != 0)
15532 byte_tmp
= gen_reg_rtx (SImode
);
15534 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15535 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
15536 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
15538 if (interleave_factor
== 1)
15540 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15541 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15542 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15551 /* Store last halfword if we haven't done so already. */
15555 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15556 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15557 emit_insn (gen_unaligned_storehi (mem
,
15558 gen_lowpart (HImode
, halfword_tmp
)));
15562 /* Likewise for last byte. */
15566 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15567 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15568 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15572 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
15575 /* From mips_adjust_block_mem:
15577 Helper function for doing a loop-based block operation on memory
15578 reference MEM. Each iteration of the loop will operate on LENGTH
15581 Create a new base register for use within the loop and point it to
15582 the start of MEM. Create a new memory reference that uses this
15583 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15586 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
15589 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
15591 /* Although the new mem does not refer to a known location,
15592 it does keep up to LENGTH bytes of alignment. */
15593 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
15594 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
15597 /* From mips_block_move_loop:
15599 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15600 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15601 the memory regions do not overlap. */
15604 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
15605 unsigned int interleave_factor
,
15606 HOST_WIDE_INT bytes_per_iter
)
15608 rtx src_reg
, dest_reg
, final_src
, test
;
15609 HOST_WIDE_INT leftover
;
15611 leftover
= length
% bytes_per_iter
;
15612 length
-= leftover
;
15614 /* Create registers and memory references for use within the loop. */
15615 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
15616 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
15618 /* Calculate the value that SRC_REG should have after the last iteration of
15620 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
15621 0, 0, OPTAB_WIDEN
);
15623 /* Emit the start of the loop. */
15624 rtx_code_label
*label
= gen_label_rtx ();
15625 emit_label (label
);
15627 /* Emit the loop body. */
15628 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
15629 interleave_factor
);
15631 /* Move on to the next block. */
15632 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
15633 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
15635 /* Emit the loop condition. */
15636 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
15637 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
15639 /* Mop up any left-over bytes. */
15641 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
15644 /* Emit a block move when either the source or destination is unaligned (not
15645 aligned to a four-byte boundary). This may need further tuning depending on
15646 core type, optimize_size setting, etc. */
15649 arm_cpymemqi_unaligned (rtx
*operands
)
15651 HOST_WIDE_INT length
= INTVAL (operands
[2]);
15655 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
15656 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
15657 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15658 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15659 or dst_aligned though: allow more interleaving in those cases since the
15660 resulting code can be smaller. */
15661 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
15662 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
15665 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
15666 interleave_factor
, bytes_per_iter
);
15668 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
15669 interleave_factor
);
15673 /* Note that the loop created by arm_block_move_unaligned_loop may be
15674 subject to loop unrolling, which makes tuning this condition a little
15677 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
15679 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
15686 arm_gen_cpymemqi (rtx
*operands
)
15688 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
15689 HOST_WIDE_INT srcoffset
, dstoffset
;
15690 rtx src
, dst
, srcbase
, dstbase
;
15691 rtx part_bytes_reg
= NULL
;
15694 if (!CONST_INT_P (operands
[2])
15695 || !CONST_INT_P (operands
[3])
15696 || INTVAL (operands
[2]) > 64)
15699 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
15700 return arm_cpymemqi_unaligned (operands
);
15702 if (INTVAL (operands
[3]) & 3)
15705 dstbase
= operands
[0];
15706 srcbase
= operands
[1];
15708 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
15709 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
15711 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
15712 out_words_to_go
= INTVAL (operands
[2]) / 4;
15713 last_bytes
= INTVAL (operands
[2]) & 3;
15714 dstoffset
= srcoffset
= 0;
15716 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
15717 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
15719 while (in_words_to_go
>= 2)
15721 if (in_words_to_go
> 4)
15722 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
15723 TRUE
, srcbase
, &srcoffset
));
15725 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
15726 src
, FALSE
, srcbase
,
15729 if (out_words_to_go
)
15731 if (out_words_to_go
> 4)
15732 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
15733 TRUE
, dstbase
, &dstoffset
));
15734 else if (out_words_to_go
!= 1)
15735 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
15736 out_words_to_go
, dst
,
15739 dstbase
, &dstoffset
));
15742 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15743 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
15744 if (last_bytes
!= 0)
15746 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
15752 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
15753 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
15756 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15757 if (out_words_to_go
)
15761 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15762 sreg
= copy_to_reg (mem
);
15764 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15765 emit_move_insn (mem
, sreg
);
15768 gcc_assert (!in_words_to_go
); /* Sanity check */
15771 if (in_words_to_go
)
15773 gcc_assert (in_words_to_go
> 0);
15775 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15776 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
15779 gcc_assert (!last_bytes
|| part_bytes_reg
);
15781 if (BYTES_BIG_ENDIAN
&& last_bytes
)
15783 rtx tmp
= gen_reg_rtx (SImode
);
15785 /* The bytes we want are in the top end of the word. */
15786 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
15787 GEN_INT (8 * (4 - last_bytes
))));
15788 part_bytes_reg
= tmp
;
15792 mem
= adjust_automodify_address (dstbase
, QImode
,
15793 plus_constant (Pmode
, dst
,
15795 dstoffset
+ last_bytes
- 1);
15796 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15800 tmp
= gen_reg_rtx (SImode
);
15801 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
15802 part_bytes_reg
= tmp
;
15809 if (last_bytes
> 1)
15811 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
15812 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
15816 rtx tmp
= gen_reg_rtx (SImode
);
15817 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
15818 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
15819 part_bytes_reg
= tmp
;
15826 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
15827 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15834 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15837 next_consecutive_mem (rtx mem
)
15839 machine_mode mode
= GET_MODE (mem
);
15840 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
15841 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
15843 return adjust_automodify_address (mem
, mode
, addr
, offset
);
15846 /* Copy using LDRD/STRD instructions whenever possible.
15847 Returns true upon success. */
15849 gen_cpymem_ldrd_strd (rtx
*operands
)
15851 unsigned HOST_WIDE_INT len
;
15852 HOST_WIDE_INT align
;
15853 rtx src
, dst
, base
;
15855 bool src_aligned
, dst_aligned
;
15856 bool src_volatile
, dst_volatile
;
15858 gcc_assert (CONST_INT_P (operands
[2]));
15859 gcc_assert (CONST_INT_P (operands
[3]));
15861 len
= UINTVAL (operands
[2]);
15865 /* Maximum alignment we can assume for both src and dst buffers. */
15866 align
= INTVAL (operands
[3]);
15868 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
15871 /* Place src and dst addresses in registers
15872 and update the corresponding mem rtx. */
15874 dst_volatile
= MEM_VOLATILE_P (dst
);
15875 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
15876 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
15877 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
15880 src_volatile
= MEM_VOLATILE_P (src
);
15881 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
15882 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
15883 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
15885 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
15888 if (src_volatile
|| dst_volatile
)
15891 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15892 if (!(dst_aligned
|| src_aligned
))
15893 return arm_gen_cpymemqi (operands
);
15895 /* If the either src or dst is unaligned we'll be accessing it as pairs
15896 of unaligned SImode accesses. Otherwise we can generate DImode
15897 ldrd/strd instructions. */
15898 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
15899 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
15904 reg0
= gen_reg_rtx (DImode
);
15905 rtx first_reg
= NULL_RTX
;
15906 rtx second_reg
= NULL_RTX
;
15908 if (!src_aligned
|| !dst_aligned
)
15910 if (BYTES_BIG_ENDIAN
)
15912 second_reg
= gen_lowpart (SImode
, reg0
);
15913 first_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15917 first_reg
= gen_lowpart (SImode
, reg0
);
15918 second_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15921 if (MEM_ALIGN (src
) >= 2 * BITS_PER_WORD
)
15922 emit_move_insn (reg0
, src
);
15923 else if (src_aligned
)
15924 emit_insn (gen_unaligned_loaddi (reg0
, src
));
15927 emit_insn (gen_unaligned_loadsi (first_reg
, src
));
15928 src
= next_consecutive_mem (src
);
15929 emit_insn (gen_unaligned_loadsi (second_reg
, src
));
15932 if (MEM_ALIGN (dst
) >= 2 * BITS_PER_WORD
)
15933 emit_move_insn (dst
, reg0
);
15934 else if (dst_aligned
)
15935 emit_insn (gen_unaligned_storedi (dst
, reg0
));
15938 emit_insn (gen_unaligned_storesi (dst
, first_reg
));
15939 dst
= next_consecutive_mem (dst
);
15940 emit_insn (gen_unaligned_storesi (dst
, second_reg
));
15943 src
= next_consecutive_mem (src
);
15944 dst
= next_consecutive_mem (dst
);
15947 gcc_assert (len
< 8);
15950 /* More than a word but less than a double-word to copy. Copy a word. */
15951 reg0
= gen_reg_rtx (SImode
);
15952 src
= adjust_address (src
, SImode
, 0);
15953 dst
= adjust_address (dst
, SImode
, 0);
15955 emit_move_insn (reg0
, src
);
15957 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15960 emit_move_insn (dst
, reg0
);
15962 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15964 src
= next_consecutive_mem (src
);
15965 dst
= next_consecutive_mem (dst
);
15972 /* Copy the remaining bytes. */
15975 dst
= adjust_address (dst
, HImode
, 0);
15976 src
= adjust_address (src
, HImode
, 0);
15977 reg0
= gen_reg_rtx (SImode
);
15979 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15981 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15984 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15986 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15988 src
= next_consecutive_mem (src
);
15989 dst
= next_consecutive_mem (dst
);
15994 dst
= adjust_address (dst
, QImode
, 0);
15995 src
= adjust_address (src
, QImode
, 0);
15996 reg0
= gen_reg_rtx (QImode
);
15997 emit_move_insn (reg0
, src
);
15998 emit_move_insn (dst
, reg0
);
16002 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
16003 into its component 32-bit subregs. OP2 may be an immediate
16004 constant and we want to simplify it in that case. */
16006 arm_decompose_di_binop (rtx op1
, rtx op2
, rtx
*lo_op1
, rtx
*hi_op1
,
16007 rtx
*lo_op2
, rtx
*hi_op2
)
16009 *lo_op1
= gen_lowpart (SImode
, op1
);
16010 *hi_op1
= gen_highpart (SImode
, op1
);
16011 *lo_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
16012 subreg_lowpart_offset (SImode
, DImode
));
16013 *hi_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
16014 subreg_highpart_offset (SImode
, DImode
));
16017 /* Select a dominance comparison mode if possible for a test of the general
16018 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
16019 COND_OR == DOM_CC_X_AND_Y => (X && Y)
16020 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
16021 COND_OR == DOM_CC_X_OR_Y => (X || Y)
16022 In all cases OP will be either EQ or NE, but we don't need to know which
16023 here. If we are unable to support a dominance comparison we return
16024 CC mode. This will then fail to match for the RTL expressions that
16025 generate this call. */
16027 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
16029 enum rtx_code cond1
, cond2
;
16032 /* Currently we will probably get the wrong result if the individual
16033 comparisons are not simple. This also ensures that it is safe to
16034 reverse a comparison if necessary. */
16035 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
16037 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
16041 /* The if_then_else variant of this tests the second condition if the
16042 first passes, but is true if the first fails. Reverse the first
16043 condition to get a true "inclusive-or" expression. */
16044 if (cond_or
== DOM_CC_NX_OR_Y
)
16045 cond1
= reverse_condition (cond1
);
16047 /* If the comparisons are not equal, and one doesn't dominate the other,
16048 then we can't do this. */
16050 && !comparison_dominates_p (cond1
, cond2
)
16051 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
16055 std::swap (cond1
, cond2
);
16060 if (cond_or
== DOM_CC_X_AND_Y
)
16065 case EQ
: return CC_DEQmode
;
16066 case LE
: return CC_DLEmode
;
16067 case LEU
: return CC_DLEUmode
;
16068 case GE
: return CC_DGEmode
;
16069 case GEU
: return CC_DGEUmode
;
16070 default: gcc_unreachable ();
16074 if (cond_or
== DOM_CC_X_AND_Y
)
16086 gcc_unreachable ();
16090 if (cond_or
== DOM_CC_X_AND_Y
)
16102 gcc_unreachable ();
16106 if (cond_or
== DOM_CC_X_AND_Y
)
16107 return CC_DLTUmode
;
16112 return CC_DLTUmode
;
16114 return CC_DLEUmode
;
16118 gcc_unreachable ();
16122 if (cond_or
== DOM_CC_X_AND_Y
)
16123 return CC_DGTUmode
;
16128 return CC_DGTUmode
;
16130 return CC_DGEUmode
;
16134 gcc_unreachable ();
16137 /* The remaining cases only occur when both comparisons are the
16140 gcc_assert (cond1
== cond2
);
16144 gcc_assert (cond1
== cond2
);
16148 gcc_assert (cond1
== cond2
);
16152 gcc_assert (cond1
== cond2
);
16153 return CC_DLEUmode
;
16156 gcc_assert (cond1
== cond2
);
16157 return CC_DGEUmode
;
16160 gcc_unreachable ();
16165 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
16167 /* All floating point compares return CCFP if it is an equality
16168 comparison, and CCFPE otherwise. */
16169 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
16192 gcc_unreachable ();
16196 /* A compare with a shifted operand. Because of canonicalization, the
16197 comparison will have to be swapped when we emit the assembler. */
16198 if (GET_MODE (y
) == SImode
16199 && (REG_P (y
) || (SUBREG_P (y
)))
16200 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16201 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
16202 || GET_CODE (x
) == ROTATERT
))
16205 /* A widened compare of the sum of a value plus a carry against a
16206 constant. This is a representation of RSC. We want to swap the
16207 result of the comparison at output. Not valid if the Z bit is
16209 if (GET_MODE (x
) == DImode
16210 && GET_CODE (x
) == PLUS
16211 && arm_borrow_operation (XEXP (x
, 1), DImode
)
16213 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16214 && (op
== LE
|| op
== GT
))
16215 || (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
16216 && (op
== LEU
|| op
== GTU
))))
16219 /* If X is a constant we want to use CC_RSBmode. This is
16220 non-canonical, but arm_gen_compare_reg uses this to generate the
16221 correct canonical form. */
16222 if (GET_MODE (y
) == SImode
16223 && (REG_P (y
) || SUBREG_P (y
))
16224 && CONST_INT_P (x
))
16227 /* This operation is performed swapped, but since we only rely on the Z
16228 flag we don't need an additional mode. */
16229 if (GET_MODE (y
) == SImode
16230 && (REG_P (y
) || (SUBREG_P (y
)))
16231 && GET_CODE (x
) == NEG
16232 && (op
== EQ
|| op
== NE
))
16235 /* This is a special case that is used by combine to allow a
16236 comparison of a shifted byte load to be split into a zero-extend
16237 followed by a comparison of the shifted integer (only valid for
16238 equalities and unsigned inequalities). */
16239 if (GET_MODE (x
) == SImode
16240 && GET_CODE (x
) == ASHIFT
16241 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
16242 && GET_CODE (XEXP (x
, 0)) == SUBREG
16243 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
16244 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
16245 && (op
== EQ
|| op
== NE
16246 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
16247 && CONST_INT_P (y
))
16250 /* A construct for a conditional compare, if the false arm contains
16251 0, then both conditions must be true, otherwise either condition
16252 must be true. Not all conditions are possible, so CCmode is
16253 returned if it can't be done. */
16254 if (GET_CODE (x
) == IF_THEN_ELSE
16255 && (XEXP (x
, 2) == const0_rtx
16256 || XEXP (x
, 2) == const1_rtx
)
16257 && COMPARISON_P (XEXP (x
, 0))
16258 && COMPARISON_P (XEXP (x
, 1)))
16259 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16260 INTVAL (XEXP (x
, 2)));
16262 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16263 if (GET_CODE (x
) == AND
16264 && (op
== EQ
|| op
== NE
)
16265 && COMPARISON_P (XEXP (x
, 0))
16266 && COMPARISON_P (XEXP (x
, 1)))
16267 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16270 if (GET_CODE (x
) == IOR
16271 && (op
== EQ
|| op
== NE
)
16272 && COMPARISON_P (XEXP (x
, 0))
16273 && COMPARISON_P (XEXP (x
, 1)))
16274 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16277 /* An operation (on Thumb) where we want to test for a single bit.
16278 This is done by shifting that bit up into the top bit of a
16279 scratch register; we can then branch on the sign bit. */
16281 && GET_MODE (x
) == SImode
16282 && (op
== EQ
|| op
== NE
)
16283 && GET_CODE (x
) == ZERO_EXTRACT
16284 && XEXP (x
, 1) == const1_rtx
)
16287 /* An operation that sets the condition codes as a side-effect, the
16288 V flag is not set correctly, so we can only use comparisons where
16289 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16291 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16292 if (GET_MODE (x
) == SImode
16294 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
16295 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
16296 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
16297 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
16298 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
16299 || GET_CODE (x
) == LSHIFTRT
16300 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16301 || GET_CODE (x
) == ROTATERT
16302 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
16305 /* A comparison of ~reg with a const is really a special
16306 canoncialization of compare (~const, reg), which is a reverse
16307 subtract operation. We may not get here if CONST is 0, but that
16308 doesn't matter because ~0 isn't a valid immediate for RSB. */
16309 if (GET_MODE (x
) == SImode
16310 && GET_CODE (x
) == NOT
16311 && CONST_INT_P (y
))
16314 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
16317 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
16318 && GET_CODE (x
) == PLUS
16319 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
16322 if (GET_MODE (x
) == DImode
16323 && GET_CODE (x
) == PLUS
16324 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
16326 && UINTVAL (y
) == 0x800000000
16327 && (op
== GEU
|| op
== LTU
))
16330 if (GET_MODE (x
) == DImode
16331 && (op
== GE
|| op
== LT
)
16332 && GET_CODE (x
) == SIGN_EXTEND
16333 && ((GET_CODE (y
) == PLUS
16334 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16335 || arm_borrow_operation (y
, DImode
)))
16338 if (GET_MODE (x
) == DImode
16339 && (op
== GEU
|| op
== LTU
)
16340 && GET_CODE (x
) == ZERO_EXTEND
16341 && ((GET_CODE (y
) == PLUS
16342 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16343 || arm_borrow_operation (y
, DImode
)))
16346 if (GET_MODE (x
) == DImode
16347 && (op
== EQ
|| op
== NE
)
16348 && (GET_CODE (x
) == PLUS
16349 || GET_CODE (x
) == MINUS
)
16350 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16351 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
16352 && GET_CODE (y
) == SIGN_EXTEND
16353 && GET_CODE (XEXP (y
, 0)) == GET_CODE (x
))
16356 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
16357 return GET_MODE (x
);
16362 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16363 the sequence of instructions needed to generate a suitable condition
16364 code register. Return the CC register result. */
16366 arm_gen_dicompare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16371 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16372 gcc_assert (TARGET_32BIT
);
16373 gcc_assert (!CONST_INT_P (x
));
16375 rtx x_lo
= simplify_gen_subreg (SImode
, x
, DImode
,
16376 subreg_lowpart_offset (SImode
, DImode
));
16377 rtx x_hi
= simplify_gen_subreg (SImode
, x
, DImode
,
16378 subreg_highpart_offset (SImode
, DImode
));
16379 rtx y_lo
= simplify_gen_subreg (SImode
, y
, DImode
,
16380 subreg_lowpart_offset (SImode
, DImode
));
16381 rtx y_hi
= simplify_gen_subreg (SImode
, y
, DImode
,
16382 subreg_highpart_offset (SImode
, DImode
));
16388 if (y_lo
== const0_rtx
|| y_hi
== const0_rtx
)
16390 if (y_lo
!= const0_rtx
)
16392 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16394 gcc_assert (y_hi
== const0_rtx
);
16395 y_lo
= gen_int_mode (-INTVAL (y_lo
), SImode
);
16396 if (!arm_add_operand (y_lo
, SImode
))
16397 y_lo
= force_reg (SImode
, y_lo
);
16398 emit_insn (gen_addsi3 (scratch2
, x_lo
, y_lo
));
16401 else if (y_hi
!= const0_rtx
)
16403 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16405 y_hi
= gen_int_mode (-INTVAL (y_hi
), SImode
);
16406 if (!arm_add_operand (y_hi
, SImode
))
16407 y_hi
= force_reg (SImode
, y_hi
);
16408 emit_insn (gen_addsi3 (scratch2
, x_hi
, y_hi
));
16414 gcc_assert (!reload_completed
);
16415 scratch
= gen_rtx_SCRATCH (SImode
);
16418 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
16419 cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
16422 = gen_rtx_SET (cc_reg
,
16423 gen_rtx_COMPARE (CC_NZmode
,
16424 gen_rtx_IOR (SImode
, x_lo
, x_hi
),
16426 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
,
16431 if (!arm_add_operand (y_lo
, SImode
))
16432 y_lo
= force_reg (SImode
, y_lo
);
16434 if (!arm_add_operand (y_hi
, SImode
))
16435 y_hi
= force_reg (SImode
, y_hi
);
16437 rtx cmp1
= gen_rtx_NE (SImode
, x_lo
, y_lo
);
16438 rtx cmp2
= gen_rtx_NE (SImode
, x_hi
, y_hi
);
16439 rtx conjunction
= gen_rtx_IOR (SImode
, cmp1
, cmp2
);
16440 mode
= SELECT_CC_MODE (code
, conjunction
, const0_rtx
);
16441 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16443 emit_insn (gen_rtx_SET (cc_reg
,
16444 gen_rtx_COMPARE (mode
, conjunction
,
16452 if (y_lo
== const0_rtx
)
16454 /* If the low word of y is 0, then this is simply a normal
16455 compare of the upper words. */
16456 if (!arm_add_operand (y_hi
, SImode
))
16457 y_hi
= force_reg (SImode
, y_hi
);
16459 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16462 if (!arm_add_operand (y_lo
, SImode
))
16463 y_lo
= force_reg (SImode
, y_lo
);
16466 = gen_rtx_LTU (DImode
,
16467 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16471 scratch
= gen_rtx_SCRATCH (SImode
);
16473 if (!arm_not_operand (y_hi
, SImode
))
16474 y_hi
= force_reg (SImode
, y_hi
);
16477 if (y_hi
== const0_rtx
)
16478 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch
, x_hi
,
16480 else if (CONST_INT_P (y_hi
))
16481 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch
, x_hi
,
16484 insn
= emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch
, x_hi
, y_hi
,
16486 return SET_DEST (single_set (insn
));
16492 /* During expansion, we only expect to get here if y is a
16493 constant that we want to handle, otherwise we should have
16494 swapped the operands already. */
16495 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16497 if (!const_ok_for_arm (INTVAL (y_lo
)))
16498 y_lo
= force_reg (SImode
, y_lo
);
16500 /* Perform a reverse subtract and compare. */
16502 = gen_rtx_LTU (DImode
,
16503 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16505 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_NVout_scratch (scratch
, y_hi
,
16507 return SET_DEST (single_set (insn
));
16513 if (y_lo
== const0_rtx
)
16515 /* If the low word of y is 0, then this is simply a normal
16516 compare of the upper words. */
16517 if (!arm_add_operand (y_hi
, SImode
))
16518 y_hi
= force_reg (SImode
, y_hi
);
16520 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16523 if (!arm_add_operand (y_lo
, SImode
))
16524 y_lo
= force_reg (SImode
, y_lo
);
16527 = gen_rtx_LTU (DImode
,
16528 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16532 scratch
= gen_rtx_SCRATCH (SImode
);
16533 if (!arm_not_operand (y_hi
, SImode
))
16534 y_hi
= force_reg (SImode
, y_hi
);
16537 if (y_hi
== const0_rtx
)
16538 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch
, x_hi
,
16540 else if (CONST_INT_P (y_hi
))
16542 /* Constant is viewed as unsigned when zero-extended. */
16543 y_hi
= GEN_INT (UINTVAL (y_hi
) & 0xffffffffULL
);
16544 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch
, x_hi
,
16548 insn
= emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch
, x_hi
, y_hi
,
16550 return SET_DEST (single_set (insn
));
16556 /* During expansion, we only expect to get here if y is a
16557 constant that we want to handle, otherwise we should have
16558 swapped the operands already. */
16559 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16561 if (!const_ok_for_arm (INTVAL (y_lo
)))
16562 y_lo
= force_reg (SImode
, y_lo
);
16564 /* Perform a reverse subtract and compare. */
16566 = gen_rtx_LTU (DImode
,
16567 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16569 y_hi
= GEN_INT (0xffffffff & UINTVAL (y_hi
));
16570 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_Bout_scratch (scratch
, y_hi
,
16572 return SET_DEST (single_set (insn
));
16576 gcc_unreachable ();
16580 /* X and Y are two things to compare using CODE. Emit the compare insn and
16581 return the rtx for register 0 in the proper mode. */
16583 arm_gen_compare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16585 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
16586 return arm_gen_dicompare_reg (code
, x
, y
, scratch
);
16588 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
16589 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16590 if (mode
== CC_RSBmode
)
16593 scratch
= gen_rtx_SCRATCH (SImode
);
16594 emit_insn (gen_rsb_imm_compare_scratch (scratch
,
16595 GEN_INT (~UINTVAL (x
)), y
));
16598 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
16603 /* Generate a sequence of insns that will generate the correct return
16604 address mask depending on the physical architecture that the program
16607 arm_gen_return_addr_mask (void)
16609 rtx reg
= gen_reg_rtx (Pmode
);
16611 emit_insn (gen_return_addr_mask (reg
));
16616 arm_reload_in_hi (rtx
*operands
)
16618 rtx ref
= operands
[1];
16620 HOST_WIDE_INT offset
= 0;
16622 if (SUBREG_P (ref
))
16624 offset
= SUBREG_BYTE (ref
);
16625 ref
= SUBREG_REG (ref
);
16630 /* We have a pseudo which has been spilt onto the stack; there
16631 are two cases here: the first where there is a simple
16632 stack-slot replacement and a second where the stack-slot is
16633 out of range, or is used as a subreg. */
16634 if (reg_equiv_mem (REGNO (ref
)))
16636 ref
= reg_equiv_mem (REGNO (ref
));
16637 base
= find_replacement (&XEXP (ref
, 0));
16640 /* The slot is out of range, or was dressed up in a SUBREG. */
16641 base
= reg_equiv_address (REGNO (ref
));
16643 /* PR 62554: If there is no equivalent memory location then just move
16644 the value as an SImode register move. This happens when the target
16645 architecture variant does not have an HImode register move. */
16648 gcc_assert (REG_P (operands
[0]));
16649 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16650 gen_rtx_SUBREG (SImode
, ref
, 0)));
16655 base
= find_replacement (&XEXP (ref
, 0));
16657 /* Handle the case where the address is too complex to be offset by 1. */
16658 if (GET_CODE (base
) == MINUS
16659 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16661 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16663 emit_set_insn (base_plus
, base
);
16666 else if (GET_CODE (base
) == PLUS
)
16668 /* The addend must be CONST_INT, or we would have dealt with it above. */
16669 HOST_WIDE_INT hi
, lo
;
16671 offset
+= INTVAL (XEXP (base
, 1));
16672 base
= XEXP (base
, 0);
16674 /* Rework the address into a legal sequence of insns. */
16675 /* Valid range for lo is -4095 -> 4095 */
16678 : -((-offset
) & 0xfff));
16680 /* Corner case, if lo is the max offset then we would be out of range
16681 once we have added the additional 1 below, so bump the msb into the
16682 pre-loading insn(s). */
16686 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16687 ^ (HOST_WIDE_INT
) 0x80000000)
16688 - (HOST_WIDE_INT
) 0x80000000);
16690 gcc_assert (hi
+ lo
== offset
);
16694 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16696 /* Get the base address; addsi3 knows how to handle constants
16697 that require more than one insn. */
16698 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16704 /* Operands[2] may overlap operands[0] (though it won't overlap
16705 operands[1]), that's why we asked for a DImode reg -- so we can
16706 use the bit that does not overlap. */
16707 if (REGNO (operands
[2]) == REGNO (operands
[0]))
16708 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16710 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16712 emit_insn (gen_zero_extendqisi2 (scratch
,
16713 gen_rtx_MEM (QImode
,
16714 plus_constant (Pmode
, base
,
16716 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16717 gen_rtx_MEM (QImode
,
16718 plus_constant (Pmode
, base
,
16720 if (!BYTES_BIG_ENDIAN
)
16721 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16722 gen_rtx_IOR (SImode
,
16725 gen_rtx_SUBREG (SImode
, operands
[0], 0),
16729 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16730 gen_rtx_IOR (SImode
,
16731 gen_rtx_ASHIFT (SImode
, scratch
,
16733 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
16736 /* Handle storing a half-word to memory during reload by synthesizing as two
16737 byte stores. Take care not to clobber the input values until after we
16738 have moved them somewhere safe. This code assumes that if the DImode
16739 scratch in operands[2] overlaps either the input value or output address
16740 in some way, then that value must die in this insn (we absolutely need
16741 two scratch registers for some corner cases). */
16743 arm_reload_out_hi (rtx
*operands
)
16745 rtx ref
= operands
[0];
16746 rtx outval
= operands
[1];
16748 HOST_WIDE_INT offset
= 0;
16750 if (SUBREG_P (ref
))
16752 offset
= SUBREG_BYTE (ref
);
16753 ref
= SUBREG_REG (ref
);
16758 /* We have a pseudo which has been spilt onto the stack; there
16759 are two cases here: the first where there is a simple
16760 stack-slot replacement and a second where the stack-slot is
16761 out of range, or is used as a subreg. */
16762 if (reg_equiv_mem (REGNO (ref
)))
16764 ref
= reg_equiv_mem (REGNO (ref
));
16765 base
= find_replacement (&XEXP (ref
, 0));
16768 /* The slot is out of range, or was dressed up in a SUBREG. */
16769 base
= reg_equiv_address (REGNO (ref
));
16771 /* PR 62254: If there is no equivalent memory location then just move
16772 the value as an SImode register move. This happens when the target
16773 architecture variant does not have an HImode register move. */
16776 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
16778 if (REG_P (outval
))
16780 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16781 gen_rtx_SUBREG (SImode
, outval
, 0)));
16783 else /* SUBREG_P (outval) */
16785 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
16786 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16787 SUBREG_REG (outval
)));
16789 /* FIXME: Handle other cases ? */
16790 gcc_unreachable ();
16796 base
= find_replacement (&XEXP (ref
, 0));
16798 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16800 /* Handle the case where the address is too complex to be offset by 1. */
16801 if (GET_CODE (base
) == MINUS
16802 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16804 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16806 /* Be careful not to destroy OUTVAL. */
16807 if (reg_overlap_mentioned_p (base_plus
, outval
))
16809 /* Updating base_plus might destroy outval, see if we can
16810 swap the scratch and base_plus. */
16811 if (!reg_overlap_mentioned_p (scratch
, outval
))
16812 std::swap (scratch
, base_plus
);
16815 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16817 /* Be conservative and copy OUTVAL into the scratch now,
16818 this should only be necessary if outval is a subreg
16819 of something larger than a word. */
16820 /* XXX Might this clobber base? I can't see how it can,
16821 since scratch is known to overlap with OUTVAL, and
16822 must be wider than a word. */
16823 emit_insn (gen_movhi (scratch_hi
, outval
));
16824 outval
= scratch_hi
;
16828 emit_set_insn (base_plus
, base
);
16831 else if (GET_CODE (base
) == PLUS
)
16833 /* The addend must be CONST_INT, or we would have dealt with it above. */
16834 HOST_WIDE_INT hi
, lo
;
16836 offset
+= INTVAL (XEXP (base
, 1));
16837 base
= XEXP (base
, 0);
16839 /* Rework the address into a legal sequence of insns. */
16840 /* Valid range for lo is -4095 -> 4095 */
16843 : -((-offset
) & 0xfff));
16845 /* Corner case, if lo is the max offset then we would be out of range
16846 once we have added the additional 1 below, so bump the msb into the
16847 pre-loading insn(s). */
16851 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16852 ^ (HOST_WIDE_INT
) 0x80000000)
16853 - (HOST_WIDE_INT
) 0x80000000);
16855 gcc_assert (hi
+ lo
== offset
);
16859 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16861 /* Be careful not to destroy OUTVAL. */
16862 if (reg_overlap_mentioned_p (base_plus
, outval
))
16864 /* Updating base_plus might destroy outval, see if we
16865 can swap the scratch and base_plus. */
16866 if (!reg_overlap_mentioned_p (scratch
, outval
))
16867 std::swap (scratch
, base_plus
);
16870 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16872 /* Be conservative and copy outval into scratch now,
16873 this should only be necessary if outval is a
16874 subreg of something larger than a word. */
16875 /* XXX Might this clobber base? I can't see how it
16876 can, since scratch is known to overlap with
16878 emit_insn (gen_movhi (scratch_hi
, outval
));
16879 outval
= scratch_hi
;
16883 /* Get the base address; addsi3 knows how to handle constants
16884 that require more than one insn. */
16885 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16891 if (BYTES_BIG_ENDIAN
)
16893 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16894 plus_constant (Pmode
, base
,
16896 gen_lowpart (QImode
, outval
)));
16897 emit_insn (gen_lshrsi3 (scratch
,
16898 gen_rtx_SUBREG (SImode
, outval
, 0),
16900 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16902 gen_lowpart (QImode
, scratch
)));
16906 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16908 gen_lowpart (QImode
, outval
)));
16909 emit_insn (gen_lshrsi3 (scratch
,
16910 gen_rtx_SUBREG (SImode
, outval
, 0),
16912 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16913 plus_constant (Pmode
, base
,
16915 gen_lowpart (QImode
, scratch
)));
16919 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16920 (padded to the size of a word) should be passed in a register. */
16923 arm_must_pass_in_stack (const function_arg_info
&arg
)
16925 if (TARGET_AAPCS_BASED
)
16926 return must_pass_in_stack_var_size (arg
);
16928 return must_pass_in_stack_var_size_or_pad (arg
);
16932 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16933 byte of a stack argument has useful data. For legacy APCS ABIs we use
16934 the default. For AAPCS based ABIs small aggregate types are placed
16935 in the lowest memory address. */
16937 static pad_direction
16938 arm_function_arg_padding (machine_mode mode
, const_tree type
)
16940 if (!TARGET_AAPCS_BASED
)
16941 return default_function_arg_padding (mode
, type
);
16943 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
16944 return PAD_DOWNWARD
;
16950 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16951 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16952 register has useful data, and return the opposite if the most
16953 significant byte does. */
16956 arm_pad_reg_upward (machine_mode mode
,
16957 tree type
, int first ATTRIBUTE_UNUSED
)
16959 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
16961 /* For AAPCS, small aggregates, small fixed-point types,
16962 and small complex types are always padded upwards. */
16965 if ((AGGREGATE_TYPE_P (type
)
16966 || TREE_CODE (type
) == COMPLEX_TYPE
16967 || FIXED_POINT_TYPE_P (type
))
16968 && int_size_in_bytes (type
) <= 4)
16973 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
16974 && GET_MODE_SIZE (mode
) <= 4)
16979 /* Otherwise, use default padding. */
16980 return !BYTES_BIG_ENDIAN
;
16983 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16984 assuming that the address in the base register is word aligned. */
16986 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
16988 HOST_WIDE_INT max_offset
;
16990 /* Offset must be a multiple of 4 in Thumb mode. */
16991 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
16996 else if (TARGET_ARM
)
17001 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
17004 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
17005 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
17006 Assumes that the address in the base register RN is word aligned. Pattern
17007 guarantees that both memory accesses use the same base register,
17008 the offsets are constants within the range, and the gap between the offsets is 4.
17009 If preload complete then check that registers are legal. WBACK indicates whether
17010 address is updated. LOAD indicates whether memory access is load or store. */
17012 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
17013 bool wback
, bool load
)
17015 unsigned int t
, t2
, n
;
17017 if (!reload_completed
)
17020 if (!offset_ok_for_ldrd_strd (offset
))
17027 if ((TARGET_THUMB2
)
17028 && ((wback
&& (n
== t
|| n
== t2
))
17029 || (t
== SP_REGNUM
)
17030 || (t
== PC_REGNUM
)
17031 || (t2
== SP_REGNUM
)
17032 || (t2
== PC_REGNUM
)
17033 || (!load
&& (n
== PC_REGNUM
))
17034 || (load
&& (t
== t2
))
17035 /* Triggers Cortex-M3 LDRD errata. */
17036 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
17040 && ((wback
&& (n
== t
|| n
== t2
))
17041 || (t2
== PC_REGNUM
)
17042 || (t
% 2 != 0) /* First destination register is not even. */
17044 /* PC can be used as base register (for offset addressing only),
17045 but it is depricated. */
17046 || (n
== PC_REGNUM
)))
17052 /* Return true if a 64-bit access with alignment ALIGN and with a
17053 constant offset OFFSET from the base pointer is permitted on this
17056 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
17058 return (unaligned_access
17059 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
17060 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
17063 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
17064 operand MEM's address contains an immediate offset from the base
17065 register and has no side effects, in which case it sets BASE,
17066 OFFSET and ALIGN accordingly. */
17068 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
17072 gcc_assert (base
!= NULL
&& offset
!= NULL
);
17074 /* TODO: Handle more general memory operand patterns, such as
17075 PRE_DEC and PRE_INC. */
17077 if (side_effects_p (mem
))
17080 /* Can't deal with subregs. */
17081 if (SUBREG_P (mem
))
17084 gcc_assert (MEM_P (mem
));
17086 *offset
= const0_rtx
;
17087 *align
= MEM_ALIGN (mem
);
17089 addr
= XEXP (mem
, 0);
17091 /* If addr isn't valid for DImode, then we can't handle it. */
17092 if (!arm_legitimate_address_p (DImode
, addr
,
17093 reload_in_progress
|| reload_completed
))
17101 else if (GET_CODE (addr
) == PLUS
)
17103 *base
= XEXP (addr
, 0);
17104 *offset
= XEXP (addr
, 1);
17105 return (REG_P (*base
) && CONST_INT_P (*offset
));
17111 /* Called from a peephole2 to replace two word-size accesses with a
17112 single LDRD/STRD instruction. Returns true iff we can generate a
17113 new instruction sequence. That is, both accesses use the same base
17114 register and the gap between constant offsets is 4. This function
17115 may reorder its operands to match ldrd/strd RTL templates.
17116 OPERANDS are the operands found by the peephole matcher;
17117 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17118 corresponding memory operands. LOAD indicaates whether the access
17119 is load or store. CONST_STORE indicates a store of constant
17120 integer values held in OPERANDS[4,5] and assumes that the pattern
17121 is of length 4 insn, for the purpose of checking dead registers.
17122 COMMUTE indicates that register operands may be reordered. */
17124 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
17125 bool const_store
, bool commute
)
17128 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17129 rtx base
= NULL_RTX
;
17130 rtx cur_base
, cur_offset
, tmp
;
17132 HARD_REG_SET regset
;
17134 gcc_assert (!const_store
|| !load
);
17135 /* Check that the memory references are immediate offsets from the
17136 same base register. Extract the base register, the destination
17137 registers, and the corresponding memory offsets. */
17138 for (i
= 0; i
< nops
; i
++)
17140 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17146 else if (REGNO (base
) != REGNO (cur_base
))
17149 offsets
[i
] = INTVAL (cur_offset
);
17150 if (GET_CODE (operands
[i
]) == SUBREG
)
17152 tmp
= SUBREG_REG (operands
[i
]);
17153 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
17158 /* Make sure there is no dependency between the individual loads. */
17159 if (load
&& REGNO (operands
[0]) == REGNO (base
))
17160 return false; /* RAW */
17162 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
17163 return false; /* WAW */
17165 /* If the same input register is used in both stores
17166 when storing different constants, try to find a free register.
17167 For example, the code
17172 can be transformed into
17176 in Thumb mode assuming that r1 is free.
17177 For ARM mode do the same but only if the starting register
17178 can be made to be even. */
17180 && REGNO (operands
[0]) == REGNO (operands
[1])
17181 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
17185 CLEAR_HARD_REG_SET (regset
);
17186 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17187 if (tmp
== NULL_RTX
)
17190 /* Use the new register in the first load to ensure that
17191 if the original input register is not dead after peephole,
17192 then it will have the correct constant value. */
17195 else if (TARGET_ARM
)
17197 int regno
= REGNO (operands
[0]);
17198 if (!peep2_reg_dead_p (4, operands
[0]))
17200 /* When the input register is even and is not dead after the
17201 pattern, it has to hold the second constant but we cannot
17202 form a legal STRD in ARM mode with this register as the second
17204 if (regno
% 2 == 0)
17207 /* Is regno-1 free? */
17208 SET_HARD_REG_SET (regset
);
17209 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
17210 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17211 if (tmp
== NULL_RTX
)
17218 /* Find a DImode register. */
17219 CLEAR_HARD_REG_SET (regset
);
17220 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17221 if (tmp
!= NULL_RTX
)
17223 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17224 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17228 /* Can we use the input register to form a DI register? */
17229 SET_HARD_REG_SET (regset
);
17230 CLEAR_HARD_REG_BIT(regset
,
17231 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
17232 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17233 if (tmp
== NULL_RTX
)
17235 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
17239 gcc_assert (operands
[0] != NULL_RTX
);
17240 gcc_assert (operands
[1] != NULL_RTX
);
17241 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17242 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
17246 /* Make sure the instructions are ordered with lower memory access first. */
17247 if (offsets
[0] > offsets
[1])
17249 gap
= offsets
[0] - offsets
[1];
17250 offset
= offsets
[1];
17252 /* Swap the instructions such that lower memory is accessed first. */
17253 std::swap (operands
[0], operands
[1]);
17254 std::swap (operands
[2], operands
[3]);
17255 std::swap (align
[0], align
[1]);
17257 std::swap (operands
[4], operands
[5]);
17261 gap
= offsets
[1] - offsets
[0];
17262 offset
= offsets
[0];
17265 /* Make sure accesses are to consecutive memory locations. */
17266 if (gap
!= GET_MODE_SIZE (SImode
))
17269 if (!align_ok_ldrd_strd (align
[0], offset
))
17272 /* Make sure we generate legal instructions. */
17273 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17277 /* In Thumb state, where registers are almost unconstrained, there
17278 is little hope to fix it. */
17282 if (load
&& commute
)
17284 /* Try reordering registers. */
17285 std::swap (operands
[0], operands
[1]);
17286 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17293 /* If input registers are dead after this pattern, they can be
17294 reordered or replaced by other registers that are free in the
17295 current pattern. */
17296 if (!peep2_reg_dead_p (4, operands
[0])
17297 || !peep2_reg_dead_p (4, operands
[1]))
17300 /* Try to reorder the input registers. */
17301 /* For example, the code
17306 can be transformed into
17311 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
17314 std::swap (operands
[0], operands
[1]);
17318 /* Try to find a free DI register. */
17319 CLEAR_HARD_REG_SET (regset
);
17320 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
17321 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
17324 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17325 if (tmp
== NULL_RTX
)
17328 /* DREG must be an even-numbered register in DImode.
17329 Split it into SI registers. */
17330 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17331 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17332 gcc_assert (operands
[0] != NULL_RTX
);
17333 gcc_assert (operands
[1] != NULL_RTX
);
17334 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17335 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
17337 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
17347 /* Return true if parallel execution of the two word-size accesses provided
17348 could be satisfied with a single LDRD/STRD instruction. Two word-size
17349 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17350 register operands and OPERANDS[2,3] are the corresponding memory operands.
17353 valid_operands_ldrd_strd (rtx
*operands
, bool load
)
17356 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17357 rtx base
= NULL_RTX
;
17358 rtx cur_base
, cur_offset
;
17361 /* Check that the memory references are immediate offsets from the
17362 same base register. Extract the base register, the destination
17363 registers, and the corresponding memory offsets. */
17364 for (i
= 0; i
< nops
; i
++)
17366 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17372 else if (REGNO (base
) != REGNO (cur_base
))
17375 offsets
[i
] = INTVAL (cur_offset
);
17376 if (GET_CODE (operands
[i
]) == SUBREG
)
17380 if (offsets
[0] > offsets
[1])
17383 gap
= offsets
[1] - offsets
[0];
17384 offset
= offsets
[0];
17386 /* Make sure accesses are to consecutive memory locations. */
17387 if (gap
!= GET_MODE_SIZE (SImode
))
17390 if (!align_ok_ldrd_strd (align
[0], offset
))
17393 return operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17398 /* Print a symbolic form of X to the debug file, F. */
17400 arm_print_value (FILE *f
, rtx x
)
17402 switch (GET_CODE (x
))
17405 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
17411 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
17412 sizeof (fpstr
), 0, 1);
17422 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
17424 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
17425 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
17433 fprintf (f
, "\"%s\"", XSTR (x
, 0));
17437 fprintf (f
, "`%s'", XSTR (x
, 0));
17441 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
17445 arm_print_value (f
, XEXP (x
, 0));
17449 arm_print_value (f
, XEXP (x
, 0));
17451 arm_print_value (f
, XEXP (x
, 1));
17459 fprintf (f
, "????");
17464 /* Routines for manipulation of the constant pool. */
17466 /* Arm instructions cannot load a large constant directly into a
17467 register; they have to come from a pc relative load. The constant
17468 must therefore be placed in the addressable range of the pc
17469 relative load. Depending on the precise pc relative load
17470 instruction the range is somewhere between 256 bytes and 4k. This
17471 means that we often have to dump a constant inside a function, and
17472 generate code to branch around it.
17474 It is important to minimize this, since the branches will slow
17475 things down and make the code larger.
17477 Normally we can hide the table after an existing unconditional
17478 branch so that there is no interruption of the flow, but in the
17479 worst case the code looks like this:
17497 We fix this by performing a scan after scheduling, which notices
17498 which instructions need to have their operands fetched from the
17499 constant table and builds the table.
17501 The algorithm starts by building a table of all the constants that
17502 need fixing up and all the natural barriers in the function (places
17503 where a constant table can be dropped without breaking the flow).
17504 For each fixup we note how far the pc-relative replacement will be
17505 able to reach and the offset of the instruction into the function.
17507 Having built the table we then group the fixes together to form
17508 tables that are as large as possible (subject to addressing
17509 constraints) and emit each table of constants after the last
17510 barrier that is within range of all the instructions in the group.
17511 If a group does not contain a barrier, then we forcibly create one
17512 by inserting a jump instruction into the flow. Once the table has
17513 been inserted, the insns are then modified to reference the
17514 relevant entry in the pool.
17516 Possible enhancements to the algorithm (not implemented) are:
17518 1) For some processors and object formats, there may be benefit in
17519 aligning the pools to the start of cache lines; this alignment
17520 would need to be taken into account when calculating addressability
17523 /* These typedefs are located at the start of this file, so that
17524 they can be used in the prototypes there. This comment is to
17525 remind readers of that fact so that the following structures
17526 can be understood more easily.
17528 typedef struct minipool_node Mnode;
17529 typedef struct minipool_fixup Mfix; */
17531 struct minipool_node
17533 /* Doubly linked chain of entries. */
17536 /* The maximum offset into the code that this entry can be placed. While
17537 pushing fixes for forward references, all entries are sorted in order
17538 of increasing max_address. */
17539 HOST_WIDE_INT max_address
;
17540 /* Similarly for an entry inserted for a backwards ref. */
17541 HOST_WIDE_INT min_address
;
17542 /* The number of fixes referencing this entry. This can become zero
17543 if we "unpush" an entry. In this case we ignore the entry when we
17544 come to emit the code. */
17546 /* The offset from the start of the minipool. */
17547 HOST_WIDE_INT offset
;
17548 /* The value in table. */
17550 /* The mode of value. */
17552 /* The size of the value. With iWMMXt enabled
17553 sizes > 4 also imply an alignment of 8-bytes. */
17557 struct minipool_fixup
17561 HOST_WIDE_INT address
;
17567 HOST_WIDE_INT forwards
;
17568 HOST_WIDE_INT backwards
;
17571 /* Fixes less than a word need padding out to a word boundary. */
17572 #define MINIPOOL_FIX_SIZE(mode) \
17573 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17575 static Mnode
* minipool_vector_head
;
17576 static Mnode
* minipool_vector_tail
;
17577 static rtx_code_label
*minipool_vector_label
;
17578 static int minipool_pad
;
17580 /* The linked list of all minipool fixes required for this function. */
17581 Mfix
* minipool_fix_head
;
17582 Mfix
* minipool_fix_tail
;
17583 /* The fix entry for the current minipool, once it has been placed. */
17584 Mfix
* minipool_barrier
;
17586 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17587 #define JUMP_TABLES_IN_TEXT_SECTION 0
17590 static HOST_WIDE_INT
17591 get_jump_table_size (rtx_jump_table_data
*insn
)
17593 /* ADDR_VECs only take room if read-only data does into the text
17595 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
17597 rtx body
= PATTERN (insn
);
17598 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
17599 HOST_WIDE_INT size
;
17600 HOST_WIDE_INT modesize
;
17602 modesize
= GET_MODE_SIZE (GET_MODE (body
));
17603 size
= modesize
* XVECLEN (body
, elt
);
17607 /* Round up size of TBB table to a halfword boundary. */
17608 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
17611 /* No padding necessary for TBH. */
17614 /* Add two bytes for alignment on Thumb. */
17619 gcc_unreachable ();
17627 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17628 function descriptor) into a register and the GOT address into the
17629 FDPIC register, returning an rtx for the register holding the
17630 function address. */
17633 arm_load_function_descriptor (rtx funcdesc
)
17635 rtx fnaddr_reg
= gen_reg_rtx (Pmode
);
17636 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
17637 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
17638 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
17640 emit_move_insn (fnaddr_reg
, fnaddr
);
17642 /* The ABI requires the entry point address to be loaded first, but
17643 since we cannot support lazy binding for lack of atomic load of
17644 two 32-bits values, we do not need to bother to prevent the
17645 previous load from being moved after that of the GOT address. */
17646 emit_insn (gen_restore_pic_register_after_call (pic_reg
, gotaddr
));
17651 /* Return the maximum amount of padding that will be inserted before
17653 static HOST_WIDE_INT
17654 get_label_padding (rtx label
)
17656 HOST_WIDE_INT align
, min_insn_size
;
17658 align
= 1 << label_to_alignment (label
).levels
[0].log
;
17659 min_insn_size
= TARGET_THUMB
? 2 : 4;
17660 return align
> min_insn_size
? align
- min_insn_size
: 0;
17663 /* Move a minipool fix MP from its current location to before MAX_MP.
17664 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17665 constraints may need updating. */
17667 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
17668 HOST_WIDE_INT max_address
)
17670 /* The code below assumes these are different. */
17671 gcc_assert (mp
!= max_mp
);
17673 if (max_mp
== NULL
)
17675 if (max_address
< mp
->max_address
)
17676 mp
->max_address
= max_address
;
17680 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17681 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17683 mp
->max_address
= max_address
;
17685 /* Unlink MP from its current position. Since max_mp is non-null,
17686 mp->prev must be non-null. */
17687 mp
->prev
->next
= mp
->next
;
17688 if (mp
->next
!= NULL
)
17689 mp
->next
->prev
= mp
->prev
;
17691 minipool_vector_tail
= mp
->prev
;
17693 /* Re-insert it before MAX_MP. */
17695 mp
->prev
= max_mp
->prev
;
17698 if (mp
->prev
!= NULL
)
17699 mp
->prev
->next
= mp
;
17701 minipool_vector_head
= mp
;
17704 /* Save the new entry. */
17707 /* Scan over the preceding entries and adjust their addresses as
17709 while (mp
->prev
!= NULL
17710 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17712 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17719 /* Add a constant to the minipool for a forward reference. Returns the
17720 node added or NULL if the constant will not fit in this pool. */
17722 add_minipool_forward_ref (Mfix
*fix
)
17724 /* If set, max_mp is the first pool_entry that has a lower
17725 constraint than the one we are trying to add. */
17726 Mnode
* max_mp
= NULL
;
17727 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
17730 /* If the minipool starts before the end of FIX->INSN then this FIX
17731 cannot be placed into the current pool. Furthermore, adding the
17732 new constant pool entry may cause the pool to start FIX_SIZE bytes
17734 if (minipool_vector_head
&&
17735 (fix
->address
+ get_attr_length (fix
->insn
)
17736 >= minipool_vector_head
->max_address
- fix
->fix_size
))
17739 /* Scan the pool to see if a constant with the same value has
17740 already been added. While we are doing this, also note the
17741 location where we must insert the constant if it doesn't already
17743 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17745 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17746 && fix
->mode
== mp
->mode
17747 && (!LABEL_P (fix
->value
)
17748 || (CODE_LABEL_NUMBER (fix
->value
)
17749 == CODE_LABEL_NUMBER (mp
->value
)))
17750 && rtx_equal_p (fix
->value
, mp
->value
))
17752 /* More than one fix references this entry. */
17754 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
17757 /* Note the insertion point if necessary. */
17759 && mp
->max_address
> max_address
)
17762 /* If we are inserting an 8-bytes aligned quantity and
17763 we have not already found an insertion point, then
17764 make sure that all such 8-byte aligned quantities are
17765 placed at the start of the pool. */
17766 if (ARM_DOUBLEWORD_ALIGN
17768 && fix
->fix_size
>= 8
17769 && mp
->fix_size
< 8)
17772 max_address
= mp
->max_address
;
17776 /* The value is not currently in the minipool, so we need to create
17777 a new entry for it. If MAX_MP is NULL, the entry will be put on
17778 the end of the list since the placement is less constrained than
17779 any existing entry. Otherwise, we insert the new fix before
17780 MAX_MP and, if necessary, adjust the constraints on the other
17783 mp
->fix_size
= fix
->fix_size
;
17784 mp
->mode
= fix
->mode
;
17785 mp
->value
= fix
->value
;
17787 /* Not yet required for a backwards ref. */
17788 mp
->min_address
= -65536;
17790 if (max_mp
== NULL
)
17792 mp
->max_address
= max_address
;
17794 mp
->prev
= minipool_vector_tail
;
17796 if (mp
->prev
== NULL
)
17798 minipool_vector_head
= mp
;
17799 minipool_vector_label
= gen_label_rtx ();
17802 mp
->prev
->next
= mp
;
17804 minipool_vector_tail
= mp
;
17808 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17809 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17811 mp
->max_address
= max_address
;
17814 mp
->prev
= max_mp
->prev
;
17816 if (mp
->prev
!= NULL
)
17817 mp
->prev
->next
= mp
;
17819 minipool_vector_head
= mp
;
17822 /* Save the new entry. */
17825 /* Scan over the preceding entries and adjust their addresses as
17827 while (mp
->prev
!= NULL
17828 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17830 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17838 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
17839 HOST_WIDE_INT min_address
)
17841 HOST_WIDE_INT offset
;
17843 /* The code below assumes these are different. */
17844 gcc_assert (mp
!= min_mp
);
17846 if (min_mp
== NULL
)
17848 if (min_address
> mp
->min_address
)
17849 mp
->min_address
= min_address
;
17853 /* We will adjust this below if it is too loose. */
17854 mp
->min_address
= min_address
;
17856 /* Unlink MP from its current position. Since min_mp is non-null,
17857 mp->next must be non-null. */
17858 mp
->next
->prev
= mp
->prev
;
17859 if (mp
->prev
!= NULL
)
17860 mp
->prev
->next
= mp
->next
;
17862 minipool_vector_head
= mp
->next
;
17864 /* Reinsert it after MIN_MP. */
17866 mp
->next
= min_mp
->next
;
17868 if (mp
->next
!= NULL
)
17869 mp
->next
->prev
= mp
;
17871 minipool_vector_tail
= mp
;
17877 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17879 mp
->offset
= offset
;
17880 if (mp
->refcount
> 0)
17881 offset
+= mp
->fix_size
;
17883 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17884 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17890 /* Add a constant to the minipool for a backward reference. Returns the
17891 node added or NULL if the constant will not fit in this pool.
17893 Note that the code for insertion for a backwards reference can be
17894 somewhat confusing because the calculated offsets for each fix do
17895 not take into account the size of the pool (which is still under
17898 add_minipool_backward_ref (Mfix
*fix
)
17900 /* If set, min_mp is the last pool_entry that has a lower constraint
17901 than the one we are trying to add. */
17902 Mnode
*min_mp
= NULL
;
17903 /* This can be negative, since it is only a constraint. */
17904 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
17907 /* If we can't reach the current pool from this insn, or if we can't
17908 insert this entry at the end of the pool without pushing other
17909 fixes out of range, then we don't try. This ensures that we
17910 can't fail later on. */
17911 if (min_address
>= minipool_barrier
->address
17912 || (minipool_vector_tail
->min_address
+ fix
->fix_size
17913 >= minipool_barrier
->address
))
17916 /* Scan the pool to see if a constant with the same value has
17917 already been added. While we are doing this, also note the
17918 location where we must insert the constant if it doesn't already
17920 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
17922 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17923 && fix
->mode
== mp
->mode
17924 && (!LABEL_P (fix
->value
)
17925 || (CODE_LABEL_NUMBER (fix
->value
)
17926 == CODE_LABEL_NUMBER (mp
->value
)))
17927 && rtx_equal_p (fix
->value
, mp
->value
)
17928 /* Check that there is enough slack to move this entry to the
17929 end of the table (this is conservative). */
17930 && (mp
->max_address
17931 > (minipool_barrier
->address
17932 + minipool_vector_tail
->offset
17933 + minipool_vector_tail
->fix_size
)))
17936 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
17939 if (min_mp
!= NULL
)
17940 mp
->min_address
+= fix
->fix_size
;
17943 /* Note the insertion point if necessary. */
17944 if (mp
->min_address
< min_address
)
17946 /* For now, we do not allow the insertion of 8-byte alignment
17947 requiring nodes anywhere but at the start of the pool. */
17948 if (ARM_DOUBLEWORD_ALIGN
17949 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17954 else if (mp
->max_address
17955 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
17957 /* Inserting before this entry would push the fix beyond
17958 its maximum address (which can happen if we have
17959 re-located a forwards fix); force the new fix to come
17961 if (ARM_DOUBLEWORD_ALIGN
17962 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17967 min_address
= mp
->min_address
+ fix
->fix_size
;
17970 /* Do not insert a non-8-byte aligned quantity before 8-byte
17971 aligned quantities. */
17972 else if (ARM_DOUBLEWORD_ALIGN
17973 && fix
->fix_size
< 8
17974 && mp
->fix_size
>= 8)
17977 min_address
= mp
->min_address
+ fix
->fix_size
;
17982 /* We need to create a new entry. */
17984 mp
->fix_size
= fix
->fix_size
;
17985 mp
->mode
= fix
->mode
;
17986 mp
->value
= fix
->value
;
17988 mp
->max_address
= minipool_barrier
->address
+ 65536;
17990 mp
->min_address
= min_address
;
17992 if (min_mp
== NULL
)
17995 mp
->next
= minipool_vector_head
;
17997 if (mp
->next
== NULL
)
17999 minipool_vector_tail
= mp
;
18000 minipool_vector_label
= gen_label_rtx ();
18003 mp
->next
->prev
= mp
;
18005 minipool_vector_head
= mp
;
18009 mp
->next
= min_mp
->next
;
18013 if (mp
->next
!= NULL
)
18014 mp
->next
->prev
= mp
;
18016 minipool_vector_tail
= mp
;
18019 /* Save the new entry. */
18027 /* Scan over the following entries and adjust their offsets. */
18028 while (mp
->next
!= NULL
)
18030 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
18031 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
18034 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
18036 mp
->next
->offset
= mp
->offset
;
18045 assign_minipool_offsets (Mfix
*barrier
)
18047 HOST_WIDE_INT offset
= 0;
18050 minipool_barrier
= barrier
;
18052 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
18054 mp
->offset
= offset
;
18056 if (mp
->refcount
> 0)
18057 offset
+= mp
->fix_size
;
18061 /* Output the literal table */
18063 dump_minipool (rtx_insn
*scan
)
18069 if (ARM_DOUBLEWORD_ALIGN
)
18070 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
18071 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
18078 fprintf (dump_file
,
18079 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18080 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
18082 scan
= emit_label_after (gen_label_rtx (), scan
);
18083 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
18084 scan
= emit_label_after (minipool_vector_label
, scan
);
18086 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
18088 if (mp
->refcount
> 0)
18092 fprintf (dump_file
,
18093 ";; Offset %u, min %ld, max %ld ",
18094 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
18095 (unsigned long) mp
->max_address
);
18096 arm_print_value (dump_file
, mp
->value
);
18097 fputc ('\n', dump_file
);
18100 rtx val
= copy_rtx (mp
->value
);
18102 switch (GET_MODE_SIZE (mp
->mode
))
18104 #ifdef HAVE_consttable_1
18106 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
18110 #ifdef HAVE_consttable_2
18112 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
18116 #ifdef HAVE_consttable_4
18118 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
18122 #ifdef HAVE_consttable_8
18124 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
18128 #ifdef HAVE_consttable_16
18130 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
18135 gcc_unreachable ();
18143 minipool_vector_head
= minipool_vector_tail
= NULL
;
18144 scan
= emit_insn_after (gen_consttable_end (), scan
);
18145 scan
= emit_barrier_after (scan
);
18148 /* Return the cost of forcibly inserting a barrier after INSN. */
18150 arm_barrier_cost (rtx_insn
*insn
)
18152 /* Basing the location of the pool on the loop depth is preferable,
18153 but at the moment, the basic block information seems to be
18154 corrupt by this stage of the compilation. */
18155 int base_cost
= 50;
18156 rtx_insn
*next
= next_nonnote_insn (insn
);
18158 if (next
!= NULL
&& LABEL_P (next
))
18161 switch (GET_CODE (insn
))
18164 /* It will always be better to place the table before the label, rather
18173 return base_cost
- 10;
18176 return base_cost
+ 10;
18180 /* Find the best place in the insn stream in the range
18181 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18182 Create the barrier by inserting a jump and add a new fix entry for
18185 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
18187 HOST_WIDE_INT count
= 0;
18188 rtx_barrier
*barrier
;
18189 rtx_insn
*from
= fix
->insn
;
18190 /* The instruction after which we will insert the jump. */
18191 rtx_insn
*selected
= NULL
;
18193 /* The address at which the jump instruction will be placed. */
18194 HOST_WIDE_INT selected_address
;
18196 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
18197 rtx_code_label
*label
= gen_label_rtx ();
18199 selected_cost
= arm_barrier_cost (from
);
18200 selected_address
= fix
->address
;
18202 while (from
&& count
< max_count
)
18204 rtx_jump_table_data
*tmp
;
18207 /* This code shouldn't have been called if there was a natural barrier
18209 gcc_assert (!BARRIER_P (from
));
18211 /* Count the length of this insn. This must stay in sync with the
18212 code that pushes minipool fixes. */
18213 if (LABEL_P (from
))
18214 count
+= get_label_padding (from
);
18216 count
+= get_attr_length (from
);
18218 /* If there is a jump table, add its length. */
18219 if (tablejump_p (from
, NULL
, &tmp
))
18221 count
+= get_jump_table_size (tmp
);
18223 /* Jump tables aren't in a basic block, so base the cost on
18224 the dispatch insn. If we select this location, we will
18225 still put the pool after the table. */
18226 new_cost
= arm_barrier_cost (from
);
18228 if (count
< max_count
18229 && (!selected
|| new_cost
<= selected_cost
))
18232 selected_cost
= new_cost
;
18233 selected_address
= fix
->address
+ count
;
18236 /* Continue after the dispatch table. */
18237 from
= NEXT_INSN (tmp
);
18241 new_cost
= arm_barrier_cost (from
);
18243 if (count
< max_count
18244 && (!selected
|| new_cost
<= selected_cost
))
18247 selected_cost
= new_cost
;
18248 selected_address
= fix
->address
+ count
;
18251 from
= NEXT_INSN (from
);
18254 /* Make sure that we found a place to insert the jump. */
18255 gcc_assert (selected
);
18257 /* Create a new JUMP_INSN that branches around a barrier. */
18258 from
= emit_jump_insn_after (gen_jump (label
), selected
);
18259 JUMP_LABEL (from
) = label
;
18260 barrier
= emit_barrier_after (from
);
18261 emit_label_after (label
, barrier
);
18263 /* Create a minipool barrier entry for the new barrier. */
18264 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
18265 new_fix
->insn
= barrier
;
18266 new_fix
->address
= selected_address
;
18267 new_fix
->next
= fix
->next
;
18268 fix
->next
= new_fix
;
18273 /* Record that there is a natural barrier in the insn stream at
18276 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
18278 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18281 fix
->address
= address
;
18284 if (minipool_fix_head
!= NULL
)
18285 minipool_fix_tail
->next
= fix
;
18287 minipool_fix_head
= fix
;
18289 minipool_fix_tail
= fix
;
18292 /* Record INSN, which will need fixing up to load a value from the
18293 minipool. ADDRESS is the offset of the insn since the start of the
18294 function; LOC is a pointer to the part of the insn which requires
18295 fixing; VALUE is the constant that must be loaded, which is of type
18298 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
18299 machine_mode mode
, rtx value
)
18301 gcc_assert (!arm_disable_literal_pool
);
18302 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18305 fix
->address
= address
;
18308 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
18309 fix
->value
= value
;
18310 fix
->forwards
= get_attr_pool_range (insn
);
18311 fix
->backwards
= get_attr_neg_pool_range (insn
);
18312 fix
->minipool
= NULL
;
18314 /* If an insn doesn't have a range defined for it, then it isn't
18315 expecting to be reworked by this code. Better to stop now than
18316 to generate duff assembly code. */
18317 gcc_assert (fix
->forwards
|| fix
->backwards
);
18319 /* If an entry requires 8-byte alignment then assume all constant pools
18320 require 4 bytes of padding. Trying to do this later on a per-pool
18321 basis is awkward because existing pool entries have to be modified. */
18322 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
18327 fprintf (dump_file
,
18328 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18329 GET_MODE_NAME (mode
),
18330 INSN_UID (insn
), (unsigned long) address
,
18331 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
18332 arm_print_value (dump_file
, fix
->value
);
18333 fprintf (dump_file
, "\n");
18336 /* Add it to the chain of fixes. */
18339 if (minipool_fix_head
!= NULL
)
18340 minipool_fix_tail
->next
= fix
;
18342 minipool_fix_head
= fix
;
18344 minipool_fix_tail
= fix
;
18347 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18348 Returns the number of insns needed, or 99 if we always want to synthesize
18351 arm_max_const_double_inline_cost ()
18353 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
18356 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18357 Returns the number of insns needed, or 99 if we don't know how to
18360 arm_const_double_inline_cost (rtx val
)
18362 rtx lowpart
, highpart
;
18365 mode
= GET_MODE (val
);
18367 if (mode
== VOIDmode
)
18370 gcc_assert (GET_MODE_SIZE (mode
) == 8);
18372 lowpart
= gen_lowpart (SImode
, val
);
18373 highpart
= gen_highpart_mode (SImode
, mode
, val
);
18375 gcc_assert (CONST_INT_P (lowpart
));
18376 gcc_assert (CONST_INT_P (highpart
));
18378 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
18379 NULL_RTX
, NULL_RTX
, 0, 0)
18380 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
18381 NULL_RTX
, NULL_RTX
, 0, 0));
18384 /* Cost of loading a SImode constant. */
18386 arm_const_inline_cost (enum rtx_code code
, rtx val
)
18388 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
18389 NULL_RTX
, NULL_RTX
, 1, 0);
18392 /* Return true if it is worthwhile to split a 64-bit constant into two
18393 32-bit operations. This is the case if optimizing for size, or
18394 if we have load delay slots, or if one 32-bit part can be done with
18395 a single data operation. */
18397 arm_const_double_by_parts (rtx val
)
18399 machine_mode mode
= GET_MODE (val
);
18402 if (optimize_size
|| arm_ld_sched
)
18405 if (mode
== VOIDmode
)
18408 part
= gen_highpart_mode (SImode
, mode
, val
);
18410 gcc_assert (CONST_INT_P (part
));
18412 if (const_ok_for_arm (INTVAL (part
))
18413 || const_ok_for_arm (~INTVAL (part
)))
18416 part
= gen_lowpart (SImode
, val
);
18418 gcc_assert (CONST_INT_P (part
));
18420 if (const_ok_for_arm (INTVAL (part
))
18421 || const_ok_for_arm (~INTVAL (part
)))
18427 /* Return true if it is possible to inline both the high and low parts
18428 of a 64-bit constant into 32-bit data processing instructions. */
18430 arm_const_double_by_immediates (rtx val
)
18432 machine_mode mode
= GET_MODE (val
);
18435 if (mode
== VOIDmode
)
18438 part
= gen_highpart_mode (SImode
, mode
, val
);
18440 gcc_assert (CONST_INT_P (part
));
18442 if (!const_ok_for_arm (INTVAL (part
)))
18445 part
= gen_lowpart (SImode
, val
);
18447 gcc_assert (CONST_INT_P (part
));
18449 if (!const_ok_for_arm (INTVAL (part
)))
18455 /* Scan INSN and note any of its operands that need fixing.
18456 If DO_PUSHES is false we do not actually push any of the fixups
18459 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
18463 extract_constrain_insn (insn
);
18465 if (recog_data
.n_alternatives
== 0)
18468 /* Fill in recog_op_alt with information about the constraints of
18470 preprocess_constraints (insn
);
18472 const operand_alternative
*op_alt
= which_op_alt ();
18473 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
18475 /* Things we need to fix can only occur in inputs. */
18476 if (recog_data
.operand_type
[opno
] != OP_IN
)
18479 /* If this alternative is a memory reference, then any mention
18480 of constants in this alternative is really to fool reload
18481 into allowing us to accept one there. We need to fix them up
18482 now so that we output the right code. */
18483 if (op_alt
[opno
].memory_ok
)
18485 rtx op
= recog_data
.operand
[opno
];
18487 if (CONSTANT_P (op
))
18490 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
18491 recog_data
.operand_mode
[opno
], op
);
18493 else if (MEM_P (op
)
18494 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
18495 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
18499 rtx cop
= avoid_constant_pool_reference (op
);
18501 /* Casting the address of something to a mode narrower
18502 than a word can cause avoid_constant_pool_reference()
18503 to return the pool reference itself. That's no good to
18504 us here. Lets just hope that we can use the
18505 constant pool value directly. */
18507 cop
= get_pool_constant (XEXP (op
, 0));
18509 push_minipool_fix (insn
, address
,
18510 recog_data
.operand_loc
[opno
],
18511 recog_data
.operand_mode
[opno
], cop
);
18521 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18522 and unions in the context of ARMv8-M Security Extensions. It is used as a
18523 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18524 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18525 or four masks, depending on whether it is being computed for a
18526 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18527 respectively. The tree for the type of the argument or a field within an
18528 argument is passed in ARG_TYPE, the current register this argument or field
18529 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18530 argument or field starts at is passed in STARTING_BIT and the last used bit
18531 is kept in LAST_USED_BIT which is also updated accordingly. */
18533 static unsigned HOST_WIDE_INT
18534 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
18535 uint32_t * padding_bits_to_clear
,
18536 unsigned starting_bit
, int * last_used_bit
)
18539 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
18541 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
18543 unsigned current_bit
= starting_bit
;
18545 long int offset
, size
;
18548 field
= TYPE_FIELDS (arg_type
);
18551 /* The offset within a structure is always an offset from
18552 the start of that structure. Make sure we take that into the
18553 calculation of the register based offset that we use here. */
18554 offset
= starting_bit
;
18555 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
18558 /* This is the actual size of the field, for bitfields this is the
18559 bitfield width and not the container size. */
18560 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18562 if (*last_used_bit
!= offset
)
18564 if (offset
< *last_used_bit
)
18566 /* This field's offset is before the 'last_used_bit', that
18567 means this field goes on the next register. So we need to
18568 pad the rest of the current register and increase the
18569 register number. */
18571 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
18574 padding_bits_to_clear
[*regno
] |= mask
;
18575 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18580 /* Otherwise we pad the bits between the last field's end and
18581 the start of the new field. */
18584 mask
= ((uint32_t)-1) >> (32 - offset
);
18585 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
18586 padding_bits_to_clear
[*regno
] |= mask
;
18588 current_bit
= offset
;
18591 /* Calculate further padding bits for inner structs/unions too. */
18592 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
18594 *last_used_bit
= current_bit
;
18595 not_to_clear_reg_mask
18596 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
18597 padding_bits_to_clear
, offset
,
18602 /* Update 'current_bit' with this field's size. If the
18603 'current_bit' lies in a subsequent register, update 'regno' and
18604 reset 'current_bit' to point to the current bit in that new
18606 current_bit
+= size
;
18607 while (current_bit
>= 32)
18610 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18613 *last_used_bit
= current_bit
;
18616 field
= TREE_CHAIN (field
);
18618 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18620 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
18622 tree field
, field_t
;
18623 int i
, regno_t
, field_size
;
18627 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
18628 = {-1, -1, -1, -1};
18630 /* To compute the padding bits in a union we only consider bits as
18631 padding bits if they are always either a padding bit or fall outside a
18632 fields size for all fields in the union. */
18633 field
= TYPE_FIELDS (arg_type
);
18636 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
18637 = {0U, 0U, 0U, 0U};
18638 int last_used_bit_t
= *last_used_bit
;
18640 field_t
= TREE_TYPE (field
);
18642 /* If the field's type is either a record or a union make sure to
18643 compute their padding bits too. */
18644 if (RECORD_OR_UNION_TYPE_P (field_t
))
18645 not_to_clear_reg_mask
18646 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
18647 &padding_bits_to_clear_t
[0],
18648 starting_bit
, &last_used_bit_t
);
18651 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18652 regno_t
= (field_size
/ 32) + *regno
;
18653 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
18656 for (i
= *regno
; i
< regno_t
; i
++)
18658 /* For all but the last register used by this field only keep the
18659 padding bits that were padding bits in this field. */
18660 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
18663 /* For the last register, keep all padding bits that were padding
18664 bits in this field and any padding bits that are still valid
18665 as padding bits but fall outside of this field's size. */
18666 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
18667 padding_bits_to_clear_res
[regno_t
]
18668 &= padding_bits_to_clear_t
[regno_t
] | mask
;
18670 /* Update the maximum size of the fields in terms of registers used
18671 ('max_reg') and the 'last_used_bit' in said register. */
18672 if (max_reg
< regno_t
)
18675 max_bit
= last_used_bit_t
;
18677 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
18678 max_bit
= last_used_bit_t
;
18680 field
= TREE_CHAIN (field
);
18683 /* Update the current padding_bits_to_clear using the intersection of the
18684 padding bits of all the fields. */
18685 for (i
=*regno
; i
< max_reg
; i
++)
18686 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
18688 /* Do not keep trailing padding bits, we do not know yet whether this
18689 is the end of the argument. */
18690 mask
= ((uint32_t) 1 << max_bit
) - 1;
18691 padding_bits_to_clear
[max_reg
]
18692 |= padding_bits_to_clear_res
[max_reg
] & mask
;
18695 *last_used_bit
= max_bit
;
18698 /* This function should only be used for structs and unions. */
18699 gcc_unreachable ();
18701 return not_to_clear_reg_mask
;
18704 /* In the context of ARMv8-M Security Extensions, this function is used for both
18705 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18706 registers are used when returning or passing arguments, which is then
18707 returned as a mask. It will also compute a mask to indicate padding/unused
18708 bits for each of these registers, and passes this through the
18709 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18710 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18711 the starting register used to pass this argument or return value is passed
18712 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18713 for struct and union types. */
18715 static unsigned HOST_WIDE_INT
18716 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
18717 uint32_t * padding_bits_to_clear
)
18720 int last_used_bit
= 0;
18721 unsigned HOST_WIDE_INT not_to_clear_mask
;
18723 if (RECORD_OR_UNION_TYPE_P (arg_type
))
18726 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
18727 padding_bits_to_clear
, 0,
18731 /* If the 'last_used_bit' is not zero, that means we are still using a
18732 part of the last 'regno'. In such cases we must clear the trailing
18733 bits. Otherwise we are not using regno and we should mark it as to
18735 if (last_used_bit
!= 0)
18736 padding_bits_to_clear
[regno
]
18737 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
18739 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
18743 not_to_clear_mask
= 0;
18744 /* We are not dealing with structs nor unions. So these arguments may be
18745 passed in floating point registers too. In some cases a BLKmode is
18746 used when returning or passing arguments in multiple VFP registers. */
18747 if (GET_MODE (arg_rtx
) == BLKmode
)
18752 /* This should really only occur when dealing with the hard-float
18754 gcc_assert (TARGET_HARD_FLOAT_ABI
);
18756 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
18758 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
18759 gcc_assert (REG_P (reg
));
18761 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
18763 /* If we are dealing with DF mode, make sure we don't
18764 clear either of the registers it addresses. */
18765 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
18768 unsigned HOST_WIDE_INT mask
;
18769 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
18770 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
18771 not_to_clear_mask
|= mask
;
18777 /* Otherwise we can rely on the MODE to determine how many registers
18778 are being used by this argument. */
18779 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
18780 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18783 unsigned HOST_WIDE_INT
18784 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
18785 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18786 not_to_clear_mask
|= mask
;
18791 return not_to_clear_mask
;
18794 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18795 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18796 are to be fully cleared, using the value in register CLEARING_REG if more
18797 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18798 the bits that needs to be cleared in caller-saved core registers, with
18799 SCRATCH_REG used as a scratch register for that clearing.
18801 NOTE: one of three following assertions must hold:
18802 - SCRATCH_REG is a low register
18803 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18804 in TO_CLEAR_BITMAP)
18805 - CLEARING_REG is a low register. */
18808 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
18809 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
18811 bool saved_clearing
= false;
18812 rtx saved_clearing_reg
= NULL_RTX
;
18813 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
18815 gcc_assert (arm_arch_cmse
);
18817 if (!bitmap_empty_p (to_clear_bitmap
))
18819 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
18820 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
18822 clearing_regno
= REGNO (clearing_reg
);
18824 /* Clear padding bits. */
18825 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
18826 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
18829 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
18831 if (padding_bits_to_clear
[i
] == 0)
18834 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18835 CLEARING_REG as scratch. */
18837 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
18839 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18840 such that we can use clearing_reg to clear the unused bits in the
18842 if ((clearing_regno
> maxregno
18843 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18844 && !saved_clearing
)
18846 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
18847 emit_move_insn (scratch_reg
, clearing_reg
);
18848 saved_clearing
= true;
18849 saved_clearing_reg
= scratch_reg
;
18851 scratch_reg
= clearing_reg
;
18854 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18855 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
18856 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
18858 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18859 mask
= (~padding_bits_to_clear
[i
]) >> 16;
18860 rtx16
= gen_int_mode (16, SImode
);
18861 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
18863 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
18865 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
18867 if (saved_clearing
)
18868 emit_move_insn (clearing_reg
, saved_clearing_reg
);
18871 /* Clear full registers. */
18873 if (TARGET_HAVE_FPCXT_CMSE
)
18876 int i
, j
, k
, nb_regs
;
18877 rtx use_seq
, par
, reg
, set
, vunspec
;
18878 int to_clear_bitmap_size
= SBITMAP_SIZE (to_clear_bitmap
);
18879 auto_sbitmap
core_regs_bitmap (to_clear_bitmap_size
);
18880 auto_sbitmap
to_clear_core_bitmap (to_clear_bitmap_size
);
18882 for (i
= FIRST_VFP_REGNUM
; i
<= maxregno
; i
+= nb_regs
)
18884 /* Find next register to clear and exit if none. */
18885 for (; i
<= maxregno
&& !bitmap_bit_p (to_clear_bitmap
, i
); i
++);
18889 /* Compute number of consecutive registers to clear. */
18890 for (j
= i
; j
<= maxregno
&& bitmap_bit_p (to_clear_bitmap
, j
);
18894 /* Create VSCCLRM RTX pattern. */
18895 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 1));
18896 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18897 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18898 VUNSPEC_VSCCLRM_VPR
);
18899 XVECEXP (par
, 0, 0) = vunspec
;
18901 /* Insert VFP register clearing RTX in the pattern. */
18903 for (k
= 1, j
= i
; j
<= maxregno
&& k
< nb_regs
+ 1; j
++)
18905 if (!bitmap_bit_p (to_clear_bitmap
, j
))
18908 reg
= gen_rtx_REG (SFmode
, j
);
18909 set
= gen_rtx_SET (reg
, const0_rtx
);
18910 XVECEXP (par
, 0, k
++) = set
;
18913 use_seq
= get_insns ();
18916 emit_insn_after (use_seq
, emit_insn (par
));
18919 /* Get set of core registers to clear. */
18920 bitmap_clear (core_regs_bitmap
);
18921 bitmap_set_range (core_regs_bitmap
, R0_REGNUM
,
18922 IP_REGNUM
- R0_REGNUM
+ 1);
18923 bitmap_and (to_clear_core_bitmap
, to_clear_bitmap
,
18925 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap
));
18927 if (bitmap_empty_p (to_clear_core_bitmap
))
18930 /* Create clrm RTX pattern. */
18931 nb_regs
= bitmap_count_bits (to_clear_core_bitmap
);
18932 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 2));
18934 /* Insert core register clearing RTX in the pattern. */
18936 for (j
= 0, i
= minregno
; j
< nb_regs
; i
++)
18938 if (!bitmap_bit_p (to_clear_core_bitmap
, i
))
18941 reg
= gen_rtx_REG (SImode
, i
);
18942 set
= gen_rtx_SET (reg
, const0_rtx
);
18943 XVECEXP (par
, 0, j
++) = set
;
18947 /* Insert APSR register clearing RTX in the pattern
18948 * along with clobbering CC. */
18949 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18950 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18951 VUNSPEC_CLRM_APSR
);
18953 XVECEXP (par
, 0, j
++) = vunspec
;
18955 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
18956 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
18957 XVECEXP (par
, 0, j
) = clobber
;
18959 use_seq
= get_insns ();
18962 emit_insn_after (use_seq
, emit_insn (par
));
18966 /* If not marked for clearing, clearing_reg already does not contain
18968 if (clearing_regno
<= maxregno
18969 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18971 emit_move_insn (clearing_reg
, const0_rtx
);
18972 emit_use (clearing_reg
);
18973 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
18976 for (regno
= minregno
; regno
<= maxregno
; regno
++)
18978 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
18981 if (IS_VFP_REGNUM (regno
))
18983 /* If regno is an even vfp register and its successor is also to
18984 be cleared, use vmov. */
18985 if (TARGET_VFP_DOUBLE
18986 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
18987 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
18989 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
18990 CONST1_RTX (DFmode
));
18991 emit_use (gen_rtx_REG (DFmode
, regno
));
18996 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
18997 CONST1_RTX (SFmode
));
18998 emit_use (gen_rtx_REG (SFmode
, regno
));
19003 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
19004 emit_use (gen_rtx_REG (SImode
, regno
));
19010 /* Clear core and caller-saved VFP registers not used to pass arguments before
19011 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
19012 registers is done in the __gnu_cmse_nonsecure_call libcall. See
19013 libgcc/config/arm/cmse_nonsecure_call.S. */
19016 cmse_nonsecure_call_inline_register_clear (void)
19020 FOR_EACH_BB_FN (bb
, cfun
)
19024 FOR_BB_INSNS (bb
, insn
)
19026 bool clear_callee_saved
= TARGET_HAVE_FPCXT_CMSE
;
19027 /* frame = VFP regs + FPSCR + VPR. */
19028 unsigned lazy_store_stack_frame_size
19029 = (LAST_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1 + 2) * UNITS_PER_WORD
;
19030 unsigned long callee_saved_mask
19031 = ((1 << (LAST_HI_REGNUM
+ 1)) - 1)
19032 & ~((1 << (LAST_ARG_REGNUM
+ 1)) - 1);
19033 unsigned address_regnum
, regno
;
19034 unsigned max_int_regno
19035 = clear_callee_saved
? IP_REGNUM
: LAST_ARG_REGNUM
;
19036 unsigned max_fp_regno
19037 = TARGET_HAVE_FPCXT_CMSE
? LAST_VFP_REGNUM
: D7_VFP_REGNUM
;
19039 = TARGET_HARD_FLOAT_ABI
? max_fp_regno
: max_int_regno
;
19040 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
19042 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
19044 CUMULATIVE_ARGS args_so_far_v
;
19045 cumulative_args_t args_so_far
;
19046 tree arg_type
, fntype
;
19047 bool first_param
= true, lazy_fpclear
= !TARGET_HARD_FLOAT_ABI
;
19048 function_args_iterator args_iter
;
19049 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
19051 if (!NONDEBUG_INSN_P (insn
))
19054 if (!CALL_P (insn
))
19057 pat
= PATTERN (insn
);
19058 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
19059 call
= XVECEXP (pat
, 0, 0);
19061 /* Get the real call RTX if the insn sets a value, ie. returns. */
19062 if (GET_CODE (call
) == SET
)
19063 call
= SET_SRC (call
);
19065 /* Check if it is a cmse_nonsecure_call. */
19066 unspec
= XEXP (call
, 0);
19067 if (GET_CODE (unspec
) != UNSPEC
19068 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
19071 /* Mark registers that needs to be cleared. Those that holds a
19072 parameter are removed from the set further below. */
19073 bitmap_clear (to_clear_bitmap
);
19074 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
,
19075 max_int_regno
- R0_REGNUM
+ 1);
19077 /* Only look at the caller-saved floating point registers in case of
19078 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
19079 lazy store and loads which clear both caller- and callee-saved
19083 auto_sbitmap
float_bitmap (maxregno
+ 1);
19085 bitmap_clear (float_bitmap
);
19086 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
19087 max_fp_regno
- FIRST_VFP_REGNUM
+ 1);
19088 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
19091 /* Make sure the register used to hold the function address is not
19093 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
19094 gcc_assert (MEM_P (address
));
19095 gcc_assert (REG_P (XEXP (address
, 0)));
19096 address_regnum
= REGNO (XEXP (address
, 0));
19097 if (address_regnum
<= max_int_regno
)
19098 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
19100 /* Set basic block of call insn so that df rescan is performed on
19101 insns inserted here. */
19102 set_block_for_insn (insn
, bb
);
19103 df_set_flags (DF_DEFER_INSN_RESCAN
);
19106 /* Make sure the scheduler doesn't schedule other insns beyond
19108 emit_insn (gen_blockage ());
19110 /* Walk through all arguments and clear registers appropriately.
19112 fntype
= TREE_TYPE (MEM_EXPR (address
));
19113 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
19115 args_so_far
= pack_cumulative_args (&args_so_far_v
);
19116 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
19119 uint64_t to_clear_args_mask
;
19121 if (VOID_TYPE_P (arg_type
))
19124 function_arg_info
arg (arg_type
, /*named=*/true);
19126 /* ??? We should advance after processing the argument and pass
19127 the argument we're advancing past. */
19128 arm_function_arg_advance (args_so_far
, arg
);
19130 arg_rtx
= arm_function_arg (args_so_far
, arg
);
19131 gcc_assert (REG_P (arg_rtx
));
19133 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
19135 &padding_bits_to_clear
[0]);
19136 if (to_clear_args_mask
)
19138 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
19140 if (to_clear_args_mask
& (1ULL << regno
))
19141 bitmap_clear_bit (to_clear_bitmap
, regno
);
19145 first_param
= false;
19148 /* We use right shift and left shift to clear the LSB of the address
19149 we jump to instead of using bic, to avoid having to use an extra
19150 register on Thumb-1. */
19151 clearing_reg
= XEXP (address
, 0);
19152 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
19153 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19154 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
19155 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
19157 if (clear_callee_saved
)
19160 emit_multi_reg_push (callee_saved_mask
, callee_saved_mask
);
19161 /* Disable frame debug info in push because it needs to be
19162 disabled for pop (see below). */
19163 RTX_FRAME_RELATED_P (push_insn
) = 0;
19165 /* Lazy store multiple. */
19169 rtx_insn
*add_insn
;
19171 imm
= gen_int_mode (- lazy_store_stack_frame_size
, SImode
);
19172 add_insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
19173 stack_pointer_rtx
, imm
));
19174 /* If we have the frame pointer, then it will be the
19175 CFA reg. Otherwise, the stack pointer is the CFA
19176 reg, so we need to emit a CFA adjust. */
19177 if (!frame_pointer_needed
)
19178 arm_add_cfa_adjust_cfa_note (add_insn
,
19179 - lazy_store_stack_frame_size
,
19181 stack_pointer_rtx
);
19182 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx
));
19184 /* Save VFP callee-saved registers. */
19187 vfp_emit_fstmd (D7_VFP_REGNUM
+ 1,
19188 (max_fp_regno
- D7_VFP_REGNUM
) / 2);
19189 /* Disable frame debug info in push because it needs to be
19190 disabled for vpop (see below). */
19191 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19195 /* Clear caller-saved registers that leak before doing a non-secure
19197 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
19198 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
19199 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
19201 seq
= get_insns ();
19203 emit_insn_before (seq
, insn
);
19205 /* The AAPCS requires the callee to widen integral types narrower
19206 than 32 bits to the full width of the register; but when handling
19207 calls to non-secure space, we cannot trust the callee to have
19208 correctly done so. So forcibly re-widen the result here. */
19209 tree ret_type
= TREE_TYPE (fntype
);
19210 if ((TREE_CODE (ret_type
) == INTEGER_TYPE
19211 || TREE_CODE (ret_type
) == ENUMERAL_TYPE
19212 || TREE_CODE (ret_type
) == BOOLEAN_TYPE
)
19213 && known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type
)), 4))
19215 rtx ret_reg
= gen_rtx_REG (TYPE_MODE (ret_type
), R0_REGNUM
);
19216 rtx si_reg
= gen_rtx_REG (SImode
, R0_REGNUM
);
19218 if (TYPE_UNSIGNED (ret_type
))
19219 extend
= gen_rtx_SET (si_reg
, gen_rtx_ZERO_EXTEND (SImode
,
19223 /* Signed-extension is a special case because of
19224 thumb1_extendhisi2. */
19226 && known_eq (GET_MODE_SIZE (TYPE_MODE (ret_type
)), 2))
19227 extend
= gen_thumb1_extendhisi2 (si_reg
, ret_reg
);
19229 extend
= gen_rtx_SET (si_reg
,
19230 gen_rtx_SIGN_EXTEND (SImode
,
19233 emit_insn_after (extend
, insn
);
19237 if (TARGET_HAVE_FPCXT_CMSE
)
19239 rtx_insn
*last
, *pop_insn
, *after
= insn
;
19243 /* Lazy load multiple done as part of libcall in Armv8-M. */
19246 rtx imm
= gen_int_mode (lazy_store_stack_frame_size
, SImode
);
19247 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx
));
19248 rtx_insn
*add_insn
=
19249 emit_insn (gen_addsi3 (stack_pointer_rtx
,
19250 stack_pointer_rtx
, imm
));
19251 if (!frame_pointer_needed
)
19252 arm_add_cfa_adjust_cfa_note (add_insn
,
19253 lazy_store_stack_frame_size
,
19255 stack_pointer_rtx
);
19257 /* Restore VFP callee-saved registers. */
19260 int nb_callee_saved_vfp_regs
=
19261 (max_fp_regno
- D7_VFP_REGNUM
) / 2;
19262 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM
+ 1,
19263 nb_callee_saved_vfp_regs
,
19264 stack_pointer_rtx
);
19265 /* Disable frame debug info in vpop because the SP adjustment
19266 is made using a CFA adjustment note while CFA used is
19267 sometimes R7. This then causes an assert failure in the
19268 CFI note creation code. */
19269 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19272 arm_emit_multi_reg_pop (callee_saved_mask
);
19273 pop_insn
= get_last_insn ();
19275 /* Disable frame debug info in pop because they reset the state
19276 of popped registers to what it was at the beginning of the
19277 function, before the prologue. This leads to incorrect state
19278 when doing the pop after the nonsecure call for registers that
19279 are pushed both in prologue and before the nonsecure call.
19281 It also occasionally triggers an assert failure in CFI note
19282 creation code when there are two codepaths to the epilogue,
19283 one of which does not go through the nonsecure call.
19284 Obviously this mean that debugging between the push and pop is
19286 RTX_FRAME_RELATED_P (pop_insn
) = 0;
19288 seq
= get_insns ();
19289 last
= get_last_insn ();
19292 emit_insn_after (seq
, after
);
19294 /* Skip pop we have just inserted after nonsecure call, we know
19295 it does not contain a nonsecure call. */
19302 /* Rewrite move insn into subtract of 0 if the condition codes will
19303 be useful in next conditional jump insn. */
19306 thumb1_reorg (void)
19310 FOR_EACH_BB_FN (bb
, cfun
)
19313 rtx cmp
, op0
, op1
, set
= NULL
;
19314 rtx_insn
*prev
, *insn
= BB_END (bb
);
19315 bool insn_clobbered
= false;
19317 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
19318 insn
= PREV_INSN (insn
);
19320 /* Find the last cbranchsi4_insn in basic block BB. */
19321 if (insn
== BB_HEAD (bb
)
19322 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
19325 /* Get the register with which we are comparing. */
19326 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
19327 op0
= XEXP (cmp
, 0);
19328 op1
= XEXP (cmp
, 1);
19330 /* Check that comparison is against ZERO. */
19331 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
19334 /* Find the first flag setting insn before INSN in basic block BB. */
19335 gcc_assert (insn
!= BB_HEAD (bb
));
19336 for (prev
= PREV_INSN (insn
);
19338 && prev
!= BB_HEAD (bb
)
19340 || DEBUG_INSN_P (prev
)
19341 || ((set
= single_set (prev
)) != NULL
19342 && get_attr_conds (prev
) == CONDS_NOCOND
)));
19343 prev
= PREV_INSN (prev
))
19345 if (reg_set_p (op0
, prev
))
19346 insn_clobbered
= true;
19349 /* Skip if op0 is clobbered by insn other than prev. */
19350 if (insn_clobbered
)
19356 dest
= SET_DEST (set
);
19357 src
= SET_SRC (set
);
19358 if (!low_register_operand (dest
, SImode
)
19359 || !low_register_operand (src
, SImode
))
19362 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19363 in INSN. Both src and dest of the move insn are checked. */
19364 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
19366 dest
= copy_rtx (dest
);
19367 src
= copy_rtx (src
);
19368 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
19369 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
19370 INSN_CODE (prev
) = -1;
19371 /* Set test register in INSN to dest. */
19372 XEXP (cmp
, 0) = copy_rtx (dest
);
19373 INSN_CODE (insn
) = -1;
19378 /* Convert instructions to their cc-clobbering variant if possible, since
19379 that allows us to use smaller encodings. */
19382 thumb2_reorg (void)
19387 INIT_REG_SET (&live
);
19389 /* We are freeing block_for_insn in the toplev to keep compatibility
19390 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19391 compute_bb_for_insn ();
19394 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
19396 FOR_EACH_BB_FN (bb
, cfun
)
19398 if ((current_tune
->disparage_flag_setting_t16_encodings
19399 == tune_params::DISPARAGE_FLAGS_ALL
)
19400 && optimize_bb_for_speed_p (bb
))
19404 Convert_Action action
= SKIP
;
19405 Convert_Action action_for_partial_flag_setting
19406 = ((current_tune
->disparage_flag_setting_t16_encodings
19407 != tune_params::DISPARAGE_FLAGS_NEITHER
)
19408 && optimize_bb_for_speed_p (bb
))
19411 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
19412 df_simulate_initialize_backwards (bb
, &live
);
19413 FOR_BB_INSNS_REVERSE (bb
, insn
)
19415 if (NONJUMP_INSN_P (insn
)
19416 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
19417 && GET_CODE (PATTERN (insn
)) == SET
)
19420 rtx pat
= PATTERN (insn
);
19421 rtx dst
= XEXP (pat
, 0);
19422 rtx src
= XEXP (pat
, 1);
19423 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
19425 if (UNARY_P (src
) || BINARY_P (src
))
19426 op0
= XEXP (src
, 0);
19428 if (BINARY_P (src
))
19429 op1
= XEXP (src
, 1);
19431 if (low_register_operand (dst
, SImode
))
19433 switch (GET_CODE (src
))
19436 /* Adding two registers and storing the result
19437 in the first source is already a 16-bit
19439 if (rtx_equal_p (dst
, op0
)
19440 && register_operand (op1
, SImode
))
19443 if (low_register_operand (op0
, SImode
))
19445 /* ADDS <Rd>,<Rn>,<Rm> */
19446 if (low_register_operand (op1
, SImode
))
19448 /* ADDS <Rdn>,#<imm8> */
19449 /* SUBS <Rdn>,#<imm8> */
19450 else if (rtx_equal_p (dst
, op0
)
19451 && CONST_INT_P (op1
)
19452 && IN_RANGE (INTVAL (op1
), -255, 255))
19454 /* ADDS <Rd>,<Rn>,#<imm3> */
19455 /* SUBS <Rd>,<Rn>,#<imm3> */
19456 else if (CONST_INT_P (op1
)
19457 && IN_RANGE (INTVAL (op1
), -7, 7))
19460 /* ADCS <Rd>, <Rn> */
19461 else if (GET_CODE (XEXP (src
, 0)) == PLUS
19462 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
19463 && low_register_operand (XEXP (XEXP (src
, 0), 1),
19465 && COMPARISON_P (op1
)
19466 && cc_register (XEXP (op1
, 0), VOIDmode
)
19467 && maybe_get_arm_condition_code (op1
) == ARM_CS
19468 && XEXP (op1
, 1) == const0_rtx
)
19473 /* RSBS <Rd>,<Rn>,#0
19474 Not handled here: see NEG below. */
19475 /* SUBS <Rd>,<Rn>,#<imm3>
19477 Not handled here: see PLUS above. */
19478 /* SUBS <Rd>,<Rn>,<Rm> */
19479 if (low_register_operand (op0
, SImode
)
19480 && low_register_operand (op1
, SImode
))
19485 /* MULS <Rdm>,<Rn>,<Rdm>
19486 As an exception to the rule, this is only used
19487 when optimizing for size since MULS is slow on all
19488 known implementations. We do not even want to use
19489 MULS in cold code, if optimizing for speed, so we
19490 test the global flag here. */
19491 if (!optimize_size
)
19493 /* Fall through. */
19497 /* ANDS <Rdn>,<Rm> */
19498 if (rtx_equal_p (dst
, op0
)
19499 && low_register_operand (op1
, SImode
))
19500 action
= action_for_partial_flag_setting
;
19501 else if (rtx_equal_p (dst
, op1
)
19502 && low_register_operand (op0
, SImode
))
19503 action
= action_for_partial_flag_setting
== SKIP
19504 ? SKIP
: SWAP_CONV
;
19510 /* ASRS <Rdn>,<Rm> */
19511 /* LSRS <Rdn>,<Rm> */
19512 /* LSLS <Rdn>,<Rm> */
19513 if (rtx_equal_p (dst
, op0
)
19514 && low_register_operand (op1
, SImode
))
19515 action
= action_for_partial_flag_setting
;
19516 /* ASRS <Rd>,<Rm>,#<imm5> */
19517 /* LSRS <Rd>,<Rm>,#<imm5> */
19518 /* LSLS <Rd>,<Rm>,#<imm5> */
19519 else if (low_register_operand (op0
, SImode
)
19520 && CONST_INT_P (op1
)
19521 && IN_RANGE (INTVAL (op1
), 0, 31))
19522 action
= action_for_partial_flag_setting
;
19526 /* RORS <Rdn>,<Rm> */
19527 if (rtx_equal_p (dst
, op0
)
19528 && low_register_operand (op1
, SImode
))
19529 action
= action_for_partial_flag_setting
;
19533 /* MVNS <Rd>,<Rm> */
19534 if (low_register_operand (op0
, SImode
))
19535 action
= action_for_partial_flag_setting
;
19539 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19540 if (low_register_operand (op0
, SImode
))
19545 /* MOVS <Rd>,#<imm8> */
19546 if (CONST_INT_P (src
)
19547 && IN_RANGE (INTVAL (src
), 0, 255))
19548 action
= action_for_partial_flag_setting
;
19552 /* MOVS and MOV<c> with registers have different
19553 encodings, so are not relevant here. */
19561 if (action
!= SKIP
)
19563 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
19564 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
19567 if (action
== SWAP_CONV
)
19569 src
= copy_rtx (src
);
19570 XEXP (src
, 0) = op1
;
19571 XEXP (src
, 1) = op0
;
19572 pat
= gen_rtx_SET (dst
, src
);
19573 vec
= gen_rtvec (2, pat
, clobber
);
19575 else /* action == CONV */
19576 vec
= gen_rtvec (2, pat
, clobber
);
19578 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
19579 INSN_CODE (insn
) = -1;
19583 if (NONDEBUG_INSN_P (insn
))
19584 df_simulate_one_insn_backwards (bb
, insn
, &live
);
19588 CLEAR_REG_SET (&live
);
19591 /* Gcc puts the pool in the wrong place for ARM, since we can only
19592 load addresses a limited distance around the pc. We do some
19593 special munging to move the constant pool values to the correct
19594 point in the code. */
19599 HOST_WIDE_INT address
= 0;
19603 cmse_nonsecure_call_inline_register_clear ();
19605 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19606 if (cfun
->is_thunk
)
19608 else if (TARGET_THUMB1
)
19610 else if (TARGET_THUMB2
)
19613 /* Ensure all insns that must be split have been split at this point.
19614 Otherwise, the pool placement code below may compute incorrect
19615 insn lengths. Note that when optimizing, all insns have already
19616 been split at this point. */
19618 split_all_insns_noflow ();
19620 /* Make sure we do not attempt to create a literal pool even though it should
19621 no longer be necessary to create any. */
19622 if (arm_disable_literal_pool
)
19625 minipool_fix_head
= minipool_fix_tail
= NULL
;
19627 /* The first insn must always be a note, or the code below won't
19628 scan it properly. */
19629 insn
= get_insns ();
19630 gcc_assert (NOTE_P (insn
));
19633 /* Scan all the insns and record the operands that will need fixing. */
19634 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
19636 if (BARRIER_P (insn
))
19637 push_minipool_barrier (insn
, address
);
19638 else if (INSN_P (insn
))
19640 rtx_jump_table_data
*table
;
19642 note_invalid_constants (insn
, address
, true);
19643 address
+= get_attr_length (insn
);
19645 /* If the insn is a vector jump, add the size of the table
19646 and skip the table. */
19647 if (tablejump_p (insn
, NULL
, &table
))
19649 address
+= get_jump_table_size (table
);
19653 else if (LABEL_P (insn
))
19654 /* Add the worst-case padding due to alignment. We don't add
19655 the _current_ padding because the minipool insertions
19656 themselves might change it. */
19657 address
+= get_label_padding (insn
);
19660 fix
= minipool_fix_head
;
19662 /* Now scan the fixups and perform the required changes. */
19667 Mfix
* last_added_fix
;
19668 Mfix
* last_barrier
= NULL
;
19671 /* Skip any further barriers before the next fix. */
19672 while (fix
&& BARRIER_P (fix
->insn
))
19675 /* No more fixes. */
19679 last_added_fix
= NULL
;
19681 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
19683 if (BARRIER_P (ftmp
->insn
))
19685 if (ftmp
->address
>= minipool_vector_head
->max_address
)
19688 last_barrier
= ftmp
;
19690 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
19693 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
19696 /* If we found a barrier, drop back to that; any fixes that we
19697 could have reached but come after the barrier will now go in
19698 the next mini-pool. */
19699 if (last_barrier
!= NULL
)
19701 /* Reduce the refcount for those fixes that won't go into this
19703 for (fdel
= last_barrier
->next
;
19704 fdel
&& fdel
!= ftmp
;
19707 fdel
->minipool
->refcount
--;
19708 fdel
->minipool
= NULL
;
19711 ftmp
= last_barrier
;
19715 /* ftmp is first fix that we can't fit into this pool and
19716 there no natural barriers that we could use. Insert a
19717 new barrier in the code somewhere between the previous
19718 fix and this one, and arrange to jump around it. */
19719 HOST_WIDE_INT max_address
;
19721 /* The last item on the list of fixes must be a barrier, so
19722 we can never run off the end of the list of fixes without
19723 last_barrier being set. */
19726 max_address
= minipool_vector_head
->max_address
;
19727 /* Check that there isn't another fix that is in range that
19728 we couldn't fit into this pool because the pool was
19729 already too large: we need to put the pool before such an
19730 instruction. The pool itself may come just after the
19731 fix because create_fix_barrier also allows space for a
19732 jump instruction. */
19733 if (ftmp
->address
< max_address
)
19734 max_address
= ftmp
->address
+ 1;
19736 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
19739 assign_minipool_offsets (last_barrier
);
19743 if (!BARRIER_P (ftmp
->insn
)
19744 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
19751 /* Scan over the fixes we have identified for this pool, fixing them
19752 up and adding the constants to the pool itself. */
19753 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
19754 this_fix
= this_fix
->next
)
19755 if (!BARRIER_P (this_fix
->insn
))
19758 = plus_constant (Pmode
,
19759 gen_rtx_LABEL_REF (VOIDmode
,
19760 minipool_vector_label
),
19761 this_fix
->minipool
->offset
);
19762 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
19765 dump_minipool (last_barrier
->insn
);
19769 /* From now on we must synthesize any constants that we can't handle
19770 directly. This can happen if the RTL gets split during final
19771 instruction generation. */
19772 cfun
->machine
->after_arm_reorg
= 1;
19774 /* Free the minipool memory. */
19775 obstack_free (&minipool_obstack
, minipool_startobj
);
19778 /* Routines to output assembly language. */
19780 /* OPERANDS[0] is the entire list of insns that constitute pop,
19781 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19782 is in the list, UPDATE is true iff the list contains explicit
19783 update of base register. */
19785 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
19791 const char *conditional
;
19792 int num_saves
= XVECLEN (operands
[0], 0);
19793 unsigned int regno
;
19794 unsigned int regno_base
= REGNO (operands
[1]);
19795 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
19798 offset
+= update
? 1 : 0;
19799 offset
+= return_pc
? 1 : 0;
19801 /* Is the base register in the list? */
19802 for (i
= offset
; i
< num_saves
; i
++)
19804 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
19805 /* If SP is in the list, then the base register must be SP. */
19806 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
19807 /* If base register is in the list, there must be no explicit update. */
19808 if (regno
== regno_base
)
19809 gcc_assert (!update
);
19812 conditional
= reverse
? "%?%D0" : "%?%d0";
19813 /* Can't use POP if returning from an interrupt. */
19814 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
19815 sprintf (pattern
, "pop%s\t{", conditional
);
19818 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19819 It's just a convention, their semantics are identical. */
19820 if (regno_base
== SP_REGNUM
)
19821 sprintf (pattern
, "ldmfd%s\t", conditional
);
19823 sprintf (pattern
, "ldmia%s\t", conditional
);
19825 sprintf (pattern
, "ldm%s\t", conditional
);
19827 strcat (pattern
, reg_names
[regno_base
]);
19829 strcat (pattern
, "!, {");
19831 strcat (pattern
, ", {");
19834 /* Output the first destination register. */
19836 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
19838 /* Output the rest of the destination registers. */
19839 for (i
= offset
+ 1; i
< num_saves
; i
++)
19841 strcat (pattern
, ", ");
19843 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
19846 strcat (pattern
, "}");
19848 if (interrupt_p
&& return_pc
)
19849 strcat (pattern
, "^");
19851 output_asm_insn (pattern
, &cond
);
19855 /* Output the assembly for a store multiple. */
19858 vfp_output_vstmd (rtx
* operands
)
19864 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
19865 ? XEXP (operands
[0], 0)
19866 : XEXP (XEXP (operands
[0], 0), 0);
19867 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
19870 strcpy (pattern
, "vpush%?.64\t{%P1");
19872 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
19874 p
= strlen (pattern
);
19876 gcc_assert (REG_P (operands
[1]));
19878 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
19879 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
19881 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
19883 strcpy (&pattern
[p
], "}");
19885 output_asm_insn (pattern
, operands
);
19890 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19891 number of bytes pushed. */
19894 vfp_emit_fstmd (int base_reg
, int count
)
19901 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19902 register pairs are stored by a store multiple insn. We avoid this
19903 by pushing an extra pair. */
19904 if (count
== 2 && !arm_arch6
)
19906 if (base_reg
== LAST_VFP_REGNUM
- 3)
19911 /* FSTMD may not store more than 16 doubleword registers at once. Split
19912 larger stores into multiple parts (up to a maximum of two, in
19917 /* NOTE: base_reg is an internal register number, so each D register
19919 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
19920 saved
+= vfp_emit_fstmd (base_reg
, 16);
19924 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
19925 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
19927 reg
= gen_rtx_REG (DFmode
, base_reg
);
19930 XVECEXP (par
, 0, 0)
19931 = gen_rtx_SET (gen_frame_mem
19933 gen_rtx_PRE_MODIFY (Pmode
,
19936 (Pmode
, stack_pointer_rtx
,
19939 gen_rtx_UNSPEC (BLKmode
,
19940 gen_rtvec (1, reg
),
19941 UNSPEC_PUSH_MULT
));
19943 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19944 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
19945 RTX_FRAME_RELATED_P (tmp
) = 1;
19946 XVECEXP (dwarf
, 0, 0) = tmp
;
19948 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
19949 RTX_FRAME_RELATED_P (tmp
) = 1;
19950 XVECEXP (dwarf
, 0, 1) = tmp
;
19952 for (i
= 1; i
< count
; i
++)
19954 reg
= gen_rtx_REG (DFmode
, base_reg
);
19956 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
19958 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
19959 plus_constant (Pmode
,
19963 RTX_FRAME_RELATED_P (tmp
) = 1;
19964 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
19967 par
= emit_insn (par
);
19968 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19969 RTX_FRAME_RELATED_P (par
) = 1;
19974 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19975 has the cmse_nonsecure_call attribute and returns false otherwise. */
19978 detect_cmse_nonsecure_call (tree addr
)
19983 tree fntype
= TREE_TYPE (addr
);
19984 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
19985 TYPE_ATTRIBUTES (fntype
)))
19991 /* Emit a call instruction with pattern PAT. ADDR is the address of
19992 the call target. */
19995 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
19999 insn
= emit_call_insn (pat
);
20001 /* The PIC register is live on entry to VxWorks PIC PLT entries.
20002 If the call might use such an entry, add a use of the PIC register
20003 to the instruction's CALL_INSN_FUNCTION_USAGE. */
20004 if (TARGET_VXWORKS_RTP
20007 && SYMBOL_REF_P (addr
)
20008 && (SYMBOL_REF_DECL (addr
)
20009 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
20010 : !SYMBOL_REF_LOCAL_P (addr
)))
20012 require_pic_register (NULL_RTX
, false /*compute_now*/);
20013 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
20018 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
20019 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), fdpic_reg
);
20022 if (TARGET_AAPCS_BASED
)
20024 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
20025 linker. We need to add an IP clobber to allow setting
20026 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
20027 is not needed since it's a fixed register. */
20028 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
20029 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
20033 /* Output a 'call' insn. */
20035 output_call (rtx
*operands
)
20037 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
20039 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
20040 if (REGNO (operands
[0]) == LR_REGNUM
)
20042 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
20043 output_asm_insn ("mov%?\t%0, %|lr", operands
);
20046 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
20048 if (TARGET_INTERWORK
|| arm_arch4t
)
20049 output_asm_insn ("bx%?\t%0", operands
);
20051 output_asm_insn ("mov%?\t%|pc, %0", operands
);
20056 /* Output a move from arm registers to arm registers of a long double
20057 OPERANDS[0] is the destination.
20058 OPERANDS[1] is the source. */
20060 output_mov_long_double_arm_from_arm (rtx
*operands
)
20062 /* We have to be careful here because the two might overlap. */
20063 int dest_start
= REGNO (operands
[0]);
20064 int src_start
= REGNO (operands
[1]);
20068 if (dest_start
< src_start
)
20070 for (i
= 0; i
< 3; i
++)
20072 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
20073 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
20074 output_asm_insn ("mov%?\t%0, %1", ops
);
20079 for (i
= 2; i
>= 0; i
--)
20081 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
20082 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
20083 output_asm_insn ("mov%?\t%0, %1", ops
);
20091 arm_emit_movpair (rtx dest
, rtx src
)
20093 /* If the src is an immediate, simplify it. */
20094 if (CONST_INT_P (src
))
20096 HOST_WIDE_INT val
= INTVAL (src
);
20097 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
20098 if ((val
>> 16) & 0x0000ffff)
20100 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
20102 GEN_INT ((val
>> 16) & 0x0000ffff));
20103 rtx_insn
*insn
= get_last_insn ();
20104 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
20108 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
20109 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
20110 rtx_insn
*insn
= get_last_insn ();
20111 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
20114 /* Output a move between double words. It must be REG<-MEM
20117 output_move_double (rtx
*operands
, bool emit
, int *count
)
20119 enum rtx_code code0
= GET_CODE (operands
[0]);
20120 enum rtx_code code1
= GET_CODE (operands
[1]);
20125 /* The only case when this might happen is when
20126 you are looking at the length of a DImode instruction
20127 that has an invalid constant in it. */
20128 if (code0
== REG
&& code1
!= MEM
)
20130 gcc_assert (!emit
);
20137 unsigned int reg0
= REGNO (operands
[0]);
20138 const bool can_ldrd
= TARGET_LDRD
&& (TARGET_THUMB2
|| (reg0
% 2 == 0));
20140 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
20142 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
20144 switch (GET_CODE (XEXP (operands
[1], 0)))
20151 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
20152 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
20154 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20159 gcc_assert (can_ldrd
);
20161 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
20168 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
20170 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
20178 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
20180 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
20185 gcc_assert (can_ldrd
);
20187 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
20192 /* Autoicrement addressing modes should never have overlapping
20193 base and destination registers, and overlapping index registers
20194 are already prohibited, so this doesn't need to worry about
20196 otherops
[0] = operands
[0];
20197 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
20198 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
20200 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
20202 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
20204 /* Registers overlap so split out the increment. */
20207 gcc_assert (can_ldrd
);
20208 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
20209 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
20216 /* Use a single insn if we can.
20217 FIXME: IWMMXT allows offsets larger than ldrd can
20218 handle, fix these up with a pair of ldr. */
20221 || !CONST_INT_P (otherops
[2])
20222 || (INTVAL (otherops
[2]) > -256
20223 && INTVAL (otherops
[2]) < 256)))
20226 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
20232 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
20233 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20243 /* Use a single insn if we can.
20244 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20245 fix these up with a pair of ldr. */
20248 || !CONST_INT_P (otherops
[2])
20249 || (INTVAL (otherops
[2]) > -256
20250 && INTVAL (otherops
[2]) < 256)))
20253 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
20259 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20260 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
20270 /* We might be able to use ldrd %0, %1 here. However the range is
20271 different to ldr/adr, and it is broken on some ARMv7-M
20272 implementations. */
20273 /* Use the second register of the pair to avoid problematic
20275 otherops
[1] = operands
[1];
20277 output_asm_insn ("adr%?\t%0, %1", otherops
);
20278 operands
[1] = otherops
[0];
20282 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20284 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
20291 /* ??? This needs checking for thumb2. */
20293 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
20294 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
20296 otherops
[0] = operands
[0];
20297 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
20298 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
20300 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
20302 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20304 switch ((int) INTVAL (otherops
[2]))
20308 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
20314 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
20320 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
20324 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
20325 operands
[1] = otherops
[0];
20327 && (REG_P (otherops
[2])
20329 || (CONST_INT_P (otherops
[2])
20330 && INTVAL (otherops
[2]) > -256
20331 && INTVAL (otherops
[2]) < 256)))
20333 if (reg_overlap_mentioned_p (operands
[0],
20336 /* Swap base and index registers over to
20337 avoid a conflict. */
20338 std::swap (otherops
[1], otherops
[2]);
20340 /* If both registers conflict, it will usually
20341 have been fixed by a splitter. */
20342 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
20343 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
20347 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20348 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20355 otherops
[0] = operands
[0];
20357 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
20362 if (CONST_INT_P (otherops
[2]))
20366 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
20367 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
20369 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20375 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20381 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
20388 return "ldrd%?\t%0, [%1]";
20390 return "ldmia%?\t%1, %M0";
20394 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
20395 /* Take care of overlapping base/data reg. */
20396 if (reg_mentioned_p (operands
[0], operands
[1]))
20400 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20401 output_asm_insn ("ldr%?\t%0, %1", operands
);
20411 output_asm_insn ("ldr%?\t%0, %1", operands
);
20412 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20422 /* Constraints should ensure this. */
20423 gcc_assert (code0
== MEM
&& code1
== REG
);
20424 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
20425 || (TARGET_ARM
&& TARGET_LDRD
));
20427 /* For TARGET_ARM the first source register of an STRD
20428 must be even. This is usually the case for double-word
20429 values but user assembly constraints can force an odd
20430 starting register. */
20431 bool allow_strd
= TARGET_LDRD
20432 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
20433 switch (GET_CODE (XEXP (operands
[0], 0)))
20439 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
20441 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20446 gcc_assert (allow_strd
);
20448 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
20455 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
20457 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
20465 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
20467 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
20472 gcc_assert (allow_strd
);
20474 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
20479 otherops
[0] = operands
[1];
20480 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
20481 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
20483 /* IWMMXT allows offsets larger than strd can handle,
20484 fix these up with a pair of str. */
20486 && CONST_INT_P (otherops
[2])
20487 && (INTVAL(otherops
[2]) <= -256
20488 || INTVAL(otherops
[2]) >= 256))
20490 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20494 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
20495 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20504 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20505 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
20511 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20514 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
20519 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
20524 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
20525 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20527 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
20531 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
20538 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
20545 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
20550 && (REG_P (otherops
[2])
20552 || (CONST_INT_P (otherops
[2])
20553 && INTVAL (otherops
[2]) > -256
20554 && INTVAL (otherops
[2]) < 256)))
20556 otherops
[0] = operands
[1];
20557 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
20559 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
20565 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
20566 otherops
[1] = operands
[1];
20569 output_asm_insn ("str%?\t%1, %0", operands
);
20570 output_asm_insn ("str%?\t%H1, %0", otherops
);
20580 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20581 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20584 output_move_quad (rtx
*operands
)
20586 if (REG_P (operands
[0]))
20588 /* Load, or reg->reg move. */
20590 if (MEM_P (operands
[1]))
20592 switch (GET_CODE (XEXP (operands
[1], 0)))
20595 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20600 output_asm_insn ("adr%?\t%0, %1", operands
);
20601 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
20605 gcc_unreachable ();
20613 gcc_assert (REG_P (operands
[1]));
20615 dest
= REGNO (operands
[0]);
20616 src
= REGNO (operands
[1]);
20618 /* This seems pretty dumb, but hopefully GCC won't try to do it
20621 for (i
= 0; i
< 4; i
++)
20623 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20624 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20625 output_asm_insn ("mov%?\t%0, %1", ops
);
20628 for (i
= 3; i
>= 0; i
--)
20630 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20631 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20632 output_asm_insn ("mov%?\t%0, %1", ops
);
20638 gcc_assert (MEM_P (operands
[0]));
20639 gcc_assert (REG_P (operands
[1]));
20640 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
20642 switch (GET_CODE (XEXP (operands
[0], 0)))
20645 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20649 gcc_unreachable ();
20656 /* Output a VFP load or store instruction. */
20659 output_move_vfp (rtx
*operands
)
20661 rtx reg
, mem
, addr
, ops
[2];
20662 int load
= REG_P (operands
[0]);
20663 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
20664 int sp
= (!TARGET_VFP_FP16INST
20665 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
20666 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
20671 reg
= operands
[!load
];
20672 mem
= operands
[load
];
20674 mode
= GET_MODE (reg
);
20676 gcc_assert (REG_P (reg
));
20677 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
20678 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
20684 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
20685 gcc_assert (MEM_P (mem
));
20687 addr
= XEXP (mem
, 0);
20689 switch (GET_CODE (addr
))
20692 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20693 ops
[0] = XEXP (addr
, 0);
20698 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20699 ops
[0] = XEXP (addr
, 0);
20704 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
20710 sprintf (buff
, templ
,
20711 load
? "ld" : "st",
20712 dp
? "64" : sp
? "32" : "16",
20714 integer_p
? "\t%@ int" : "");
20715 output_asm_insn (buff
, ops
);
20720 /* Output a Neon double-word or quad-word load or store, or a load
20721 or store for larger structure modes.
20723 WARNING: The ordering of elements is weird in big-endian mode,
20724 because the EABI requires that vectors stored in memory appear
20725 as though they were stored by a VSTM, as required by the EABI.
20726 GCC RTL defines element ordering based on in-memory order.
20727 This can be different from the architectural ordering of elements
20728 within a NEON register. The intrinsics defined in arm_neon.h use the
20729 NEON register element ordering, not the GCC RTL element ordering.
20731 For example, the in-memory ordering of a big-endian a quadword
20732 vector with 16-bit elements when stored from register pair {d0,d1}
20733 will be (lowest address first, d0[N] is NEON register element N):
20735 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20737 When necessary, quadword registers (dN, dN+1) are moved to ARM
20738 registers from rN in the order:
20740 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20742 So that STM/LDM can be used on vectors in ARM registers, and the
20743 same memory layout will result as if VSTM/VLDM were used.
20745 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20746 possible, which allows use of appropriate alignment tags.
20747 Note that the choice of "64" is independent of the actual vector
20748 element size; this size simply ensures that the behavior is
20749 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20751 Due to limitations of those instructions, use of VST1.64/VLD1.64
20752 is not possible if:
20753 - the address contains PRE_DEC, or
20754 - the mode refers to more than 4 double-word registers
20756 In those cases, it would be possible to replace VSTM/VLDM by a
20757 sequence of instructions; this is not currently implemented since
20758 this is not certain to actually improve performance. */
20761 output_move_neon (rtx
*operands
)
20763 rtx reg
, mem
, addr
, ops
[2];
20764 int regno
, nregs
, load
= REG_P (operands
[0]);
20769 reg
= operands
[!load
];
20770 mem
= operands
[load
];
20772 mode
= GET_MODE (reg
);
20774 gcc_assert (REG_P (reg
));
20775 regno
= REGNO (reg
);
20776 nregs
= REG_NREGS (reg
) / 2;
20777 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
20778 || NEON_REGNO_OK_FOR_QUAD (regno
));
20779 gcc_assert ((TARGET_NEON
20780 && (VALID_NEON_DREG_MODE (mode
)
20781 || VALID_NEON_QREG_MODE (mode
)
20782 || VALID_NEON_STRUCT_MODE (mode
)))
20783 || (TARGET_HAVE_MVE
20784 && (VALID_MVE_MODE (mode
)
20785 || VALID_MVE_STRUCT_MODE (mode
))));
20786 gcc_assert (MEM_P (mem
));
20788 addr
= XEXP (mem
, 0);
20790 /* Strip off const from addresses like (const (plus (...))). */
20791 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20792 addr
= XEXP (addr
, 0);
20794 switch (GET_CODE (addr
))
20797 /* We have to use vldm / vstm for too-large modes. */
20798 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20800 templ
= "v%smia%%?\t%%0!, %%h1";
20801 ops
[0] = XEXP (addr
, 0);
20805 templ
= "v%s1.64\t%%h1, %%A0";
20812 /* We have to use vldm / vstm in this case, since there is no
20813 pre-decrement form of the vld1 / vst1 instructions. */
20814 templ
= "v%smdb%%?\t%%0!, %%h1";
20815 ops
[0] = XEXP (addr
, 0);
20820 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20821 gcc_unreachable ();
20824 /* We have to use vldm / vstm for too-large modes. */
20827 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20828 templ
= "v%smia%%?\t%%m0, %%h1";
20830 templ
= "v%s1.64\t%%h1, %%A0";
20836 /* Fall through. */
20838 if (GET_CODE (addr
) == PLUS
)
20839 addr
= XEXP (addr
, 0);
20840 /* Fall through. */
20845 for (i
= 0; i
< nregs
; i
++)
20847 /* Use DFmode for vldr/vstr. */
20848 ops
[0] = gen_rtx_REG (DFmode
, REGNO (reg
) + 2 * i
);
20849 ops
[1] = adjust_address_nv (mem
, DFmode
, 8 * i
);
20850 if (reg_overlap_mentioned_p (ops
[0], mem
))
20852 gcc_assert (overlap
== -1);
20857 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20858 sprintf (buff
, "v%sr.64\t%%P0, %%1", load
? "ld" : "st");
20860 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20861 output_asm_insn (buff
, ops
);
20866 ops
[0] = gen_rtx_REG (DFmode
, REGNO (reg
) + 2 * overlap
);
20867 ops
[1] = adjust_address_nv (mem
, DFmode
, 8 * overlap
);
20868 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20869 sprintf (buff
, "v%sr.32\t%%P0, %%1", load
? "ld" : "st");
20871 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20872 output_asm_insn (buff
, ops
);
20879 gcc_unreachable ();
20882 sprintf (buff
, templ
, load
? "ld" : "st");
20883 output_asm_insn (buff
, ops
);
20888 /* Compute and return the length of neon_mov<mode>, where <mode> is one of
20889 VSTRUCT modes: EI, OI, CI or XI for Neon, and V2x16QI, V2x8HI, V2x4SI,
20890 V2x8HF, V2x4SF, V2x16QI, V2x8HI, V2x4SI, V2x8HF, V2x4SF for MVE. */
20892 arm_attr_length_move_neon (rtx_insn
*insn
)
20894 rtx reg
, mem
, addr
;
20898 extract_insn_cached (insn
);
20900 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
20902 mode
= GET_MODE (recog_data
.operand
[0]);
20907 case E_V2x16QImode
:
20916 case E_V4x16QImode
:
20923 gcc_unreachable ();
20927 load
= REG_P (recog_data
.operand
[0]);
20928 reg
= recog_data
.operand
[!load
];
20929 mem
= recog_data
.operand
[load
];
20931 gcc_assert (MEM_P (mem
));
20933 addr
= XEXP (mem
, 0);
20935 /* Strip off const from addresses like (const (plus (...))). */
20936 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20937 addr
= XEXP (addr
, 0);
20939 if (LABEL_REF_P (addr
) || GET_CODE (addr
) == PLUS
)
20941 int insns
= REG_NREGS (reg
) / 2;
20948 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20952 arm_address_offset_is_imm (rtx_insn
*insn
)
20956 extract_insn_cached (insn
);
20958 if (REG_P (recog_data
.operand
[0]))
20961 mem
= recog_data
.operand
[0];
20963 gcc_assert (MEM_P (mem
));
20965 addr
= XEXP (mem
, 0);
20968 || (GET_CODE (addr
) == PLUS
20969 && REG_P (XEXP (addr
, 0))
20970 && CONST_INT_P (XEXP (addr
, 1))))
20976 /* Output an ADD r, s, #n where n may be too big for one instruction.
20977 If adding zero to one register, output nothing. */
20979 output_add_immediate (rtx
*operands
)
20981 HOST_WIDE_INT n
= INTVAL (operands
[2]);
20983 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
20986 output_multi_immediate (operands
,
20987 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20990 output_multi_immediate (operands
,
20991 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20998 /* Output a multiple immediate operation.
20999 OPERANDS is the vector of operands referred to in the output patterns.
21000 INSTR1 is the output pattern to use for the first constant.
21001 INSTR2 is the output pattern to use for subsequent constants.
21002 IMMED_OP is the index of the constant slot in OPERANDS.
21003 N is the constant value. */
21004 static const char *
21005 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
21006 int immed_op
, HOST_WIDE_INT n
)
21008 #if HOST_BITS_PER_WIDE_INT > 32
21014 /* Quick and easy output. */
21015 operands
[immed_op
] = const0_rtx
;
21016 output_asm_insn (instr1
, operands
);
21021 const char * instr
= instr1
;
21023 /* Note that n is never zero here (which would give no output). */
21024 for (i
= 0; i
< 32; i
+= 2)
21028 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
21029 output_asm_insn (instr
, operands
);
21039 /* Return the name of a shifter operation. */
21040 static const char *
21041 arm_shift_nmem(enum rtx_code code
)
21046 return ARM_LSL_NAME
;
21062 /* Return the appropriate ARM instruction for the operation code.
21063 The returned result should not be overwritten. OP is the rtx of the
21064 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
21067 arithmetic_instr (rtx op
, int shift_first_arg
)
21069 switch (GET_CODE (op
))
21075 return shift_first_arg
? "rsb" : "sub";
21090 return arm_shift_nmem(GET_CODE(op
));
21093 gcc_unreachable ();
21097 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21098 for the operation code. The returned result should not be overwritten.
21099 OP is the rtx code of the shift.
21100 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21102 static const char *
21103 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
21106 enum rtx_code code
= GET_CODE (op
);
21111 if (!CONST_INT_P (XEXP (op
, 1)))
21113 output_operand_lossage ("invalid shift operand");
21118 *amountp
= 32 - INTVAL (XEXP (op
, 1));
21126 mnem
= arm_shift_nmem(code
);
21127 if (CONST_INT_P (XEXP (op
, 1)))
21129 *amountp
= INTVAL (XEXP (op
, 1));
21131 else if (REG_P (XEXP (op
, 1)))
21138 output_operand_lossage ("invalid shift operand");
21144 /* We never have to worry about the amount being other than a
21145 power of 2, since this case can never be reloaded from a reg. */
21146 if (!CONST_INT_P (XEXP (op
, 1)))
21148 output_operand_lossage ("invalid shift operand");
21152 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
21154 /* Amount must be a power of two. */
21155 if (*amountp
& (*amountp
- 1))
21157 output_operand_lossage ("invalid shift operand");
21161 *amountp
= exact_log2 (*amountp
);
21162 gcc_assert (IN_RANGE (*amountp
, 0, 31));
21163 return ARM_LSL_NAME
;
21166 output_operand_lossage ("invalid shift operand");
21170 /* This is not 100% correct, but follows from the desire to merge
21171 multiplication by a power of 2 with the recognizer for a
21172 shift. >=32 is not a valid shift for "lsl", so we must try and
21173 output a shift that produces the correct arithmetical result.
21174 Using lsr #32 is identical except for the fact that the carry bit
21175 is not set correctly if we set the flags; but we never use the
21176 carry bit from such an operation, so we can ignore that. */
21177 if (code
== ROTATERT
)
21178 /* Rotate is just modulo 32. */
21180 else if (*amountp
!= (*amountp
& 31))
21182 if (code
== ASHIFT
)
21187 /* Shifts of 0 are no-ops. */
21194 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21195 because /bin/as is horribly restrictive. The judgement about
21196 whether or not each character is 'printable' (and can be output as
21197 is) or not (and must be printed with an octal escape) must be made
21198 with reference to the *host* character set -- the situation is
21199 similar to that discussed in the comments above pp_c_char in
21200 c-pretty-print.cc. */
21202 #define MAX_ASCII_LEN 51
21205 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
21208 int len_so_far
= 0;
21210 fputs ("\t.ascii\t\"", stream
);
21212 for (i
= 0; i
< len
; i
++)
21216 if (len_so_far
>= MAX_ASCII_LEN
)
21218 fputs ("\"\n\t.ascii\t\"", stream
);
21224 if (c
== '\\' || c
== '\"')
21226 putc ('\\', stream
);
21234 fprintf (stream
, "\\%03o", c
);
21239 fputs ("\"\n", stream
);
21243 /* Compute the register save mask for registers 0 through 12
21244 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21246 static unsigned long
21247 arm_compute_save_reg0_reg12_mask (void)
21249 unsigned long func_type
= arm_current_func_type ();
21250 unsigned long save_reg_mask
= 0;
21253 if (IS_INTERRUPT (func_type
))
21255 unsigned int max_reg
;
21256 /* Interrupt functions must not corrupt any registers,
21257 even call clobbered ones. If this is a leaf function
21258 we can just examine the registers used by the RTL, but
21259 otherwise we have to assume that whatever function is
21260 called might clobber anything, and so we have to save
21261 all the call-clobbered registers as well. */
21262 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
21263 /* FIQ handlers have registers r8 - r12 banked, so
21264 we only need to check r0 - r7, Normal ISRs only
21265 bank r14 and r15, so we must check up to r12.
21266 r13 is the stack pointer which is always preserved,
21267 so we do not need to consider it here. */
21272 for (reg
= 0; reg
<= max_reg
; reg
++)
21273 if (reg_needs_saving_p (reg
))
21274 save_reg_mask
|= (1 << reg
);
21276 /* Also save the pic base register if necessary. */
21277 if (PIC_REGISTER_MAY_NEED_SAVING
21278 && crtl
->uses_pic_offset_table
)
21279 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21281 else if (IS_VOLATILE(func_type
))
21283 /* For noreturn functions we historically omitted register saves
21284 altogether. However this really messes up debugging. As a
21285 compromise save just the frame pointers. Combined with the link
21286 register saved elsewhere this should be sufficient to get
21288 if (frame_pointer_needed
)
21289 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21290 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
21291 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21292 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
21293 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
21297 /* In the normal case we only need to save those registers
21298 which are call saved and which are used by this function. */
21299 for (reg
= 0; reg
<= 11; reg
++)
21300 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21301 save_reg_mask
|= (1 << reg
);
21303 /* Handle the frame pointer as a special case. */
21304 if (frame_pointer_needed
)
21305 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21307 /* If we aren't loading the PIC register,
21308 don't stack it even though it may be live. */
21309 if (PIC_REGISTER_MAY_NEED_SAVING
21310 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
21311 || crtl
->uses_pic_offset_table
))
21312 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21314 /* The prologue will copy SP into R0, so save it. */
21315 if (IS_STACKALIGN (func_type
))
21316 save_reg_mask
|= 1;
21319 /* Save registers so the exception handler can modify them. */
21320 if (crtl
->calls_eh_return
)
21326 reg
= EH_RETURN_DATA_REGNO (i
);
21327 if (reg
== INVALID_REGNUM
)
21329 save_reg_mask
|= 1 << reg
;
21333 return save_reg_mask
;
21336 /* Return true if r3 is live at the start of the function. */
21339 arm_r3_live_at_start_p (void)
21341 /* Just look at cfg info, which is still close enough to correct at this
21342 point. This gives false positives for broken functions that might use
21343 uninitialized data that happens to be allocated in r3, but who cares? */
21344 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
21347 /* Compute the number of bytes used to store the static chain register on the
21348 stack, above the stack frame. We need to know this accurately to get the
21349 alignment of the rest of the stack frame correct. */
21352 arm_compute_static_chain_stack_bytes (void)
21354 /* Once the value is updated from the init value of -1, do not
21356 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
21357 return cfun
->machine
->static_chain_stack_bytes
;
21359 /* See the defining assertion in arm_expand_prologue. */
21360 if (IS_NESTED (arm_current_func_type ())
21361 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21362 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21363 || flag_stack_clash_protection
)
21364 && !df_regs_ever_live_p (LR_REGNUM
)))
21365 && arm_r3_live_at_start_p ()
21366 && crtl
->args
.pretend_args_size
== 0)
21372 /* Compute a bit mask of which core registers need to be
21373 saved on the stack for the current function.
21374 This is used by arm_compute_frame_layout, which may add extra registers. */
21376 static unsigned long
21377 arm_compute_save_core_reg_mask (void)
21379 unsigned int save_reg_mask
= 0;
21380 unsigned long func_type
= arm_current_func_type ();
21383 if (IS_NAKED (func_type
))
21384 /* This should never really happen. */
21387 /* If we are creating a stack frame, then we must save the frame pointer,
21388 IP (which will hold the old stack pointer), LR and the PC. */
21389 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21391 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
21394 | (1 << PC_REGNUM
);
21396 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
21398 if (arm_current_function_pac_enabled_p ())
21399 save_reg_mask
|= 1 << IP_REGNUM
;
21401 /* Decide if we need to save the link register.
21402 Interrupt routines have their own banked link register,
21403 so they never need to save it.
21404 Otherwise if we do not use the link register we do not need to save
21405 it. If we are pushing other registers onto the stack however, we
21406 can save an instruction in the epilogue by pushing the link register
21407 now and then popping it back into the PC. This incurs extra memory
21408 accesses though, so we only do it when optimizing for size, and only
21409 if we know that we will not need a fancy return sequence. */
21410 if (df_regs_ever_live_p (LR_REGNUM
)
21413 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
21414 && !crtl
->tail_call_emit
21415 && !crtl
->calls_eh_return
))
21416 save_reg_mask
|= 1 << LR_REGNUM
;
21418 if (cfun
->machine
->lr_save_eliminated
)
21419 save_reg_mask
&= ~ (1 << LR_REGNUM
);
21421 if (TARGET_REALLY_IWMMXT
21422 && ((bit_count (save_reg_mask
)
21423 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
21424 arm_compute_static_chain_stack_bytes())
21427 /* The total number of registers that are going to be pushed
21428 onto the stack is odd. We need to ensure that the stack
21429 is 64-bit aligned before we start to save iWMMXt registers,
21430 and also before we start to create locals. (A local variable
21431 might be a double or long long which we will load/store using
21432 an iWMMXt instruction). Therefore we need to push another
21433 ARM register, so that the stack will be 64-bit aligned. We
21434 try to avoid using the arg registers (r0 -r3) as they might be
21435 used to pass values in a tail call. */
21436 for (reg
= 4; reg
<= 12; reg
++)
21437 if ((save_reg_mask
& (1 << reg
)) == 0)
21441 save_reg_mask
|= (1 << reg
);
21444 cfun
->machine
->sibcall_blocked
= 1;
21445 save_reg_mask
|= (1 << 3);
21449 /* We may need to push an additional register for use initializing the
21450 PIC base register. */
21451 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
21452 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
21454 reg
= thumb_find_work_register (1 << 4);
21455 if (!call_used_or_fixed_reg_p (reg
))
21456 save_reg_mask
|= (1 << reg
);
21459 return save_reg_mask
;
21462 /* Compute a bit mask of which core registers need to be
21463 saved on the stack for the current function. */
21464 static unsigned long
21465 thumb1_compute_save_core_reg_mask (void)
21467 unsigned long mask
;
21471 for (reg
= 0; reg
< 12; reg
++)
21472 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21475 /* Handle the frame pointer as a special case. */
21476 if (frame_pointer_needed
)
21477 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21480 && !TARGET_SINGLE_PIC_BASE
21481 && arm_pic_register
!= INVALID_REGNUM
21482 && crtl
->uses_pic_offset_table
)
21483 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21485 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21486 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
21487 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21489 /* LR will also be pushed if any lo regs are pushed. */
21490 if (mask
& 0xff || thumb_force_lr_save ())
21491 mask
|= (1 << LR_REGNUM
);
21493 bool call_clobbered_scratch
21494 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21495 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21497 /* Make sure we have a low work register if we need one. We will
21498 need one if we are going to push a high register, but we are not
21499 currently intending to push a low register. However if both the
21500 prologue and epilogue have a spare call-clobbered low register,
21501 then we won't need to find an additional work register. It does
21502 not need to be the same register in the prologue and
21504 if ((mask
& 0xff) == 0
21505 && !call_clobbered_scratch
21506 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
21508 /* Use thumb_find_work_register to choose which register
21509 we will use. If the register is live then we will
21510 have to push it. Use LAST_LO_REGNUM as our fallback
21511 choice for the register to select. */
21512 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
21513 /* Make sure the register returned by thumb_find_work_register is
21514 not part of the return value. */
21515 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
21516 reg
= LAST_LO_REGNUM
;
21518 if (callee_saved_reg_p (reg
))
21522 /* The 504 below is 8 bytes less than 512 because there are two possible
21523 alignment words. We can't tell here if they will be present or not so we
21524 have to play it safe and assume that they are. */
21525 if ((CALLER_INTERWORKING_SLOT_SIZE
+
21526 ROUND_UP_WORD (get_frame_size ()) +
21527 crtl
->outgoing_args_size
) >= 504)
21529 /* This is the same as the code in thumb1_expand_prologue() which
21530 determines which register to use for stack decrement. */
21531 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
21532 if (mask
& (1 << reg
))
21535 if (reg
> LAST_LO_REGNUM
)
21537 /* Make sure we have a register available for stack decrement. */
21538 mask
|= 1 << LAST_LO_REGNUM
;
21545 /* Return the number of bytes required to save VFP registers. */
21547 arm_get_vfp_saved_size (void)
21549 unsigned int regno
;
21554 /* Space for saved VFP registers. */
21555 if (TARGET_VFP_BASE
)
21558 for (regno
= FIRST_VFP_REGNUM
;
21559 regno
< LAST_VFP_REGNUM
;
21562 if (!reg_needs_saving_p (regno
) && !reg_needs_saving_p (regno
+ 1))
21566 /* Workaround ARM10 VFPr1 bug. */
21567 if (count
== 2 && !arm_arch6
)
21569 saved
+= count
* 8;
21578 if (count
== 2 && !arm_arch6
)
21580 saved
+= count
* 8;
21587 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21588 everything bar the final return instruction. If simple_return is true,
21589 then do not output epilogue, because it has already been emitted in RTL.
21591 Note: do not forget to update length attribute of corresponding insn pattern
21592 when changing assembly output (eg. length attribute of
21593 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21594 register clearing sequences). */
21596 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
21597 bool simple_return
)
21599 char conditional
[10];
21602 unsigned long live_regs_mask
;
21603 unsigned long func_type
;
21604 arm_stack_offsets
*offsets
;
21606 func_type
= arm_current_func_type ();
21608 if (IS_NAKED (func_type
))
21611 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
21613 /* If this function was declared non-returning, and we have
21614 found a tail call, then we have to trust that the called
21615 function won't return. */
21620 /* Otherwise, trap an attempted return by aborting. */
21622 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
21624 assemble_external_libcall (ops
[1]);
21625 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
21631 gcc_assert (!cfun
->calls_alloca
|| really_return
);
21633 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
21635 cfun
->machine
->return_used_this_function
= 1;
21637 offsets
= arm_get_frame_offsets ();
21638 live_regs_mask
= offsets
->saved_regs_mask
;
21640 if (!simple_return
&& live_regs_mask
)
21642 const char * return_reg
;
21644 /* If we do not have any special requirements for function exit
21645 (e.g. interworking) then we can load the return address
21646 directly into the PC. Otherwise we must load it into LR. */
21648 && !IS_CMSE_ENTRY (func_type
)
21649 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
21650 return_reg
= reg_names
[PC_REGNUM
];
21652 return_reg
= reg_names
[LR_REGNUM
];
21654 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
21656 /* There are three possible reasons for the IP register
21657 being saved. 1) a stack frame was created, in which case
21658 IP contains the old stack pointer, or 2) an ISR routine
21659 corrupted it, or 3) it was saved to align the stack on
21660 iWMMXt. In case 1, restore IP into SP, otherwise just
21662 if (frame_pointer_needed
)
21664 live_regs_mask
&= ~ (1 << IP_REGNUM
);
21665 live_regs_mask
|= (1 << SP_REGNUM
);
21668 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
21671 /* On some ARM architectures it is faster to use LDR rather than
21672 LDM to load a single register. On other architectures, the
21673 cost is the same. In 26 bit mode, or for exception handlers,
21674 we have to use LDM to load the PC so that the CPSR is also
21676 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
21677 if (live_regs_mask
== (1U << reg
))
21680 if (reg
<= LAST_ARM_REGNUM
21681 && (reg
!= LR_REGNUM
21683 || ! IS_INTERRUPT (func_type
)))
21685 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
21686 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
21693 /* Generate the load multiple instruction to restore the
21694 registers. Note we can get here, even if
21695 frame_pointer_needed is true, but only if sp already
21696 points to the base of the saved core registers. */
21697 if (live_regs_mask
& (1 << SP_REGNUM
))
21699 unsigned HOST_WIDE_INT stack_adjust
;
21701 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
21702 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
21704 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
21705 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
21708 /* If we can't use ldmib (SA110 bug),
21709 then try to pop r3 instead. */
21711 live_regs_mask
|= 1 << 3;
21713 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
21716 /* For interrupt returns we have to use an LDM rather than
21717 a POP so that we can use the exception return variant. */
21718 else if (IS_INTERRUPT (func_type
))
21719 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
21721 sprintf (instr
, "pop%s\t{", conditional
);
21723 p
= instr
+ strlen (instr
);
21725 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
21726 if (live_regs_mask
& (1 << reg
))
21728 int l
= strlen (reg_names
[reg
]);
21734 memcpy (p
, ", ", 2);
21738 memcpy (p
, "%|", 2);
21739 memcpy (p
+ 2, reg_names
[reg
], l
);
21743 if (live_regs_mask
& (1 << LR_REGNUM
))
21745 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
21746 /* If returning from an interrupt, restore the CPSR. */
21747 if (IS_INTERRUPT (func_type
))
21754 output_asm_insn (instr
, & operand
);
21756 /* See if we need to generate an extra instruction to
21757 perform the actual function return. */
21759 && func_type
!= ARM_FT_INTERWORKED
21760 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
21762 /* The return has already been handled
21763 by loading the LR into the PC. */
21770 switch ((int) ARM_FUNC_TYPE (func_type
))
21774 /* ??? This is wrong for unified assembly syntax. */
21775 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
21778 case ARM_FT_INTERWORKED
:
21779 gcc_assert (arm_arch5t
|| arm_arch4t
);
21780 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21783 case ARM_FT_EXCEPTION
:
21784 /* ??? This is wrong for unified assembly syntax. */
21785 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
21789 if (IS_CMSE_ENTRY (func_type
))
21791 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21792 emitted by cmse_nonsecure_entry_clear_before_return () and the
21793 VSTR/VLDR instructions in the prologue and epilogue. */
21794 if (!TARGET_HAVE_FPCXT_CMSE
)
21796 /* Check if we have to clear the 'GE bits' which is only used if
21797 parallel add and subtraction instructions are available. */
21798 if (TARGET_INT_SIMD
)
21799 snprintf (instr
, sizeof (instr
),
21800 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
21802 snprintf (instr
, sizeof (instr
),
21803 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
21805 output_asm_insn (instr
, & operand
);
21806 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21808 if (TARGET_HARD_FLOAT
)
21810 /* Clear the cumulative exception-status bits (0-4,7) and
21811 the condition code bits (28-31) of the FPSCR. We need
21812 to remember to clear the first scratch register used
21813 (IP) and save and restore the second (r4).
21815 Important note: the length of the
21816 thumb2_cmse_entry_return insn pattern must account for
21817 the size of the below instructions. */
21818 output_asm_insn ("push\t{%|r4}", & operand
);
21819 output_asm_insn ("vmrs\t%|ip, fpscr", & operand
);
21820 output_asm_insn ("movw\t%|r4, #65376", & operand
);
21821 output_asm_insn ("movt\t%|r4, #4095", & operand
);
21822 output_asm_insn ("and\t%|ip, %|r4", & operand
);
21823 output_asm_insn ("vmsr\tfpscr, %|ip", & operand
);
21824 output_asm_insn ("pop\t{%|r4}", & operand
);
21825 output_asm_insn ("mov\t%|ip, %|lr", & operand
);
21828 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
21830 /* Use bx if it's available. */
21831 else if (arm_arch5t
|| arm_arch4t
)
21832 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21834 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
21838 output_asm_insn (instr
, & operand
);
21844 /* Output in FILE asm statements needed to declare the NAME of the function
21845 defined by its DECL node. */
21848 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
21850 size_t cmse_name_len
;
21851 char *cmse_name
= 0;
21852 char cmse_prefix
[] = "__acle_se_";
21854 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21855 extra function label for each function with the 'cmse_nonsecure_entry'
21856 attribute. This extra function label should be prepended with
21857 '__acle_se_', telling the linker that it needs to create secure gateway
21858 veneers for this function. */
21859 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
21860 DECL_ATTRIBUTES (decl
)))
21862 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
21863 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
21864 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
21865 targetm
.asm_out
.globalize_label (file
, cmse_name
);
21867 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
21868 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
21871 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
21872 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21873 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21874 ASM_OUTPUT_FUNCTION_LABEL (file
, name
, decl
);
21877 ASM_OUTPUT_LABEL (file
, cmse_name
);
21879 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
21882 /* Write the function name into the code section, directly preceding
21883 the function prologue.
21885 Code will be output similar to this:
21887 .ascii "arm_poke_function_name", 0
21890 .word 0xff000000 + (t1 - t0)
21891 arm_poke_function_name
21893 stmfd sp!, {fp, ip, lr, pc}
21896 When performing a stack backtrace, code can inspect the value
21897 of 'pc' stored at 'fp' + 0. If the trace function then looks
21898 at location pc - 12 and the top 8 bits are set, then we know
21899 that there is a function name embedded immediately preceding this
21900 location and has length ((pc[-3]) & 0xff000000).
21902 We assume that pc is declared as a pointer to an unsigned long.
21904 It is of no benefit to output the function name if we are assembling
21905 a leaf function. These function types will not contain a stack
21906 backtrace structure, therefore it is not possible to determine the
21909 arm_poke_function_name (FILE *stream
, const char *name
)
21911 unsigned long alignlength
;
21912 unsigned long length
;
21915 length
= strlen (name
) + 1;
21916 alignlength
= ROUND_UP_WORD (length
);
21918 ASM_OUTPUT_ASCII (stream
, name
, length
);
21919 ASM_OUTPUT_ALIGN (stream
, 2);
21920 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
21921 assemble_aligned_integer (UNITS_PER_WORD
, x
);
21924 /* Place some comments into the assembler stream
21925 describing the current function. */
21927 arm_output_function_prologue (FILE *f
)
21929 unsigned long func_type
;
21931 /* Sanity check. */
21932 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
21934 func_type
= arm_current_func_type ();
21936 switch ((int) ARM_FUNC_TYPE (func_type
))
21939 case ARM_FT_NORMAL
:
21941 case ARM_FT_INTERWORKED
:
21942 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
21945 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
21948 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
21950 case ARM_FT_EXCEPTION
:
21951 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
21955 if (IS_NAKED (func_type
))
21956 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21958 if (IS_VOLATILE (func_type
))
21959 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
21961 if (IS_NESTED (func_type
))
21962 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
21963 if (IS_STACKALIGN (func_type
))
21964 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21965 if (IS_CMSE_ENTRY (func_type
))
21966 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
21968 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21969 (HOST_WIDE_INT
) crtl
->args
.size
,
21970 crtl
->args
.pretend_args_size
,
21971 (HOST_WIDE_INT
) get_frame_size ());
21973 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21974 frame_pointer_needed
,
21975 cfun
->machine
->uses_anonymous_args
);
21977 if (cfun
->machine
->lr_save_eliminated
)
21978 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
21980 if (crtl
->calls_eh_return
)
21981 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
21986 arm_output_function_epilogue (FILE *)
21988 arm_stack_offsets
*offsets
;
21994 /* Emit any call-via-reg trampolines that are needed for v4t support
21995 of call_reg and call_value_reg type insns. */
21996 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21998 rtx label
= cfun
->machine
->call_via
[regno
];
22002 switch_to_section (function_section (current_function_decl
));
22003 targetm
.asm_out
.internal_label (asm_out_file
, "L",
22004 CODE_LABEL_NUMBER (label
));
22005 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
22009 /* ??? Probably not safe to set this here, since it assumes that a
22010 function will be emitted as assembly immediately after we generate
22011 RTL for it. This does not happen for inline functions. */
22012 cfun
->machine
->return_used_this_function
= 0;
22014 else /* TARGET_32BIT */
22016 /* We need to take into account any stack-frame rounding. */
22017 offsets
= arm_get_frame_offsets ();
22019 gcc_assert (!use_return_insn (FALSE
, NULL
)
22020 || (cfun
->machine
->return_used_this_function
!= 0)
22021 || offsets
->saved_regs
== offsets
->outgoing_args
22022 || frame_pointer_needed
);
22026 /* Generate and emit a sequence of insns equivalent to PUSH, but using
22027 STR and STRD. If an even number of registers are being pushed, one
22028 or more STRD patterns are created for each register pair. If an
22029 odd number of registers are pushed, emit an initial STR followed by
22030 as many STRD instructions as are needed. This works best when the
22031 stack is initially 64-bit aligned (the normal case), since it
22032 ensures that each STRD is also 64-bit aligned. */
22034 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
22039 rtx par
= NULL_RTX
;
22040 rtx dwarf
= NULL_RTX
;
22044 num_regs
= bit_count (saved_regs_mask
);
22046 /* Must be at least one register to save, and can't save SP or PC. */
22047 gcc_assert (num_regs
> 0 && num_regs
<= 14);
22048 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22049 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
22051 /* Create sequence for DWARF info. All the frame-related data for
22052 debugging is held in this wrapper. */
22053 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22055 /* Describe the stack adjustment. */
22056 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22057 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22058 RTX_FRAME_RELATED_P (tmp
) = 1;
22059 XVECEXP (dwarf
, 0, 0) = tmp
;
22061 /* Find the first register. */
22062 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
22067 /* If there's an odd number of registers to push. Start off by
22068 pushing a single register. This ensures that subsequent strd
22069 operations are dword aligned (assuming that SP was originally
22070 64-bit aligned). */
22071 if ((num_regs
& 1) != 0)
22073 rtx reg
, mem
, insn
;
22075 reg
= gen_rtx_REG (SImode
, regno
);
22077 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
22078 stack_pointer_rtx
));
22080 mem
= gen_frame_mem (Pmode
,
22082 (Pmode
, stack_pointer_rtx
,
22083 plus_constant (Pmode
, stack_pointer_rtx
,
22086 tmp
= gen_rtx_SET (mem
, reg
);
22087 RTX_FRAME_RELATED_P (tmp
) = 1;
22088 insn
= emit_insn (tmp
);
22089 RTX_FRAME_RELATED_P (insn
) = 1;
22090 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22091 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
22092 RTX_FRAME_RELATED_P (tmp
) = 1;
22095 XVECEXP (dwarf
, 0, i
) = tmp
;
22099 while (i
< num_regs
)
22100 if (saved_regs_mask
& (1 << regno
))
22102 rtx reg1
, reg2
, mem1
, mem2
;
22103 rtx tmp0
, tmp1
, tmp2
;
22106 /* Find the register to pair with this one. */
22107 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
22111 reg1
= gen_rtx_REG (SImode
, regno
);
22112 reg2
= gen_rtx_REG (SImode
, regno2
);
22119 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22122 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22124 -4 * (num_regs
- 1)));
22125 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
22126 plus_constant (Pmode
, stack_pointer_rtx
,
22128 tmp1
= gen_rtx_SET (mem1
, reg1
);
22129 tmp2
= gen_rtx_SET (mem2
, reg2
);
22130 RTX_FRAME_RELATED_P (tmp0
) = 1;
22131 RTX_FRAME_RELATED_P (tmp1
) = 1;
22132 RTX_FRAME_RELATED_P (tmp2
) = 1;
22133 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
22134 XVECEXP (par
, 0, 0) = tmp0
;
22135 XVECEXP (par
, 0, 1) = tmp1
;
22136 XVECEXP (par
, 0, 2) = tmp2
;
22137 insn
= emit_insn (par
);
22138 RTX_FRAME_RELATED_P (insn
) = 1;
22139 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22143 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22146 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
22149 tmp1
= gen_rtx_SET (mem1
, reg1
);
22150 tmp2
= gen_rtx_SET (mem2
, reg2
);
22151 RTX_FRAME_RELATED_P (tmp1
) = 1;
22152 RTX_FRAME_RELATED_P (tmp2
) = 1;
22153 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22154 XVECEXP (par
, 0, 0) = tmp1
;
22155 XVECEXP (par
, 0, 1) = tmp2
;
22159 /* Create unwind information. This is an approximation. */
22160 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
22161 plus_constant (Pmode
,
22165 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
22166 plus_constant (Pmode
,
22171 RTX_FRAME_RELATED_P (tmp1
) = 1;
22172 RTX_FRAME_RELATED_P (tmp2
) = 1;
22173 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
22174 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
22176 regno
= regno2
+ 1;
22184 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22185 whenever possible, otherwise it emits single-word stores. The first store
22186 also allocates stack space for all saved registers, using writeback with
22187 post-addressing mode. All other stores use offset addressing. If no STRD
22188 can be emitted, this function emits a sequence of single-word stores,
22189 and not an STM as before, because single-word stores provide more freedom
22190 scheduling and can be turned into an STM by peephole optimizations. */
22192 arm_emit_strd_push (unsigned long saved_regs_mask
)
22195 int i
, j
, dwarf_index
= 0;
22197 rtx dwarf
= NULL_RTX
;
22198 rtx insn
= NULL_RTX
;
22201 /* TODO: A more efficient code can be emitted by changing the
22202 layout, e.g., first push all pairs that can use STRD to keep the
22203 stack aligned, and then push all other registers. */
22204 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22205 if (saved_regs_mask
& (1 << i
))
22208 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22209 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
22210 gcc_assert (num_regs
> 0);
22212 /* Create sequence for DWARF info. */
22213 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22215 /* For dwarf info, we generate explicit stack update. */
22216 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22217 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22218 RTX_FRAME_RELATED_P (tmp
) = 1;
22219 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22221 /* Save registers. */
22222 offset
= - 4 * num_regs
;
22224 while (j
<= LAST_ARM_REGNUM
)
22225 if (saved_regs_mask
& (1 << j
))
22228 && (saved_regs_mask
& (1 << (j
+ 1))))
22230 /* Current register and previous register form register pair for
22231 which STRD can be generated. */
22234 /* Allocate stack space for all saved registers. */
22235 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22236 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22237 mem
= gen_frame_mem (DImode
, tmp
);
22240 else if (offset
> 0)
22241 mem
= gen_frame_mem (DImode
,
22242 plus_constant (Pmode
,
22246 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22248 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
22249 RTX_FRAME_RELATED_P (tmp
) = 1;
22250 tmp
= emit_insn (tmp
);
22252 /* Record the first store insn. */
22253 if (dwarf_index
== 1)
22256 /* Generate dwarf info. */
22257 mem
= gen_frame_mem (SImode
,
22258 plus_constant (Pmode
,
22261 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22262 RTX_FRAME_RELATED_P (tmp
) = 1;
22263 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22265 mem
= gen_frame_mem (SImode
,
22266 plus_constant (Pmode
,
22269 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
22270 RTX_FRAME_RELATED_P (tmp
) = 1;
22271 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22278 /* Emit a single word store. */
22281 /* Allocate stack space for all saved registers. */
22282 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22283 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22284 mem
= gen_frame_mem (SImode
, tmp
);
22287 else if (offset
> 0)
22288 mem
= gen_frame_mem (SImode
,
22289 plus_constant (Pmode
,
22293 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22295 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22296 RTX_FRAME_RELATED_P (tmp
) = 1;
22297 tmp
= emit_insn (tmp
);
22299 /* Record the first store insn. */
22300 if (dwarf_index
== 1)
22303 /* Generate dwarf info. */
22304 mem
= gen_frame_mem (SImode
,
22305 plus_constant(Pmode
,
22308 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22309 RTX_FRAME_RELATED_P (tmp
) = 1;
22310 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22319 /* Attach dwarf info to the first insn we generate. */
22320 gcc_assert (insn
!= NULL_RTX
);
22321 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22322 RTX_FRAME_RELATED_P (insn
) = 1;
22325 /* Generate and emit an insn that we will recognize as a push_multi.
22326 Unfortunately, since this insn does not reflect very well the actual
22327 semantics of the operation, we need to annotate the insn for the benefit
22328 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22329 MASK for registers that should be annotated for DWARF2 frame unwind
22332 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
22335 int num_dwarf_regs
= 0;
22339 int dwarf_par_index
;
22342 /* We don't record the PC in the dwarf frame information. */
22343 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
22345 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22347 if (mask
& (1 << i
))
22349 if (dwarf_regs_mask
& (1 << i
))
22353 gcc_assert (num_regs
&& num_regs
<= 16);
22354 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
22356 /* For the body of the insn we are going to generate an UNSPEC in
22357 parallel with several USEs. This allows the insn to be recognized
22358 by the push_multi pattern in the arm.md file.
22360 The body of the insn looks something like this:
22363 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22364 (const_int:SI <num>)))
22365 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22371 For the frame note however, we try to be more explicit and actually
22372 show each register being stored into the stack frame, plus a (single)
22373 decrement of the stack pointer. We do it this way in order to be
22374 friendly to the stack unwinding code, which only wants to see a single
22375 stack decrement per instruction. The RTL we generate for the note looks
22376 something like this:
22379 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22380 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22381 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22382 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22386 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22387 instead we'd have a parallel expression detailing all
22388 the stores to the various memory addresses so that debug
22389 information is more up-to-date. Remember however while writing
22390 this to take care of the constraints with the push instruction.
22392 Note also that this has to be taken care of for the VFP registers.
22394 For more see PR43399. */
22396 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
22397 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
22398 dwarf_par_index
= 1;
22400 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22402 if (mask
& (1 << i
))
22404 /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22405 following example reg-reg copy of SP to IP register is handled
22406 through .cfi_def_cfa_register directive and the .cfi_offset
22407 directive for IP register is skipped by dwarf code emitter.
22410 .cfi_def_cfa_register 12
22411 push {fp, ip, lr, pc}
22412 .cfi_offset 11, -16
22413 .cfi_offset 13, -12
22416 Where as Arm-specific .save directive handling is different to that
22417 of dwarf code emitter and it doesn't consider reg-reg copies while
22418 updating the register list. When PACBTI is enabled we manually
22419 updated the .save directive register list to use "ra_auth_code"
22420 (pseduo register 143) instead of IP register as shown in following
22424 .cfi_register 143, 12
22425 push {r3, r7, ip, lr}
22426 .save {r3, r7, ra_auth_code, lr}
22428 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22429 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22430 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22432 XVECEXP (par
, 0, 0)
22433 = gen_rtx_SET (gen_frame_mem
22435 gen_rtx_PRE_MODIFY (Pmode
,
22438 (Pmode
, stack_pointer_rtx
,
22441 gen_rtx_UNSPEC (BLKmode
,
22442 gen_rtvec (1, reg
),
22443 UNSPEC_PUSH_MULT
));
22445 if (dwarf_regs_mask
& (1 << i
))
22447 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
22449 RTX_FRAME_RELATED_P (tmp
) = 1;
22450 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22457 for (j
= 1, i
++; j
< num_regs
; i
++)
22459 if (mask
& (1 << i
))
22461 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22462 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22463 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22465 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
22467 if (dwarf_regs_mask
& (1 << i
))
22470 = gen_rtx_SET (gen_frame_mem
22472 plus_constant (Pmode
, stack_pointer_rtx
,
22475 RTX_FRAME_RELATED_P (tmp
) = 1;
22476 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22483 par
= emit_insn (par
);
22485 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22486 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22487 RTX_FRAME_RELATED_P (tmp
) = 1;
22488 XVECEXP (dwarf
, 0, 0) = tmp
;
22490 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
22495 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22496 SIZE is the offset to be adjusted.
22497 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22499 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
22503 RTX_FRAME_RELATED_P (insn
) = 1;
22504 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
22505 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
22508 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22509 SAVED_REGS_MASK shows which registers need to be restored.
22511 Unfortunately, since this insn does not reflect very well the actual
22512 semantics of the operation, we need to annotate the insn for the benefit
22513 of DWARF2 frame unwind information. */
22515 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
22520 rtx dwarf
= NULL_RTX
;
22522 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22526 offset_adj
= return_in_pc
? 1 : 0;
22527 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22528 if (saved_regs_mask
& (1 << i
))
22531 gcc_assert (num_regs
&& num_regs
<= 16);
22533 /* If SP is in reglist, then we don't emit SP update insn. */
22534 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
22536 /* The parallel needs to hold num_regs SETs
22537 and one SET for the stack update. */
22538 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
22541 XVECEXP (par
, 0, 0) = ret_rtx
;
22545 /* Increment the stack pointer, based on there being
22546 num_regs 4-byte registers to restore. */
22547 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22548 plus_constant (Pmode
,
22551 RTX_FRAME_RELATED_P (tmp
) = 1;
22552 XVECEXP (par
, 0, offset_adj
) = tmp
;
22555 /* Now restore every reg, which may include PC. */
22556 for (j
= 0, i
= 0; j
< num_regs
; i
++)
22557 if (saved_regs_mask
& (1 << i
))
22559 rtx dwarf_reg
= reg
= gen_rtx_REG (SImode
, i
);
22560 if (arm_current_function_pac_enabled_p () && i
== IP_REGNUM
)
22561 dwarf_reg
= gen_rtx_REG (SImode
, RA_AUTH_CODE
);
22562 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
22564 /* Emit single load with writeback. */
22565 tmp
= gen_frame_mem (SImode
,
22566 gen_rtx_POST_INC (Pmode
,
22567 stack_pointer_rtx
));
22568 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
22569 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, dwarf_reg
,
22574 tmp
= gen_rtx_SET (reg
,
22577 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
22578 RTX_FRAME_RELATED_P (tmp
) = 1;
22579 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
22581 /* We need to maintain a sequence for DWARF info too. As dwarf info
22582 should not have PC, skip PC. */
22583 if (i
!= PC_REGNUM
)
22584 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, dwarf_reg
, dwarf
);
22590 par
= emit_jump_insn (par
);
22592 par
= emit_insn (par
);
22594 REG_NOTES (par
) = dwarf
;
22596 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
22597 stack_pointer_rtx
, stack_pointer_rtx
);
22600 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22601 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22603 Unfortunately, since this insn does not reflect very well the actual
22604 semantics of the operation, we need to annotate the insn for the benefit
22605 of DWARF2 frame unwind information. */
22607 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
22611 rtx dwarf
= NULL_RTX
;
22614 gcc_assert (num_regs
&& num_regs
<= 32);
22616 /* Workaround ARM10 VFPr1 bug. */
22617 if (num_regs
== 2 && !arm_arch6
)
22619 if (first_reg
== 15)
22625 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22626 there could be up to 32 D-registers to restore.
22627 If there are more than 16 D-registers, make two recursive calls,
22628 each of which emits one pop_multi instruction. */
22631 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
22632 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
22636 /* The parallel needs to hold num_regs SETs
22637 and one SET for the stack update. */
22638 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22640 /* Increment the stack pointer, based on there being
22641 num_regs 8-byte registers to restore. */
22642 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
22643 RTX_FRAME_RELATED_P (tmp
) = 1;
22644 XVECEXP (par
, 0, 0) = tmp
;
22646 /* Now show every reg that will be restored, using a SET for each. */
22647 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
22649 reg
= gen_rtx_REG (DFmode
, i
);
22651 tmp
= gen_rtx_SET (reg
,
22654 plus_constant (Pmode
, base_reg
, 8 * j
)));
22655 RTX_FRAME_RELATED_P (tmp
) = 1;
22656 XVECEXP (par
, 0, j
+ 1) = tmp
;
22658 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22663 par
= emit_insn (par
);
22664 REG_NOTES (par
) = dwarf
;
22666 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22667 if (REGNO (base_reg
) == IP_REGNUM
)
22669 RTX_FRAME_RELATED_P (par
) = 1;
22670 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
22673 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
22674 base_reg
, base_reg
);
22677 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22678 number of registers are being popped, multiple LDRD patterns are created for
22679 all register pairs. If odd number of registers are popped, last register is
22680 loaded by using LDR pattern. */
22682 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
22686 rtx par
= NULL_RTX
;
22687 rtx dwarf
= NULL_RTX
;
22688 rtx tmp
, reg
, tmp1
;
22689 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22691 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22692 if (saved_regs_mask
& (1 << i
))
22695 gcc_assert (num_regs
&& num_regs
<= 16);
22697 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22698 to be popped. So, if num_regs is even, now it will become odd,
22699 and we can generate pop with PC. If num_regs is odd, it will be
22700 even now, and ldr with return can be generated for PC. */
22704 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22706 /* Var j iterates over all the registers to gather all the registers in
22707 saved_regs_mask. Var i gives index of saved registers in stack frame.
22708 A PARALLEL RTX of register-pair is created here, so that pattern for
22709 LDRD can be matched. As PC is always last register to be popped, and
22710 we have already decremented num_regs if PC, we don't have to worry
22711 about PC in this loop. */
22712 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
22713 if (saved_regs_mask
& (1 << j
))
22715 /* Create RTX for memory load. */
22716 reg
= gen_rtx_REG (SImode
, j
);
22717 tmp
= gen_rtx_SET (reg
,
22718 gen_frame_mem (SImode
,
22719 plus_constant (Pmode
,
22720 stack_pointer_rtx
, 4 * i
)));
22721 RTX_FRAME_RELATED_P (tmp
) = 1;
22725 /* When saved-register index (i) is even, the RTX to be emitted is
22726 yet to be created. Hence create it first. The LDRD pattern we
22727 are generating is :
22728 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22729 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22730 where target registers need not be consecutive. */
22731 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22735 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22736 added as 0th element and if i is odd, reg_i is added as 1st element
22737 of LDRD pattern shown above. */
22738 XVECEXP (par
, 0, (i
% 2)) = tmp
;
22739 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22743 /* When saved-register index (i) is odd, RTXs for both the registers
22744 to be loaded are generated in above given LDRD pattern, and the
22745 pattern can be emitted now. */
22746 par
= emit_insn (par
);
22747 REG_NOTES (par
) = dwarf
;
22748 RTX_FRAME_RELATED_P (par
) = 1;
22754 /* If the number of registers pushed is odd AND return_in_pc is false OR
22755 number of registers are even AND return_in_pc is true, last register is
22756 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22757 then LDR with post increment. */
22759 /* Increment the stack pointer, based on there being
22760 num_regs 4-byte registers to restore. */
22761 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22762 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
22763 RTX_FRAME_RELATED_P (tmp
) = 1;
22764 tmp
= emit_insn (tmp
);
22767 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
22768 stack_pointer_rtx
, stack_pointer_rtx
);
22773 if (((num_regs
% 2) == 1 && !return_in_pc
)
22774 || ((num_regs
% 2) == 0 && return_in_pc
))
22776 /* Scan for the single register to be popped. Skip until the saved
22777 register is found. */
22778 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
22780 /* Gen LDR with post increment here. */
22781 tmp1
= gen_rtx_MEM (SImode
,
22782 gen_rtx_POST_INC (SImode
,
22783 stack_pointer_rtx
));
22784 set_mem_alias_set (tmp1
, get_frame_alias_set ());
22786 reg
= gen_rtx_REG (SImode
, j
);
22787 tmp
= gen_rtx_SET (reg
, tmp1
);
22788 RTX_FRAME_RELATED_P (tmp
) = 1;
22789 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22793 /* If return_in_pc, j must be PC_REGNUM. */
22794 gcc_assert (j
== PC_REGNUM
);
22795 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22796 XVECEXP (par
, 0, 0) = ret_rtx
;
22797 XVECEXP (par
, 0, 1) = tmp
;
22798 par
= emit_jump_insn (par
);
22802 par
= emit_insn (tmp
);
22803 REG_NOTES (par
) = dwarf
;
22804 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22805 stack_pointer_rtx
, stack_pointer_rtx
);
22809 else if ((num_regs
% 2) == 1 && return_in_pc
)
22811 /* There are 2 registers to be popped. So, generate the pattern
22812 pop_multiple_with_stack_update_and_return to pop in PC. */
22813 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
22819 /* LDRD in ARM mode needs consecutive registers as operands. This function
22820 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22821 offset addressing and then generates one separate stack udpate. This provides
22822 more scheduling freedom, compared to writeback on every load. However,
22823 if the function returns using load into PC directly
22824 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22825 before the last load. TODO: Add a peephole optimization to recognize
22826 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22827 peephole optimization to merge the load at stack-offset zero
22828 with the stack update instruction using load with writeback
22829 in post-index addressing mode. */
22831 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
22835 rtx par
= NULL_RTX
;
22836 rtx dwarf
= NULL_RTX
;
22839 /* Restore saved registers. */
22840 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
22842 while (j
<= LAST_ARM_REGNUM
)
22843 if (saved_regs_mask
& (1 << j
))
22846 && (saved_regs_mask
& (1 << (j
+ 1)))
22847 && (j
+ 1) != PC_REGNUM
)
22849 /* Current register and next register form register pair for which
22850 LDRD can be generated. PC is always the last register popped, and
22851 we handle it separately. */
22853 mem
= gen_frame_mem (DImode
,
22854 plus_constant (Pmode
,
22858 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22860 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
22861 tmp
= emit_insn (tmp
);
22862 RTX_FRAME_RELATED_P (tmp
) = 1;
22864 /* Generate dwarf info. */
22866 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22867 gen_rtx_REG (SImode
, j
),
22869 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22870 gen_rtx_REG (SImode
, j
+ 1),
22873 REG_NOTES (tmp
) = dwarf
;
22878 else if (j
!= PC_REGNUM
)
22880 /* Emit a single word load. */
22882 mem
= gen_frame_mem (SImode
,
22883 plus_constant (Pmode
,
22887 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22889 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
22890 tmp
= emit_insn (tmp
);
22891 RTX_FRAME_RELATED_P (tmp
) = 1;
22893 /* Generate dwarf info. */
22894 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
22895 gen_rtx_REG (SImode
, j
),
22901 else /* j == PC_REGNUM */
22907 /* Update the stack. */
22910 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22911 plus_constant (Pmode
,
22914 tmp
= emit_insn (tmp
);
22915 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
22916 stack_pointer_rtx
, stack_pointer_rtx
);
22920 if (saved_regs_mask
& (1 << PC_REGNUM
))
22922 /* Only PC is to be popped. */
22923 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22924 XVECEXP (par
, 0, 0) = ret_rtx
;
22925 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
22926 gen_frame_mem (SImode
,
22927 gen_rtx_POST_INC (SImode
,
22928 stack_pointer_rtx
)));
22929 RTX_FRAME_RELATED_P (tmp
) = 1;
22930 XVECEXP (par
, 0, 1) = tmp
;
22931 par
= emit_jump_insn (par
);
22933 /* Generate dwarf info. */
22934 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22935 gen_rtx_REG (SImode
, PC_REGNUM
),
22937 REG_NOTES (par
) = dwarf
;
22938 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22939 stack_pointer_rtx
, stack_pointer_rtx
);
22943 /* Calculate the size of the return value that is passed in registers. */
22945 arm_size_return_regs (void)
22949 if (crtl
->return_rtx
!= 0)
22950 mode
= GET_MODE (crtl
->return_rtx
);
22952 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
22954 return GET_MODE_SIZE (mode
);
22957 /* Return true if the current function needs to save/restore LR. */
22959 thumb_force_lr_save (void)
22961 return !cfun
->machine
->lr_save_eliminated
22963 || thumb_far_jump_used_p ()
22964 || df_regs_ever_live_p (LR_REGNUM
));
22967 /* We do not know if r3 will be available because
22968 we do have an indirect tailcall happening in this
22969 particular case. */
22971 is_indirect_tailcall_p (rtx call
)
22973 rtx pat
= PATTERN (call
);
22975 /* Indirect tail call. */
22976 pat
= XVECEXP (pat
, 0, 0);
22977 if (GET_CODE (pat
) == SET
)
22978 pat
= SET_SRC (pat
);
22980 pat
= XEXP (XEXP (pat
, 0), 0);
22981 return REG_P (pat
);
22984 /* Return true if r3 is used by any of the tail call insns in the
22985 current function. */
22987 any_sibcall_could_use_r3 (void)
22992 if (!crtl
->tail_call_emit
)
22994 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
22995 if (e
->flags
& EDGE_SIBCALL
)
22997 rtx_insn
*call
= BB_END (e
->src
);
22998 if (!CALL_P (call
))
22999 call
= prev_nonnote_nondebug_insn (call
);
23000 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
23001 if (find_regno_fusage (call
, USE
, 3)
23002 || is_indirect_tailcall_p (call
))
23009 /* Compute the distance from register FROM to register TO.
23010 These can be the arg pointer (26), the soft frame pointer (25),
23011 the stack pointer (13) or the hard frame pointer (11).
23012 In thumb mode r7 is used as the soft frame pointer, if needed.
23013 Typical stack layout looks like this:
23015 old stack pointer -> | |
23018 | | saved arguments for
23019 | | vararg functions
23022 hard FP & arg pointer -> | | \
23030 soft frame pointer -> | | /
23035 locals base pointer -> | | /
23040 current stack pointer -> | | /
23043 For a given function some or all of these stack components
23044 may not be needed, giving rise to the possibility of
23045 eliminating some of the registers.
23047 The values returned by this function must reflect the behavior
23048 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
23050 The sign of the number returned reflects the direction of stack
23051 growth, so the values are positive for all eliminations except
23052 from the soft frame pointer to the hard frame pointer.
23054 SFP may point just inside the local variables block to ensure correct
23058 /* Return cached stack offsets. */
23060 static arm_stack_offsets
*
23061 arm_get_frame_offsets (void)
23063 struct arm_stack_offsets
*offsets
;
23065 offsets
= &cfun
->machine
->stack_offsets
;
23071 /* Calculate stack offsets. These are used to calculate register elimination
23072 offsets and in prologue/epilogue code. Also calculates which registers
23073 should be saved. */
23076 arm_compute_frame_layout (void)
23078 struct arm_stack_offsets
*offsets
;
23079 unsigned long func_type
;
23082 HOST_WIDE_INT frame_size
;
23085 offsets
= &cfun
->machine
->stack_offsets
;
23087 /* Initially this is the size of the local variables. It will translated
23088 into an offset once we have determined the size of preceding data. */
23089 frame_size
= ROUND_UP_WORD (get_frame_size ());
23091 /* Space for variadic functions. */
23092 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
23094 /* In Thumb mode this is incorrect, but never used. */
23096 = (offsets
->saved_args
23097 + arm_compute_static_chain_stack_bytes ()
23098 + (frame_pointer_needed
? 4 : 0));
23102 unsigned int regno
;
23104 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
23105 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
23106 saved
= core_saved
;
23108 /* We know that SP will be doubleword aligned on entry, and we must
23109 preserve that condition at any subroutine call. We also require the
23110 soft frame pointer to be doubleword aligned. */
23112 if (TARGET_REALLY_IWMMXT
)
23114 /* Check for the call-saved iWMMXt registers. */
23115 for (regno
= FIRST_IWMMXT_REGNUM
;
23116 regno
<= LAST_IWMMXT_REGNUM
;
23118 if (reg_needs_saving_p (regno
))
23122 func_type
= arm_current_func_type ();
23123 /* Space for saved VFP registers. */
23124 if (! IS_VOLATILE (func_type
)
23125 && TARGET_VFP_BASE
)
23126 saved
+= arm_get_vfp_saved_size ();
23128 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23129 nonecure entry functions with VSTR/VLDR. */
23130 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23133 else /* TARGET_THUMB1 */
23135 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
23136 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
23137 saved
= core_saved
;
23138 if (TARGET_BACKTRACE
)
23142 /* Saved registers include the stack frame. */
23143 offsets
->saved_regs
23144 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
23145 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
23147 /* A leaf function does not need any stack alignment if it has nothing
23149 if (crtl
->is_leaf
&& frame_size
== 0
23150 /* However if it calls alloca(), we have a dynamically allocated
23151 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
23152 && ! cfun
->calls_alloca
)
23154 offsets
->outgoing_args
= offsets
->soft_frame
;
23155 offsets
->locals_base
= offsets
->soft_frame
;
23159 /* Ensure SFP has the correct alignment. */
23160 if (ARM_DOUBLEWORD_ALIGN
23161 && (offsets
->soft_frame
& 7))
23163 offsets
->soft_frame
+= 4;
23164 /* Try to align stack by pushing an extra reg. Don't bother doing this
23165 when there is a stack frame as the alignment will be rolled into
23166 the normal stack adjustment. */
23167 if (frame_size
+ crtl
->outgoing_args_size
== 0)
23171 /* Register r3 is caller-saved. Normally it does not need to be
23172 saved on entry by the prologue. However if we choose to save
23173 it for padding then we may confuse the compiler into thinking
23174 a prologue sequence is required when in fact it is not. This
23175 will occur when shrink-wrapping if r3 is used as a scratch
23176 register and there are no other callee-saved writes.
23178 This situation can be avoided when other callee-saved registers
23179 are available and r3 is not mandatory if we choose a callee-saved
23180 register for padding. */
23181 bool prefer_callee_reg_p
= false;
23183 /* If it is safe to use r3, then do so. This sometimes
23184 generates better code on Thumb-2 by avoiding the need to
23185 use 32-bit push/pop instructions. */
23186 if (! any_sibcall_could_use_r3 ()
23187 && arm_size_return_regs () <= 12
23188 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
23190 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
23193 if (!TARGET_THUMB2
)
23194 prefer_callee_reg_p
= true;
23197 || prefer_callee_reg_p
)
23199 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
23201 /* Avoid fixed registers; they may be changed at
23202 arbitrary times so it's unsafe to restore them
23203 during the epilogue. */
23205 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
23215 offsets
->saved_regs
+= 4;
23216 offsets
->saved_regs_mask
|= (1 << reg
);
23221 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
23222 offsets
->outgoing_args
= (offsets
->locals_base
23223 + crtl
->outgoing_args_size
);
23225 if (ARM_DOUBLEWORD_ALIGN
)
23227 /* Ensure SP remains doubleword aligned. */
23228 if (offsets
->outgoing_args
& 7)
23229 offsets
->outgoing_args
+= 4;
23230 gcc_assert (!(offsets
->outgoing_args
& 7));
23235 /* Calculate the relative offsets for the different stack pointers. Positive
23236 offsets are in the direction of stack growth. */
23239 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
23241 arm_stack_offsets
*offsets
;
23243 offsets
= arm_get_frame_offsets ();
23245 /* OK, now we have enough information to compute the distances.
23246 There must be an entry in these switch tables for each pair
23247 of registers in ELIMINABLE_REGS, even if some of the entries
23248 seem to be redundant or useless. */
23251 case ARG_POINTER_REGNUM
:
23254 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23257 case FRAME_POINTER_REGNUM
:
23258 /* This is the reverse of the soft frame pointer
23259 to hard frame pointer elimination below. */
23260 return offsets
->soft_frame
- offsets
->saved_args
;
23262 case ARM_HARD_FRAME_POINTER_REGNUM
:
23263 /* This is only non-zero in the case where the static chain register
23264 is stored above the frame. */
23265 return offsets
->frame
- offsets
->saved_args
- 4;
23267 case STACK_POINTER_REGNUM
:
23268 /* If nothing has been pushed on the stack at all
23269 then this will return -4. This *is* correct! */
23270 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
23273 gcc_unreachable ();
23275 gcc_unreachable ();
23277 case FRAME_POINTER_REGNUM
:
23280 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23283 case ARM_HARD_FRAME_POINTER_REGNUM
:
23284 /* The hard frame pointer points to the top entry in the
23285 stack frame. The soft frame pointer to the bottom entry
23286 in the stack frame. If there is no stack frame at all,
23287 then they are identical. */
23289 return offsets
->frame
- offsets
->soft_frame
;
23291 case STACK_POINTER_REGNUM
:
23292 return offsets
->outgoing_args
- offsets
->soft_frame
;
23295 gcc_unreachable ();
23297 gcc_unreachable ();
23300 /* You cannot eliminate from the stack pointer.
23301 In theory you could eliminate from the hard frame
23302 pointer to the stack pointer, but this will never
23303 happen, since if a stack frame is not needed the
23304 hard frame pointer will never be used. */
23305 gcc_unreachable ();
23309 /* Given FROM and TO register numbers, say whether this elimination is
23310 allowed. Frame pointer elimination is automatically handled.
23312 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23313 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23314 pointer, we must eliminate FRAME_POINTER_REGNUM into
23315 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23316 ARG_POINTER_REGNUM. */
23319 arm_can_eliminate (const int from
, const int to
)
23321 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
23322 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
23323 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
23324 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
23328 /* Emit RTL to save coprocessor registers on function entry. Returns the
23329 number of bytes pushed. */
23332 arm_save_coproc_regs(void)
23334 int saved_size
= 0;
23336 unsigned start_reg
;
23339 if (TARGET_REALLY_IWMMXT
)
23340 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
23341 if (reg_needs_saving_p (reg
))
23343 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23344 insn
= gen_rtx_MEM (V2SImode
, insn
);
23345 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
23346 RTX_FRAME_RELATED_P (insn
) = 1;
23350 if (TARGET_VFP_BASE
)
23352 start_reg
= FIRST_VFP_REGNUM
;
23354 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
23356 if (!reg_needs_saving_p (reg
) && !reg_needs_saving_p (reg
+ 1))
23358 if (start_reg
!= reg
)
23359 saved_size
+= vfp_emit_fstmd (start_reg
,
23360 (reg
- start_reg
) / 2);
23361 start_reg
= reg
+ 2;
23364 if (start_reg
!= reg
)
23365 saved_size
+= vfp_emit_fstmd (start_reg
,
23366 (reg
- start_reg
) / 2);
23372 /* Set the Thumb frame pointer from the stack pointer. */
23375 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
23377 HOST_WIDE_INT amount
;
23380 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
23382 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23383 stack_pointer_rtx
, GEN_INT (amount
)));
23386 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
23387 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23388 expects the first two operands to be the same. */
23391 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23393 hard_frame_pointer_rtx
));
23397 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23398 hard_frame_pointer_rtx
,
23399 stack_pointer_rtx
));
23401 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
23402 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
23403 RTX_FRAME_RELATED_P (dwarf
) = 1;
23404 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23407 RTX_FRAME_RELATED_P (insn
) = 1;
23410 struct scratch_reg
{
23415 /* Return a short-lived scratch register for use as a 2nd scratch register on
23416 function entry after the registers are saved in the prologue. This register
23417 must be released by means of release_scratch_register_on_entry. IP is not
23418 considered since it is always used as the 1st scratch register if available.
23420 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23421 mask of live registers. */
23424 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
23425 unsigned long live_regs
)
23431 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
23437 for (i
= 4; i
< 11; i
++)
23438 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
23446 /* If IP is used as the 1st scratch register for a nested function,
23447 then either r3 wasn't available or is used to preserve IP. */
23448 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
23450 regno
= (regno1
== 3 ? 2 : 3);
23452 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
23457 sr
->reg
= gen_rtx_REG (SImode
, regno
);
23460 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23461 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
23462 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23463 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23464 RTX_FRAME_RELATED_P (insn
) = 1;
23465 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23469 /* Release a scratch register obtained from the preceding function. */
23472 release_scratch_register_on_entry (struct scratch_reg
*sr
)
23476 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
23477 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
23478 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23479 plus_constant (Pmode
, stack_pointer_rtx
, 4));
23480 RTX_FRAME_RELATED_P (insn
) = 1;
23481 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23485 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23487 #if PROBE_INTERVAL > 4096
23488 #error Cannot use indexed addressing mode for stack probing
23491 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23492 inclusive. These are offsets from the current stack pointer. REGNO1
23493 is the index number of the 1st scratch register and LIVE_REGS is the
23494 mask of live registers. */
23497 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
23498 unsigned int regno1
, unsigned long live_regs
)
23500 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
23502 /* See if we have a constant small number of probes to generate. If so,
23503 that's the easy case. */
23504 if (size
<= PROBE_INTERVAL
)
23506 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23507 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23508 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
23511 /* The run-time loop is made up of 10 insns in the generic case while the
23512 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23513 else if (size
<= 5 * PROBE_INTERVAL
)
23515 HOST_WIDE_INT i
, rem
;
23517 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23518 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23519 emit_stack_probe (reg1
);
23521 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23522 it exceeds SIZE. If only two probes are needed, this will not
23523 generate any code. Then probe at FIRST + SIZE. */
23524 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
23526 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23527 emit_stack_probe (reg1
);
23530 rem
= size
- (i
- PROBE_INTERVAL
);
23531 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23533 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23534 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
23537 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
23540 /* Otherwise, do the same as above, but in a loop. Note that we must be
23541 extra careful with variables wrapping around because we might be at
23542 the very top (or the very bottom) of the address space and we have
23543 to be able to handle this case properly; in particular, we use an
23544 equality test for the loop condition. */
23547 HOST_WIDE_INT rounded_size
;
23548 struct scratch_reg sr
;
23550 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
23552 emit_move_insn (reg1
, GEN_INT (first
));
23555 /* Step 1: round SIZE to the previous multiple of the interval. */
23557 rounded_size
= size
& -PROBE_INTERVAL
;
23558 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
23561 /* Step 2: compute initial and final value of the loop counter. */
23563 /* TEST_ADDR = SP + FIRST. */
23564 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23566 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23567 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
23570 /* Step 3: the loop
23574 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23577 while (TEST_ADDR != LAST_ADDR)
23579 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23580 until it is equal to ROUNDED_SIZE. */
23582 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
23585 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23586 that SIZE is equal to ROUNDED_SIZE. */
23588 if (size
!= rounded_size
)
23590 HOST_WIDE_INT rem
= size
- rounded_size
;
23592 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23594 emit_set_insn (sr
.reg
,
23595 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
23596 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
23597 PROBE_INTERVAL
- rem
));
23600 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
23603 release_scratch_register_on_entry (&sr
);
23606 /* Make sure nothing is scheduled before we are done. */
23607 emit_insn (gen_blockage ());
23610 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23611 absolute addresses. */
23614 output_probe_stack_range (rtx reg1
, rtx reg2
)
23616 static int labelno
= 0;
23620 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
23623 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
23625 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23627 xops
[1] = GEN_INT (PROBE_INTERVAL
);
23628 output_asm_insn ("sub\t%0, %0, %1", xops
);
23630 /* Probe at TEST_ADDR. */
23631 output_asm_insn ("str\tr0, [%0, #0]", xops
);
23633 /* Test if TEST_ADDR == LAST_ADDR. */
23635 output_asm_insn ("cmp\t%0, %1", xops
);
23638 fputs ("\tbne\t", asm_out_file
);
23639 assemble_name_raw (asm_out_file
, loop_lab
);
23640 fputc ('\n', asm_out_file
);
23645 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23648 arm_expand_prologue (void)
23653 unsigned long live_regs_mask
;
23654 unsigned long func_type
;
23656 int saved_pretend_args
= 0;
23657 int saved_regs
= 0;
23658 unsigned HOST_WIDE_INT args_to_push
;
23659 HOST_WIDE_INT size
;
23660 arm_stack_offsets
*offsets
;
23663 func_type
= arm_current_func_type ();
23665 /* Naked functions don't have prologues. */
23666 if (IS_NAKED (func_type
))
23668 if (flag_stack_usage_info
)
23669 current_function_static_stack_size
= 0;
23673 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23674 args_to_push
= crtl
->args
.pretend_args_size
;
23676 /* Compute which register we will have to save onto the stack. */
23677 offsets
= arm_get_frame_offsets ();
23678 live_regs_mask
= offsets
->saved_regs_mask
;
23680 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
23682 /* The AAPCS requires the callee to widen integral types narrower
23683 than 32 bits to the full width of the register; but when handling
23684 calls to non-secure space, we cannot trust the callee to have
23685 correctly done so. So forcibly re-widen the result here. */
23686 if (IS_CMSE_ENTRY (func_type
))
23688 function_args_iterator args_iter
;
23689 CUMULATIVE_ARGS args_so_far_v
;
23690 cumulative_args_t args_so_far
;
23691 bool first_param
= true;
23693 tree fndecl
= current_function_decl
;
23694 tree fntype
= TREE_TYPE (fndecl
);
23695 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
23696 args_so_far
= pack_cumulative_args (&args_so_far_v
);
23697 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
23701 if (VOID_TYPE_P (arg_type
))
23704 function_arg_info
arg (arg_type
, /*named=*/true);
23706 /* We should advance after processing the argument and pass
23707 the argument we're advancing past. */
23708 arm_function_arg_advance (args_so_far
, arg
);
23709 first_param
= false;
23710 arg_rtx
= arm_function_arg (args_so_far
, arg
);
23711 gcc_assert (REG_P (arg_rtx
));
23712 if ((TREE_CODE (arg_type
) == INTEGER_TYPE
23713 || TREE_CODE (arg_type
) == ENUMERAL_TYPE
23714 || TREE_CODE (arg_type
) == BOOLEAN_TYPE
)
23715 && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx
)), 4))
23717 if (TYPE_UNSIGNED (arg_type
))
23718 emit_set_insn (gen_rtx_REG (SImode
, REGNO (arg_rtx
)),
23719 gen_rtx_ZERO_EXTEND (SImode
, arg_rtx
));
23721 emit_set_insn (gen_rtx_REG (SImode
, REGNO (arg_rtx
)),
23722 gen_rtx_SIGN_EXTEND (SImode
, arg_rtx
));
23727 if (IS_STACKALIGN (func_type
))
23731 /* Handle a word-aligned stack pointer. We generate the following:
23736 <save and restore r0 in normal prologue/epilogue>
23740 The unwinder doesn't need to know about the stack realignment.
23741 Just tell it we saved SP in r0. */
23742 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
23744 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
23745 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
23747 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
23748 RTX_FRAME_RELATED_P (insn
) = 1;
23749 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
23751 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
23753 /* ??? The CFA changes here, which may cause GDB to conclude that it
23754 has entered a different function. That said, the unwind info is
23755 correct, individually, before and after this instruction because
23756 we've described the save of SP, which will override the default
23757 handling of SP as restoring from the CFA. */
23758 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
23761 /* Let's compute the static_chain_stack_bytes required and store it. Right
23762 now the value must be -1 as stored by arm_init_machine_status (). */
23763 cfun
->machine
->static_chain_stack_bytes
23764 = arm_compute_static_chain_stack_bytes ();
23766 /* The static chain register is the same as the IP register. If it is
23767 clobbered when creating the frame, we need to save and restore it. */
23768 clobber_ip
= (IS_NESTED (func_type
)
23769 && (((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23770 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23771 || flag_stack_clash_protection
)
23772 && !df_regs_ever_live_p (LR_REGNUM
)
23773 && arm_r3_live_at_start_p ()))
23774 || arm_current_function_pac_enabled_p ()));
23776 /* Find somewhere to store IP whilst the frame is being created.
23777 We try the following places in order:
23779 1. The last argument register r3 if it is available.
23780 2. A slot on the stack above the frame if there are no
23781 arguments to push onto the stack.
23782 3. Register r3 again, after pushing the argument registers
23783 onto the stack, if this is a varargs function.
23784 4. The last slot on the stack created for the arguments to
23785 push, if this isn't a varargs function.
23787 Note - we only need to tell the dwarf2 backend about the SP
23788 adjustment in the second variant; the static chain register
23789 doesn't need to be unwound, as it doesn't contain a value
23790 inherited from the caller. */
23793 if (!arm_r3_live_at_start_p ())
23794 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23795 else if (args_to_push
== 0)
23801 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23802 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23805 /* Just tell the dwarf backend that we adjusted SP. */
23806 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23807 plus_constant (Pmode
, stack_pointer_rtx
,
23809 RTX_FRAME_RELATED_P (insn
) = 1;
23810 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23811 if (arm_current_function_pac_enabled_p ())
23812 cfun
->machine
->pacspval_needed
= 1;
23816 /* Store the args on the stack. */
23817 if (cfun
->machine
->uses_anonymous_args
)
23819 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23820 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23821 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23822 saved_pretend_args
= 1;
23828 if (args_to_push
== 4)
23829 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23831 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
23832 plus_constant (Pmode
,
23836 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23838 /* Just tell the dwarf backend that we adjusted SP. */
23839 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23840 plus_constant (Pmode
, stack_pointer_rtx
,
23842 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23845 RTX_FRAME_RELATED_P (insn
) = 1;
23846 fp_offset
= args_to_push
;
23848 if (arm_current_function_pac_enabled_p ())
23849 cfun
->machine
->pacspval_needed
= 1;
23853 if (arm_current_function_pac_enabled_p ())
23855 /* If IP was clobbered we only emit a PAC instruction as the BTI
23856 one will be added before the push of the clobbered IP (if
23857 necessary) by the bti pass. */
23858 if (aarch_bti_enabled () && !clobber_ip
)
23859 insn
= emit_insn (gen_pacbti_nop ());
23861 insn
= emit_insn (gen_pac_nop ());
23863 rtx dwarf
= gen_rtx_SET (ip_rtx
, gen_rtx_REG (SImode
, RA_AUTH_CODE
));
23864 RTX_FRAME_RELATED_P (insn
) = 1;
23865 add_reg_note (insn
, REG_CFA_REGISTER
, dwarf
);
23868 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23870 if (IS_INTERRUPT (func_type
))
23872 /* Interrupt functions must not corrupt any registers.
23873 Creating a frame pointer however, corrupts the IP
23874 register, so we must push it first. */
23875 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
23877 /* Do not set RTX_FRAME_RELATED_P on this insn.
23878 The dwarf stack unwinding code only wants to see one
23879 stack decrement per function, and this is not it. If
23880 this instruction is labeled as being part of the frame
23881 creation sequence then dwarf2out_frame_debug_expr will
23882 die when it encounters the assignment of IP to FP
23883 later on, since the use of SP here establishes SP as
23884 the CFA register and not IP.
23886 Anyway this instruction is not really part of the stack
23887 frame creation although it is part of the prologue. */
23890 insn
= emit_set_insn (ip_rtx
,
23891 plus_constant (Pmode
, stack_pointer_rtx
,
23893 RTX_FRAME_RELATED_P (insn
) = 1;
23896 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23897 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23900 insn
= emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx
,
23901 GEN_INT (FPCXTNS_ENUM
)));
23902 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23903 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23904 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23905 RTX_FRAME_RELATED_P (insn
) = 1;
23910 /* Push the argument registers, or reserve space for them. */
23911 if (cfun
->machine
->uses_anonymous_args
)
23912 insn
= emit_multi_reg_push
23913 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23914 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23917 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23918 GEN_INT (- args_to_push
)));
23919 RTX_FRAME_RELATED_P (insn
) = 1;
23922 /* If this is an interrupt service routine, and the link register
23923 is going to be pushed, and we're not generating extra
23924 push of IP (needed when frame is needed and frame layout if apcs),
23925 subtracting four from LR now will mean that the function return
23926 can be done with a single instruction. */
23927 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
23928 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
23929 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
23932 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
23934 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
23937 if (live_regs_mask
)
23939 unsigned long dwarf_regs_mask
= live_regs_mask
;
23941 saved_regs
+= bit_count (live_regs_mask
) * 4;
23942 if (optimize_size
&& !frame_pointer_needed
23943 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
23945 /* If no coprocessor registers are being pushed and we don't have
23946 to worry about a frame pointer then push extra registers to
23947 create the stack frame. This is done in a way that does not
23948 alter the frame layout, so is independent of the epilogue. */
23952 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
23954 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
23955 if (frame
&& n
* 4 >= frame
)
23958 live_regs_mask
|= (1 << n
) - 1;
23959 saved_regs
+= frame
;
23964 && current_tune
->prefer_ldrd_strd
23965 && !optimize_function_for_size_p (cfun
))
23967 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
23969 thumb2_emit_strd_push (live_regs_mask
);
23970 else if (TARGET_ARM
23971 && !TARGET_APCS_FRAME
23972 && !IS_INTERRUPT (func_type
))
23973 arm_emit_strd_push (live_regs_mask
);
23976 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
23977 RTX_FRAME_RELATED_P (insn
) = 1;
23982 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
23983 RTX_FRAME_RELATED_P (insn
) = 1;
23987 if (! IS_VOLATILE (func_type
))
23988 saved_regs
+= arm_save_coproc_regs ();
23990 if (frame_pointer_needed
&& TARGET_ARM
)
23992 /* Create the new frame pointer. */
23993 if (TARGET_APCS_FRAME
)
23995 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
23996 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
23997 RTX_FRAME_RELATED_P (insn
) = 1;
24001 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
24002 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
24003 stack_pointer_rtx
, insn
));
24004 RTX_FRAME_RELATED_P (insn
) = 1;
24008 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24009 if (flag_stack_usage_info
)
24010 current_function_static_stack_size
= size
;
24012 /* If this isn't an interrupt service routine and we have a frame, then do
24013 stack checking. We use IP as the first scratch register, except for the
24014 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
24015 if (!IS_INTERRUPT (func_type
)
24016 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
24017 || flag_stack_clash_protection
))
24019 unsigned int regno
;
24021 if (!IS_NESTED (func_type
) || clobber_ip
)
24023 else if (df_regs_ever_live_p (LR_REGNUM
))
24028 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
24030 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
24031 arm_emit_probe_stack_range (get_stack_check_protect (),
24032 size
- get_stack_check_protect (),
24033 regno
, live_regs_mask
);
24036 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
24037 regno
, live_regs_mask
);
24040 /* Recover the static chain register. */
24043 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
24044 insn
= gen_rtx_REG (SImode
, 3);
24047 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
24048 insn
= gen_frame_mem (SImode
, insn
);
24050 emit_set_insn (ip_rtx
, insn
);
24051 emit_insn (gen_force_register_use (ip_rtx
));
24054 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
24056 /* This add can produce multiple insns for a large constant, so we
24057 need to get tricky. */
24058 rtx_insn
*last
= get_last_insn ();
24060 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
24061 - offsets
->outgoing_args
);
24063 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24067 last
= last
? NEXT_INSN (last
) : get_insns ();
24068 RTX_FRAME_RELATED_P (last
) = 1;
24070 while (last
!= insn
);
24072 /* If the frame pointer is needed, emit a special barrier that
24073 will prevent the scheduler from moving stores to the frame
24074 before the stack adjustment. */
24075 if (frame_pointer_needed
)
24076 emit_insn (gen_stack_tie (stack_pointer_rtx
,
24077 hard_frame_pointer_rtx
));
24081 if (frame_pointer_needed
&& TARGET_THUMB2
)
24082 thumb_set_frame_pointer (offsets
);
24084 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24086 unsigned long mask
;
24088 mask
= live_regs_mask
;
24089 mask
&= THUMB2_WORK_REGS
;
24090 if (!IS_NESTED (func_type
))
24091 mask
|= (1 << IP_REGNUM
);
24092 arm_load_pic_register (mask
, NULL_RTX
);
24095 /* If we are profiling, make sure no instructions are scheduled before
24096 the call to mcount. Similarly if the user has requested no
24097 scheduling in the prolog. Similarly if we want non-call exceptions
24098 using the EABI unwinder, to prevent faulting instructions from being
24099 swapped with a stack adjustment. */
24100 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24101 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24102 && cfun
->can_throw_non_call_exceptions
))
24103 emit_insn (gen_blockage ());
24105 /* If the link register is being kept alive, with the return address in it,
24106 then make sure that it does not get reused by the ce2 pass. */
24107 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
24108 cfun
->machine
->lr_save_eliminated
= 1;
24111 /* Print condition code to STREAM. Helper function for arm_print_operand. */
24113 arm_print_condition (FILE *stream
)
24115 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
24117 /* Branch conversion is not implemented for Thumb-2. */
24120 output_operand_lossage ("predicated Thumb instruction");
24123 if (current_insn_predicate
!= NULL
)
24125 output_operand_lossage
24126 ("predicated instruction in conditional sequence");
24130 fputs (arm_condition_codes
[arm_current_cc
], stream
);
24132 else if (current_insn_predicate
)
24134 enum arm_cond_code code
;
24138 output_operand_lossage ("predicated Thumb instruction");
24142 code
= get_arm_condition_code (current_insn_predicate
);
24143 fputs (arm_condition_codes
[code
], stream
);
24148 /* Globally reserved letters: acln
24149 Puncutation letters currently used: @_|?().!#
24150 Lower case letters currently used: bcdefhimpqtvwxyz
24151 Upper case letters currently used: ABCDEFGHIJKLMOPQRSTUV
24152 Letters previously used, but now deprecated/obsolete: sNWXYZ.
24154 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24156 If CODE is 'd', then the X is a condition operand and the instruction
24157 should only be executed if the condition is true.
24158 if CODE is 'D', then the X is a condition operand and the instruction
24159 should only be executed if the condition is false: however, if the mode
24160 of the comparison is CCFPEmode, then always execute the instruction -- we
24161 do this because in these circumstances !GE does not necessarily imply LT;
24162 in these cases the instruction pattern will take care to make sure that
24163 an instruction containing %d will follow, thereby undoing the effects of
24164 doing this instruction unconditionally.
24165 If CODE is 'B' then output a bitwise inverted value of X (a const int).
24166 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24167 If CODE is 'V', then the operand must be a CONST_INT representing
24168 the bits to preserve in the modified register (Rd) of a BFI or BFC
24169 instruction: print out both the width and lsb (shift) fields. */
24171 arm_print_operand (FILE *stream
, rtx x
, int code
)
24176 fputs (ASM_COMMENT_START
, stream
);
24180 fputs (user_label_prefix
, stream
);
24184 fputs (REGISTER_PREFIX
, stream
);
24188 arm_print_condition (stream
);
24192 /* The current condition code for a condition code setting instruction.
24193 Preceded by 's' in unified syntax, otherwise followed by 's'. */
24194 fputc('s', stream
);
24195 arm_print_condition (stream
);
24199 /* If the instruction is conditionally executed then print
24200 the current condition code, otherwise print 's'. */
24201 gcc_assert (TARGET_THUMB2
);
24202 if (current_insn_predicate
)
24203 arm_print_condition (stream
);
24205 fputc('s', stream
);
24208 /* %# is a "break" sequence. It doesn't output anything, but is used to
24209 separate e.g. operand numbers from following text, if that text consists
24210 of further digits which we don't want to be part of the operand
24215 /* An integer or symbol address without a preceding # sign. */
24217 switch (GET_CODE (x
))
24220 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
24224 output_addr_const (stream
, x
);
24228 if (GET_CODE (XEXP (x
, 0)) == PLUS
24229 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
24231 output_addr_const (stream
, x
);
24234 /* Fall through. */
24237 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24241 /* An integer that we want to print in HEX. */
24243 switch (GET_CODE (x
))
24246 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
24250 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24255 if (CONST_INT_P (x
))
24258 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
24259 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
24263 putc ('~', stream
);
24264 output_addr_const (stream
, x
);
24269 /* Print the log2 of a CONST_INT. */
24273 if (!CONST_INT_P (x
)
24274 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
24275 output_operand_lossage ("Unsupported operand for code '%c'", code
);
24277 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24282 /* The low 16 bits of an immediate constant. */
24283 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
24287 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
24291 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
24299 shift
= shift_op (x
, &val
);
24303 fprintf (stream
, ", %s ", shift
);
24305 arm_print_operand (stream
, XEXP (x
, 1), 0);
24307 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24312 /* An explanation of the 'Q', 'R' and 'H' register operands:
24314 In a pair of registers containing a DI or DF value the 'Q'
24315 operand returns the register number of the register containing
24316 the least significant part of the value. The 'R' operand returns
24317 the register number of the register containing the most
24318 significant part of the value.
24320 The 'H' operand returns the higher of the two register numbers.
24321 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24322 same as the 'Q' operand, since the most significant part of the
24323 value is held in the lower number register. The reverse is true
24324 on systems where WORDS_BIG_ENDIAN is false.
24326 The purpose of these operands is to distinguish between cases
24327 where the endian-ness of the values is important (for example
24328 when they are added together), and cases where the endian-ness
24329 is irrelevant, but the order of register operations is important.
24330 For example when loading a value from memory into a register
24331 pair, the endian-ness does not matter. Provided that the value
24332 from the lower memory address is put into the lower numbered
24333 register, and the value from the higher address is put into the
24334 higher numbered register, the load will work regardless of whether
24335 the value being loaded is big-wordian or little-wordian. The
24336 order of the two register loads can matter however, if the address
24337 of the memory location is actually held in one of the registers
24338 being overwritten by the load.
24340 The 'Q' and 'R' constraints are also available for 64-bit
24343 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24345 rtx part
= gen_lowpart (SImode
, x
);
24346 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24350 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24352 output_operand_lossage ("invalid operand for code '%c'", code
);
24356 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
24360 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24362 machine_mode mode
= GET_MODE (x
);
24365 if (mode
== VOIDmode
)
24367 part
= gen_highpart_mode (SImode
, mode
, x
);
24368 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24372 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24374 output_operand_lossage ("invalid operand for code '%c'", code
);
24378 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
24382 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24384 output_operand_lossage ("invalid operand for code '%c'", code
);
24388 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
24392 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24394 output_operand_lossage ("invalid operand for code '%c'", code
);
24398 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
24402 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24404 output_operand_lossage ("invalid operand for code '%c'", code
);
24408 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
24412 asm_fprintf (stream
, "%r",
24413 REG_P (XEXP (x
, 0))
24414 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
24418 asm_fprintf (stream
, "{%r-%r}",
24420 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
24423 /* Like 'M', but writing doubleword vector registers, for use by Neon
24427 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
24428 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
24430 asm_fprintf (stream
, "{d%d}", regno
);
24432 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
24437 /* CONST_TRUE_RTX means always -- that's the default. */
24438 if (x
== const_true_rtx
)
24441 if (!COMPARISON_P (x
))
24443 output_operand_lossage ("invalid operand for code '%c'", code
);
24447 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
24452 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24453 want to do that. */
24454 if (x
== const_true_rtx
)
24456 output_operand_lossage ("instruction never executed");
24459 if (!COMPARISON_P (x
))
24461 output_operand_lossage ("invalid operand for code '%c'", code
);
24465 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
24466 (get_arm_condition_code (x
))],
24472 /* Output the LSB (shift) and width for a bitmask instruction
24473 based on a literal mask. The LSB is printed first,
24474 followed by the width.
24476 Eg. For 0b1...1110001, the result is #1, #3. */
24477 if (!CONST_INT_P (x
))
24479 output_operand_lossage ("invalid operand for code '%c'", code
);
24483 unsigned HOST_WIDE_INT val
24484 = ~UINTVAL (x
) & HOST_WIDE_INT_UC (0xffffffff);
24485 int lsb
= exact_log2 (val
& -val
);
24486 asm_fprintf (stream
, "#%d, #%d", lsb
,
24487 (exact_log2 (val
+ (val
& -val
)) - lsb
));
24492 /* Former FPA support, effectively unused after GCC-4.7, but not
24493 removed until gcc-15. */
24494 output_operand_lossage ("obsolete FPA format code '%c'", code
);
24502 /* Former Maverick support, removed after GCC-4.7. */
24503 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
24508 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
24509 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
24510 /* Bad value for wCG register number. */
24512 output_operand_lossage ("invalid operand for code '%c'", code
);
24517 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
24520 /* Print an iWMMXt control register name. */
24522 if (!CONST_INT_P (x
)
24524 || INTVAL (x
) >= 16)
24525 /* Bad value for wC register number. */
24527 output_operand_lossage ("invalid operand for code '%c'", code
);
24533 static const char * wc_reg_names
[16] =
24535 "wCID", "wCon", "wCSSF", "wCASF",
24536 "wC4", "wC5", "wC6", "wC7",
24537 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24538 "wC12", "wC13", "wC14", "wC15"
24541 fputs (wc_reg_names
[INTVAL (x
)], stream
);
24545 /* Print the high single-precision register of a VFP double-precision
24549 machine_mode mode
= GET_MODE (x
);
24552 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
24554 output_operand_lossage ("invalid operand for code '%c'", code
);
24559 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
24561 output_operand_lossage ("invalid operand for code '%c'", code
);
24565 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
24569 /* Print a VFP/Neon double precision or quad precision register name. */
24573 machine_mode mode
= GET_MODE (x
);
24574 int is_quad
= (code
== 'q');
24577 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
24579 output_operand_lossage ("invalid operand for code '%c'", code
);
24584 || !IS_VFP_REGNUM (REGNO (x
)))
24586 output_operand_lossage ("invalid operand for code '%c'", code
);
24591 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
24592 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
24594 output_operand_lossage ("invalid operand for code '%c'", code
);
24598 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
24599 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
24603 /* These two codes print the low/high doubleword register of a Neon quad
24604 register, respectively. For pair-structure types, can also print
24605 low/high quadword registers. */
24609 machine_mode mode
= GET_MODE (x
);
24612 if ((GET_MODE_SIZE (mode
) != 16
24613 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
24615 output_operand_lossage ("invalid operand for code '%c'", code
);
24620 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
24622 output_operand_lossage ("invalid operand for code '%c'", code
);
24626 if (GET_MODE_SIZE (mode
) == 16)
24627 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
24628 + (code
== 'f' ? 1 : 0));
24630 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
24631 + (code
== 'f' ? 1 : 0));
24635 /* Print a VFPv3 floating-point constant, represented as an integer
24639 int index
= vfp3_const_double_index (x
);
24640 gcc_assert (index
!= -1);
24641 fprintf (stream
, "%d", index
);
24645 /* Print bits representing opcode features for Neon.
24647 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24648 and polynomials as unsigned.
24650 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24652 Bit 2 is 1 for rounding functions, 0 otherwise. */
24654 /* Identify the type as 's', 'u', 'p' or 'f'. */
24657 HOST_WIDE_INT bits
= INTVAL (x
);
24658 fputc ("uspf"[bits
& 3], stream
);
24662 /* Likewise, but signed and unsigned integers are both 'i'. */
24665 HOST_WIDE_INT bits
= INTVAL (x
);
24666 fputc ("iipf"[bits
& 3], stream
);
24670 /* As for 'T', but emit 'u' instead of 'p'. */
24673 HOST_WIDE_INT bits
= INTVAL (x
);
24674 fputc ("usuf"[bits
& 3], stream
);
24678 /* Bit 2: rounding (vs none). */
24681 HOST_WIDE_INT bits
= INTVAL (x
);
24682 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
24686 /* Memory operand for vld1/vst1 instruction. */
24690 bool postinc
= FALSE
;
24691 rtx postinc_reg
= NULL
;
24692 unsigned align
, memsize
, align_bits
;
24694 gcc_assert (MEM_P (x
));
24695 addr
= XEXP (x
, 0);
24696 if (GET_CODE (addr
) == POST_INC
)
24699 addr
= XEXP (addr
, 0);
24701 if (GET_CODE (addr
) == POST_MODIFY
)
24703 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
24704 addr
= XEXP (addr
, 0);
24706 asm_fprintf (stream
, "[%r", REGNO (addr
));
24708 /* We know the alignment of this access, so we can emit a hint in the
24709 instruction (for some alignments) as an aid to the memory subsystem
24711 align
= MEM_ALIGN (x
) >> 3;
24712 memsize
= MEM_SIZE (x
);
24714 /* Only certain alignment specifiers are supported by the hardware. */
24715 if (memsize
== 32 && (align
% 32) == 0)
24717 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
24719 else if (memsize
>= 8 && (align
% 8) == 0)
24724 if (align_bits
!= 0)
24725 asm_fprintf (stream
, ":%d", align_bits
);
24727 asm_fprintf (stream
, "]");
24730 fputs("!", stream
);
24732 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
24736 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24737 rtx_code the memory operands output looks like following.
24739 2. [Rn, #+/-<imm>]!
24745 rtx postinc_reg
= NULL
;
24746 unsigned inc_val
= 0;
24747 enum rtx_code code
;
24749 gcc_assert (MEM_P (x
));
24750 addr
= XEXP (x
, 0);
24751 code
= GET_CODE (addr
);
24752 if (code
== POST_INC
|| code
== POST_DEC
|| code
== PRE_INC
24753 || code
== PRE_DEC
)
24755 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24756 inc_val
= GET_MODE_SIZE (GET_MODE (x
));
24757 if (code
== POST_INC
|| code
== POST_DEC
)
24758 asm_fprintf (stream
, "], #%s%d", (code
== POST_INC
)
24759 ? "" : "-", inc_val
);
24761 asm_fprintf (stream
, ", #%s%d]!", (code
== PRE_INC
)
24762 ? "" : "-", inc_val
);
24764 else if (code
== POST_MODIFY
|| code
== PRE_MODIFY
)
24766 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24767 postinc_reg
= XEXP (XEXP (addr
, 1), 1);
24768 if (postinc_reg
&& CONST_INT_P (postinc_reg
))
24770 if (code
== POST_MODIFY
)
24771 asm_fprintf (stream
, "], #%wd", INTVAL (postinc_reg
));
24773 asm_fprintf (stream
, ", #%wd]!", INTVAL (postinc_reg
));
24776 else if (code
== PLUS
)
24778 rtx base
= XEXP (addr
, 0);
24779 rtx index
= XEXP (addr
, 1);
24781 gcc_assert (REG_P (base
) && CONST_INT_P (index
));
24783 HOST_WIDE_INT offset
= INTVAL (index
);
24784 asm_fprintf (stream
, "[%r, #%wd]", REGNO (base
), offset
);
24788 gcc_assert (REG_P (addr
));
24789 asm_fprintf (stream
, "[%r]",REGNO (addr
));
24798 gcc_assert (MEM_P (x
));
24799 addr
= XEXP (x
, 0);
24800 gcc_assert (REG_P (addr
));
24801 asm_fprintf (stream
, "[%r]", REGNO (addr
));
24805 /* Translate an S register number into a D register number and element index. */
24808 machine_mode mode
= GET_MODE (x
);
24811 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
24813 output_operand_lossage ("invalid operand for code '%c'", code
);
24818 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24820 output_operand_lossage ("invalid operand for code '%c'", code
);
24824 regno
= regno
- FIRST_VFP_REGNUM
;
24825 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
24830 gcc_assert (CONST_DOUBLE_P (x
));
24832 result
= vfp3_const_double_for_fract_bits (x
);
24834 result
= vfp3_const_double_for_bits (x
);
24835 fprintf (stream
, "#%d", result
);
24838 /* Register specifier for vld1.16/vst1.16. Translate the S register
24839 number into a D register number and element index. */
24842 machine_mode mode
= GET_MODE (x
);
24845 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
24847 output_operand_lossage ("invalid operand for code '%c'", code
);
24852 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24854 output_operand_lossage ("invalid operand for code '%c'", code
);
24858 regno
= regno
- FIRST_VFP_REGNUM
;
24859 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
24866 output_operand_lossage ("missing operand");
24870 switch (GET_CODE (x
))
24873 asm_fprintf (stream
, "%r", REGNO (x
));
24877 output_address (GET_MODE (x
), XEXP (x
, 0));
24883 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
24884 sizeof (fpstr
), 0, 1);
24885 fprintf (stream
, "#%s", fpstr
);
24890 gcc_assert (GET_CODE (x
) != NEG
);
24891 fputc ('#', stream
);
24892 if (GET_CODE (x
) == HIGH
)
24894 fputs (":lower16:", stream
);
24898 output_addr_const (stream
, x
);
24904 /* Target hook for printing a memory address. */
24906 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
24910 int is_minus
= GET_CODE (x
) == MINUS
;
24913 asm_fprintf (stream
, "[%r]", REGNO (x
));
24914 else if (GET_CODE (x
) == PLUS
|| is_minus
)
24916 rtx base
= XEXP (x
, 0);
24917 rtx index
= XEXP (x
, 1);
24918 HOST_WIDE_INT offset
= 0;
24920 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
24922 /* Ensure that BASE is a register. */
24923 /* (one of them must be). */
24924 /* Also ensure the SP is not used as in index register. */
24925 std::swap (base
, index
);
24927 switch (GET_CODE (index
))
24930 offset
= INTVAL (index
);
24933 asm_fprintf (stream
, "[%r, #%wd]",
24934 REGNO (base
), offset
);
24938 asm_fprintf (stream
, "[%r, %s%r]",
24939 REGNO (base
), is_minus
? "-" : "",
24949 asm_fprintf (stream
, "[%r, %s%r",
24950 REGNO (base
), is_minus
? "-" : "",
24951 REGNO (XEXP (index
, 0)));
24952 arm_print_operand (stream
, index
, 'S');
24953 fputs ("]", stream
);
24958 gcc_unreachable ();
24961 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
24962 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
24964 gcc_assert (REG_P (XEXP (x
, 0)));
24966 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
24967 asm_fprintf (stream
, "[%r, #%s%d]!",
24968 REGNO (XEXP (x
, 0)),
24969 GET_CODE (x
) == PRE_DEC
? "-" : "",
24970 GET_MODE_SIZE (mode
));
24971 else if (TARGET_HAVE_MVE
24972 && VALID_MVE_STRUCT_MODE (mode
))
24973 asm_fprintf (stream
, "[%r]!", REGNO (XEXP (x
,0)));
24975 asm_fprintf (stream
, "[%r], #%s%d", REGNO (XEXP (x
, 0)),
24976 GET_CODE (x
) == POST_DEC
? "-" : "",
24977 GET_MODE_SIZE (mode
));
24979 else if (GET_CODE (x
) == PRE_MODIFY
)
24981 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
24982 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24983 asm_fprintf (stream
, "#%wd]!",
24984 INTVAL (XEXP (XEXP (x
, 1), 1)));
24986 asm_fprintf (stream
, "%r]!",
24987 REGNO (XEXP (XEXP (x
, 1), 1)));
24989 else if (GET_CODE (x
) == POST_MODIFY
)
24991 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
24992 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24993 asm_fprintf (stream
, "#%wd",
24994 INTVAL (XEXP (XEXP (x
, 1), 1)));
24996 asm_fprintf (stream
, "%r",
24997 REGNO (XEXP (XEXP (x
, 1), 1)));
24999 else output_addr_const (stream
, x
);
25004 asm_fprintf (stream
, "[%r]", REGNO (x
));
25005 else if (GET_CODE (x
) == POST_INC
)
25006 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
25007 else if (GET_CODE (x
) == PLUS
)
25009 gcc_assert (REG_P (XEXP (x
, 0)));
25010 if (CONST_INT_P (XEXP (x
, 1)))
25011 asm_fprintf (stream
, "[%r, #%wd]",
25012 REGNO (XEXP (x
, 0)),
25013 INTVAL (XEXP (x
, 1)));
25015 asm_fprintf (stream
, "[%r, %r]",
25016 REGNO (XEXP (x
, 0)),
25017 REGNO (XEXP (x
, 1)));
25020 output_addr_const (stream
, x
);
25024 /* Target hook for indicating whether a punctuation character for
25025 TARGET_PRINT_OPERAND is valid. */
25027 arm_print_operand_punct_valid_p (unsigned char code
)
25029 return (code
== '@' || code
== '|' || code
== '.'
25030 || code
== '(' || code
== ')' || code
== '#'
25031 || (TARGET_32BIT
&& (code
== '?'))
25032 || (TARGET_THUMB2
&& (code
== '!'))
25033 || (TARGET_THUMB
&& (code
== '_')));
25036 /* Target hook for assembling integer objects. The ARM version needs to
25037 handle word-sized values specially. */
25039 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
25043 if (size
== UNITS_PER_WORD
&& aligned_p
)
25045 fputs ("\t.word\t", asm_out_file
);
25046 output_addr_const (asm_out_file
, x
);
25048 /* Mark symbols as position independent. We only do this in the
25049 .text segment, not in the .data segment. */
25050 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
25051 (SYMBOL_REF_P (x
) || LABEL_REF_P (x
)))
25053 /* See legitimize_pic_address for an explanation of the
25054 TARGET_VXWORKS_RTP check. */
25055 /* References to weak symbols cannot be resolved locally:
25056 they may be overridden by a non-weak definition at link
25058 if (!arm_pic_data_is_text_relative
25059 || (SYMBOL_REF_P (x
)
25060 && (!SYMBOL_REF_LOCAL_P (x
)
25061 || (SYMBOL_REF_DECL (x
)
25062 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0)
25063 || (SYMBOL_REF_FUNCTION_P (x
)
25064 && !arm_fdpic_local_funcdesc_p (x
)))))
25066 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
25067 fputs ("(GOTFUNCDESC)", asm_out_file
);
25069 fputs ("(GOT)", asm_out_file
);
25073 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
25074 fputs ("(GOTOFFFUNCDESC)", asm_out_file
);
25080 || arm_is_segment_info_known (x
, &is_readonly
))
25081 fputs ("(GOTOFF)", asm_out_file
);
25083 fputs ("(GOT)", asm_out_file
);
25088 /* For FDPIC we also have to mark symbol for .data section. */
25090 && !making_const_table
25091 && SYMBOL_REF_P (x
)
25092 && SYMBOL_REF_FUNCTION_P (x
))
25093 fputs ("(FUNCDESC)", asm_out_file
);
25095 fputc ('\n', asm_out_file
);
25099 mode
= GET_MODE (x
);
25101 if (arm_vector_mode_supported_p (mode
))
25105 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
25107 units
= CONST_VECTOR_NUNITS (x
);
25108 size
= GET_MODE_UNIT_SIZE (mode
);
25110 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
25111 for (i
= 0; i
< units
; i
++)
25113 rtx elt
= CONST_VECTOR_ELT (x
, i
);
25115 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
25118 for (i
= 0; i
< units
; i
++)
25120 rtx elt
= CONST_VECTOR_ELT (x
, i
);
25122 (*CONST_DOUBLE_REAL_VALUE (elt
),
25123 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
25124 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
25130 return default_assemble_integer (x
, size
, aligned_p
);
25134 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
25138 if (!TARGET_AAPCS_BASED
)
25141 default_named_section_asm_out_constructor
25142 : default_named_section_asm_out_destructor
) (symbol
, priority
);
25146 /* Put these in the .init_array section, using a special relocation. */
25147 if (priority
!= DEFAULT_INIT_PRIORITY
)
25150 sprintf (buf
, "%s.%.5u",
25151 is_ctor
? ".init_array" : ".fini_array",
25153 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
25160 switch_to_section (s
);
25161 assemble_align (POINTER_SIZE
);
25162 fputs ("\t.word\t", asm_out_file
);
25163 output_addr_const (asm_out_file
, symbol
);
25164 fputs ("(target1)\n", asm_out_file
);
25167 /* Add a function to the list of static constructors. */
25170 arm_elf_asm_constructor (rtx symbol
, int priority
)
25172 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
25175 /* Add a function to the list of static destructors. */
25178 arm_elf_asm_destructor (rtx symbol
, int priority
)
25180 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
25183 /* A finite state machine takes care of noticing whether or not instructions
25184 can be conditionally executed, and thus decrease execution time and code
25185 size by deleting branch instructions. The fsm is controlled by
25186 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
25188 /* The state of the fsm controlling condition codes are:
25189 0: normal, do nothing special
25190 1: make ASM_OUTPUT_OPCODE not output this instruction
25191 2: make ASM_OUTPUT_OPCODE not output this instruction
25192 3: make instructions conditional
25193 4: make instructions conditional
25195 State transitions (state->state by whom under condition):
25196 0 -> 1 final_prescan_insn if the `target' is a label
25197 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25198 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25199 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25200 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25201 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25202 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25203 (the target insn is arm_target_insn).
25205 If the jump clobbers the conditions then we use states 2 and 4.
25207 A similar thing can be done with conditional return insns.
25209 XXX In case the `target' is an unconditional branch, this conditionalising
25210 of the instructions always reduces code size, but not always execution
25211 time. But then, I want to reduce the code size to somewhere near what
25212 /bin/cc produces. */
25214 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25215 instructions. When a COND_EXEC instruction is seen the subsequent
25216 instructions are scanned so that multiple conditional instructions can be
25217 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25218 specify the length and true/false mask for the IT block. These will be
25219 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25221 /* Returns the index of the ARM condition code string in
25222 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25223 COMPARISON should be an rtx like `(eq (...) (...))'. */
25226 maybe_get_arm_condition_code (rtx comparison
)
25228 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
25229 enum arm_cond_code code
;
25230 enum rtx_code comp_code
= GET_CODE (comparison
);
25232 if (GET_MODE_CLASS (mode
) != MODE_CC
)
25233 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
25234 XEXP (comparison
, 1));
25238 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
25239 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
25240 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
25241 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
25242 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
25243 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
25244 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
25245 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
25246 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
25247 case E_CC_DLTUmode
: code
= ARM_CC
;
25250 if (comp_code
== EQ
)
25251 return ARM_INVERSE_CONDITION_CODE (code
);
25252 if (comp_code
== NE
)
25259 case NE
: return ARM_NE
;
25260 case EQ
: return ARM_EQ
;
25261 case GE
: return ARM_PL
;
25262 case LT
: return ARM_MI
;
25263 default: return ARM_NV
;
25269 case NE
: return ARM_NE
;
25270 case EQ
: return ARM_EQ
;
25271 default: return ARM_NV
;
25277 case NE
: return ARM_MI
;
25278 case EQ
: return ARM_PL
;
25279 default: return ARM_NV
;
25284 /* We can handle all cases except UNEQ and LTGT. */
25287 case GE
: return ARM_GE
;
25288 case GT
: return ARM_GT
;
25289 case LE
: return ARM_LS
;
25290 case LT
: return ARM_MI
;
25291 case NE
: return ARM_NE
;
25292 case EQ
: return ARM_EQ
;
25293 case ORDERED
: return ARM_VC
;
25294 case UNORDERED
: return ARM_VS
;
25295 case UNLT
: return ARM_LT
;
25296 case UNLE
: return ARM_LE
;
25297 case UNGT
: return ARM_HI
;
25298 case UNGE
: return ARM_PL
;
25299 /* UNEQ and LTGT do not have a representation. */
25300 case UNEQ
: /* Fall through. */
25301 case LTGT
: /* Fall through. */
25302 default: return ARM_NV
;
25308 case NE
: return ARM_NE
;
25309 case EQ
: return ARM_EQ
;
25310 case GE
: return ARM_LE
;
25311 case GT
: return ARM_LT
;
25312 case LE
: return ARM_GE
;
25313 case LT
: return ARM_GT
;
25314 case GEU
: return ARM_LS
;
25315 case GTU
: return ARM_CC
;
25316 case LEU
: return ARM_CS
;
25317 case LTU
: return ARM_HI
;
25318 default: return ARM_NV
;
25324 case LTU
: return ARM_CS
;
25325 case GEU
: return ARM_CC
;
25326 default: return ARM_NV
;
25332 case GE
: return ARM_GE
;
25333 case LT
: return ARM_LT
;
25334 default: return ARM_NV
;
25340 case GEU
: return ARM_CS
;
25341 case LTU
: return ARM_CC
;
25342 default: return ARM_NV
;
25348 case NE
: return ARM_VS
;
25349 case EQ
: return ARM_VC
;
25350 default: return ARM_NV
;
25356 case GEU
: return ARM_CS
;
25357 case LTU
: return ARM_CC
;
25358 default: return ARM_NV
;
25365 case NE
: return ARM_NE
;
25366 case EQ
: return ARM_EQ
;
25367 case GE
: return ARM_GE
;
25368 case GT
: return ARM_GT
;
25369 case LE
: return ARM_LE
;
25370 case LT
: return ARM_LT
;
25371 case GEU
: return ARM_CS
;
25372 case GTU
: return ARM_HI
;
25373 case LEU
: return ARM_LS
;
25374 case LTU
: return ARM_CC
;
25375 default: return ARM_NV
;
25378 default: gcc_unreachable ();
25382 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25383 static enum arm_cond_code
25384 get_arm_condition_code (rtx comparison
)
25386 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
25387 gcc_assert (code
!= ARM_NV
);
25391 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25392 code registers when not targetting Thumb1. The VFP condition register
25393 only exists when generating hard-float code. */
25395 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
25401 *p2
= TARGET_VFP_BASE
? VFPCC_REGNUM
: INVALID_REGNUM
;
25405 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25408 thumb2_final_prescan_insn (rtx_insn
*insn
)
25410 rtx_insn
*first_insn
= insn
;
25411 rtx body
= PATTERN (insn
);
25413 enum arm_cond_code code
;
25418 /* max_insns_skipped in the tune was already taken into account in the
25419 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25420 just emit the IT blocks as we can. It does not make sense to split
25422 max
= MAX_INSN_PER_IT_BLOCK
;
25424 /* Remove the previous insn from the count of insns to be output. */
25425 if (arm_condexec_count
)
25426 arm_condexec_count
--;
25428 /* Nothing to do if we are already inside a conditional block. */
25429 if (arm_condexec_count
)
25432 if (GET_CODE (body
) != COND_EXEC
)
25435 /* Conditional jumps are implemented directly. */
25439 predicate
= COND_EXEC_TEST (body
);
25440 arm_current_cc
= get_arm_condition_code (predicate
);
25442 n
= get_attr_ce_count (insn
);
25443 arm_condexec_count
= 1;
25444 arm_condexec_mask
= (1 << n
) - 1;
25445 arm_condexec_masklen
= n
;
25446 /* See if subsequent instructions can be combined into the same block. */
25449 insn
= next_nonnote_insn (insn
);
25451 /* Jumping into the middle of an IT block is illegal, so a label or
25452 barrier terminates the block. */
25453 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
25456 body
= PATTERN (insn
);
25457 /* USE and CLOBBER aren't really insns, so just skip them. */
25458 if (GET_CODE (body
) == USE
25459 || GET_CODE (body
) == CLOBBER
)
25462 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25463 if (GET_CODE (body
) != COND_EXEC
)
25465 /* Maximum number of conditionally executed instructions in a block. */
25466 n
= get_attr_ce_count (insn
);
25467 if (arm_condexec_masklen
+ n
> max
)
25470 predicate
= COND_EXEC_TEST (body
);
25471 code
= get_arm_condition_code (predicate
);
25472 mask
= (1 << n
) - 1;
25473 if (arm_current_cc
== code
)
25474 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
25475 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
25478 arm_condexec_count
++;
25479 arm_condexec_masklen
+= n
;
25481 /* A jump must be the last instruction in a conditional block. */
25485 /* Restore recog_data (getting the attributes of other insns can
25486 destroy this array, but final.cc assumes that it remains intact
25487 across this call). */
25488 extract_constrain_insn_cached (first_insn
);
25492 arm_final_prescan_insn (rtx_insn
*insn
)
25494 /* BODY will hold the body of INSN. */
25495 rtx body
= PATTERN (insn
);
25497 /* This will be 1 if trying to repeat the trick, and things need to be
25498 reversed if it appears to fail. */
25501 /* If we start with a return insn, we only succeed if we find another one. */
25502 int seeking_return
= 0;
25503 enum rtx_code return_code
= UNKNOWN
;
25505 /* START_INSN will hold the insn from where we start looking. This is the
25506 first insn after the following code_label if REVERSE is true. */
25507 rtx_insn
*start_insn
= insn
;
25509 /* If in state 4, check if the target branch is reached, in order to
25510 change back to state 0. */
25511 if (arm_ccfsm_state
== 4)
25513 if (insn
== arm_target_insn
)
25515 arm_target_insn
= NULL
;
25516 arm_ccfsm_state
= 0;
25521 /* If in state 3, it is possible to repeat the trick, if this insn is an
25522 unconditional branch to a label, and immediately following this branch
25523 is the previous target label which is only used once, and the label this
25524 branch jumps to is not too far off. */
25525 if (arm_ccfsm_state
== 3)
25527 if (simplejump_p (insn
))
25529 start_insn
= next_nonnote_insn (start_insn
);
25530 if (BARRIER_P (start_insn
))
25532 /* XXX Isn't this always a barrier? */
25533 start_insn
= next_nonnote_insn (start_insn
);
25535 if (LABEL_P (start_insn
)
25536 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25537 && LABEL_NUSES (start_insn
) == 1)
25542 else if (ANY_RETURN_P (body
))
25544 start_insn
= next_nonnote_insn (start_insn
);
25545 if (BARRIER_P (start_insn
))
25546 start_insn
= next_nonnote_insn (start_insn
);
25547 if (LABEL_P (start_insn
)
25548 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25549 && LABEL_NUSES (start_insn
) == 1)
25552 seeking_return
= 1;
25553 return_code
= GET_CODE (body
);
25562 gcc_assert (!arm_ccfsm_state
|| reverse
);
25563 if (!JUMP_P (insn
))
25566 /* This jump might be paralleled with a clobber of the condition codes
25567 the jump should always come first */
25568 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
25569 body
= XVECEXP (body
, 0, 0);
25572 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
25573 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
25576 int fail
= FALSE
, succeed
= FALSE
;
25577 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25578 int then_not_else
= TRUE
;
25579 rtx_insn
*this_insn
= start_insn
;
25582 /* Register the insn jumped to. */
25585 if (!seeking_return
)
25586 label
= XEXP (SET_SRC (body
), 0);
25588 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
25589 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
25590 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
25592 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
25593 then_not_else
= FALSE
;
25595 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
25597 seeking_return
= 1;
25598 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
25600 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
25602 seeking_return
= 1;
25603 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
25604 then_not_else
= FALSE
;
25607 gcc_unreachable ();
25609 /* See how many insns this branch skips, and what kind of insns. If all
25610 insns are okay, and the label or unconditional branch to the same
25611 label is not too far away, succeed. */
25612 for (insns_skipped
= 0;
25613 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
25617 this_insn
= next_nonnote_insn (this_insn
);
25621 switch (GET_CODE (this_insn
))
25624 /* Succeed if it is the target label, otherwise fail since
25625 control falls in from somewhere else. */
25626 if (this_insn
== label
)
25628 arm_ccfsm_state
= 1;
25636 /* Succeed if the following insn is the target label.
25638 If return insns are used then the last insn in a function
25639 will be a barrier. */
25640 this_insn
= next_nonnote_insn (this_insn
);
25641 if (this_insn
&& this_insn
== label
)
25643 arm_ccfsm_state
= 1;
25651 /* The AAPCS says that conditional calls should not be
25652 used since they make interworking inefficient (the
25653 linker can't transform BL<cond> into BLX). That's
25654 only a problem if the machine has BLX. */
25661 /* Succeed if the following insn is the target label, or
25662 if the following two insns are a barrier and the
25664 this_insn
= next_nonnote_insn (this_insn
);
25665 if (this_insn
&& BARRIER_P (this_insn
))
25666 this_insn
= next_nonnote_insn (this_insn
);
25668 if (this_insn
&& this_insn
== label
25669 && insns_skipped
< max_insns_skipped
)
25671 arm_ccfsm_state
= 1;
25679 /* If this is an unconditional branch to the same label, succeed.
25680 If it is to another label, do nothing. If it is conditional,
25682 /* XXX Probably, the tests for SET and the PC are
25685 scanbody
= PATTERN (this_insn
);
25686 if (GET_CODE (scanbody
) == SET
25687 && GET_CODE (SET_DEST (scanbody
)) == PC
)
25689 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
25690 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
25692 arm_ccfsm_state
= 2;
25695 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
25698 /* Fail if a conditional return is undesirable (e.g. on a
25699 StrongARM), but still allow this if optimizing for size. */
25700 else if (GET_CODE (scanbody
) == return_code
25701 && !use_return_insn (TRUE
, NULL
)
25704 else if (GET_CODE (scanbody
) == return_code
)
25706 arm_ccfsm_state
= 2;
25709 else if (GET_CODE (scanbody
) == PARALLEL
)
25711 switch (get_attr_conds (this_insn
))
25721 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
25726 /* Check the instruction is explicitly marked as predicable.
25727 Instructions using or affecting the condition codes are not. */
25728 scanbody
= PATTERN (this_insn
);
25729 if (!(GET_CODE (scanbody
) == SET
25730 || GET_CODE (scanbody
) == PARALLEL
)
25731 || get_attr_predicable (this_insn
) != PREDICABLE_YES
25732 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
25742 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
25743 arm_target_label
= CODE_LABEL_NUMBER (label
);
25746 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
25748 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
25750 this_insn
= next_nonnote_insn (this_insn
);
25751 gcc_assert (!this_insn
25752 || (!BARRIER_P (this_insn
)
25753 && !LABEL_P (this_insn
)));
25757 /* Oh, dear! we ran off the end.. give up. */
25758 extract_constrain_insn_cached (insn
);
25759 arm_ccfsm_state
= 0;
25760 arm_target_insn
= NULL
;
25763 arm_target_insn
= this_insn
;
25766 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25769 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
25771 if (reverse
|| then_not_else
)
25772 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
25775 /* Restore recog_data (getting the attributes of other insns can
25776 destroy this array, but final.cc assumes that it remains intact
25777 across this call. */
25778 extract_constrain_insn_cached (insn
);
25782 /* Output IT instructions. */
25784 thumb2_asm_output_opcode (FILE * stream
)
25789 if (arm_condexec_mask
)
25791 for (n
= 0; n
< arm_condexec_masklen
; n
++)
25792 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
25794 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
25795 arm_condition_codes
[arm_current_cc
]);
25796 arm_condexec_mask
= 0;
25800 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25801 UNITS_PER_WORD bytes wide. */
25802 static unsigned int
25803 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
25805 if (IS_VPR_REGNUM (regno
))
25806 return CEIL (GET_MODE_SIZE (mode
), 2);
25809 && regno
> PC_REGNUM
25810 && regno
!= FRAME_POINTER_REGNUM
25811 && regno
!= ARG_POINTER_REGNUM
25812 && !IS_VFP_REGNUM (regno
))
25815 return ARM_NUM_REGS (mode
);
25818 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25820 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
25822 if (GET_MODE_CLASS (mode
) == MODE_CC
)
25823 return (regno
== CC_REGNUM
25824 || (TARGET_VFP_BASE
25825 && regno
== VFPCC_REGNUM
));
25827 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
25830 if (IS_VPR_REGNUM (regno
))
25831 return VALID_MVE_PRED_MODE (mode
);
25834 /* For the Thumb we only allow values bigger than SImode in
25835 registers 0 - 6, so that there is always a second low
25836 register available to hold the upper part of the value.
25837 We probably we ought to ensure that the register is the
25838 start of an even numbered register pair. */
25839 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
25841 if (TARGET_VFP_BASE
&& IS_VFP_REGNUM (regno
))
25843 if (mode
== DFmode
|| mode
== DImode
)
25844 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
25846 if (mode
== HFmode
|| mode
== BFmode
|| mode
== HImode
25847 || mode
== SFmode
|| mode
== SImode
)
25848 return VFP_REGNO_OK_FOR_SINGLE (regno
);
25851 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
25852 || (VALID_NEON_QREG_MODE (mode
)
25853 && NEON_REGNO_OK_FOR_QUAD (regno
))
25854 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
25855 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
25856 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25857 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
25858 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
25859 if (TARGET_HAVE_MVE
)
25860 return ((VALID_MVE_MODE (mode
) && NEON_REGNO_OK_FOR_QUAD (regno
))
25861 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25862 || (mode
== V2x16QImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25863 || (mode
== V2x8HImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25864 || (mode
== V2x4SImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25865 || (mode
== V2x8HFmode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25866 || (mode
== V2x4SFmode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25867 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8))
25868 || (mode
== V4x16QImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8))
25869 || (mode
== V4x8HImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8))
25870 || (mode
== V4x4SImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8))
25871 || (mode
== V4x8HFmode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8))
25872 || (mode
== V4x4SFmode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8)));
25877 if (TARGET_REALLY_IWMMXT
)
25879 if (IS_IWMMXT_GR_REGNUM (regno
))
25880 return mode
== SImode
;
25882 if (IS_IWMMXT_REGNUM (regno
))
25883 return VALID_IWMMXT_REG_MODE (mode
);
25886 /* We allow almost any value to be stored in the general registers.
25887 Restrict doubleword quantities to even register pairs in ARM state
25888 so that we can use ldrd. The same restriction applies for MVE
25889 in order to support Armv8.1-M Mainline instructions.
25890 Do not allow very large Neon structure opaque modes in general
25891 registers; they would use too many. */
25892 if (regno
<= LAST_ARM_REGNUM
)
25894 if (ARM_NUM_REGS (mode
) > 4)
25897 if (TARGET_THUMB2
&& !(TARGET_HAVE_MVE
|| TARGET_CDE
))
25900 return !((TARGET_LDRD
|| TARGET_CDE
)
25901 && GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
25904 if (regno
== FRAME_POINTER_REGNUM
25905 || regno
== ARG_POINTER_REGNUM
)
25906 /* We only allow integers in the fake hard registers. */
25907 return GET_MODE_CLASS (mode
) == MODE_INT
;
25912 /* Implement TARGET_MODES_TIEABLE_P. */
25915 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
25917 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
25920 if (TARGET_HAVE_MVE
25921 && (VALID_MVE_PRED_MODE (mode1
) && VALID_MVE_PRED_MODE (mode2
)))
25924 /* We specifically want to allow elements of "structure" modes to
25925 be tieable to the structure. This more general condition allows
25926 other rarer situations too. */
25928 && (VALID_NEON_DREG_MODE (mode1
)
25929 || VALID_NEON_QREG_MODE (mode1
)
25930 || VALID_NEON_STRUCT_MODE (mode1
))
25931 && (VALID_NEON_DREG_MODE (mode2
)
25932 || VALID_NEON_QREG_MODE (mode2
)
25933 || VALID_NEON_STRUCT_MODE (mode2
)))
25934 || (TARGET_HAVE_MVE
25935 && (VALID_MVE_MODE (mode1
)
25936 || VALID_MVE_STRUCT_MODE (mode1
))
25937 && (VALID_MVE_MODE (mode2
)
25938 || VALID_MVE_STRUCT_MODE (mode2
))))
25944 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25945 not used in arm mode. */
25948 arm_regno_class (int regno
)
25950 if (regno
== PC_REGNUM
)
25953 if (IS_VPR_REGNUM (regno
))
25956 if (IS_PAC_REGNUM (regno
))
25961 if (regno
== STACK_POINTER_REGNUM
)
25963 if (regno
== CC_REGNUM
)
25970 if (TARGET_THUMB2
&& regno
< 8)
25973 if ( regno
<= LAST_ARM_REGNUM
25974 || regno
== FRAME_POINTER_REGNUM
25975 || regno
== ARG_POINTER_REGNUM
)
25976 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
25978 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
25979 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
25981 if (IS_VFP_REGNUM (regno
))
25983 if (regno
<= D7_VFP_REGNUM
)
25984 return VFP_D0_D7_REGS
;
25985 else if (regno
<= LAST_LO_VFP_REGNUM
)
25986 return VFP_LO_REGS
;
25988 return VFP_HI_REGS
;
25991 if (IS_IWMMXT_REGNUM (regno
))
25992 return IWMMXT_REGS
;
25994 if (IS_IWMMXT_GR_REGNUM (regno
))
25995 return IWMMXT_GR_REGS
;
26000 /* Handle a special case when computing the offset
26001 of an argument from the frame pointer. */
26003 arm_debugger_arg_offset (int value
, rtx addr
)
26007 /* We are only interested if dbxout_parms() failed to compute the offset. */
26011 /* We can only cope with the case where the address is held in a register. */
26015 /* If we are using the frame pointer to point at the argument, then
26016 an offset of 0 is correct. */
26017 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
26020 /* If we are using the stack pointer to point at the
26021 argument, then an offset of 0 is correct. */
26022 /* ??? Check this is consistent with thumb2 frame layout. */
26023 if ((TARGET_THUMB
|| !frame_pointer_needed
)
26024 && REGNO (addr
) == SP_REGNUM
)
26027 /* Oh dear. The argument is pointed to by a register rather
26028 than being held in a register, or being stored at a known
26029 offset from the frame pointer. Since GDB only understands
26030 those two kinds of argument we must translate the address
26031 held in the register into an offset from the frame pointer.
26032 We do this by searching through the insns for the function
26033 looking to see where this register gets its value. If the
26034 register is initialized from the frame pointer plus an offset
26035 then we are in luck and we can continue, otherwise we give up.
26037 This code is exercised by producing debugging information
26038 for a function with arguments like this:
26040 double func (double a, double b, int c, double d) {return d;}
26042 Without this code the stab for parameter 'd' will be set to
26043 an offset of 0 from the frame pointer, rather than 8. */
26045 /* The if() statement says:
26047 If the insn is a normal instruction
26048 and if the insn is setting the value in a register
26049 and if the register being set is the register holding the address of the argument
26050 and if the address is computing by an addition
26051 that involves adding to a register
26052 which is the frame pointer
26057 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26059 if ( NONJUMP_INSN_P (insn
)
26060 && GET_CODE (PATTERN (insn
)) == SET
26061 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
26062 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
26063 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
26064 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
26065 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
26068 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
26077 warning (0, "unable to compute real location of stacked parameter");
26078 value
= 8; /* XXX magic hack */
26084 /* Implement TARGET_PROMOTED_TYPE. */
26087 arm_promoted_type (const_tree t
)
26089 if (SCALAR_FLOAT_TYPE_P (t
)
26090 && TYPE_PRECISION (t
) == 16
26091 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
26092 return float_type_node
;
26096 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
26097 This simply adds HFmode as a supported mode; even though we don't
26098 implement arithmetic on this type directly, it's supported by
26099 optabs conversions, much the way the double-word arithmetic is
26100 special-cased in the default hook. */
26103 arm_scalar_mode_supported_p (scalar_mode mode
)
26105 if (mode
== HFmode
)
26106 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
26107 else if (ALL_FIXED_POINT_MODE_P (mode
))
26110 return default_scalar_mode_supported_p (mode
);
26113 /* Set the value of FLT_EVAL_METHOD.
26114 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
26116 0: evaluate all operations and constants, whose semantic type has at
26117 most the range and precision of type float, to the range and
26118 precision of float; evaluate all other operations and constants to
26119 the range and precision of the semantic type;
26121 N, where _FloatN is a supported interchange floating type
26122 evaluate all operations and constants, whose semantic type has at
26123 most the range and precision of _FloatN type, to the range and
26124 precision of the _FloatN type; evaluate all other operations and
26125 constants to the range and precision of the semantic type;
26127 If we have the ARMv8.2-A extensions then we support _Float16 in native
26128 precision, so we should set this to 16. Otherwise, we support the type,
26129 but want to evaluate expressions in float precision, so set this to
26132 static enum flt_eval_method
26133 arm_excess_precision (enum excess_precision_type type
)
26137 case EXCESS_PRECISION_TYPE_FAST
:
26138 case EXCESS_PRECISION_TYPE_STANDARD
:
26139 /* We can calculate either in 16-bit range and precision or
26140 32-bit range and precision. Make that decision based on whether
26141 we have native support for the ARMv8.2-A 16-bit floating-point
26142 instructions or not. */
26143 return (TARGET_VFP_FP16INST
26144 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26145 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
26146 case EXCESS_PRECISION_TYPE_IMPLICIT
:
26147 case EXCESS_PRECISION_TYPE_FLOAT16
:
26148 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
26150 gcc_unreachable ();
26152 return FLT_EVAL_METHOD_UNPREDICTABLE
;
26156 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
26157 _Float16 if we are using anything other than ieee format for 16-bit
26158 floating point. Otherwise, punt to the default implementation. */
26159 static opt_scalar_float_mode
26160 arm_floatn_mode (int n
, bool extended
)
26162 if (!extended
&& n
== 16)
26164 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
26166 return opt_scalar_float_mode ();
26169 return default_floatn_mode (n
, extended
);
26173 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26174 not to early-clobber SRC registers in the process.
26176 We assume that the operands described by SRC and DEST represent a
26177 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
26178 number of components into which the copy has been decomposed. */
26180 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
26184 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
26185 || REGNO (operands
[0]) < REGNO (operands
[1]))
26187 for (i
= 0; i
< count
; i
++)
26189 operands
[2 * i
] = dest
[i
];
26190 operands
[2 * i
+ 1] = src
[i
];
26195 for (i
= 0; i
< count
; i
++)
26197 operands
[2 * i
] = dest
[count
- i
- 1];
26198 operands
[2 * i
+ 1] = src
[count
- i
- 1];
26203 /* Split operands into moves from op[1] + op[2] into op[0]. */
26206 neon_split_vcombine (rtx operands
[3])
26208 unsigned int dest
= REGNO (operands
[0]);
26209 unsigned int src1
= REGNO (operands
[1]);
26210 unsigned int src2
= REGNO (operands
[2]);
26211 machine_mode halfmode
= GET_MODE (operands
[1]);
26212 unsigned int halfregs
= REG_NREGS (operands
[1]);
26213 rtx destlo
, desthi
;
26215 if (src1
== dest
&& src2
== dest
+ halfregs
)
26217 /* No-op move. Can't split to nothing; emit something. */
26218 emit_note (NOTE_INSN_DELETED
);
26222 /* Preserve register attributes for variable tracking. */
26223 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
26224 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
26225 GET_MODE_SIZE (halfmode
));
26227 /* Special case of reversed high/low parts. Use VSWP. */
26228 if (src2
== dest
&& src1
== dest
+ halfregs
)
26230 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
26231 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
26232 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
26236 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
26238 /* Try to avoid unnecessary moves if part of the result
26239 is in the right place already. */
26241 emit_move_insn (destlo
, operands
[1]);
26242 if (src2
!= dest
+ halfregs
)
26243 emit_move_insn (desthi
, operands
[2]);
26247 if (src2
!= dest
+ halfregs
)
26248 emit_move_insn (desthi
, operands
[2]);
26250 emit_move_insn (destlo
, operands
[1]);
26254 /* Return the number (counting from 0) of
26255 the least significant set bit in MASK. */
26258 number_of_first_bit_set (unsigned mask
)
26260 return ctz_hwi (mask
);
26263 /* Like emit_multi_reg_push, but allowing for a different set of
26264 registers to be described as saved. MASK is the set of registers
26265 to be saved; REAL_REGS is the set of registers to be described as
26266 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26269 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
26271 unsigned long regno
;
26272 rtx par
[10], tmp
, reg
;
26276 /* Build the parallel of the registers actually being stored. */
26277 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
26279 regno
= ctz_hwi (mask
);
26280 reg
= gen_rtx_REG (SImode
, regno
);
26283 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
26285 tmp
= gen_rtx_USE (VOIDmode
, reg
);
26290 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26291 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
26292 tmp
= gen_frame_mem (BLKmode
, tmp
);
26293 tmp
= gen_rtx_SET (tmp
, par
[0]);
26296 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
26297 insn
= emit_insn (tmp
);
26299 /* Always build the stack adjustment note for unwind info. */
26300 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26301 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
26304 /* Build the parallel of the registers recorded as saved for unwind. */
26305 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26307 regno
= ctz_hwi (real_regs
);
26308 reg
= gen_rtx_REG (SImode
, regno
);
26310 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26311 tmp
= gen_frame_mem (SImode
, tmp
);
26312 tmp
= gen_rtx_SET (tmp
, reg
);
26313 RTX_FRAME_RELATED_P (tmp
) = 1;
26321 RTX_FRAME_RELATED_P (par
[0]) = 1;
26322 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26325 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26330 /* Emit code to push or pop registers to or from the stack. F is the
26331 assembly file. MASK is the registers to pop. */
26333 thumb_pop (FILE *f
, unsigned long mask
)
26336 int lo_mask
= mask
& 0xFF;
26340 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26342 /* Special case. Do not generate a POP PC statement here, do it in
26344 thumb_exit (f
, -1);
26348 fprintf (f
, "\tpop\t{");
26350 /* Look at the low registers first. */
26351 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26355 asm_fprintf (f
, "%r", regno
);
26357 if ((lo_mask
& ~1) != 0)
26362 if (mask
& (1 << PC_REGNUM
))
26364 /* Catch popping the PC. */
26365 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
26366 || IS_CMSE_ENTRY (arm_current_func_type ()))
26368 /* The PC is never poped directly, instead
26369 it is popped into r3 and then BX is used. */
26370 fprintf (f
, "}\n");
26372 thumb_exit (f
, -1);
26381 asm_fprintf (f
, "%r", PC_REGNUM
);
26385 fprintf (f
, "}\n");
26388 /* Generate code to return from a thumb function.
26389 If 'reg_containing_return_addr' is -1, then the return address is
26390 actually on the stack, at the stack pointer.
26392 Note: do not forget to update length attribute of corresponding insn pattern
26393 when changing assembly output (eg. length attribute of epilogue_insns when
26394 updating Armv8-M Baseline Security Extensions register clearing
26397 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26399 unsigned regs_available_for_popping
;
26400 unsigned regs_to_pop
;
26402 unsigned available
;
26406 int restore_a4
= FALSE
;
26408 /* Compute the registers we need to pop. */
26412 if (reg_containing_return_addr
== -1)
26414 regs_to_pop
|= 1 << LR_REGNUM
;
26418 if (TARGET_BACKTRACE
)
26420 /* Restore the (ARM) frame pointer and stack pointer. */
26421 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26425 /* If there is nothing to pop then just emit the BX instruction and
26427 if (pops_needed
== 0)
26429 if (crtl
->calls_eh_return
)
26430 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26432 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26434 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26435 emitted by cmse_nonsecure_entry_clear_before_return (). */
26436 if (!TARGET_HAVE_FPCXT_CMSE
)
26437 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
26438 reg_containing_return_addr
);
26439 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26442 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26445 /* Otherwise if we are not supporting interworking and we have not created
26446 a backtrace structure and the function was not entered in ARM mode then
26447 just pop the return address straight into the PC. */
26448 else if (!TARGET_INTERWORK
26449 && !TARGET_BACKTRACE
26450 && !is_called_in_ARM_mode (current_function_decl
)
26451 && !crtl
->calls_eh_return
26452 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26454 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26458 /* Find out how many of the (return) argument registers we can corrupt. */
26459 regs_available_for_popping
= 0;
26461 /* If returning via __builtin_eh_return, the bottom three registers
26462 all contain information needed for the return. */
26463 if (crtl
->calls_eh_return
)
26467 /* If we can deduce the registers used from the function's
26468 return value. This is more reliable that examining
26469 df_regs_ever_live_p () because that will be set if the register is
26470 ever used in the function, not just if the register is used
26471 to hold a return value. */
26473 if (crtl
->return_rtx
!= 0)
26474 mode
= GET_MODE (crtl
->return_rtx
);
26476 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26478 size
= GET_MODE_SIZE (mode
);
26482 /* In a void function we can use any argument register.
26483 In a function that returns a structure on the stack
26484 we can use the second and third argument registers. */
26485 if (mode
== VOIDmode
)
26486 regs_available_for_popping
=
26487 (1 << ARG_REGISTER (1))
26488 | (1 << ARG_REGISTER (2))
26489 | (1 << ARG_REGISTER (3));
26491 regs_available_for_popping
=
26492 (1 << ARG_REGISTER (2))
26493 | (1 << ARG_REGISTER (3));
26495 else if (size
<= 4)
26496 regs_available_for_popping
=
26497 (1 << ARG_REGISTER (2))
26498 | (1 << ARG_REGISTER (3));
26499 else if (size
<= 8)
26500 regs_available_for_popping
=
26501 (1 << ARG_REGISTER (3));
26504 /* Match registers to be popped with registers into which we pop them. */
26505 for (available
= regs_available_for_popping
,
26506 required
= regs_to_pop
;
26507 required
!= 0 && available
!= 0;
26508 available
&= ~(available
& - available
),
26509 required
&= ~(required
& - required
))
26512 /* If we have any popping registers left over, remove them. */
26514 regs_available_for_popping
&= ~available
;
26516 /* Otherwise if we need another popping register we can use
26517 the fourth argument register. */
26518 else if (pops_needed
)
26520 /* If we have not found any free argument registers and
26521 reg a4 contains the return address, we must move it. */
26522 if (regs_available_for_popping
== 0
26523 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26525 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26526 reg_containing_return_addr
= LR_REGNUM
;
26528 else if (size
> 12)
26530 /* Register a4 is being used to hold part of the return value,
26531 but we have dire need of a free, low register. */
26534 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26537 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26539 /* The fourth argument register is available. */
26540 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26546 /* Pop as many registers as we can. */
26547 thumb_pop (f
, regs_available_for_popping
);
26549 /* Process the registers we popped. */
26550 if (reg_containing_return_addr
== -1)
26552 /* The return address was popped into the lowest numbered register. */
26553 regs_to_pop
&= ~(1 << LR_REGNUM
);
26555 reg_containing_return_addr
=
26556 number_of_first_bit_set (regs_available_for_popping
);
26558 /* Remove this register for the mask of available registers, so that
26559 the return address will not be corrupted by further pops. */
26560 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26563 /* If we popped other registers then handle them here. */
26564 if (regs_available_for_popping
)
26568 /* Work out which register currently contains the frame pointer. */
26569 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26571 /* Move it into the correct place. */
26572 asm_fprintf (f
, "\tmov\t%r, %r\n",
26573 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26575 /* (Temporarily) remove it from the mask of popped registers. */
26576 regs_available_for_popping
&= ~(1 << frame_pointer
);
26577 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26579 if (regs_available_for_popping
)
26583 /* We popped the stack pointer as well,
26584 find the register that contains it. */
26585 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26587 /* Move it into the stack register. */
26588 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26590 /* At this point we have popped all necessary registers, so
26591 do not worry about restoring regs_available_for_popping
26592 to its correct value:
26594 assert (pops_needed == 0)
26595 assert (regs_available_for_popping == (1 << frame_pointer))
26596 assert (regs_to_pop == (1 << STACK_POINTER)) */
26600 /* Since we have just move the popped value into the frame
26601 pointer, the popping register is available for reuse, and
26602 we know that we still have the stack pointer left to pop. */
26603 regs_available_for_popping
|= (1 << frame_pointer
);
26607 /* If we still have registers left on the stack, but we no longer have
26608 any registers into which we can pop them, then we must move the return
26609 address into the link register and make available the register that
26611 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26613 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26615 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26616 reg_containing_return_addr
);
26618 reg_containing_return_addr
= LR_REGNUM
;
26621 /* If we have registers left on the stack then pop some more.
26622 We know that at most we will want to pop FP and SP. */
26623 if (pops_needed
> 0)
26628 thumb_pop (f
, regs_available_for_popping
);
26630 /* We have popped either FP or SP.
26631 Move whichever one it is into the correct register. */
26632 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26633 move_to
= number_of_first_bit_set (regs_to_pop
);
26635 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26639 /* If we still have not popped everything then we must have only
26640 had one register available to us and we are now popping the SP. */
26641 if (pops_needed
> 0)
26645 thumb_pop (f
, regs_available_for_popping
);
26647 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26649 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26651 assert (regs_to_pop == (1 << STACK_POINTER))
26652 assert (pops_needed == 1)
26656 /* If necessary restore the a4 register. */
26659 if (reg_containing_return_addr
!= LR_REGNUM
)
26661 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26662 reg_containing_return_addr
= LR_REGNUM
;
26665 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26668 if (crtl
->calls_eh_return
)
26669 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26671 /* Return to caller. */
26672 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26674 /* This is for the cases where LR is not being used to contain the return
26675 address. It may therefore contain information that we might not want
26676 to leak, hence it must be cleared. The value in R0 will never be a
26677 secret at this point, so it is safe to use it, see the clearing code
26678 in cmse_nonsecure_entry_clear_before_return (). */
26679 if (reg_containing_return_addr
!= LR_REGNUM
)
26680 asm_fprintf (f
, "\tmov\tlr, r0\n");
26682 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26683 by cmse_nonsecure_entry_clear_before_return (). */
26684 if (!TARGET_HAVE_FPCXT_CMSE
)
26685 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
26686 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26689 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26692 /* Scan INSN just before assembler is output for it.
26693 For Thumb-1, we track the status of the condition codes; this
26694 information is used in the cbranchsi4_insn pattern. */
26696 thumb1_final_prescan_insn (rtx_insn
*insn
)
26698 if (flag_print_asm_name
)
26699 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26700 INSN_ADDRESSES (INSN_UID (insn
)));
26701 /* Don't overwrite the previous setter when we get to a cbranch. */
26702 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26704 enum attr_conds conds
;
26706 if (cfun
->machine
->thumb1_cc_insn
)
26708 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26709 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26712 conds
= get_attr_conds (insn
);
26713 if (conds
== CONDS_SET
)
26715 rtx set
= single_set (insn
);
26716 cfun
->machine
->thumb1_cc_insn
= insn
;
26717 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26718 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26719 cfun
->machine
->thumb1_cc_mode
= CC_NZmode
;
26720 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26722 rtx src1
= XEXP (SET_SRC (set
), 1);
26723 if (src1
== const0_rtx
)
26724 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26726 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26728 /* Record the src register operand instead of dest because
26729 cprop_hardreg pass propagates src. */
26730 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26733 else if (conds
!= CONDS_NOCOND
)
26734 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26737 /* Check if unexpected far jump is used. */
26738 if (cfun
->machine
->lr_save_eliminated
26739 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26740 internal_error("Unexpected thumb1 far jump");
26744 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26746 unsigned HOST_WIDE_INT mask
= 0xff;
26749 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26750 if (val
== 0) /* XXX */
26753 for (i
= 0; i
< 25; i
++)
26754 if ((val
& (mask
<< i
)) == val
)
26760 /* Returns nonzero if the current function contains,
26761 or might contain a far jump. */
26763 thumb_far_jump_used_p (void)
26766 bool far_jump
= false;
26767 unsigned int func_size
= 0;
26769 /* If we have already decided that far jumps may be used,
26770 do not bother checking again, and always return true even if
26771 it turns out that they are not being used. Once we have made
26772 the decision that far jumps are present (and that hence the link
26773 register will be pushed onto the stack) we cannot go back on it. */
26774 if (cfun
->machine
->far_jump_used
)
26777 /* If this function is not being called from the prologue/epilogue
26778 generation code then it must be being called from the
26779 INITIAL_ELIMINATION_OFFSET macro. */
26780 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26782 /* In this case we know that we are being asked about the elimination
26783 of the arg pointer register. If that register is not being used,
26784 then there are no arguments on the stack, and we do not have to
26785 worry that a far jump might force the prologue to push the link
26786 register, changing the stack offsets. In this case we can just
26787 return false, since the presence of far jumps in the function will
26788 not affect stack offsets.
26790 If the arg pointer is live (or if it was live, but has now been
26791 eliminated and so set to dead) then we do have to test to see if
26792 the function might contain a far jump. This test can lead to some
26793 false negatives, since before reload is completed, then length of
26794 branch instructions is not known, so gcc defaults to returning their
26795 longest length, which in turn sets the far jump attribute to true.
26797 A false negative will not result in bad code being generated, but it
26798 will result in a needless push and pop of the link register. We
26799 hope that this does not occur too often.
26801 If we need doubleword stack alignment this could affect the other
26802 elimination offsets so we can't risk getting it wrong. */
26803 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26804 cfun
->machine
->arg_pointer_live
= 1;
26805 else if (!cfun
->machine
->arg_pointer_live
)
26809 /* We should not change far_jump_used during or after reload, as there is
26810 no chance to change stack frame layout. */
26811 if (reload_in_progress
|| reload_completed
)
26814 /* Check to see if the function contains a branch
26815 insn with the far jump attribute set. */
26816 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26818 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26822 func_size
+= get_attr_length (insn
);
26825 /* Attribute far_jump will always be true for thumb1 before
26826 shorten_branch pass. So checking far_jump attribute before
26827 shorten_branch isn't much useful.
26829 Following heuristic tries to estimate more accurately if a far jump
26830 may finally be used. The heuristic is very conservative as there is
26831 no chance to roll-back the decision of not to use far jump.
26833 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26834 2-byte insn is associated with a 4 byte constant pool. Using
26835 function size 2048/3 as the threshold is conservative enough. */
26838 if ((func_size
* 3) >= 2048)
26840 /* Record the fact that we have decided that
26841 the function does use far jumps. */
26842 cfun
->machine
->far_jump_used
= 1;
26850 /* Return nonzero if FUNC must be entered in ARM mode. */
26852 is_called_in_ARM_mode (tree func
)
26854 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26856 /* Ignore the problem about functions whose address is taken. */
26857 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26863 /* Given the stack offsets and register mask in OFFSETS, decide how
26864 many additional registers to push instead of subtracting a constant
26865 from SP. For epilogues the principle is the same except we use pop.
26866 FOR_PROLOGUE indicates which we're generating. */
26868 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26870 HOST_WIDE_INT amount
;
26871 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26872 /* Extract a mask of the ones we can give to the Thumb's push/pop
26874 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26875 /* Then count how many other high registers will need to be pushed. */
26876 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26877 int n_free
, reg_base
, size
;
26879 if (!for_prologue
&& frame_pointer_needed
)
26880 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26882 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26884 /* If the stack frame size is 512 exactly, we can save one load
26885 instruction, which should make this a win even when optimizing
26887 if (!optimize_size
&& amount
!= 512)
26890 /* Can't do this if there are high registers to push. */
26891 if (high_regs_pushed
!= 0)
26894 /* Shouldn't do it in the prologue if no registers would normally
26895 be pushed at all. In the epilogue, also allow it if we'll have
26896 a pop insn for the PC. */
26899 || TARGET_BACKTRACE
26900 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26901 || TARGET_INTERWORK
26902 || crtl
->args
.pretend_args_size
!= 0))
26905 /* Don't do this if thumb_expand_prologue wants to emit instructions
26906 between the push and the stack frame allocation. */
26908 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26909 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26916 size
= arm_size_return_regs ();
26917 reg_base
= ARM_NUM_INTS (size
);
26918 live_regs_mask
>>= reg_base
;
26921 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26922 && (for_prologue
|| call_used_or_fixed_reg_p (reg_base
+ n_free
)))
26924 live_regs_mask
>>= 1;
26930 gcc_assert (amount
/ 4 * 4 == amount
);
26932 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26933 return (amount
- 508) / 4;
26934 if (amount
<= n_free
* 4)
26939 /* The bits which aren't usefully expanded as rtl. */
26941 thumb1_unexpanded_epilogue (void)
26943 arm_stack_offsets
*offsets
;
26945 unsigned long live_regs_mask
= 0;
26946 int high_regs_pushed
= 0;
26948 int had_to_push_lr
;
26951 if (cfun
->machine
->return_used_this_function
!= 0)
26954 if (IS_NAKED (arm_current_func_type ()))
26957 offsets
= arm_get_frame_offsets ();
26958 live_regs_mask
= offsets
->saved_regs_mask
;
26959 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26961 /* If we can deduce the registers used from the function's return value.
26962 This is more reliable that examining df_regs_ever_live_p () because that
26963 will be set if the register is ever used in the function, not just if
26964 the register is used to hold a return value. */
26965 size
= arm_size_return_regs ();
26967 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26970 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26971 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26974 /* The prolog may have pushed some high registers to use as
26975 work registers. e.g. the testsuite file:
26976 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26977 compiles to produce:
26978 push {r4, r5, r6, r7, lr}
26982 as part of the prolog. We have to undo that pushing here. */
26984 if (high_regs_pushed
)
26986 unsigned long mask
= live_regs_mask
& 0xff;
26989 mask
|= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26992 /* Oh dear! We have no low registers into which we can pop
26995 ("no low registers available for popping high registers");
26997 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26998 if (live_regs_mask
& (1 << next_hi_reg
))
27001 while (high_regs_pushed
)
27003 /* Find lo register(s) into which the high register(s) can
27005 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
27007 if (mask
& (1 << regno
))
27008 high_regs_pushed
--;
27009 if (high_regs_pushed
== 0)
27013 if (high_regs_pushed
== 0 && regno
>= 0)
27014 mask
&= ~((1 << regno
) - 1);
27016 /* Pop the values into the low register(s). */
27017 thumb_pop (asm_out_file
, mask
);
27019 /* Move the value(s) into the high registers. */
27020 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
27022 if (mask
& (1 << regno
))
27024 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
27027 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27029 if (live_regs_mask
& (1 << next_hi_reg
))
27034 live_regs_mask
&= ~0x0f00;
27037 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
27038 live_regs_mask
&= 0xff;
27040 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
27042 /* Pop the return address into the PC. */
27043 if (had_to_push_lr
)
27044 live_regs_mask
|= 1 << PC_REGNUM
;
27046 /* Either no argument registers were pushed or a backtrace
27047 structure was created which includes an adjusted stack
27048 pointer, so just pop everything. */
27049 if (live_regs_mask
)
27050 thumb_pop (asm_out_file
, live_regs_mask
);
27052 /* We have either just popped the return address into the
27053 PC or it is was kept in LR for the entire function.
27054 Note that thumb_pop has already called thumb_exit if the
27055 PC was in the list. */
27056 if (!had_to_push_lr
)
27057 thumb_exit (asm_out_file
, LR_REGNUM
);
27061 /* Pop everything but the return address. */
27062 if (live_regs_mask
)
27063 thumb_pop (asm_out_file
, live_regs_mask
);
27065 if (had_to_push_lr
)
27069 /* We have no free low regs, so save one. */
27070 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
27074 /* Get the return address into a temporary register. */
27075 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
27079 /* Move the return address to lr. */
27080 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
27082 /* Restore the low register. */
27083 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
27088 regno
= LAST_ARG_REGNUM
;
27093 /* Remove the argument registers that were pushed onto the stack. */
27094 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
27095 SP_REGNUM
, SP_REGNUM
,
27096 crtl
->args
.pretend_args_size
);
27098 thumb_exit (asm_out_file
, regno
);
27104 /* Functions to save and restore machine-specific function data. */
27105 static struct machine_function
*
27106 arm_init_machine_status (void)
27108 struct machine_function
*machine
;
27109 machine
= ggc_cleared_alloc
<machine_function
> ();
27111 #if ARM_FT_UNKNOWN != 0
27112 machine
->func_type
= ARM_FT_UNKNOWN
;
27114 machine
->static_chain_stack_bytes
= -1;
27115 machine
->pacspval_needed
= 0;
27119 /* Return an RTX indicating where the return address to the
27120 calling function can be found. */
27122 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
27127 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
27130 /* Do anything needed before RTL is emitted for each function. */
27132 arm_init_expanders (void)
27134 /* Arrange to initialize and mark the machine per-function status. */
27135 init_machine_status
= arm_init_machine_status
;
27137 /* This is to stop the combine pass optimizing away the alignment
27138 adjustment of va_arg. */
27139 /* ??? It is claimed that this should not be necessary. */
27141 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
27144 /* Check that FUNC is called with a different mode. */
27147 arm_change_mode_p (tree func
)
27149 if (TREE_CODE (func
) != FUNCTION_DECL
)
27152 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
27155 callee_tree
= target_option_default_node
;
27157 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
27158 int flags
= callee_opts
->x_target_flags
;
27160 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
27163 /* Like arm_compute_initial_elimination offset. Simpler because there
27164 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27165 to point at the base of the local variables after static stack
27166 space for a function has been allocated. */
27169 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
27171 arm_stack_offsets
*offsets
;
27173 offsets
= arm_get_frame_offsets ();
27177 case ARG_POINTER_REGNUM
:
27180 case STACK_POINTER_REGNUM
:
27181 return offsets
->outgoing_args
- offsets
->saved_args
;
27183 case FRAME_POINTER_REGNUM
:
27184 return offsets
->soft_frame
- offsets
->saved_args
;
27186 case ARM_HARD_FRAME_POINTER_REGNUM
:
27187 return offsets
->saved_regs
- offsets
->saved_args
;
27189 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27190 return offsets
->locals_base
- offsets
->saved_args
;
27193 gcc_unreachable ();
27197 case FRAME_POINTER_REGNUM
:
27200 case STACK_POINTER_REGNUM
:
27201 return offsets
->outgoing_args
- offsets
->soft_frame
;
27203 case ARM_HARD_FRAME_POINTER_REGNUM
:
27204 return offsets
->saved_regs
- offsets
->soft_frame
;
27206 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27207 return offsets
->locals_base
- offsets
->soft_frame
;
27210 gcc_unreachable ();
27215 gcc_unreachable ();
27219 /* Generate the function's prologue. */
27222 thumb1_expand_prologue (void)
27226 HOST_WIDE_INT amount
;
27227 HOST_WIDE_INT size
;
27228 arm_stack_offsets
*offsets
;
27229 unsigned long func_type
;
27231 unsigned long live_regs_mask
;
27232 unsigned long l_mask
;
27233 unsigned high_regs_pushed
= 0;
27234 bool lr_needs_saving
;
27236 func_type
= arm_current_func_type ();
27238 /* Naked functions don't have prologues. */
27239 if (IS_NAKED (func_type
))
27241 if (flag_stack_usage_info
)
27242 current_function_static_stack_size
= 0;
27246 if (IS_INTERRUPT (func_type
))
27248 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27252 if (is_called_in_ARM_mode (current_function_decl
))
27253 emit_insn (gen_prologue_thumb1_interwork ());
27255 offsets
= arm_get_frame_offsets ();
27256 live_regs_mask
= offsets
->saved_regs_mask
;
27257 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
27259 /* The AAPCS requires the callee to widen integral types narrower
27260 than 32 bits to the full width of the register; but when handling
27261 calls to non-secure space, we cannot trust the callee to have
27262 correctly done so. So forcibly re-widen the result here. */
27263 if (IS_CMSE_ENTRY (func_type
))
27265 function_args_iterator args_iter
;
27266 CUMULATIVE_ARGS args_so_far_v
;
27267 cumulative_args_t args_so_far
;
27268 bool first_param
= true;
27270 tree fndecl
= current_function_decl
;
27271 tree fntype
= TREE_TYPE (fndecl
);
27272 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
27273 args_so_far
= pack_cumulative_args (&args_so_far_v
);
27274 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
27278 if (VOID_TYPE_P (arg_type
))
27281 function_arg_info
arg (arg_type
, /*named=*/true);
27283 /* We should advance after processing the argument and pass
27284 the argument we're advancing past. */
27285 arm_function_arg_advance (args_so_far
, arg
);
27286 first_param
= false;
27287 arg_rtx
= arm_function_arg (args_so_far
, arg
);
27288 gcc_assert (REG_P (arg_rtx
));
27289 if ((TREE_CODE (arg_type
) == INTEGER_TYPE
27290 || TREE_CODE (arg_type
) == ENUMERAL_TYPE
27291 || TREE_CODE (arg_type
) == BOOLEAN_TYPE
)
27292 && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx
)), 4))
27294 rtx res_reg
= gen_rtx_REG (SImode
, REGNO (arg_rtx
));
27295 if (TYPE_UNSIGNED (arg_type
))
27296 emit_set_insn (res_reg
, gen_rtx_ZERO_EXTEND (SImode
, arg_rtx
));
27299 /* Signed-extension is a special case because of
27300 thumb1_extendhisi2. */
27301 if (known_eq (GET_MODE_SIZE (GET_MODE (arg_rtx
)), 2))
27302 emit_insn (gen_thumb1_extendhisi2 (res_reg
, arg_rtx
));
27304 emit_set_insn (res_reg
,
27305 gen_rtx_SIGN_EXTEND (SImode
, arg_rtx
));
27311 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27312 l_mask
= live_regs_mask
& 0x40ff;
27313 /* Then count how many other high registers will need to be pushed. */
27314 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
27316 if (crtl
->args
.pretend_args_size
)
27318 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
27320 if (cfun
->machine
->uses_anonymous_args
)
27322 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
27323 unsigned long mask
;
27325 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
27326 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
27328 insn
= thumb1_emit_multi_reg_push (mask
, 0);
27332 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27333 stack_pointer_rtx
, x
));
27335 RTX_FRAME_RELATED_P (insn
) = 1;
27338 if (TARGET_BACKTRACE
)
27340 HOST_WIDE_INT offset
= 0;
27341 unsigned work_register
;
27342 rtx work_reg
, x
, arm_hfp_rtx
;
27344 /* We have been asked to create a stack backtrace structure.
27345 The code looks like this:
27349 0 sub SP, #16 Reserve space for 4 registers.
27350 2 push {R7} Push low registers.
27351 4 add R7, SP, #20 Get the stack pointer before the push.
27352 6 str R7, [SP, #8] Store the stack pointer
27353 (before reserving the space).
27354 8 mov R7, PC Get hold of the start of this code + 12.
27355 10 str R7, [SP, #16] Store it.
27356 12 mov R7, FP Get hold of the current frame pointer.
27357 14 str R7, [SP, #4] Store it.
27358 16 mov R7, LR Get hold of the current return address.
27359 18 str R7, [SP, #12] Store it.
27360 20 add R7, SP, #16 Point at the start of the
27361 backtrace structure.
27362 22 mov FP, R7 Put this value into the frame pointer. */
27364 work_register
= thumb_find_work_register (live_regs_mask
);
27365 work_reg
= gen_rtx_REG (SImode
, work_register
);
27366 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27368 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27369 stack_pointer_rtx
, GEN_INT (-16)));
27370 RTX_FRAME_RELATED_P (insn
) = 1;
27374 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27375 RTX_FRAME_RELATED_P (insn
) = 1;
27376 lr_needs_saving
= false;
27378 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27381 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27382 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27384 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27385 x
= gen_frame_mem (SImode
, x
);
27386 emit_move_insn (x
, work_reg
);
27388 /* Make sure that the instruction fetching the PC is in the right place
27389 to calculate "start of backtrace creation code + 12". */
27390 /* ??? The stores using the common WORK_REG ought to be enough to
27391 prevent the scheduler from doing anything weird. Failing that
27392 we could always move all of the following into an UNSPEC_VOLATILE. */
27395 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27396 emit_move_insn (work_reg
, x
);
27398 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27399 x
= gen_frame_mem (SImode
, x
);
27400 emit_move_insn (x
, work_reg
);
27402 emit_move_insn (work_reg
, arm_hfp_rtx
);
27404 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27405 x
= gen_frame_mem (SImode
, x
);
27406 emit_move_insn (x
, work_reg
);
27410 emit_move_insn (work_reg
, arm_hfp_rtx
);
27412 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27413 x
= gen_frame_mem (SImode
, x
);
27414 emit_move_insn (x
, work_reg
);
27416 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27417 emit_move_insn (work_reg
, x
);
27419 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27420 x
= gen_frame_mem (SImode
, x
);
27421 emit_move_insn (x
, work_reg
);
27424 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27425 emit_move_insn (work_reg
, x
);
27427 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27428 x
= gen_frame_mem (SImode
, x
);
27429 emit_move_insn (x
, work_reg
);
27431 x
= GEN_INT (offset
+ 12);
27432 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27434 emit_move_insn (arm_hfp_rtx
, work_reg
);
27436 /* Optimization: If we are not pushing any low registers but we are going
27437 to push some high registers then delay our first push. This will just
27438 be a push of LR and we can combine it with the push of the first high
27440 else if ((l_mask
& 0xff) != 0
27441 || (high_regs_pushed
== 0 && lr_needs_saving
))
27443 unsigned long mask
= l_mask
;
27444 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27445 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27446 RTX_FRAME_RELATED_P (insn
) = 1;
27447 lr_needs_saving
= false;
27450 if (high_regs_pushed
)
27452 unsigned pushable_regs
;
27453 unsigned next_hi_reg
;
27454 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27455 : crtl
->args
.info
.nregs
;
27456 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27458 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27459 if (live_regs_mask
& (1 << next_hi_reg
))
27462 /* Here we need to mask out registers used for passing arguments
27463 even if they can be pushed. This is to avoid using them to
27464 stash the high registers. Such kind of stash may clobber the
27465 use of arguments. */
27466 pushable_regs
= l_mask
& (~arg_regs_mask
);
27467 pushable_regs
|= thumb1_prologue_unused_call_clobbered_lo_regs ();
27469 /* Normally, LR can be used as a scratch register once it has been
27470 saved; but if the function examines its own return address then
27471 the value is still live and we need to avoid using it. */
27472 bool return_addr_live
27473 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
27476 if (lr_needs_saving
|| return_addr_live
)
27477 pushable_regs
&= ~(1 << LR_REGNUM
);
27479 if (pushable_regs
== 0)
27480 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27482 while (high_regs_pushed
> 0)
27484 unsigned long real_regs_mask
= 0;
27485 unsigned long push_mask
= 0;
27487 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
27489 if (pushable_regs
& (1 << regno
))
27491 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27492 gen_rtx_REG (SImode
, next_hi_reg
));
27494 high_regs_pushed
--;
27495 real_regs_mask
|= (1 << next_hi_reg
);
27496 push_mask
|= (1 << regno
);
27498 if (high_regs_pushed
)
27500 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27502 if (live_regs_mask
& (1 << next_hi_reg
))
27510 /* If we had to find a work register and we have not yet
27511 saved the LR then add it to the list of regs to push. */
27512 if (lr_needs_saving
)
27514 push_mask
|= 1 << LR_REGNUM
;
27515 real_regs_mask
|= 1 << LR_REGNUM
;
27516 lr_needs_saving
= false;
27517 /* If the return address is not live at this point, we
27518 can add LR to the list of registers that we can use
27520 if (!return_addr_live
)
27521 pushable_regs
|= 1 << LR_REGNUM
;
27524 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
27525 RTX_FRAME_RELATED_P (insn
) = 1;
27529 /* Load the pic register before setting the frame pointer,
27530 so we can use r7 as a temporary work register. */
27531 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27532 arm_load_pic_register (live_regs_mask
, NULL_RTX
);
27534 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27535 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27536 stack_pointer_rtx
);
27538 size
= offsets
->outgoing_args
- offsets
->saved_args
;
27539 if (flag_stack_usage_info
)
27540 current_function_static_stack_size
= size
;
27542 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27543 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27544 || flag_stack_clash_protection
)
27546 sorry ("%<-fstack-check=specific%> for Thumb-1");
27548 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27549 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27554 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27555 GEN_INT (- amount
)));
27556 RTX_FRAME_RELATED_P (insn
) = 1;
27562 /* The stack decrement is too big for an immediate value in a single
27563 insn. In theory we could issue multiple subtracts, but after
27564 three of them it becomes more space efficient to place the full
27565 value in the constant pool and load into a register. (Also the
27566 ARM debugger really likes to see only one stack decrement per
27567 function). So instead we look for a scratch register into which
27568 we can load the decrement, and then we subtract this from the
27569 stack pointer. Unfortunately on the thumb the only available
27570 scratch registers are the argument registers, and we cannot use
27571 these as they may hold arguments to the function. Instead we
27572 attempt to locate a call preserved register which is used by this
27573 function. If we can find one, then we know that it will have
27574 been pushed at the start of the prologue and so we can corrupt
27576 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27577 if (live_regs_mask
& (1 << regno
))
27580 gcc_assert(regno
<= LAST_LO_REGNUM
);
27582 reg
= gen_rtx_REG (SImode
, regno
);
27584 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27586 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27587 stack_pointer_rtx
, reg
));
27589 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
27590 plus_constant (Pmode
, stack_pointer_rtx
,
27592 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27593 RTX_FRAME_RELATED_P (insn
) = 1;
27597 if (frame_pointer_needed
)
27598 thumb_set_frame_pointer (offsets
);
27600 /* If we are profiling, make sure no instructions are scheduled before
27601 the call to mcount. Similarly if the user has requested no
27602 scheduling in the prolog. Similarly if we want non-call exceptions
27603 using the EABI unwinder, to prevent faulting instructions from being
27604 swapped with a stack adjustment. */
27605 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27606 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27607 && cfun
->can_throw_non_call_exceptions
))
27608 emit_insn (gen_blockage ());
27610 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27611 if (live_regs_mask
& 0xff)
27612 cfun
->machine
->lr_save_eliminated
= 0;
27615 /* Clear caller saved registers not used to pass return values and leaked
27616 condition flags before exiting a cmse_nonsecure_entry function. */
27619 cmse_nonsecure_entry_clear_before_return (void)
27621 bool clear_vfpregs
= TARGET_HARD_FLOAT
|| TARGET_HAVE_FPCXT_CMSE
;
27622 int regno
, maxregno
= clear_vfpregs
? LAST_VFP_REGNUM
: IP_REGNUM
;
27623 uint32_t padding_bits_to_clear
= 0;
27624 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
27625 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
27628 bitmap_clear (to_clear_bitmap
);
27629 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
27630 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
27632 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27636 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
27638 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
27640 if (!TARGET_HAVE_FPCXT_CMSE
)
27642 /* Make sure we don't clear the two scratch registers used to clear
27643 the relevant FPSCR bits in output_return_instruction. */
27644 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
27645 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
27646 emit_use (gen_rtx_REG (SImode
, 4));
27647 bitmap_clear_bit (to_clear_bitmap
, 4);
27651 /* If the user has defined registers to be caller saved, these are no longer
27652 restored by the function before returning and must thus be cleared for
27653 security purposes. */
27654 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
27656 /* We do not touch registers that can be used to pass arguments as per
27657 the AAPCS, since these should never be made callee-saved by user
27659 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
27661 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
27663 if (!callee_saved_reg_p (regno
)
27664 && (!IN_RANGE (regno
, FIRST_VFP_REGNUM
, LAST_VFP_REGNUM
)
27665 || TARGET_HARD_FLOAT
))
27666 bitmap_set_bit (to_clear_bitmap
, regno
);
27669 /* Make sure we do not clear the registers used to return the result in. */
27670 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
27671 if (!VOID_TYPE_P (result_type
))
27673 uint64_t to_clear_return_mask
;
27674 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
27676 /* No need to check that we return in registers, because we don't
27677 support returning on stack yet. */
27678 gcc_assert (REG_P (result_rtl
));
27679 to_clear_return_mask
27680 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
27681 &padding_bits_to_clear
);
27682 if (to_clear_return_mask
)
27684 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
27685 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
27687 if (to_clear_return_mask
& (1ULL << regno
))
27688 bitmap_clear_bit (to_clear_bitmap
, regno
);
27693 if (padding_bits_to_clear
!= 0)
27695 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
27696 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
27698 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27699 returning a composite type, which only uses r0. Let's make sure that
27700 r1-r3 is cleared too. */
27701 bitmap_clear (to_clear_arg_regs_bitmap
);
27702 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
27703 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
27706 /* Clear full registers that leak before returning. */
27707 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
27708 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
27709 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
27713 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27714 POP instruction can be generated. LR should be replaced by PC. All
27715 the checks required are already done by USE_RETURN_INSN (). Hence,
27716 all we really need to check here is if single register is to be
27717 returned, or multiple register return. */
27719 thumb2_expand_return (bool simple_return
)
27722 unsigned long saved_regs_mask
;
27723 arm_stack_offsets
*offsets
;
27725 offsets
= arm_get_frame_offsets ();
27726 saved_regs_mask
= offsets
->saved_regs_mask
;
27728 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27729 if (saved_regs_mask
& (1 << i
))
27732 if (!simple_return
&& saved_regs_mask
)
27734 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27735 functions or adapt code to handle according to ACLE. This path should
27736 not be reachable for cmse_nonsecure_entry functions though we prefer
27737 to assert it for now to ensure that future code changes do not silently
27738 change this behavior. */
27739 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27740 if (arm_current_function_pac_enabled_p ())
27742 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
27743 arm_emit_multi_reg_pop (saved_regs_mask
);
27744 emit_insn (gen_aut_nop ());
27745 emit_jump_insn (simple_return_rtx
);
27747 else if (num_regs
== 1)
27749 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27750 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27751 rtx addr
= gen_rtx_MEM (SImode
,
27752 gen_rtx_POST_INC (SImode
,
27753 stack_pointer_rtx
));
27754 set_mem_alias_set (addr
, get_frame_alias_set ());
27755 XVECEXP (par
, 0, 0) = ret_rtx
;
27756 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
27757 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27758 emit_jump_insn (par
);
27762 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27763 saved_regs_mask
|= (1 << PC_REGNUM
);
27764 arm_emit_multi_reg_pop (saved_regs_mask
);
27769 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27770 cmse_nonsecure_entry_clear_before_return ();
27771 emit_jump_insn (simple_return_rtx
);
27776 thumb1_expand_epilogue (void)
27778 HOST_WIDE_INT amount
;
27779 arm_stack_offsets
*offsets
;
27782 /* Naked functions don't have prologues. */
27783 if (IS_NAKED (arm_current_func_type ()))
27786 offsets
= arm_get_frame_offsets ();
27787 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27789 if (frame_pointer_needed
)
27791 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27792 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27794 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27796 gcc_assert (amount
>= 0);
27799 emit_insn (gen_blockage ());
27802 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27803 GEN_INT (amount
)));
27806 /* r3 is always free in the epilogue. */
27807 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27809 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27810 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27814 /* Emit a USE (stack_pointer_rtx), so that
27815 the stack adjustment will not be deleted. */
27816 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27818 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27819 emit_insn (gen_blockage ());
27821 /* Emit a clobber for each insn that will be restored in the epilogue,
27822 so that flow2 will get register lifetimes correct. */
27823 for (regno
= 0; regno
< 13; regno
++)
27824 if (reg_needs_saving_p (regno
))
27825 emit_clobber (gen_rtx_REG (SImode
, regno
));
27827 if (! df_regs_ever_live_p (LR_REGNUM
))
27828 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27830 /* Clear all caller-saved regs that are not used to return. */
27831 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27832 cmse_nonsecure_entry_clear_before_return ();
27835 /* Epilogue code for APCS frame. */
27837 arm_expand_epilogue_apcs_frame (bool really_return
)
27839 unsigned long func_type
;
27840 unsigned long saved_regs_mask
;
27843 int floats_from_frame
= 0;
27844 arm_stack_offsets
*offsets
;
27846 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27847 func_type
= arm_current_func_type ();
27849 /* Get frame offsets for ARM. */
27850 offsets
= arm_get_frame_offsets ();
27851 saved_regs_mask
= offsets
->saved_regs_mask
;
27853 /* Find the offset of the floating-point save area in the frame. */
27855 = (offsets
->saved_args
27856 + arm_compute_static_chain_stack_bytes ()
27859 /* Compute how many core registers saved and how far away the floats are. */
27860 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27861 if (saved_regs_mask
& (1 << i
))
27864 floats_from_frame
+= 4;
27867 if (TARGET_VFP_BASE
)
27870 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27872 /* The offset is from IP_REGNUM. */
27873 int saved_size
= arm_get_vfp_saved_size ();
27874 if (saved_size
> 0)
27877 floats_from_frame
+= saved_size
;
27878 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27879 hard_frame_pointer_rtx
,
27880 GEN_INT (-floats_from_frame
)));
27881 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27882 ip_rtx
, hard_frame_pointer_rtx
);
27885 /* Generate VFP register multi-pop. */
27886 start_reg
= FIRST_VFP_REGNUM
;
27888 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27889 /* Look for a case where a reg does not need restoring. */
27890 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27892 if (start_reg
!= i
)
27893 arm_emit_vfp_multi_reg_pop (start_reg
,
27894 (i
- start_reg
) / 2,
27895 gen_rtx_REG (SImode
,
27900 /* Restore the remaining regs that we have discovered (or possibly
27901 even all of them, if the conditional in the for loop never
27903 if (start_reg
!= i
)
27904 arm_emit_vfp_multi_reg_pop (start_reg
,
27905 (i
- start_reg
) / 2,
27906 gen_rtx_REG (SImode
, IP_REGNUM
));
27911 /* The frame pointer is guaranteed to be non-double-word aligned, as
27912 it is set to double-word-aligned old_stack_pointer - 4. */
27914 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27916 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27917 if (reg_needs_saving_p (i
))
27919 rtx addr
= gen_frame_mem (V2SImode
,
27920 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27922 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27923 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27924 gen_rtx_REG (V2SImode
, i
),
27930 /* saved_regs_mask should contain IP which contains old stack pointer
27931 at the time of activation creation. Since SP and IP are adjacent registers,
27932 we can restore the value directly into SP. */
27933 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27934 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27935 saved_regs_mask
|= (1 << SP_REGNUM
);
27937 /* There are two registers left in saved_regs_mask - LR and PC. We
27938 only need to restore LR (the return address), but to
27939 save time we can load it directly into PC, unless we need a
27940 special function exit sequence, or we are not really returning. */
27942 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27943 && !crtl
->calls_eh_return
)
27944 /* Delete LR from the register mask, so that LR on
27945 the stack is loaded into the PC in the register mask. */
27946 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27948 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27950 num_regs
= bit_count (saved_regs_mask
);
27951 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27954 emit_insn (gen_blockage ());
27955 /* Unwind the stack to just below the saved registers. */
27956 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27957 hard_frame_pointer_rtx
,
27958 GEN_INT (- 4 * num_regs
)));
27960 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27961 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27964 arm_emit_multi_reg_pop (saved_regs_mask
);
27966 if (IS_INTERRUPT (func_type
))
27968 /* Interrupt handlers will have pushed the
27969 IP onto the stack, so restore it now. */
27971 rtx addr
= gen_rtx_MEM (SImode
,
27972 gen_rtx_POST_INC (SImode
,
27973 stack_pointer_rtx
));
27974 set_mem_alias_set (addr
, get_frame_alias_set ());
27975 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27976 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27977 gen_rtx_REG (SImode
, IP_REGNUM
),
27981 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27984 if (crtl
->calls_eh_return
)
27985 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27987 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27989 if (IS_STACKALIGN (func_type
))
27990 /* Restore the original stack pointer. Before prologue, the stack was
27991 realigned and the original stack pointer saved in r0. For details,
27992 see comment in arm_expand_prologue. */
27993 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
27995 emit_jump_insn (simple_return_rtx
);
27998 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27999 function is not a sibcall. */
28001 arm_expand_epilogue (bool really_return
)
28003 unsigned long func_type
;
28004 unsigned long saved_regs_mask
;
28008 arm_stack_offsets
*offsets
;
28010 func_type
= arm_current_func_type ();
28012 /* Naked functions don't have epilogue. Hence, generate return pattern, and
28013 let output_return_instruction take care of instruction emission if any. */
28014 if (IS_NAKED (func_type
)
28015 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
28018 emit_jump_insn (simple_return_rtx
);
28022 /* If we are throwing an exception, then we really must be doing a
28023 return, so we can't tail-call. */
28024 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
28026 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
28028 arm_expand_epilogue_apcs_frame (really_return
);
28032 /* Get frame offsets for ARM. */
28033 offsets
= arm_get_frame_offsets ();
28034 saved_regs_mask
= offsets
->saved_regs_mask
;
28035 num_regs
= bit_count (saved_regs_mask
);
28037 if (frame_pointer_needed
)
28040 /* Restore stack pointer if necessary. */
28043 /* In ARM mode, frame pointer points to first saved register.
28044 Restore stack pointer to last saved register. */
28045 amount
= offsets
->frame
- offsets
->saved_regs
;
28047 /* Force out any pending memory operations that reference stacked data
28048 before stack de-allocation occurs. */
28049 emit_insn (gen_blockage ());
28050 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
28051 hard_frame_pointer_rtx
,
28052 GEN_INT (amount
)));
28053 arm_add_cfa_adjust_cfa_note (insn
, amount
,
28055 hard_frame_pointer_rtx
);
28057 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
28059 emit_insn (gen_force_register_use (stack_pointer_rtx
));
28063 /* In Thumb-2 mode, the frame pointer points to the last saved
28065 amount
= offsets
->locals_base
- offsets
->saved_regs
;
28068 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
28069 hard_frame_pointer_rtx
,
28070 GEN_INT (amount
)));
28071 arm_add_cfa_adjust_cfa_note (insn
, amount
,
28072 hard_frame_pointer_rtx
,
28073 hard_frame_pointer_rtx
);
28076 /* Force out any pending memory operations that reference stacked data
28077 before stack de-allocation occurs. */
28078 emit_insn (gen_blockage ());
28079 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
28080 hard_frame_pointer_rtx
));
28081 arm_add_cfa_adjust_cfa_note (insn
, 0,
28083 hard_frame_pointer_rtx
);
28084 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
28086 emit_insn (gen_force_register_use (stack_pointer_rtx
));
28091 /* Pop off outgoing args and local frame to adjust stack pointer to
28092 last saved register. */
28093 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
28097 /* Force out any pending memory operations that reference stacked data
28098 before stack de-allocation occurs. */
28099 emit_insn (gen_blockage ());
28100 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
28102 GEN_INT (amount
)));
28103 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
28104 stack_pointer_rtx
, stack_pointer_rtx
);
28105 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
28107 emit_insn (gen_force_register_use (stack_pointer_rtx
));
28111 if (TARGET_VFP_BASE
)
28113 /* Generate VFP register multi-pop. */
28114 int end_reg
= LAST_VFP_REGNUM
+ 1;
28116 /* Scan the registers in reverse order. We need to match
28117 any groupings made in the prologue and generate matching
28118 vldm operations. The need to match groups is because,
28119 unlike pop, vldm can only do consecutive regs. */
28120 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
28121 /* Look for a case where a reg does not need restoring. */
28122 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
28124 /* Restore the regs discovered so far (from reg+2 to
28126 if (end_reg
> i
+ 2)
28127 arm_emit_vfp_multi_reg_pop (i
+ 2,
28128 (end_reg
- (i
+ 2)) / 2,
28129 stack_pointer_rtx
);
28133 /* Restore the remaining regs that we have discovered (or possibly
28134 even all of them, if the conditional in the for loop never
28136 if (end_reg
> i
+ 2)
28137 arm_emit_vfp_multi_reg_pop (i
+ 2,
28138 (end_reg
- (i
+ 2)) / 2,
28139 stack_pointer_rtx
);
28143 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
28144 if (reg_needs_saving_p (i
))
28147 rtx addr
= gen_rtx_MEM (V2SImode
,
28148 gen_rtx_POST_INC (SImode
,
28149 stack_pointer_rtx
));
28150 set_mem_alias_set (addr
, get_frame_alias_set ());
28151 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
28152 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
28153 gen_rtx_REG (V2SImode
, i
),
28155 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
28156 stack_pointer_rtx
, stack_pointer_rtx
);
28159 if (saved_regs_mask
)
28162 bool return_in_pc
= false;
28164 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
28165 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
28166 && !IS_CMSE_ENTRY (func_type
)
28167 && !IS_STACKALIGN (func_type
)
28169 && crtl
->args
.pretend_args_size
== 0
28170 && saved_regs_mask
& (1 << LR_REGNUM
)
28171 && !crtl
->calls_eh_return
28172 && !arm_current_function_pac_enabled_p ())
28174 saved_regs_mask
&= ~(1 << LR_REGNUM
);
28175 saved_regs_mask
|= (1 << PC_REGNUM
);
28176 return_in_pc
= true;
28179 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
28181 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
28182 if (saved_regs_mask
& (1 << i
))
28184 rtx addr
= gen_rtx_MEM (SImode
,
28185 gen_rtx_POST_INC (SImode
,
28186 stack_pointer_rtx
));
28187 set_mem_alias_set (addr
, get_frame_alias_set ());
28189 if (i
== PC_REGNUM
)
28191 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
28192 XVECEXP (insn
, 0, 0) = ret_rtx
;
28193 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
28195 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
28196 insn
= emit_jump_insn (insn
);
28200 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
28202 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
28203 gen_rtx_REG (SImode
, i
),
28205 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
28207 stack_pointer_rtx
);
28214 && current_tune
->prefer_ldrd_strd
28215 && !optimize_function_for_size_p (cfun
))
28218 thumb2_emit_ldrd_pop (saved_regs_mask
);
28219 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
28220 arm_emit_ldrd_pop (saved_regs_mask
);
28222 arm_emit_multi_reg_pop (saved_regs_mask
);
28225 arm_emit_multi_reg_pop (saved_regs_mask
);
28233 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
28237 rtx dwarf
= NULL_RTX
;
28239 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28241 GEN_INT (amount
)));
28243 RTX_FRAME_RELATED_P (tmp
) = 1;
28245 if (cfun
->machine
->uses_anonymous_args
)
28247 /* Restore pretend args. Refer arm_expand_prologue on how to save
28248 pretend_args in stack. */
28249 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
28250 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
28251 for (j
= 0, i
= 0; j
< num_regs
; i
++)
28252 if (saved_regs_mask
& (1 << i
))
28254 rtx reg
= gen_rtx_REG (SImode
, i
);
28255 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
28258 REG_NOTES (tmp
) = dwarf
;
28260 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
28261 stack_pointer_rtx
, stack_pointer_rtx
);
28264 if (IS_CMSE_ENTRY (func_type
))
28266 /* CMSE_ENTRY always returns. */
28267 gcc_assert (really_return
);
28268 /* Clear all caller-saved regs that are not used to return. */
28269 cmse_nonsecure_entry_clear_before_return ();
28271 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28273 if (TARGET_HAVE_FPCXT_CMSE
)
28277 insn
= emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx
,
28278 GEN_INT (FPCXTNS_ENUM
)));
28279 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
28280 plus_constant (Pmode
, stack_pointer_rtx
, 4));
28281 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
28282 RTX_FRAME_RELATED_P (insn
) = 1;
28286 if (arm_current_function_pac_enabled_p ())
28287 emit_insn (gen_aut_nop ());
28289 if (!really_return
)
28292 if (crtl
->calls_eh_return
)
28293 emit_insn (gen_addsi3 (stack_pointer_rtx
,
28295 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
28297 if (IS_STACKALIGN (func_type
))
28298 /* Restore the original stack pointer. Before prologue, the stack was
28299 realigned and the original stack pointer saved in r0. For details,
28300 see comment in arm_expand_prologue. */
28301 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
28303 emit_jump_insn (simple_return_rtx
);
28306 /* Implementation of insn prologue_thumb1_interwork. This is the first
28307 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28310 thumb1_output_interwork (void)
28313 FILE *f
= asm_out_file
;
28315 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
28316 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
28318 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
28320 /* Generate code sequence to switch us into Thumb mode. */
28321 /* The .code 32 directive has already been emitted by
28322 ASM_DECLARE_FUNCTION_NAME. */
28323 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
28324 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
28326 /* Generate a label, so that the debugger will notice the
28327 change in instruction sets. This label is also used by
28328 the assembler to bypass the ARM code when this function
28329 is called from a Thumb encoded function elsewhere in the
28330 same file. Hence the definition of STUB_NAME here must
28331 agree with the definition in gas/config/tc-arm.c. */
28333 #define STUB_NAME ".real_start_of"
28335 fprintf (f
, "\t.code\t16\n");
28336 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
28337 fprintf (f
, "\t.thumb_func\n");
28338 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
28343 /* Handle the case of a double word load into a low register from
28344 a computed memory address. The computed address may involve a
28345 register which is overwritten by the load. */
28347 thumb_load_double_from_address (rtx
*operands
)
28355 gcc_assert (REG_P (operands
[0]));
28356 gcc_assert (MEM_P (operands
[1]));
28358 /* Get the memory address. */
28359 addr
= XEXP (operands
[1], 0);
28361 /* Work out how the memory address is computed. */
28362 switch (GET_CODE (addr
))
28365 if (reg_overlap_mentioned_p (addr
, operands
[0]))
28366 output_asm_insn ("ldmia\t%m1, {%0, %H0}", operands
);
28369 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28370 output_asm_insn ("ldr\t%0, %1", operands
);
28371 output_asm_insn ("ldr\t%H0, %2", operands
);
28376 /* Compute <address> + 4 for the high order load. */
28377 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28379 output_asm_insn ("ldr\t%0, %1", operands
);
28380 output_asm_insn ("ldr\t%H0, %2", operands
);
28384 arg1
= XEXP (addr
, 0);
28385 arg2
= XEXP (addr
, 1);
28387 if (CONSTANT_P (arg1
))
28388 base
= arg2
, offset
= arg1
;
28390 base
= arg1
, offset
= arg2
;
28392 gcc_assert (REG_P (base
));
28394 /* Catch the case of <address> = <reg> + <reg> */
28395 if (REG_P (offset
))
28397 int reg_offset
= REGNO (offset
);
28398 int reg_base
= REGNO (base
);
28399 int reg_dest
= REGNO (operands
[0]);
28401 /* Add the base and offset registers together into the
28402 higher destination register. */
28403 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
28404 reg_dest
+ 1, reg_base
, reg_offset
);
28406 /* Load the lower destination register from the address in
28407 the higher destination register. */
28408 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
28409 reg_dest
, reg_dest
+ 1);
28411 /* Load the higher destination register from its own address
28413 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
28414 reg_dest
+ 1, reg_dest
+ 1);
28418 /* Compute <address> + 4 for the high order load. */
28419 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28421 /* If the computed address is held in the low order register
28422 then load the high order register first, otherwise always
28423 load the low order register first. */
28424 if (REGNO (operands
[0]) == REGNO (base
))
28426 output_asm_insn ("ldr\t%H0, %2", operands
);
28427 output_asm_insn ("ldr\t%0, %1", operands
);
28431 output_asm_insn ("ldr\t%0, %1", operands
);
28432 output_asm_insn ("ldr\t%H0, %2", operands
);
28438 /* With no registers to worry about we can just load the value
28440 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28442 output_asm_insn ("ldr\t%H0, %2", operands
);
28443 output_asm_insn ("ldr\t%0, %1", operands
);
28447 gcc_unreachable ();
28454 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
28459 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28460 std::swap (operands
[4], operands
[5]);
28462 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
28463 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
28467 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28468 std::swap (operands
[4], operands
[5]);
28469 if (REGNO (operands
[5]) > REGNO (operands
[6]))
28470 std::swap (operands
[5], operands
[6]);
28471 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28472 std::swap (operands
[4], operands
[5]);
28474 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
28475 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
28479 gcc_unreachable ();
28485 /* Output a call-via instruction for thumb state. */
28487 thumb_call_via_reg (rtx reg
)
28489 int regno
= REGNO (reg
);
28492 gcc_assert (regno
< LR_REGNUM
);
28494 /* If we are in the normal text section we can use a single instance
28495 per compilation unit. If we are doing function sections, then we need
28496 an entry per section, since we can't rely on reachability. */
28497 if (in_section
== text_section
)
28499 thumb_call_reg_needed
= 1;
28501 if (thumb_call_via_label
[regno
] == NULL
)
28502 thumb_call_via_label
[regno
] = gen_label_rtx ();
28503 labelp
= thumb_call_via_label
+ regno
;
28507 if (cfun
->machine
->call_via
[regno
] == NULL
)
28508 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28509 labelp
= cfun
->machine
->call_via
+ regno
;
28512 output_asm_insn ("bl\t%a0", labelp
);
28516 /* Routines for generating rtl. */
28518 thumb_expand_cpymemqi (rtx
*operands
)
28520 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28521 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28522 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28523 HOST_WIDE_INT offset
= 0;
28527 emit_insn (gen_cpymem12b (out
, in
, out
, in
));
28533 emit_insn (gen_cpymem8b (out
, in
, out
, in
));
28539 rtx reg
= gen_reg_rtx (SImode
);
28540 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28541 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28548 rtx reg
= gen_reg_rtx (HImode
);
28549 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28550 plus_constant (Pmode
, in
,
28552 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28561 rtx reg
= gen_reg_rtx (QImode
);
28562 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28563 plus_constant (Pmode
, in
,
28565 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28572 thumb_reload_out_hi (rtx
*operands
)
28574 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28577 /* Return the length of a function name prefix
28578 that starts with the character 'c'. */
28580 arm_get_strip_length (int c
)
28584 ARM_NAME_ENCODING_LENGTHS
28589 /* Return a pointer to a function's name with any
28590 and all prefix encodings stripped from it. */
28592 arm_strip_name_encoding (const char *name
)
28596 while ((skip
= arm_get_strip_length (* name
)))
28602 /* If there is a '*' anywhere in the name's prefix, then
28603 emit the stripped name verbatim, otherwise prepend an
28604 underscore if leading underscores are being used. */
28606 arm_asm_output_labelref (FILE *stream
, const char *name
)
28611 while ((skip
= arm_get_strip_length (* name
)))
28613 verbatim
|= (*name
== '*');
28618 fputs (name
, stream
);
28620 asm_fprintf (stream
, "%U%s", name
);
28623 /* This function is used to emit an EABI tag and its associated value.
28624 We emit the numerical value of the tag in case the assembler does not
28625 support textual tags. (Eg gas prior to 2.20). If requested we include
28626 the tag name in a comment so that anyone reading the assembler output
28627 will know which tag is being set.
28629 This function is not static because arm-c.cc needs it too. */
28632 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28634 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28635 if (flag_verbose_asm
|| flag_debug_asm
)
28636 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28637 asm_fprintf (asm_out_file
, "\n");
28640 /* This function is used to print CPU tuning information as comment
28641 in assembler file. Pointers are not printed for now. */
28644 arm_print_tune_info (void)
28646 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
28647 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
28648 current_tune
->constant_limit
);
28649 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28650 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
28651 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28652 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
28653 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28654 "prefetch.l1_cache_size:\t%d\n",
28655 current_tune
->prefetch
.l1_cache_size
);
28656 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28657 "prefetch.l1_cache_line_size:\t%d\n",
28658 current_tune
->prefetch
.l1_cache_line_size
);
28659 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28660 "prefer_constant_pool:\t%d\n",
28661 (int) current_tune
->prefer_constant_pool
);
28662 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28663 "branch_cost:\t(s:speed, p:predictable)\n");
28664 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
28665 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
28666 current_tune
->branch_cost (false, false));
28667 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
28668 current_tune
->branch_cost (false, true));
28669 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
28670 current_tune
->branch_cost (true, false));
28671 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
28672 current_tune
->branch_cost (true, true));
28673 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28674 "prefer_ldrd_strd:\t%d\n",
28675 (int) current_tune
->prefer_ldrd_strd
);
28676 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28677 "logical_op_non_short_circuit:\t[%d,%d]\n",
28678 (int) current_tune
->logical_op_non_short_circuit_thumb
,
28679 (int) current_tune
->logical_op_non_short_circuit_arm
);
28680 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28681 "disparage_flag_setting_t16_encodings:\t%d\n",
28682 (int) current_tune
->disparage_flag_setting_t16_encodings
);
28683 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28684 "string_ops_prefer_neon:\t%d\n",
28685 (int) current_tune
->string_ops_prefer_neon
);
28686 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28687 "max_insns_inline_memset:\t%d\n",
28688 current_tune
->max_insns_inline_memset
);
28689 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
28690 current_tune
->fusible_ops
);
28691 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
28692 (int) current_tune
->sched_autopref
);
28695 /* The last set of target options used to emit .arch directives, etc. This
28696 could be a function-local static if it were not required to expose it as a
28697 root to the garbage collector. */
28698 static GTY(()) cl_target_option
*last_asm_targ_options
= NULL
;
28700 /* Print .arch and .arch_extension directives corresponding to the
28701 current architecture configuration. */
28703 arm_print_asm_arch_directives (FILE *stream
, cl_target_option
*targ_options
)
28705 arm_build_target build_target
;
28706 /* If the target options haven't changed since the last time we were called
28707 there is nothing to do. This should be sufficient to suppress the
28708 majority of redundant work. */
28709 if (last_asm_targ_options
== targ_options
)
28712 last_asm_targ_options
= targ_options
;
28714 build_target
.isa
= sbitmap_alloc (isa_num_bits
);
28715 arm_configure_build_target (&build_target
, targ_options
, false);
28717 if (build_target
.core_name
28718 && !bitmap_bit_p (build_target
.isa
, isa_bit_quirk_no_asmcpu
))
28720 const char* truncated_name
28721 = arm_rewrite_selected_cpu (build_target
.core_name
);
28722 asm_fprintf (stream
, "\t.cpu %s\n", truncated_name
);
28725 const arch_option
*arch
28726 = arm_parse_arch_option_name (all_architectures
, "-march",
28727 build_target
.arch_name
);
28728 auto_sbitmap
opt_bits (isa_num_bits
);
28732 if (strcmp (build_target
.arch_name
, "armv7ve") == 0)
28734 /* Keep backward compatability for assemblers which don't support
28735 armv7ve. Fortunately, none of the following extensions are reset
28736 by a .fpu directive. */
28737 asm_fprintf (stream
, "\t.arch armv7-a\n");
28738 asm_fprintf (stream
, "\t.arch_extension virt\n");
28739 asm_fprintf (stream
, "\t.arch_extension idiv\n");
28740 asm_fprintf (stream
, "\t.arch_extension sec\n");
28741 asm_fprintf (stream
, "\t.arch_extension mp\n");
28744 asm_fprintf (stream
, "\t.arch %s\n", build_target
.arch_name
);
28746 /* The .fpu directive will reset any architecture extensions from the
28747 assembler that relate to the fp/vector extensions. So put this out before
28748 any .arch_extension directives. */
28749 const char *fpu_name
= (TARGET_SOFT_FLOAT
28751 : arm_identify_fpu_from_isa (build_target
.isa
));
28752 asm_fprintf (stream
, "\t.fpu %s\n", fpu_name
);
28754 if (!arch
->common
.extensions
)
28757 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
28763 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
28765 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28766 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28767 floating point instructions is disabled. So the following check
28768 restricts the printing of ".arch_extension mve" and
28769 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28770 this special behaviour because the feature bit "mve" and
28771 "mve_float" are not part of "fpu bits", so they are not cleared
28772 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28773 TARGET_HAVE_MVE_FLOAT are disabled. */
28774 if ((bitmap_bit_p (opt_bits
, isa_bit_mve
) && !TARGET_HAVE_MVE
)
28775 || (bitmap_bit_p (opt_bits
, isa_bit_mve_float
)
28776 && !TARGET_HAVE_MVE_FLOAT
))
28779 /* If every feature bit of this option is set in the target ISA
28780 specification, print out the option name. However, don't print
28781 anything if all the bits are part of the FPU specification. */
28782 if (bitmap_subset_p (opt_bits
, build_target
.isa
)
28783 && !bitmap_subset_p (opt_bits
, isa_all_fpubits_internal
))
28784 asm_fprintf (stream
, "\t.arch_extension %s\n", opt
->name
);
28790 arm_file_start (void)
28793 bool pac
= (aarch_ra_sign_scope
!= AARCH_FUNCTION_NONE
);
28794 bool bti
= (aarch_enable_bti
== 1);
28796 arm_print_asm_arch_directives
28797 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28801 /* If we have a named cpu, but we the assembler does not support that
28802 name via .cpu, put out a cpu name attribute; but don't do this if the
28803 name starts with the fictitious prefix, 'generic'. */
28804 if (arm_active_target
.core_name
28805 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
)
28806 && !startswith (arm_active_target
.core_name
, "generic"))
28808 const char* truncated_name
28809 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
28810 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
))
28811 asm_fprintf (asm_out_file
, "\t.eabi_attribute 5, \"%s\"\n",
28815 if (print_tune_info
)
28816 arm_print_tune_info ();
28818 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
28819 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28821 if (TARGET_HARD_FLOAT_ABI
)
28822 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28824 /* Some of these attributes only apply when the corresponding features
28825 are used. However we don't have any easy way of figuring this out.
28826 Conservatively record the setting that would have been used. */
28828 if (flag_rounding_math
)
28829 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28831 if (!flag_unsafe_math_optimizations
)
28833 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28834 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28836 if (flag_signaling_nans
)
28837 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28839 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28840 flag_finite_math_only
? 1 : 3);
28842 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28843 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28844 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28845 flag_short_enums
? 1 : 2);
28847 /* Tag_ABI_optimization_goals. */
28850 else if (optimize
>= 2)
28856 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28858 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28861 if (arm_fp16_format
)
28862 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28863 (int) arm_fp16_format
);
28865 if (TARGET_HAVE_PACBTI
)
28867 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28868 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28870 else if (pac
|| bti
)
28872 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28873 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28877 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28879 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28881 if (arm_lang_output_object_attributes_hook
)
28882 arm_lang_output_object_attributes_hook();
28885 default_file_start ();
28889 arm_file_end (void)
28893 /* Just in case the last function output in the assembler had non-default
28894 architecture directives, we force the assembler state back to the default
28895 set, so that any 'calculated' build attributes are based on the default
28896 options rather than the special options for that function. */
28897 arm_print_asm_arch_directives
28898 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28900 if (NEED_INDICATE_EXEC_STACK
)
28901 /* Add .note.GNU-stack. */
28902 file_end_indicate_exec_stack ();
28904 if (! thumb_call_reg_needed
)
28907 switch_to_section (text_section
);
28908 asm_fprintf (asm_out_file
, "\t.code 16\n");
28909 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28911 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28913 rtx label
= thumb_call_via_label
[regno
];
28917 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28918 CODE_LABEL_NUMBER (label
));
28919 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28924 /* Symbols in the text segment can be accessed without indirecting via the
28925 constant pool; it may take an extra binary operation, but this is still
28926 faster than indirecting via memory. Don't do this when not optimizing,
28927 since we won't be calculating al of the offsets necessary to do this
28931 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28933 if (optimize
> 0 && TREE_CONSTANT (decl
))
28934 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28936 default_encode_section_info (decl
, rtl
, first
);
28940 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28942 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28943 && !strcmp (prefix
, "L"))
28945 arm_ccfsm_state
= 0;
28946 arm_target_insn
= NULL
;
28948 default_internal_label (stream
, prefix
, labelno
);
28951 /* Define classes to generate code as RTL or output asm to a file.
28952 Using templates then allows to use the same code to output code
28953 sequences in the two formats. */
28954 class thumb1_const_rtl
28957 thumb1_const_rtl (rtx dst
) : dst (dst
) {}
28959 void mov (HOST_WIDE_INT val
)
28961 emit_set_insn (dst
, GEN_INT (val
));
28964 void add (HOST_WIDE_INT val
)
28966 emit_set_insn (dst
, gen_rtx_PLUS (SImode
, dst
, GEN_INT (val
)));
28969 void ashift (HOST_WIDE_INT shift
)
28971 emit_set_insn (dst
, gen_rtx_ASHIFT (SImode
, dst
, GEN_INT (shift
)));
28976 emit_set_insn (dst
, gen_rtx_NEG (SImode
, dst
));
28983 class thumb1_const_print
28986 thumb1_const_print (FILE *f
, int regno
)
28989 dst_regname
= reg_names
[regno
];
28992 void mov (HOST_WIDE_INT val
)
28994 asm_fprintf (t_file
, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28998 void add (HOST_WIDE_INT val
)
29000 asm_fprintf (t_file
, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
29004 void ashift (HOST_WIDE_INT shift
)
29006 asm_fprintf (t_file
, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
29007 dst_regname
, shift
);
29012 asm_fprintf (t_file
, "\trsbs\t%s, #0\n", dst_regname
);
29017 const char *dst_regname
;
29020 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
29021 Avoid generating useless code when one of the bytes is zero. */
29024 thumb1_gen_const_int_1 (T dst
, HOST_WIDE_INT op1
)
29026 bool mov_done_p
= false;
29027 unsigned HOST_WIDE_INT val
= op1
;
29031 gcc_assert (op1
== trunc_int_for_mode (op1
, SImode
));
29039 /* For negative numbers with the first nine bits set, build the
29040 opposite of OP1, then negate it, it's generally shorter and not
29042 if ((val
& 0xFF800000) == 0xFF800000)
29044 thumb1_gen_const_int_1 (dst
, -op1
);
29049 /* In the general case, we need 7 instructions to build
29050 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
29051 do better if VAL is small enough, or
29052 right-shiftable by a suitable amount. If the
29053 right-shift enables to encode at least one less byte,
29054 it's worth it: we save a adds and a lsls at the
29055 expense of a final lsls. */
29056 int final_shift
= number_of_first_bit_set (val
);
29058 int leading_zeroes
= clz_hwi (val
);
29059 int number_of_bytes_needed
29060 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
)
29061 / BITS_PER_UNIT
) + 1;
29062 int number_of_bytes_needed2
29063 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
- final_shift
)
29064 / BITS_PER_UNIT
) + 1;
29066 if (number_of_bytes_needed2
< number_of_bytes_needed
)
29067 val
>>= final_shift
;
29071 /* If we are in a very small range, we can use either a single movs
29077 unsigned HOST_WIDE_INT high
= val
- 255;
29085 if (final_shift
> 0)
29086 dst
.ashift (final_shift
);
29090 /* General case, emit upper 3 bytes as needed. */
29091 for (i
= 0; i
< 3; i
++)
29093 unsigned HOST_WIDE_INT byte
= (val
>> (8 * (3 - i
))) & 0xff;
29097 /* We are about to emit new bits, stop accumulating a
29098 shift amount, and left-shift only if we have already
29099 emitted some upper bits. */
29102 dst
.ashift (shift
);
29108 /* Stop accumulating shift amount since we've just
29109 emitted some bits. */
29119 /* Emit lower byte. */
29121 dst
.mov (val
& 0xff);
29124 dst
.ashift (shift
);
29126 dst
.add (val
& 0xff);
29129 if (final_shift
> 0)
29130 dst
.ashift (final_shift
);
29134 /* Proxies for thumb1.md, since the thumb1_const_print and
29135 thumb1_const_rtl classes are not exported. */
29137 thumb1_gen_const_int_rtl (rtx dst
, HOST_WIDE_INT op1
)
29139 thumb1_const_rtl
t (dst
);
29140 thumb1_gen_const_int_1 (t
, op1
);
29144 thumb1_gen_const_int_print (rtx dst
, HOST_WIDE_INT op1
)
29146 thumb1_const_print
t (asm_out_file
, REGNO (dst
));
29147 thumb1_gen_const_int_1 (t
, op1
);
29150 /* Output code to add DELTA to the first argument, and then jump
29151 to FUNCTION. Used for C++ multiple inheritance. */
29154 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
29155 HOST_WIDE_INT
, tree function
)
29157 static int thunk_label
= 0;
29160 int mi_delta
= delta
;
29161 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
29163 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
29166 mi_delta
= - mi_delta
;
29168 final_start_function (emit_barrier (), file
, 1);
29172 int labelno
= thunk_label
++;
29173 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
29174 /* Thunks are entered in arm mode when available. */
29175 if (TARGET_THUMB1_ONLY
)
29177 /* push r3 so we can use it as a temporary. */
29178 /* TODO: Omit this save if r3 is not used. */
29179 fputs ("\tpush {r3}\n", file
);
29181 /* With -mpure-code, we cannot load the address from the
29182 constant pool: we build it explicitly. */
29183 if (target_pure_code
)
29185 fputs ("\tmovs\tr3, #:upper8_15:#", file
);
29186 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29187 fputc ('\n', file
);
29188 fputs ("\tlsls r3, #8\n", file
);
29189 fputs ("\tadds\tr3, #:upper0_7:#", file
);
29190 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29191 fputc ('\n', file
);
29192 fputs ("\tlsls r3, #8\n", file
);
29193 fputs ("\tadds\tr3, #:lower8_15:#", file
);
29194 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29195 fputc ('\n', file
);
29196 fputs ("\tlsls r3, #8\n", file
);
29197 fputs ("\tadds\tr3, #:lower0_7:#", file
);
29198 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29199 fputc ('\n', file
);
29202 fputs ("\tldr\tr3, ", file
);
29206 fputs ("\tldr\tr12, ", file
);
29209 if (!target_pure_code
)
29211 assemble_name (file
, label
);
29212 fputc ('\n', file
);
29217 /* If we are generating PIC, the ldr instruction below loads
29218 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
29219 the address of the add + 8, so we have:
29221 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29224 Note that we have "+ 1" because some versions of GNU ld
29225 don't set the low bit of the result for R_ARM_REL32
29226 relocations against thumb function symbols.
29227 On ARMv6M this is +4, not +8. */
29228 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
29229 assemble_name (file
, labelpc
);
29230 fputs (":\n", file
);
29231 if (TARGET_THUMB1_ONLY
)
29233 /* This is 2 insns after the start of the thunk, so we know it
29234 is 4-byte aligned. */
29235 fputs ("\tadd\tr3, pc, r3\n", file
);
29236 fputs ("\tmov r12, r3\n", file
);
29239 fputs ("\tadd\tr12, pc, r12\n", file
);
29241 else if (TARGET_THUMB1_ONLY
)
29242 fputs ("\tmov r12, r3\n", file
);
29244 if (TARGET_THUMB1_ONLY
)
29246 if (mi_delta
> 255)
29248 /* With -mpure-code, we cannot load MI_DELTA from the
29249 constant pool: we build it explicitly. */
29250 if (target_pure_code
)
29252 thumb1_const_print
r3 (file
, 3);
29253 thumb1_gen_const_int_1 (r3
, mi_delta
);
29257 fputs ("\tldr\tr3, ", file
);
29258 assemble_name (file
, label
);
29259 fputs ("+4\n", file
);
29261 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
29262 mi_op
, this_regno
, this_regno
);
29264 else if (mi_delta
!= 0)
29266 /* Thumb1 unified syntax requires s suffix in instruction name when
29267 one of the operands is immediate. */
29268 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
29269 mi_op
, this_regno
, this_regno
,
29275 /* TODO: Use movw/movt for large constants when available. */
29276 while (mi_delta
!= 0)
29278 if ((mi_delta
& (3 << shift
)) == 0)
29282 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
29283 mi_op
, this_regno
, this_regno
,
29284 mi_delta
& (0xff << shift
));
29285 mi_delta
&= ~(0xff << shift
);
29292 if (TARGET_THUMB1_ONLY
)
29293 fputs ("\tpop\t{r3}\n", file
);
29295 fprintf (file
, "\tbx\tr12\n");
29297 /* With -mpure-code, we don't need to emit literals for the
29298 function address and delta since we emitted code to build
29300 if (!target_pure_code
)
29302 ASM_OUTPUT_ALIGN (file
, 2);
29303 assemble_name (file
, label
);
29304 fputs (":\n", file
);
29307 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29308 rtx tem
= XEXP (DECL_RTL (function
), 0);
29309 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29310 pipeline offset is four rather than eight. Adjust the offset
29312 tem
= plus_constant (GET_MODE (tem
), tem
,
29313 TARGET_THUMB1_ONLY
? -3 : -7);
29314 tem
= gen_rtx_MINUS (GET_MODE (tem
),
29316 gen_rtx_SYMBOL_REF (Pmode
,
29317 ggc_strdup (labelpc
)));
29318 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
29321 /* Output ".word .LTHUNKn". */
29322 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
29324 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
29325 assemble_integer (GEN_INT (mi_delta
), 4, BITS_PER_WORD
, 1);
29330 fputs ("\tb\t", file
);
29331 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
29332 if (NEED_PLT_RELOC
)
29333 fputs ("(PLT)", file
);
29334 fputc ('\n', file
);
29337 final_end_function ();
29340 /* MI thunk handling for TARGET_32BIT. */
29343 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
29344 HOST_WIDE_INT vcall_offset
, tree function
)
29346 const bool long_call_p
= arm_is_long_call_p (function
);
29348 /* On ARM, this_regno is R0 or R1 depending on
29349 whether the function returns an aggregate or not.
29351 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
29353 ? R1_REGNUM
: R0_REGNUM
);
29355 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
29356 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
29357 reload_completed
= 1;
29358 emit_note (NOTE_INSN_PROLOGUE_END
);
29360 /* Add DELTA to THIS_RTX. */
29362 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
29363 delta
, this_rtx
, this_rtx
, false);
29365 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29366 if (vcall_offset
!= 0)
29368 /* Load *THIS_RTX. */
29369 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
29370 /* Compute *THIS_RTX + VCALL_OFFSET. */
29371 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
29373 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29374 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
29375 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
29378 /* Generate a tail call to the target function. */
29379 if (!TREE_USED (function
))
29381 assemble_external (function
);
29382 TREE_USED (function
) = 1;
29384 rtx funexp
= XEXP (DECL_RTL (function
), 0);
29387 emit_move_insn (temp
, funexp
);
29390 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
29391 rtx_insn
*insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
29392 SIBLING_CALL_P (insn
) = 1;
29395 /* Indirect calls require a bit of fixup in PIC mode. */
29398 split_all_insns_noflow ();
29402 insn
= get_insns ();
29403 shorten_branches (insn
);
29404 final_start_function (insn
, file
, 1);
29405 final (insn
, file
, 1);
29406 final_end_function ();
29408 /* Stop pretending this is a post-reload pass. */
29409 reload_completed
= 0;
29412 /* Output code to add DELTA to the first argument, and then jump
29413 to FUNCTION. Used for C++ multiple inheritance. */
29416 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
29417 HOST_WIDE_INT vcall_offset
, tree function
)
29419 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
29421 assemble_start_function (thunk
, fnname
);
29422 if (aarch_bti_enabled ())
29423 emit_insn (aarch_gen_bti_c ());
29425 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29427 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29428 assemble_end_function (thunk
, fnname
);
29432 arm_emit_vector_const (FILE *file
, rtx x
)
29435 const char * pattern
;
29437 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
29439 switch (GET_MODE (x
))
29441 case E_V2SImode
: pattern
= "%08x"; break;
29442 case E_V4HImode
: pattern
= "%04x"; break;
29443 case E_V8QImode
: pattern
= "%02x"; break;
29444 default: gcc_unreachable ();
29447 fprintf (file
, "0x");
29448 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
29452 element
= CONST_VECTOR_ELT (x
, i
);
29453 fprintf (file
, pattern
, INTVAL (element
));
29459 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29460 HFmode constant pool entries are actually loaded with ldr. */
29462 arm_emit_fp16_const (rtx c
)
29466 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
29467 if (WORDS_BIG_ENDIAN
)
29468 assemble_zeros (2);
29469 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
29470 if (!WORDS_BIG_ENDIAN
)
29471 assemble_zeros (2);
29475 arm_output_load_gr (rtx
*operands
)
29482 if (!MEM_P (operands
[1])
29483 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
29484 || !REG_P (reg
= XEXP (sum
, 0))
29485 || !CONST_INT_P (offset
= XEXP (sum
, 1))
29486 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
29487 return "wldrw%?\t%0, %1";
29489 /* Fix up an out-of-range load of a GR register. */
29490 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
29491 wcgr
= operands
[0];
29493 output_asm_insn ("ldr%?\t%0, %1", operands
);
29495 operands
[0] = wcgr
;
29497 output_asm_insn ("tmcr%?\t%0, %1", operands
);
29498 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
29503 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29505 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29506 named arg and all anonymous args onto the stack.
29507 XXX I know the prologue shouldn't be pushing registers, but it is faster
29511 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
29512 const function_arg_info
&arg
,
29514 int second_time ATTRIBUTE_UNUSED
)
29516 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
29519 cfun
->machine
->uses_anonymous_args
= 1;
29520 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
29522 nregs
= pcum
->aapcs_ncrn
;
29523 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl
))
29526 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
29527 if (res
< 0 && warn_psabi
)
29528 inform (input_location
, "parameter passing for argument of "
29529 "type %qT changed in GCC 7.1", arg
.type
);
29533 if (res
> 1 && warn_psabi
)
29534 inform (input_location
,
29535 "parameter passing for argument of type "
29536 "%qT changed in GCC 9.1", arg
.type
);
29541 nregs
= pcum
->nregs
;
29543 if (nregs
< NUM_ARG_REGS
)
29544 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
29547 /* We can't rely on the caller doing the proper promotion when
29548 using APCS or ATPCS. */
29551 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
29553 return !TARGET_AAPCS_BASED
;
29556 static machine_mode
29557 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
29559 int *punsignedp ATTRIBUTE_UNUSED
,
29560 const_tree fntype ATTRIBUTE_UNUSED
,
29561 int for_return ATTRIBUTE_UNUSED
)
29563 if (GET_MODE_CLASS (mode
) == MODE_INT
29564 && GET_MODE_SIZE (mode
) < 4)
29572 arm_default_short_enums (void)
29574 return ARM_DEFAULT_SHORT_ENUMS
;
29578 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29581 arm_align_anon_bitfield (void)
29583 return TARGET_AAPCS_BASED
;
29587 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29590 arm_cxx_guard_type (void)
29592 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
29596 /* The EABI says test the least significant bit of a guard variable. */
29599 arm_cxx_guard_mask_bit (void)
29601 return TARGET_AAPCS_BASED
;
29605 /* The EABI specifies that all array cookies are 8 bytes long. */
29608 arm_get_cookie_size (tree type
)
29612 if (!TARGET_AAPCS_BASED
)
29613 return default_cxx_get_cookie_size (type
);
29615 size
= build_int_cst (sizetype
, 8);
29620 /* The EABI says that array cookies should also contain the element size. */
29623 arm_cookie_has_size (void)
29625 return TARGET_AAPCS_BASED
;
29629 /* The EABI says constructors and destructors should return a pointer to
29630 the object constructed/destroyed. */
29633 arm_cxx_cdtor_returns_this (void)
29635 return TARGET_AAPCS_BASED
;
29638 /* The EABI says that an inline function may never be the key
29642 arm_cxx_key_method_may_be_inline (void)
29644 return !TARGET_AAPCS_BASED
;
29648 arm_cxx_determine_class_data_visibility (tree decl
)
29650 if (!TARGET_AAPCS_BASED
29651 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
29654 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29655 is exported. However, on systems without dynamic vague linkage,
29656 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29657 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
29658 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
29660 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
29661 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
29665 arm_cxx_class_data_always_comdat (void)
29667 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29668 vague linkage if the class has no key function. */
29669 return !TARGET_AAPCS_BASED
;
29673 /* The EABI says __aeabi_atexit should be used to register static
29677 arm_cxx_use_aeabi_atexit (void)
29679 return TARGET_AAPCS_BASED
;
29684 arm_set_return_address (rtx source
, rtx scratch
)
29686 arm_stack_offsets
*offsets
;
29687 HOST_WIDE_INT delta
;
29689 unsigned long saved_regs
;
29691 offsets
= arm_get_frame_offsets ();
29692 saved_regs
= offsets
->saved_regs_mask
;
29694 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
29695 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29698 if (frame_pointer_needed
)
29699 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
29702 /* LR will be the first saved register. */
29703 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
29708 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
29709 GEN_INT (delta
& ~4095)));
29714 addr
= stack_pointer_rtx
;
29716 addr
= plus_constant (Pmode
, addr
, delta
);
29719 /* The store needs to be marked to prevent DSE from deleting
29720 it as dead if it is based on fp. */
29721 mem
= gen_frame_mem (Pmode
, addr
);
29722 MEM_VOLATILE_P (mem
) = true;
29723 emit_move_insn (mem
, source
);
29729 thumb_set_return_address (rtx source
, rtx scratch
)
29731 arm_stack_offsets
*offsets
;
29732 HOST_WIDE_INT delta
;
29733 HOST_WIDE_INT limit
;
29736 unsigned long mask
;
29740 offsets
= arm_get_frame_offsets ();
29741 mask
= offsets
->saved_regs_mask
;
29742 if (mask
& (1 << LR_REGNUM
))
29745 /* Find the saved regs. */
29746 if (frame_pointer_needed
)
29748 delta
= offsets
->soft_frame
- offsets
->saved_args
;
29749 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
29755 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
29758 /* Allow for the stack frame. */
29759 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
29761 /* The link register is always the first saved register. */
29764 /* Construct the address. */
29765 addr
= gen_rtx_REG (SImode
, reg
);
29768 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
29769 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
29773 addr
= plus_constant (Pmode
, addr
, delta
);
29775 /* The store needs to be marked to prevent DSE from deleting
29776 it as dead if it is based on fp. */
29777 mem
= gen_frame_mem (Pmode
, addr
);
29778 MEM_VOLATILE_P (mem
) = true;
29779 emit_move_insn (mem
, source
);
29782 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29785 /* Implements target hook vector_mode_supported_p. */
29787 arm_vector_mode_supported_p (machine_mode mode
)
29789 /* Neon also supports V2SImode, etc. listed in the clause below. */
29790 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
29791 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
29792 || mode
== V2DImode
|| mode
== V8HFmode
|| mode
== V4BFmode
29793 || mode
== V8BFmode
))
29796 if ((TARGET_NEON
|| TARGET_IWMMXT
)
29797 && ((mode
== V2SImode
)
29798 || (mode
== V4HImode
)
29799 || (mode
== V8QImode
)))
29802 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
29803 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
29804 || mode
== V2HAmode
))
29807 if (TARGET_HAVE_MVE
29808 && (VALID_MVE_SI_MODE (mode
) || VALID_MVE_PRED_MODE (mode
)))
29811 if (TARGET_HAVE_MVE_FLOAT
29812 && (mode
== V2DFmode
|| mode
== V4SFmode
|| mode
== V8HFmode
))
29818 /* Implements target hook array_mode. */
29819 static opt_machine_mode
29820 arm_array_mode (machine_mode mode
, unsigned HOST_WIDE_INT nelems
)
29822 if (TARGET_HAVE_MVE
29823 /* MVE accepts only tuples of 2 or 4 vectors. */
29827 machine_mode struct_mode
;
29828 FOR_EACH_MODE_IN_CLASS (struct_mode
, GET_MODE_CLASS (mode
))
29830 if (GET_MODE_INNER (struct_mode
) == GET_MODE_INNER (mode
)
29831 && known_eq (GET_MODE_NUNITS (struct_mode
),
29832 GET_MODE_NUNITS (mode
) * nelems
))
29833 return struct_mode
;
29836 return opt_machine_mode ();
29839 /* Implements target hook array_mode_supported_p. */
29842 arm_array_mode_supported_p (machine_mode mode
,
29843 unsigned HOST_WIDE_INT nelems
)
29845 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29846 for now, as the lane-swapping logic needs to be extended in the expanders.
29847 See PR target/82518. */
29848 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
29849 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
29850 && (nelems
>= 2 && nelems
<= 4))
29853 if (TARGET_HAVE_MVE
&& !BYTES_BIG_ENDIAN
29854 && VALID_MVE_MODE (mode
) && (nelems
== 2 || nelems
== 4))
29860 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29861 registers when autovectorizing for Neon, at least until multiple vector
29862 widths are supported properly by the middle-end. */
29864 static machine_mode
29865 arm_preferred_simd_mode (scalar_mode mode
)
29871 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HFmode
: V8HFmode
;
29873 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
29875 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
29877 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
29879 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
29881 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29888 if (TARGET_REALLY_IWMMXT
)
29901 if (TARGET_HAVE_MVE
)
29914 if (TARGET_HAVE_MVE_FLOAT
)
29928 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29930 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29931 using r0-r4 for function arguments, r7 for the stack frame and don't have
29932 enough left over to do doubleword arithmetic. For Thumb-2 all the
29933 potentially problematic instructions accept high registers so this is not
29934 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29935 that require many low registers. */
29937 arm_class_likely_spilled_p (reg_class_t rclass
)
29939 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
29940 || rclass
== CC_REG
)
29943 return default_class_likely_spilled_p (rclass
);
29946 /* Implements target hook small_register_classes_for_mode_p. */
29948 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
29950 return TARGET_THUMB1
;
29953 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29954 ARM insns and therefore guarantee that the shift count is modulo 256.
29955 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29956 guarantee no particular behavior for out-of-range counts. */
29958 static unsigned HOST_WIDE_INT
29959 arm_shift_truncation_mask (machine_mode mode
)
29961 return mode
== SImode
? 255 : 0;
29965 /* Map internal gcc register numbers to DWARF2 register numbers. */
29968 arm_debugger_regno (unsigned int regno
)
29973 if (IS_VFP_REGNUM (regno
))
29975 /* See comment in arm_dwarf_register_span. */
29976 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29977 return 64 + regno
- FIRST_VFP_REGNUM
;
29979 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
29982 if (IS_IWMMXT_GR_REGNUM (regno
))
29983 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
29985 if (IS_IWMMXT_REGNUM (regno
))
29986 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
29988 if (IS_PAC_REGNUM (regno
))
29989 return DWARF_PAC_REGNUM
;
29991 return DWARF_FRAME_REGISTERS
;
29994 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29995 GCC models tham as 64 32-bit registers, so we need to describe this to
29996 the DWARF generation code. Other registers can use the default. */
29998 arm_dwarf_register_span (rtx rtl
)
30006 regno
= REGNO (rtl
);
30007 if (!IS_VFP_REGNUM (regno
))
30010 /* XXX FIXME: The EABI defines two VFP register ranges:
30011 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
30013 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
30014 corresponding D register. Until GDB supports this, we shall use the
30015 legacy encodings. We also use these encodings for D0-D15 for
30016 compatibility with older debuggers. */
30017 mode
= GET_MODE (rtl
);
30018 if (GET_MODE_SIZE (mode
) < 8)
30021 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
30023 nregs
= GET_MODE_SIZE (mode
) / 4;
30024 for (i
= 0; i
< nregs
; i
+= 2)
30025 if (TARGET_BIG_END
)
30027 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
30028 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
30032 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
30033 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
30038 nregs
= GET_MODE_SIZE (mode
) / 8;
30039 for (i
= 0; i
< nregs
; i
++)
30040 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
30043 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
30046 #if ARM_UNWIND_INFO
30047 /* Emit unwind directives for a store-multiple instruction or stack pointer
30048 push during alignment.
30049 These should only ever be generated by the function prologue code, so
30050 expect them to have a particular form.
30051 The store-multiple instruction sometimes pushes pc as the last register,
30052 although it should not be tracked into unwind information, or for -Os
30053 sometimes pushes some dummy registers before first register that needs
30054 to be tracked in unwind information; such dummy registers are there just
30055 to avoid separate stack adjustment, and will not be restored in the
30059 arm_unwind_emit_sequence (FILE * out_file
, rtx p
)
30062 HOST_WIDE_INT offset
;
30063 HOST_WIDE_INT nregs
;
30067 unsigned padfirst
= 0, padlast
= 0;
30070 e
= XVECEXP (p
, 0, 0);
30071 gcc_assert (GET_CODE (e
) == SET
);
30073 /* First insn will adjust the stack pointer. */
30074 gcc_assert (GET_CODE (e
) == SET
30075 && REG_P (SET_DEST (e
))
30076 && REGNO (SET_DEST (e
)) == SP_REGNUM
30077 && GET_CODE (SET_SRC (e
)) == PLUS
);
30079 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
30080 nregs
= XVECLEN (p
, 0) - 1;
30081 gcc_assert (nregs
);
30083 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
30084 if (reg
< 16 || IS_PAC_REGNUM (reg
))
30086 /* For -Os dummy registers can be pushed at the beginning to
30087 avoid separate stack pointer adjustment. */
30088 e
= XVECEXP (p
, 0, 1);
30089 e
= XEXP (SET_DEST (e
), 0);
30090 if (GET_CODE (e
) == PLUS
)
30091 padfirst
= INTVAL (XEXP (e
, 1));
30092 gcc_assert (padfirst
== 0 || optimize_size
);
30093 /* The function prologue may also push pc, but not annotate it as it is
30094 never restored. We turn this into a stack pointer adjustment. */
30095 e
= XVECEXP (p
, 0, nregs
);
30096 e
= XEXP (SET_DEST (e
), 0);
30097 if (GET_CODE (e
) == PLUS
)
30098 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
30100 padlast
= offset
- 4;
30101 gcc_assert (padlast
== 0 || padlast
== 4);
30103 fprintf (out_file
, "\t.pad #4\n");
30105 fprintf (out_file
, "\t.save {");
30107 else if (IS_VFP_REGNUM (reg
))
30110 fprintf (out_file
, "\t.vsave {");
30113 /* Unknown register type. */
30114 gcc_unreachable ();
30116 /* If the stack increment doesn't match the size of the saved registers,
30117 something has gone horribly wrong. */
30118 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
30122 /* The remaining insns will describe the stores. */
30123 for (i
= 1; i
<= nregs
; i
++)
30125 /* Expect (set (mem <addr>) (reg)).
30126 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
30127 e
= XVECEXP (p
, 0, i
);
30128 gcc_assert (GET_CODE (e
) == SET
30129 && MEM_P (SET_DEST (e
))
30130 && REG_P (SET_SRC (e
)));
30132 reg
= REGNO (SET_SRC (e
));
30133 gcc_assert (reg
>= lastreg
);
30136 fprintf (out_file
, ", ");
30137 /* We can't use %r for vfp because we need to use the
30138 double precision register names. */
30139 if (IS_VFP_REGNUM (reg
))
30140 asm_fprintf (out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
30141 else if (IS_PAC_REGNUM (reg
))
30142 asm_fprintf (asm_out_file
, "ra_auth_code");
30144 asm_fprintf (out_file
, "%r", reg
);
30148 /* Check that the addresses are consecutive. */
30149 e
= XEXP (SET_DEST (e
), 0);
30150 if (GET_CODE (e
) == PLUS
)
30151 gcc_assert (REG_P (XEXP (e
, 0))
30152 && REGNO (XEXP (e
, 0)) == SP_REGNUM
30153 && CONST_INT_P (XEXP (e
, 1))
30154 && offset
== INTVAL (XEXP (e
, 1)));
30158 && REGNO (e
) == SP_REGNUM
);
30159 offset
+= reg_size
;
30162 fprintf (out_file
, "}\n");
30164 fprintf (out_file
, "\t.pad #%d\n", padfirst
);
30167 /* Emit unwind directives for a SET. */
30170 arm_unwind_emit_set (FILE * out_file
, rtx p
)
30178 switch (GET_CODE (e0
))
30181 /* Pushing a single register. */
30182 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
30183 || !REG_P (XEXP (XEXP (e0
, 0), 0))
30184 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
30187 asm_fprintf (out_file
, "\t.save ");
30188 if (IS_VFP_REGNUM (REGNO (e1
)))
30189 asm_fprintf(out_file
, "{d%d}\n",
30190 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
30192 asm_fprintf(out_file
, "{%r}\n", REGNO (e1
));
30196 if (REGNO (e0
) == SP_REGNUM
)
30198 /* A stack increment. */
30199 if (GET_CODE (e1
) != PLUS
30200 || !REG_P (XEXP (e1
, 0))
30201 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
30202 || !CONST_INT_P (XEXP (e1
, 1)))
30205 asm_fprintf (out_file
, "\t.pad #%wd\n",
30206 -INTVAL (XEXP (e1
, 1)));
30208 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
30210 HOST_WIDE_INT offset
;
30212 if (GET_CODE (e1
) == PLUS
)
30214 if (!REG_P (XEXP (e1
, 0))
30215 || !CONST_INT_P (XEXP (e1
, 1)))
30217 reg
= REGNO (XEXP (e1
, 0));
30218 offset
= INTVAL (XEXP (e1
, 1));
30219 asm_fprintf (out_file
, "\t.setfp %r, %r, #%wd\n",
30220 HARD_FRAME_POINTER_REGNUM
, reg
,
30223 else if (REG_P (e1
))
30226 asm_fprintf (out_file
, "\t.setfp %r, %r\n",
30227 HARD_FRAME_POINTER_REGNUM
, reg
);
30232 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
30234 /* Move from sp to reg. */
30235 asm_fprintf (out_file
, "\t.movsp %r\n", REGNO (e0
));
30237 else if (GET_CODE (e1
) == PLUS
30238 && REG_P (XEXP (e1
, 0))
30239 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
30240 && CONST_INT_P (XEXP (e1
, 1)))
30242 /* Set reg to offset from sp. */
30243 asm_fprintf (out_file
, "\t.movsp %r, #%d\n",
30244 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
30246 else if (REGNO (e0
) == IP_REGNUM
&& arm_current_function_pac_enabled_p ())
30248 if (cfun
->machine
->pacspval_needed
)
30249 asm_fprintf (out_file
, "\t.pacspval\n");
30261 /* Emit unwind directives for the given insn. */
30264 arm_unwind_emit (FILE * out_file
, rtx_insn
*insn
)
30267 bool handled_one
= false;
30269 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30272 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30273 && (TREE_NOTHROW (current_function_decl
)
30274 || crtl
->all_throwers_are_sibcalls
))
30277 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
30280 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
30282 switch (REG_NOTE_KIND (note
))
30284 case REG_FRAME_RELATED_EXPR
:
30285 pat
= XEXP (note
, 0);
30288 case REG_CFA_REGISTER
:
30289 pat
= XEXP (note
, 0);
30292 pat
= PATTERN (insn
);
30293 if (GET_CODE (pat
) == PARALLEL
)
30294 pat
= XVECEXP (pat
, 0, 0);
30297 /* Only emitted for IS_STACKALIGN re-alignment. */
30302 src
= SET_SRC (pat
);
30303 dest
= SET_DEST (pat
);
30305 gcc_assert (src
== stack_pointer_rtx
30306 || IS_PAC_REGNUM (REGNO (src
)));
30307 reg
= REGNO (dest
);
30309 if (IS_PAC_REGNUM (REGNO (src
)))
30310 arm_unwind_emit_set (out_file
, PATTERN (insn
));
30312 asm_fprintf (out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30315 handled_one
= true;
30318 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30319 to get correct dwarf information for shrink-wrap. We should not
30320 emit unwind information for it because these are used either for
30321 pretend arguments or notes to adjust sp and restore registers from
30323 case REG_CFA_DEF_CFA
:
30324 case REG_CFA_ADJUST_CFA
:
30325 case REG_CFA_RESTORE
:
30328 case REG_CFA_EXPRESSION
:
30329 case REG_CFA_OFFSET
:
30330 /* ??? Only handling here what we actually emit. */
30331 gcc_unreachable ();
30339 pat
= PATTERN (insn
);
30342 switch (GET_CODE (pat
))
30345 arm_unwind_emit_set (out_file
, pat
);
30349 /* Store multiple. */
30350 arm_unwind_emit_sequence (out_file
, pat
);
30359 /* Output a reference from a function exception table to the type_info
30360 object X. The EABI specifies that the symbol should be relocated by
30361 an R_ARM_TARGET2 relocation. */
30364 arm_output_ttype (rtx x
)
30366 fputs ("\t.word\t", asm_out_file
);
30367 output_addr_const (asm_out_file
, x
);
30368 /* Use special relocations for symbol references. */
30369 if (!CONST_INT_P (x
))
30370 fputs ("(TARGET2)", asm_out_file
);
30371 fputc ('\n', asm_out_file
);
30376 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30379 arm_asm_emit_except_personality (rtx personality
)
30381 fputs ("\t.personality\t", asm_out_file
);
30382 output_addr_const (asm_out_file
, personality
);
30383 fputc ('\n', asm_out_file
);
30385 #endif /* ARM_UNWIND_INFO */
30387 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30390 arm_asm_init_sections (void)
30392 #if ARM_UNWIND_INFO
30393 exception_section
= get_unnamed_section (0, output_section_asm_op
,
30395 #endif /* ARM_UNWIND_INFO */
30397 #ifdef OBJECT_FORMAT_ELF
30398 if (target_pure_code
)
30399 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
30403 /* Output unwind directives for the start/end of a function. */
30406 arm_output_fn_unwind (FILE * f
, bool prologue
)
30408 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30412 fputs ("\t.fnstart\n", f
);
30415 /* If this function will never be unwound, then mark it as such.
30416 The came condition is used in arm_unwind_emit to suppress
30417 the frame annotations. */
30418 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30419 && (TREE_NOTHROW (current_function_decl
)
30420 || crtl
->all_throwers_are_sibcalls
))
30421 fputs("\t.cantunwind\n", f
);
30423 fputs ("\t.fnend\n", f
);
30428 arm_emit_tls_decoration (FILE *fp
, rtx x
)
30430 enum tls_reloc reloc
;
30433 val
= XVECEXP (x
, 0, 0);
30434 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
30436 output_addr_const (fp
, val
);
30441 fputs ("(tlsgd)", fp
);
30443 case TLS_GD32_FDPIC
:
30444 fputs ("(tlsgd_fdpic)", fp
);
30447 fputs ("(tlsldm)", fp
);
30449 case TLS_LDM32_FDPIC
:
30450 fputs ("(tlsldm_fdpic)", fp
);
30453 fputs ("(tlsldo)", fp
);
30456 fputs ("(gottpoff)", fp
);
30458 case TLS_IE32_FDPIC
:
30459 fputs ("(gottpoff_fdpic)", fp
);
30462 fputs ("(tpoff)", fp
);
30465 fputs ("(tlsdesc)", fp
);
30468 gcc_unreachable ();
30477 fputs (" + (. - ", fp
);
30478 output_addr_const (fp
, XVECEXP (x
, 0, 2));
30479 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30480 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
30481 output_addr_const (fp
, XVECEXP (x
, 0, 3));
30491 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30494 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
30496 gcc_assert (size
== 4);
30497 fputs ("\t.word\t", file
);
30498 output_addr_const (file
, x
);
30499 fputs ("(tlsldo)", file
);
30502 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30505 arm_output_addr_const_extra (FILE *fp
, rtx x
)
30507 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
30508 return arm_emit_tls_decoration (fp
, x
);
30509 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
30512 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
30514 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
30515 assemble_name_raw (fp
, label
);
30519 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
30521 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
30525 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30529 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
30531 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30535 output_addr_const (fp
, XVECEXP (x
, 0, 1));
30539 else if (GET_CODE (x
) == CONST_VECTOR
)
30540 return arm_emit_vector_const (fp
, x
);
30545 /* Output assembly for a shift instruction.
30546 SET_FLAGS determines how the instruction modifies the condition codes.
30547 0 - Do not set condition codes.
30548 1 - Set condition codes.
30549 2 - Use smallest instruction. */
30551 arm_output_shift(rtx
* operands
, int set_flags
)
30554 static const char flag_chars
[3] = {'?', '.', '!'};
30559 c
= flag_chars
[set_flags
];
30560 shift
= shift_op(operands
[3], &val
);
30564 operands
[2] = GEN_INT(val
);
30565 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
30568 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
30570 output_asm_insn (pattern
, operands
);
30574 /* Output assembly for a WMMX immediate shift instruction. */
30576 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
30578 int shift
= INTVAL (operands
[2]);
30580 machine_mode opmode
= GET_MODE (operands
[0]);
30582 gcc_assert (shift
>= 0);
30584 /* If the shift value in the register versions is > 63 (for D qualifier),
30585 31 (for W qualifier) or 15 (for H qualifier). */
30586 if (((opmode
== V4HImode
) && (shift
> 15))
30587 || ((opmode
== V2SImode
) && (shift
> 31))
30588 || ((opmode
== DImode
) && (shift
> 63)))
30592 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30593 output_asm_insn (templ
, operands
);
30594 if (opmode
== DImode
)
30596 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
30597 output_asm_insn (templ
, operands
);
30602 /* The destination register will contain all zeros. */
30603 sprintf (templ
, "wzero\t%%0");
30604 output_asm_insn (templ
, operands
);
30609 if ((opmode
== DImode
) && (shift
> 32))
30611 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30612 output_asm_insn (templ
, operands
);
30613 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
30614 output_asm_insn (templ
, operands
);
30618 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
30619 output_asm_insn (templ
, operands
);
30624 /* Output assembly for a WMMX tinsr instruction. */
30626 arm_output_iwmmxt_tinsr (rtx
*operands
)
30628 int mask
= INTVAL (operands
[3]);
30631 int units
= mode_nunits
[GET_MODE (operands
[0])];
30632 gcc_assert ((mask
& (mask
- 1)) == 0);
30633 for (i
= 0; i
< units
; ++i
)
30635 if ((mask
& 0x01) == 1)
30641 gcc_assert (i
< units
);
30643 switch (GET_MODE (operands
[0]))
30646 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
30649 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
30652 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
30655 gcc_unreachable ();
30658 output_asm_insn (templ
, operands
);
30663 /* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn.
30664 Responsible for the handling of switch statements in arm. */
30666 arm_output_casesi (rtx
*operands
)
30669 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
30670 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30671 output_asm_insn ("cmp\t%0, %1", operands
);
30672 output_asm_insn ("bhi\t%l3", operands
);
30673 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
30674 switch (GET_MODE (diff_vec
))
30677 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30678 output_asm_insn ("ldrb\t%4, [%5, %0]", operands
);
30680 output_asm_insn ("ldrsb\t%4, [%5, %0]", operands
);
30681 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands
);
30684 if (REGNO (operands
[4]) != REGNO (operands
[5]))
30686 output_asm_insn ("add\t%4, %0, %0", operands
);
30687 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30688 output_asm_insn ("ldrh\t%4, [%5, %4]", operands
);
30690 output_asm_insn ("ldrsh\t%4, [%5, %4]", operands
);
30694 output_asm_insn ("add\t%4, %5, %0", operands
);
30695 if (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
)
30696 output_asm_insn ("ldrh\t%4, [%4, %0]", operands
);
30698 output_asm_insn ("ldrsh\t%4, [%4, %0]", operands
);
30700 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands
);
30705 output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands
);
30706 output_asm_insn ("add\t%|pc, %|pc, %4", operands
);
30709 output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands
);
30712 gcc_unreachable ();
30714 assemble_label (asm_out_file
, label
);
30715 output_asm_insn ("nop", operands
);
30719 /* Output a Thumb-1 casesi dispatch sequence. */
30721 thumb1_output_casesi (rtx
*operands
)
30723 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
30725 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30727 switch (GET_MODE(diff_vec
))
30730 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30731 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30733 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30734 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30736 return "bl\t%___gnu_thumb1_case_si";
30738 gcc_unreachable ();
30742 /* Output a Thumb-2 casesi instruction. */
30744 thumb2_output_casesi (rtx
*operands
)
30746 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
30748 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30750 output_asm_insn ("cmp\t%0, %1", operands
);
30751 output_asm_insn ("bhi\t%l3", operands
);
30752 switch (GET_MODE(diff_vec
))
30755 return "tbb\t[%|pc, %0]";
30757 return "tbh\t[%|pc, %0, lsl #1]";
30761 output_asm_insn ("adr\t%4, %l2", operands
);
30762 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
30763 output_asm_insn ("add\t%4, %4, %5", operands
);
30768 output_asm_insn ("adr\t%4, %l2", operands
);
30769 return "ldr\t%|pc, [%4, %0, lsl #2]";
30772 gcc_unreachable ();
30776 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30777 per-core tuning structs. */
30779 arm_issue_rate (void)
30781 return current_tune
->issue_rate
;
30784 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30786 arm_sched_variable_issue (FILE *, int, rtx_insn
*insn
, int more
)
30788 if (DEBUG_INSN_P (insn
))
30791 rtx_code code
= GET_CODE (PATTERN (insn
));
30792 if (code
== USE
|| code
== CLOBBER
)
30795 if (get_attr_type (insn
) == TYPE_NO_INSN
)
30801 /* Return how many instructions should scheduler lookahead to choose the
30804 arm_first_cycle_multipass_dfa_lookahead (void)
30806 int issue_rate
= arm_issue_rate ();
30808 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
30811 /* Enable modeling of L2 auto-prefetcher. */
30813 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
30815 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
30819 arm_mangle_type (const_tree type
)
30821 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30822 has to be managled as if it is in the "std" namespace. */
30823 if (TARGET_AAPCS_BASED
30824 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
30825 return "St9__va_list";
30827 /* Half-precision floating point types. */
30828 if (SCALAR_FLOAT_TYPE_P (type
) && TYPE_PRECISION (type
) == 16)
30830 if (TYPE_MAIN_VARIANT (type
) == float16_type_node
)
30832 if (TYPE_MODE (type
) == BFmode
)
30838 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30840 if (TYPE_NAME (type
) != NULL
)
30841 return arm_mangle_builtin_type (type
);
30843 /* Use the default mangling. */
30847 /* Order of allocation of core registers for Thumb: this allocation is
30848 written over the corresponding initial entries of the array
30849 initialized with REG_ALLOC_ORDER. We allocate all low registers
30850 first. Saving and restoring a low register is usually cheaper than
30851 using a call-clobbered high register. */
30853 static const int thumb_core_reg_alloc_order
[] =
30855 3, 2, 1, 0, 4, 5, 6, 7,
30856 12, 14, 8, 9, 10, 11
30859 /* Adjust register allocation order when compiling for Thumb. */
30862 arm_order_regs_for_local_alloc (void)
30864 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
30865 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
30867 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
30868 sizeof (thumb_core_reg_alloc_order
));
30871 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30874 arm_frame_pointer_required (void)
30876 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
30879 /* If the function receives nonlocal gotos, it needs to save the frame
30880 pointer in the nonlocal_goto_save_area object. */
30881 if (cfun
->has_nonlocal_label
)
30884 /* The frame pointer is required for non-leaf APCS frames. */
30885 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
30888 /* If we are probing the stack in the prologue, we will have a faulting
30889 instruction prior to the stack adjustment and this requires a frame
30890 pointer if we want to catch the exception using the EABI unwinder. */
30891 if (!IS_INTERRUPT (arm_current_func_type ())
30892 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
30893 || flag_stack_clash_protection
)
30894 && arm_except_unwind_info (&global_options
) == UI_TARGET
30895 && cfun
->can_throw_non_call_exceptions
)
30897 HOST_WIDE_INT size
= get_frame_size ();
30899 /* That's irrelevant if there is no stack adjustment. */
30903 /* That's relevant only if there is a stack probe. */
30904 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
30906 /* We don't have the final size of the frame so adjust. */
30907 size
+= 32 * UNITS_PER_WORD
;
30908 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
30918 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30919 All modes except THUMB1 have conditional execution.
30920 If we have conditional arithmetic, return false before reload to
30921 enable some ifcvt transformations. */
30923 arm_have_conditional_execution (void)
30925 bool has_cond_exec
, enable_ifcvt_trans
;
30927 /* Only THUMB1 cannot support conditional execution. */
30928 has_cond_exec
= !TARGET_THUMB1
;
30930 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30932 enable_ifcvt_trans
= TARGET_COND_ARITH
&& !reload_completed
;
30934 return has_cond_exec
&& !enable_ifcvt_trans
;
30937 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30938 static HOST_WIDE_INT
30939 arm_vector_alignment (const_tree type
)
30941 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
30943 if (TARGET_AAPCS_BASED
)
30944 align
= MIN (align
, 64);
30949 static unsigned int
30950 arm_autovectorize_vector_modes (vector_modes
*modes
, bool)
30952 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
30954 modes
->safe_push (V16QImode
);
30955 modes
->safe_push (V8QImode
);
30961 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
30963 /* Vectors which aren't in packed structures will not be less aligned than
30964 the natural alignment of their element type, so this is safe. */
30965 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30968 return default_builtin_vector_alignment_reachable (type
, is_packed
);
30972 arm_builtin_support_vector_misalignment (machine_mode mode
,
30973 const_tree type
, int misalignment
,
30976 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30978 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
30983 /* If the misalignment is unknown, we should be able to handle the access
30984 so long as it is not to a member of a packed data structure. */
30985 if (misalignment
== -1)
30988 /* Return true if the misalignment is a multiple of the natural alignment
30989 of the vector's element type. This is probably always going to be
30990 true in practice, since we've already established that this isn't a
30992 return ((misalignment
% align
) == 0);
30995 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
31000 arm_conditional_register_usage (void)
31004 if (TARGET_THUMB1
&& optimize_size
)
31006 /* When optimizing for size on Thumb-1, it's better not
31007 to use the HI regs, because of the overhead of
31009 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
31010 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
31013 /* The link register can be clobbered by any branch insn,
31014 but we have no way to track that at present, so mark
31015 it as unavailable. */
31017 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
31019 if (TARGET_32BIT
&& TARGET_VFP_BASE
)
31021 /* VFPv3 registers are disabled when earlier VFP
31022 versions are selected due to the definition of
31023 LAST_VFP_REGNUM. */
31024 for (regno
= FIRST_VFP_REGNUM
;
31025 regno
<= LAST_VFP_REGNUM
; ++ regno
)
31027 fixed_regs
[regno
] = 0;
31028 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
31029 || regno
>= FIRST_VFP_REGNUM
+ 32;
31031 if (TARGET_HAVE_MVE
)
31032 fixed_regs
[VPR_REGNUM
] = 0;
31035 if (TARGET_REALLY_IWMMXT
&& !TARGET_GENERAL_REGS_ONLY
)
31037 regno
= FIRST_IWMMXT_GR_REGNUM
;
31038 /* The 2002/10/09 revision of the XScale ABI has wCG0
31039 and wCG1 as call-preserved registers. The 2002/11/21
31040 revision changed this so that all wCG registers are
31041 scratch registers. */
31042 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
31043 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
31044 fixed_regs
[regno
] = 0;
31045 /* The XScale ABI has wR0 - wR9 as scratch registers,
31046 the rest as call-preserved registers. */
31047 for (regno
= FIRST_IWMMXT_REGNUM
;
31048 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
31050 fixed_regs
[regno
] = 0;
31051 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
31055 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
31057 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
31058 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
31060 else if (TARGET_APCS_STACK
)
31062 fixed_regs
[10] = 1;
31063 call_used_regs
[10] = 1;
31065 /* -mcaller-super-interworking reserves r11 for calls to
31066 _interwork_r11_call_via_rN(). Making the register global
31067 is an easy way of ensuring that it remains valid for all
31069 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
31070 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
31072 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
31073 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
31074 if (TARGET_CALLER_INTERWORKING
)
31075 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
31078 /* The Q and GE bits are only accessed via special ACLE patterns. */
31079 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRQ_REGNUM
);
31080 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRGE_REGNUM
);
31082 SUBTARGET_CONDITIONAL_REGISTER_USAGE
31086 arm_preferred_rename_class (reg_class_t rclass
)
31088 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
31089 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
31090 and code size can be reduced. */
31091 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
31097 /* Compute the attribute "length" of insn "*push_multi".
31098 So this function MUST be kept in sync with that insn pattern. */
31100 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
31102 int i
, regno
, hi_reg
;
31103 int num_saves
= XVECLEN (parallel_op
, 0);
31113 regno
= REGNO (first_op
);
31114 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
31115 list is 8-bit. Normally this means all registers in the list must be
31116 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
31117 encodings. There is one exception for PUSH that LR in HI_REGS can be used
31118 with 16-bit encoding. */
31119 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
31120 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
31122 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
31123 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
31131 /* Compute the attribute "length" of insn. Currently, this function is used
31132 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
31133 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
31134 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
31135 true if OPERANDS contains insn which explicit updates base register. */
31138 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
31147 rtx parallel_op
= operands
[0];
31148 /* Initialize to elements number of PARALLEL. */
31149 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
31150 /* Initialize the value to base register. */
31151 unsigned regno
= REGNO (operands
[1]);
31152 /* Skip return and write back pattern.
31153 We only need register pop pattern for later analysis. */
31154 unsigned first_indx
= 0;
31155 first_indx
+= return_pc
? 1 : 0;
31156 first_indx
+= write_back_p
? 1 : 0;
31158 /* A pop operation can be done through LDM or POP. If the base register is SP
31159 and if it's with write back, then a LDM will be alias of POP. */
31160 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
31161 bool ldm_p
= !pop_p
;
31163 /* Check base register for LDM. */
31164 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
31167 /* Check each register in the list. */
31168 for (; indx
>= first_indx
; indx
--)
31170 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
31171 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
31172 comment in arm_attr_length_push_multi. */
31173 if (REGNO_REG_CLASS (regno
) == HI_REGS
31174 && (regno
!= PC_REGNUM
|| ldm_p
))
31181 /* Compute the number of instructions emitted by output_move_double. */
31183 arm_count_output_move_double_insns (rtx
*operands
)
31187 /* output_move_double may modify the operands array, so call it
31188 here on a copy of the array. */
31189 ops
[0] = operands
[0];
31190 ops
[1] = operands
[1];
31191 output_move_double (ops
, false, &count
);
31195 /* Same as above, but operands are a register/memory pair in SImode.
31196 Assumes operands has the base register in position 0 and memory in position
31197 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
31199 arm_count_ldrdstrd_insns (rtx
*operands
, bool load
)
31203 int regnum
, memnum
;
31205 regnum
= 0, memnum
= 1;
31207 regnum
= 1, memnum
= 0;
31208 ops
[regnum
] = gen_rtx_REG (DImode
, REGNO (operands
[0]));
31209 ops
[memnum
] = adjust_address (operands
[2], DImode
, 0);
31210 output_move_double (ops
, false, &count
);
31216 vfp3_const_double_for_fract_bits (rtx operand
)
31218 REAL_VALUE_TYPE r0
;
31220 if (!CONST_DOUBLE_P (operand
))
31223 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
31224 if (exact_real_inverse (DFmode
, &r0
)
31225 && !REAL_VALUE_NEGATIVE (r0
))
31227 if (exact_real_truncate (DFmode
, &r0
))
31229 HOST_WIDE_INT value
= real_to_integer (&r0
);
31230 value
= value
& 0xffffffff;
31231 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
31233 int ret
= exact_log2 (value
);
31234 gcc_assert (IN_RANGE (ret
, 0, 31));
31242 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31243 log2 is in [1, 32], return that log2. Otherwise return -1.
31244 This is used in the patterns for vcvt.s32.f32 floating-point to
31245 fixed-point conversions. */
31248 vfp3_const_double_for_bits (rtx x
)
31250 const REAL_VALUE_TYPE
*r
;
31252 if (!CONST_DOUBLE_P (x
))
31255 r
= CONST_DOUBLE_REAL_VALUE (x
);
31257 if (REAL_VALUE_NEGATIVE (*r
)
31258 || REAL_VALUE_ISNAN (*r
)
31259 || REAL_VALUE_ISINF (*r
)
31260 || !real_isinteger (r
, SFmode
))
31263 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
31265 /* The exact_log2 above will have returned -1 if this is
31266 not an exact log2. */
31267 if (!IN_RANGE (hwint
, 1, 32))
31274 /* Emit a memory barrier around an atomic sequence according to MODEL. */
31277 arm_pre_atomic_barrier (enum memmodel model
)
31279 if (need_atomic_barrier_p (model
, true))
31280 emit_insn (gen_memory_barrier ());
31284 arm_post_atomic_barrier (enum memmodel model
)
31286 if (need_atomic_barrier_p (model
, false))
31287 emit_insn (gen_memory_barrier ());
31290 /* Emit the load-exclusive and store-exclusive instructions.
31291 Use acquire and release versions if necessary. */
31294 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
31296 rtx (*gen
) (rtx
, rtx
);
31302 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
31303 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
31304 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
31305 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
31307 gcc_unreachable ();
31314 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
31315 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
31316 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
31317 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
31319 gcc_unreachable ();
31323 emit_insn (gen (rval
, mem
));
31327 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
31330 rtx (*gen
) (rtx
, rtx
, rtx
);
31336 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
31337 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
31338 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
31339 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
31341 gcc_unreachable ();
31348 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
31349 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
31350 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
31351 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
31353 gcc_unreachable ();
31357 emit_insn (gen (bval
, rval
, mem
));
31360 /* Mark the previous jump instruction as unlikely. */
31363 emit_unlikely_jump (rtx insn
)
31365 rtx_insn
*jump
= emit_jump_insn (insn
);
31366 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
31369 /* Expand a compare and swap pattern. */
31372 arm_expand_compare_and_swap (rtx operands
[])
31374 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
31375 machine_mode mode
, cmp_mode
;
31377 bval
= operands
[0];
31378 rval
= operands
[1];
31380 oldval
= operands
[3];
31381 newval
= operands
[4];
31382 is_weak
= operands
[5];
31383 mod_s
= operands
[6];
31384 mod_f
= operands
[7];
31385 mode
= GET_MODE (mem
);
31387 /* Normally the succ memory model must be stronger than fail, but in the
31388 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31389 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31391 if (TARGET_HAVE_LDACQ
31392 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
31393 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
31394 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
31400 /* For narrow modes, we're going to perform the comparison in SImode,
31401 so do the zero-extension now. */
31402 rval
= gen_reg_rtx (SImode
);
31403 oldval
= convert_modes (SImode
, mode
, oldval
, true);
31407 /* Force the value into a register if needed. We waited until after
31408 the zero-extension above to do this properly. */
31409 if (!arm_add_operand (oldval
, SImode
))
31410 oldval
= force_reg (SImode
, oldval
);
31414 if (!cmpdi_operand (oldval
, mode
))
31415 oldval
= force_reg (mode
, oldval
);
31419 gcc_unreachable ();
31423 cmp_mode
= E_SImode
;
31425 cmp_mode
= CC_Zmode
;
31427 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
31428 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode
, mode
, bdst
, rval
, mem
,
31429 oldval
, newval
, is_weak
, mod_s
, mod_f
));
31431 if (mode
== QImode
|| mode
== HImode
)
31432 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
31434 /* In all cases, we arrange for success to be signaled by Z set.
31435 This arrangement allows for the boolean result to be used directly
31436 in a subsequent branch, post optimization. For Thumb-1 targets, the
31437 boolean negation of the result is also stored in bval because Thumb-1
31438 backend lacks dependency tracking for CC flag due to flag-setting not
31439 being represented at RTL level. */
31441 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
31444 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
31445 emit_insn (gen_rtx_SET (bval
, x
));
31449 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31450 another memory store between the load-exclusive and store-exclusive can
31451 reset the monitor from Exclusive to Open state. This means we must wait
31452 until after reload to split the pattern, lest we get a register spill in
31453 the middle of the atomic sequence. Success of the compare and swap is
31454 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31455 for Thumb-1 targets (ie. negation of the boolean value returned by
31456 atomic_compare_and_swapmode standard pattern in operand 0). */
31459 arm_split_compare_and_swap (rtx operands
[])
31461 rtx rval
, mem
, oldval
, newval
, neg_bval
, mod_s_rtx
;
31463 enum memmodel mod_s
, mod_f
;
31465 rtx_code_label
*label1
, *label2
;
31468 rval
= operands
[1];
31470 oldval
= operands
[3];
31471 newval
= operands
[4];
31472 is_weak
= (operands
[5] != const0_rtx
);
31473 mod_s_rtx
= operands
[6];
31474 mod_s
= memmodel_from_int (INTVAL (mod_s_rtx
));
31475 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
31476 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
31477 mode
= GET_MODE (mem
);
31479 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
31481 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (mod_s_rtx
);
31482 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (mod_s_rtx
);
31484 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31485 a full barrier is emitted after the store-release. */
31487 use_acquire
= false;
31489 /* Checks whether a barrier is needed and emits one accordingly. */
31490 if (!(use_acquire
|| use_release
))
31491 arm_pre_atomic_barrier (mod_s
);
31496 label1
= gen_label_rtx ();
31497 emit_label (label1
);
31499 label2
= gen_label_rtx ();
31501 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
31503 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31504 as required to communicate with arm_expand_compare_and_swap. */
31507 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
31508 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31509 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31510 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
31511 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31515 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
31516 if (thumb1_cmpneg_operand (oldval
, SImode
))
31519 if (!satisfies_constraint_L (oldval
))
31521 gcc_assert (satisfies_constraint_J (oldval
));
31523 /* For such immediates, ADDS needs the source and destination regs
31526 Normally this would be handled by RA, but this is all happening
31528 emit_move_insn (neg_bval
, rval
);
31532 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval
, src
, oldval
,
31537 emit_move_insn (neg_bval
, const1_rtx
);
31538 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
31542 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
31544 /* Weak or strong, we want EQ to be true for success, so that we
31545 match the flags that we got from the compare above. */
31548 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
31549 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
31550 emit_insn (gen_rtx_SET (cond
, x
));
31555 /* Z is set to boolean value of !neg_bval, as required to communicate
31556 with arm_expand_compare_and_swap. */
31557 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
31558 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
31561 if (!is_mm_relaxed (mod_f
))
31562 emit_label (label2
);
31564 /* Checks whether a barrier is needed and emits one accordingly. */
31566 || !(use_acquire
|| use_release
))
31567 arm_post_atomic_barrier (mod_s
);
31569 if (is_mm_relaxed (mod_f
))
31570 emit_label (label2
);
31573 /* Split an atomic operation pattern. Operation is given by CODE and is one
31574 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31575 operation). Operation is performed on the content at MEM and on VALUE
31576 following the memory model MODEL_RTX. The content at MEM before and after
31577 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31578 success of the operation is returned in COND. Using a scratch register or
31579 an operand register for these determines what result is returned for that
31583 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
31584 rtx value
, rtx model_rtx
, rtx cond
)
31586 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
31587 machine_mode mode
= GET_MODE (mem
);
31588 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
31589 rtx_code_label
*label
;
31590 bool all_low_regs
, bind_old_new
;
31593 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
31595 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (model_rtx
);
31596 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (model_rtx
);
31598 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31599 a full barrier is emitted after the store-release. */
31601 use_acquire
= false;
31603 /* Checks whether a barrier is needed and emits one accordingly. */
31604 if (!(use_acquire
|| use_release
))
31605 arm_pre_atomic_barrier (model
);
31607 label
= gen_label_rtx ();
31608 emit_label (label
);
31611 new_out
= gen_lowpart (wmode
, new_out
);
31613 old_out
= gen_lowpart (wmode
, old_out
);
31616 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
31618 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
31620 /* Does the operation require destination and first operand to use the same
31621 register? This is decided by register constraints of relevant insn
31622 patterns in thumb1.md. */
31623 gcc_assert (!new_out
|| REG_P (new_out
));
31624 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
31625 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
31626 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
31631 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
31633 /* We want to return the old value while putting the result of the operation
31634 in the same register as the old value so copy the old value over to the
31635 destination register and use that register for the operation. */
31636 if (old_out
&& bind_old_new
)
31638 emit_move_insn (new_out
, old_out
);
31649 x
= gen_rtx_AND (wmode
, old_out
, value
);
31650 emit_insn (gen_rtx_SET (new_out
, x
));
31651 x
= gen_rtx_NOT (wmode
, new_out
);
31652 emit_insn (gen_rtx_SET (new_out
, x
));
31656 if (CONST_INT_P (value
))
31658 value
= gen_int_mode (-INTVAL (value
), wmode
);
31664 if (mode
== DImode
)
31666 /* DImode plus/minus need to clobber flags. */
31667 /* The adddi3 and subdi3 patterns are incorrectly written so that
31668 they require matching operands, even when we could easily support
31669 three operands. Thankfully, this can be fixed up post-splitting,
31670 as the individual add+adc patterns do accept three operands and
31671 post-reload cprop can make these moves go away. */
31672 emit_move_insn (new_out
, old_out
);
31674 x
= gen_adddi3 (new_out
, new_out
, value
);
31676 x
= gen_subdi3 (new_out
, new_out
, value
);
31683 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
31684 emit_insn (gen_rtx_SET (new_out
, x
));
31688 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
31691 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31692 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
31694 /* Checks whether a barrier is needed and emits one accordingly. */
31696 || !(use_acquire
|| use_release
))
31697 arm_post_atomic_barrier (model
);
31700 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31702 arm_mode_to_pred_mode (machine_mode mode
)
31704 switch (GET_MODE_NUNITS (mode
))
31706 case 16: return V16BImode
;
31707 case 8: return V8BImode
;
31708 case 4: return V4BImode
;
31709 case 2: return V2QImode
;
31711 return opt_machine_mode ();
31714 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31715 If CAN_INVERT, store either the result or its inverse in TARGET
31716 and return true if TARGET contains the inverse. If !CAN_INVERT,
31717 always store the result in TARGET, never its inverse.
31719 Note that the handling of floating-point comparisons is not
31723 arm_expand_vector_compare (rtx target
, rtx_code code
, rtx op0
, rtx op1
,
31726 machine_mode cmp_result_mode
= GET_MODE (target
);
31727 machine_mode cmp_mode
= GET_MODE (op0
);
31731 /* MVE supports more comparisons than Neon. */
31732 if (TARGET_HAVE_MVE
)
31737 /* For these we need to compute the inverse of the requested
31746 code
= reverse_condition_maybe_unordered (code
);
31749 /* Recursively emit the inverted comparison into a temporary
31750 and then store its inverse in TARGET. This avoids reusing
31751 TARGET (which for integer NE could be one of the inputs). */
31752 rtx tmp
= gen_reg_rtx (cmp_result_mode
);
31753 if (arm_expand_vector_compare (tmp
, code
, op0
, op1
, true))
31754 gcc_unreachable ();
31755 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (cmp_result_mode
, tmp
)));
31768 /* These are natively supported by Neon for zero comparisons, but otherwise
31769 require the operands to be swapped. For MVE, we can only compare
31773 if (!TARGET_HAVE_MVE
)
31774 if (op1
!= CONST0_RTX (cmp_mode
))
31776 code
= swap_condition (code
);
31777 std::swap (op0
, op1
);
31779 /* Fall through. */
31781 /* These are natively supported by Neon for both register and zero
31782 operands. MVE supports registers only. */
31787 if (TARGET_HAVE_MVE
)
31789 switch (GET_MODE_CLASS (cmp_mode
))
31791 case MODE_VECTOR_INT
:
31792 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31793 op0
, force_reg (cmp_mode
, op1
)));
31795 case MODE_VECTOR_FLOAT
:
31796 if (TARGET_HAVE_MVE_FLOAT
)
31797 emit_insn (gen_mve_vcmpq_f (code
, cmp_mode
, target
,
31798 op0
, force_reg (cmp_mode
, op1
)));
31800 gcc_unreachable ();
31803 gcc_unreachable ();
31807 emit_insn (gen_neon_vc (code
, cmp_mode
, target
, op0
, op1
));
31810 /* These are natively supported for register operands only.
31811 Comparisons with zero aren't useful and should be folded
31812 or canonicalized by target-independent code. */
31815 if (TARGET_HAVE_MVE
)
31816 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31817 op0
, force_reg (cmp_mode
, op1
)));
31819 emit_insn (gen_neon_vc (code
, cmp_mode
, target
,
31820 op0
, force_reg (cmp_mode
, op1
)));
31823 /* These require the operands to be swapped and likewise do not
31824 support comparisons with zero. */
31827 if (TARGET_HAVE_MVE
)
31828 emit_insn (gen_mve_vcmpq (swap_condition (code
), cmp_mode
, target
,
31829 force_reg (cmp_mode
, op1
), op0
));
31831 emit_insn (gen_neon_vc (swap_condition (code
), cmp_mode
,
31832 target
, force_reg (cmp_mode
, op1
), op0
));
31835 /* These need a combination of two comparisons. */
31839 /* Operands are LTGT iff (a > b || a > b).
31840 Operands are ORDERED iff (a > b || a <= b). */
31841 rtx gt_res
= gen_reg_rtx (cmp_result_mode
);
31842 rtx alt_res
= gen_reg_rtx (cmp_result_mode
);
31843 rtx_code alt_code
= (code
== LTGT
? LT
: LE
);
31844 if (arm_expand_vector_compare (gt_res
, GT
, op0
, op1
, true)
31845 || arm_expand_vector_compare (alt_res
, alt_code
, op0
, op1
, true))
31846 gcc_unreachable ();
31847 emit_insn (gen_rtx_SET (target
, gen_rtx_IOR (cmp_result_mode
,
31848 gt_res
, alt_res
)));
31853 gcc_unreachable ();
31858 #define MAX_VECT_LEN 16
31860 struct expand_vec_perm_d
31862 rtx target
, op0
, op1
;
31863 vec_perm_indices perm
;
31864 machine_mode vmode
;
31869 /* Generate a variable permutation. */
31872 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31874 machine_mode vmode
= GET_MODE (target
);
31875 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31877 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
31878 gcc_checking_assert (GET_MODE (op0
) == vmode
);
31879 gcc_checking_assert (GET_MODE (op1
) == vmode
);
31880 gcc_checking_assert (GET_MODE (sel
) == vmode
);
31881 gcc_checking_assert (TARGET_NEON
);
31885 if (vmode
== V8QImode
)
31886 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
31888 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
31894 if (vmode
== V8QImode
)
31896 pair
= gen_reg_rtx (V16QImode
);
31897 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
31898 pair
= gen_lowpart (TImode
, pair
);
31899 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
31903 pair
= gen_reg_rtx (OImode
);
31904 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
31905 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
31911 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31913 machine_mode vmode
= GET_MODE (target
);
31914 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
31915 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31918 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31919 numbering of elements for big-endian, we must reverse the order. */
31920 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
31922 /* The VTBL instruction does not use a modulo index, so we must take care
31923 of that ourselves. */
31924 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31925 mask
= gen_const_vec_duplicate (vmode
, mask
);
31926 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
31928 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
31931 /* Map lane ordering between architectural lane order, and GCC lane order,
31932 taking into account ABI. See comment above output_move_neon for details. */
31935 neon_endian_lane_map (machine_mode mode
, int lane
)
31937 if (BYTES_BIG_ENDIAN
)
31939 int nelems
= GET_MODE_NUNITS (mode
);
31940 /* Reverse lane order. */
31941 lane
= (nelems
- 1 - lane
);
31942 /* Reverse D register order, to match ABI. */
31943 if (GET_MODE_SIZE (mode
) == 16)
31944 lane
= lane
^ (nelems
/ 2);
31949 /* Some permutations index into pairs of vectors, this is a helper function
31950 to map indexes into those pairs of vectors. */
31953 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
31955 int nelem
= GET_MODE_NUNITS (mode
);
31956 if (BYTES_BIG_ENDIAN
)
31958 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
31962 /* Generate or test for an insn that supports a constant permutation. */
31964 /* Recognize patterns for the VUZP insns. */
31967 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
31969 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
31970 rtx out0
, out1
, in0
, in1
;
31974 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31977 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31978 big endian pattern on 64 bit vectors, so we correct for that. */
31979 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
31980 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
31982 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
31984 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31986 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
31990 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31992 for (i
= 0; i
< nelt
; i
++)
31995 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
31996 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
32006 if (swap_nelt
!= 0)
32007 std::swap (in0
, in1
);
32010 out1
= gen_reg_rtx (d
->vmode
);
32012 std::swap (out0
, out1
);
32014 emit_insn (gen_neon_vuzp_internal (d
->vmode
, out0
, in0
, in1
, out1
));
32018 /* Recognize patterns for the VZIP insns. */
32021 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
32023 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
32024 rtx out0
, out1
, in0
, in1
;
32028 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
32031 is_swapped
= BYTES_BIG_ENDIAN
;
32033 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
32036 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
32038 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
32042 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
32044 for (i
= 0; i
< nelt
/ 2; i
++)
32047 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
32048 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
32052 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
32053 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
32065 std::swap (in0
, in1
);
32068 out1
= gen_reg_rtx (d
->vmode
);
32070 std::swap (out0
, out1
);
32072 emit_insn (gen_neon_vzip_internal (d
->vmode
, out0
, in0
, in1
, out1
));
32076 /* Recognize patterns for the VREV insns. */
32078 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
32080 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
32081 rtx (*gen
) (machine_mode
, rtx
, rtx
);
32083 if (!d
->one_vector_p
)
32094 gen
= gen_neon_vrev64
;
32105 gen
= gen_neon_vrev32
;
32111 gen
= gen_neon_vrev64
;
32122 gen
= gen_neon_vrev16
;
32126 gen
= gen_neon_vrev32
;
32132 gen
= gen_neon_vrev64
;
32142 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
32143 for (j
= 0; j
<= diff
; j
+= 1)
32145 /* This is guaranteed to be true as the value of diff
32146 is 7, 3, 1 and we should have enough elements in the
32147 queue to generate this. Getting a vector mask with a
32148 value of diff other than these values implies that
32149 something is wrong by the time we get here. */
32150 gcc_assert (i
+ j
< nelt
);
32151 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
32159 emit_insn (gen (d
->vmode
, d
->target
, d
->op0
));
32163 /* Recognize patterns for the VTRN insns. */
32166 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
32168 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
32169 rtx out0
, out1
, in0
, in1
;
32171 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
32174 /* Note that these are little-endian tests. Adjust for big-endian later. */
32175 if (d
->perm
[0] == 0)
32177 else if (d
->perm
[0] == 1)
32181 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
32183 for (i
= 0; i
< nelt
; i
+= 2)
32185 if (d
->perm
[i
] != i
+ odd
)
32187 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
32197 if (BYTES_BIG_ENDIAN
)
32199 std::swap (in0
, in1
);
32204 out1
= gen_reg_rtx (d
->vmode
);
32206 std::swap (out0
, out1
);
32208 emit_insn (gen_neon_vtrn_internal (d
->vmode
, out0
, in0
, in1
, out1
));
32212 /* Recognize patterns for the VEXT insns. */
32215 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
32217 unsigned int i
, nelt
= d
->perm
.length ();
32220 unsigned int location
;
32222 unsigned int next
= d
->perm
[0] + 1;
32224 /* TODO: Handle GCC's numbering of elements for big-endian. */
32225 if (BYTES_BIG_ENDIAN
)
32228 /* Check if the extracted indexes are increasing by one. */
32229 for (i
= 1; i
< nelt
; next
++, i
++)
32231 /* If we hit the most significant element of the 2nd vector in
32232 the previous iteration, no need to test further. */
32233 if (next
== 2 * nelt
)
32236 /* If we are operating on only one vector: it could be a
32237 rotation. If there are only two elements of size < 64, let
32238 arm_evpc_neon_vrev catch it. */
32239 if (d
->one_vector_p
&& (next
== nelt
))
32241 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
32247 if (d
->perm
[i
] != next
)
32251 location
= d
->perm
[0];
32257 offset
= GEN_INT (location
);
32259 if(d
->vmode
== E_DImode
)
32262 emit_insn (gen_neon_vext (d
->vmode
, d
->target
, d
->op0
, d
->op1
, offset
));
32266 /* The NEON VTBL instruction is a fully variable permuation that's even
32267 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
32268 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
32269 can do slightly better by expanding this as a constant where we don't
32270 have to apply a mask. */
32273 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
32275 rtx rperm
[MAX_VECT_LEN
], sel
;
32276 machine_mode vmode
= d
->vmode
;
32277 unsigned int i
, nelt
= d
->perm
.length ();
32279 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
32280 numbering of elements for big-endian, we must reverse the order. */
32281 if (BYTES_BIG_ENDIAN
)
32287 /* Generic code will try constant permutation twice. Once with the
32288 original mode and again with the elements lowered to QImode.
32289 So wait and don't do the selector expansion ourselves. */
32290 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
32293 for (i
= 0; i
< nelt
; ++i
)
32294 rperm
[i
] = GEN_INT (d
->perm
[i
]);
32295 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
32296 sel
= force_reg (vmode
, sel
);
32298 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
32303 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
32305 /* Check if the input mask matches vext before reordering the
32308 if (arm_evpc_neon_vext (d
))
32311 /* The pattern matching functions above are written to look for a small
32312 number to begin the sequence (0, 1, N/2). If we begin with an index
32313 from the second operand, we can swap the operands. */
32314 unsigned int nelt
= d
->perm
.length ();
32315 if (d
->perm
[0] >= nelt
)
32317 d
->perm
.rotate_inputs (1);
32318 std::swap (d
->op0
, d
->op1
);
32323 if (arm_evpc_neon_vuzp (d
))
32325 if (arm_evpc_neon_vzip (d
))
32327 if (arm_evpc_neon_vrev (d
))
32329 if (arm_evpc_neon_vtrn (d
))
32331 return arm_evpc_neon_vtbl (d
);
32336 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32339 arm_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
32340 rtx target
, rtx op0
, rtx op1
,
32341 const vec_perm_indices
&sel
)
32343 if (vmode
!= op_mode
)
32346 struct expand_vec_perm_d d
;
32347 int i
, nelt
, which
;
32349 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
32355 rtx nop0
= force_reg (vmode
, op0
);
32361 op1
= force_reg (vmode
, op1
);
32366 gcc_assert (VECTOR_MODE_P (d
.vmode
));
32367 d
.testing_p
= !target
;
32369 nelt
= GET_MODE_NUNITS (d
.vmode
);
32370 for (i
= which
= 0; i
< nelt
; ++i
)
32372 int ei
= sel
[i
] & (2 * nelt
- 1);
32373 which
|= (ei
< nelt
? 1 : 2);
32382 d
.one_vector_p
= false;
32383 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
32386 /* The elements of PERM do not suggest that only the first operand
32387 is used, but both operands are identical. Allow easier matching
32388 of the permutation by folding the permutation into the single
32393 d
.one_vector_p
= true;
32398 d
.one_vector_p
= true;
32402 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
32405 return arm_expand_vec_perm_const_1 (&d
);
32407 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
32408 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
32409 if (!d
.one_vector_p
)
32410 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
32413 bool ret
= arm_expand_vec_perm_const_1 (&d
);
32420 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
32422 /* If we are soft float and we do not have ldrd
32423 then all auto increment forms are ok. */
32424 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
32429 /* Post increment and Pre Decrement are supported for all
32430 instruction forms except for vector forms. */
32433 if (VECTOR_MODE_P (mode
))
32435 if (code
!= ARM_PRE_DEC
)
32445 /* Without LDRD and mode size greater than
32446 word size, there is no point in auto-incrementing
32447 because ldm and stm will not have these forms. */
32448 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
32451 /* Vector and floating point modes do not support
32452 these auto increment forms. */
32453 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
32466 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32467 on ARM, since we know that shifts by negative amounts are no-ops.
32468 Additionally, the default expansion code is not available or suitable
32469 for post-reload insn splits (this can occur when the register allocator
32470 chooses not to do a shift in NEON).
32472 This function is used in both initial expand and post-reload splits, and
32473 handles all kinds of 64-bit shifts.
32475 Input requirements:
32476 - It is safe for the input and output to be the same register, but
32477 early-clobber rules apply for the shift amount and scratch registers.
32478 - Shift by register requires both scratch registers. In all other cases
32479 the scratch registers may be NULL.
32480 - Ashiftrt by a register also clobbers the CC register. */
32482 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
32483 rtx amount
, rtx scratch1
, rtx scratch2
)
32485 rtx out_high
= gen_highpart (SImode
, out
);
32486 rtx out_low
= gen_lowpart (SImode
, out
);
32487 rtx in_high
= gen_highpart (SImode
, in
);
32488 rtx in_low
= gen_lowpart (SImode
, in
);
32491 in = the register pair containing the input value.
32492 out = the destination register pair.
32493 up = the high- or low-part of each pair.
32494 down = the opposite part to "up".
32495 In a shift, we can consider bits to shift from "up"-stream to
32496 "down"-stream, so in a left-shift "up" is the low-part and "down"
32497 is the high-part of each register pair. */
32499 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
32500 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
32501 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
32502 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
32504 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
32506 && (REG_P (out
) || SUBREG_P (out
))
32507 && GET_MODE (out
) == DImode
);
32509 && (REG_P (in
) || SUBREG_P (in
))
32510 && GET_MODE (in
) == DImode
);
32512 && (((REG_P (amount
) || SUBREG_P (amount
))
32513 && GET_MODE (amount
) == SImode
)
32514 || CONST_INT_P (amount
)));
32515 gcc_assert (scratch1
== NULL
32516 || (GET_CODE (scratch1
) == SCRATCH
)
32517 || (GET_MODE (scratch1
) == SImode
32518 && REG_P (scratch1
)));
32519 gcc_assert (scratch2
== NULL
32520 || (GET_CODE (scratch2
) == SCRATCH
)
32521 || (GET_MODE (scratch2
) == SImode
32522 && REG_P (scratch2
)));
32523 gcc_assert (!REG_P (out
) || !REG_P (amount
)
32524 || !HARD_REGISTER_P (out
)
32525 || (REGNO (out
) != REGNO (amount
)
32526 && REGNO (out
) + 1 != REGNO (amount
)));
32528 /* Macros to make following code more readable. */
32529 #define SUB_32(DEST,SRC) \
32530 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32531 #define RSB_32(DEST,SRC) \
32532 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32533 #define SUB_S_32(DEST,SRC) \
32534 gen_addsi3_compare0 ((DEST), (SRC), \
32536 #define SET(DEST,SRC) \
32537 gen_rtx_SET ((DEST), (SRC))
32538 #define SHIFT(CODE,SRC,AMOUNT) \
32539 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32540 #define LSHIFT(CODE,SRC,AMOUNT) \
32541 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32542 SImode, (SRC), (AMOUNT))
32543 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32544 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32545 SImode, (SRC), (AMOUNT))
32547 gen_rtx_IOR (SImode, (A), (B))
32548 #define BRANCH(COND,LABEL) \
32549 gen_arm_cond_branch ((LABEL), \
32550 gen_rtx_ ## COND (CCmode, cc_reg, \
32554 /* Shifts by register and shifts by constant are handled separately. */
32555 if (CONST_INT_P (amount
))
32557 /* We have a shift-by-constant. */
32559 /* First, handle out-of-range shift amounts.
32560 In both cases we try to match the result an ARM instruction in a
32561 shift-by-register would give. This helps reduce execution
32562 differences between optimization levels, but it won't stop other
32563 parts of the compiler doing different things. This is "undefined
32564 behavior, in any case. */
32565 if (INTVAL (amount
) <= 0)
32566 emit_insn (gen_movdi (out
, in
));
32567 else if (INTVAL (amount
) >= 64)
32569 if (code
== ASHIFTRT
)
32571 rtx const31_rtx
= GEN_INT (31);
32572 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
32573 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
32576 emit_insn (gen_movdi (out
, const0_rtx
));
32579 /* Now handle valid shifts. */
32580 else if (INTVAL (amount
) < 32)
32582 /* Shifts by a constant less than 32. */
32583 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
32585 /* Clearing the out register in DImode first avoids lots
32586 of spilling and results in less stack usage.
32587 Later this redundant insn is completely removed.
32588 Do that only if "in" and "out" are different registers. */
32589 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32590 emit_insn (SET (out
, const0_rtx
));
32591 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32592 emit_insn (SET (out_down
,
32593 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
32595 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32599 /* Shifts by a constant greater than 31. */
32600 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
32602 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32603 emit_insn (SET (out
, const0_rtx
));
32604 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
32605 if (code
== ASHIFTRT
)
32606 emit_insn (gen_ashrsi3 (out_up
, in_up
,
32609 emit_insn (SET (out_up
, const0_rtx
));
32614 /* We have a shift-by-register. */
32615 rtx cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
32617 /* This alternative requires the scratch registers. */
32618 gcc_assert (scratch1
&& REG_P (scratch1
));
32619 gcc_assert (scratch2
&& REG_P (scratch2
));
32621 /* We will need the values "amount-32" and "32-amount" later.
32622 Swapping them around now allows the later code to be more general. */
32626 emit_insn (SUB_32 (scratch1
, amount
));
32627 emit_insn (RSB_32 (scratch2
, amount
));
32630 emit_insn (RSB_32 (scratch1
, amount
));
32631 /* Also set CC = amount > 32. */
32632 emit_insn (SUB_S_32 (scratch2
, amount
));
32635 emit_insn (RSB_32 (scratch1
, amount
));
32636 emit_insn (SUB_32 (scratch2
, amount
));
32639 gcc_unreachable ();
32642 /* Emit code like this:
32645 out_down = in_down << amount;
32646 out_down = (in_up << (amount - 32)) | out_down;
32647 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32648 out_up = in_up << amount;
32651 out_down = in_down >> amount;
32652 out_down = (in_up << (32 - amount)) | out_down;
32654 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32655 out_up = in_up << amount;
32658 out_down = in_down >> amount;
32659 out_down = (in_up << (32 - amount)) | out_down;
32661 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32662 out_up = in_up << amount;
32664 The ARM and Thumb2 variants are the same but implemented slightly
32665 differently. If this were only called during expand we could just
32666 use the Thumb2 case and let combine do the right thing, but this
32667 can also be called from post-reload splitters. */
32669 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32671 if (!TARGET_THUMB2
)
32673 /* Emit code for ARM mode. */
32674 emit_insn (SET (out_down
,
32675 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
32676 if (code
== ASHIFTRT
)
32678 rtx_code_label
*done_label
= gen_label_rtx ();
32679 emit_jump_insn (BRANCH (LT
, done_label
));
32680 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
32682 emit_label (done_label
);
32685 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
32690 /* Emit code for Thumb2 mode.
32691 Thumb2 can't do shift and or in one insn. */
32692 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
32693 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
32695 if (code
== ASHIFTRT
)
32697 rtx_code_label
*done_label
= gen_label_rtx ();
32698 emit_jump_insn (BRANCH (LT
, done_label
));
32699 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
32700 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
32701 emit_label (done_label
);
32705 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
32706 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
32710 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32724 /* Returns true if the pattern is a valid symbolic address, which is either a
32725 symbol_ref or (symbol_ref + addend).
32727 According to the ARM ELF ABI, the initial addend of REL-type relocations
32728 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32729 literal field of the instruction as a 16-bit signed value in the range
32730 -32768 <= A < 32768.
32732 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32733 unsigned range of 0 <= A < 256 as described in the AAELF32
32734 relocation handling documentation: REL-type relocations are encoded
32735 as unsigned in this case. */
32738 arm_valid_symbolic_address_p (rtx addr
)
32740 rtx xop0
, xop1
= NULL_RTX
;
32743 if (target_word_relocations
)
32746 if (SYMBOL_REF_P (tmp
) || LABEL_REF_P (tmp
))
32749 /* (const (plus: symbol_ref const_int)) */
32750 if (GET_CODE (addr
) == CONST
)
32751 tmp
= XEXP (addr
, 0);
32753 if (GET_CODE (tmp
) == PLUS
)
32755 xop0
= XEXP (tmp
, 0);
32756 xop1
= XEXP (tmp
, 1);
32758 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
32760 if (TARGET_THUMB1
&& !TARGET_HAVE_MOVT
)
32761 return IN_RANGE (INTVAL (xop1
), 0, 0xff);
32763 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
32770 /* Returns true if a valid comparison operation and makes
32771 the operands in a form that is valid. */
32773 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
32775 enum rtx_code code
= GET_CODE (*comparison
);
32777 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
32778 ? GET_MODE (*op2
) : GET_MODE (*op1
);
32780 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
32782 if (code
== UNEQ
|| code
== LTGT
)
32785 code_int
= (int)code
;
32786 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
32787 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
32792 if (!arm_add_operand (*op1
, mode
))
32793 *op1
= force_reg (mode
, *op1
);
32794 if (!arm_add_operand (*op2
, mode
))
32795 *op2
= force_reg (mode
, *op2
);
32799 /* gen_compare_reg() will sort out any invalid operands. */
32803 if (!TARGET_VFP_FP16INST
)
32805 /* FP16 comparisons are done in SF mode. */
32807 *op1
= convert_to_mode (mode
, *op1
, 1);
32808 *op2
= convert_to_mode (mode
, *op2
, 1);
32809 /* Fall through. */
32812 if (!vfp_compare_operand (*op1
, mode
))
32813 *op1
= force_reg (mode
, *op1
);
32814 if (!vfp_compare_operand (*op2
, mode
))
32815 *op2
= force_reg (mode
, *op2
);
32825 /* Maximum number of instructions to set block of memory. */
32827 arm_block_set_max_insns (void)
32829 if (optimize_function_for_size_p (cfun
))
32832 return current_tune
->max_insns_inline_memset
;
32835 /* Return TRUE if it's profitable to set block of memory for
32836 non-vectorized case. VAL is the value to set the memory
32837 with. LENGTH is the number of bytes to set. ALIGN is the
32838 alignment of the destination memory in bytes. UNALIGNED_P
32839 is TRUE if we can only set the memory with instructions
32840 meeting alignment requirements. USE_STRD_P is TRUE if we
32841 can use strd to set the memory. */
32843 arm_block_set_non_vect_profit_p (rtx val
,
32844 unsigned HOST_WIDE_INT length
,
32845 unsigned HOST_WIDE_INT align
,
32846 bool unaligned_p
, bool use_strd_p
)
32849 /* For leftovers in bytes of 0-7, we can set the memory block using
32850 strb/strh/str with minimum instruction number. */
32851 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32855 num
= arm_const_inline_cost (SET
, val
);
32856 num
+= length
/ align
+ length
% align
;
32858 else if (use_strd_p
)
32860 num
= arm_const_double_inline_cost (val
);
32861 num
+= (length
>> 3) + leftover
[length
& 7];
32865 num
= arm_const_inline_cost (SET
, val
);
32866 num
+= (length
>> 2) + leftover
[length
& 3];
32869 /* We may be able to combine last pair STRH/STRB into a single STR
32870 by shifting one byte back. */
32871 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
32874 return (num
<= arm_block_set_max_insns ());
32877 /* Return TRUE if it's profitable to set block of memory for
32878 vectorized case. LENGTH is the number of bytes to set.
32879 ALIGN is the alignment of destination memory in bytes.
32880 MODE is the vector mode used to set the memory. */
32882 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
32883 unsigned HOST_WIDE_INT align
,
32887 bool unaligned_p
= ((align
& 3) != 0);
32888 unsigned int nelt
= GET_MODE_NUNITS (mode
);
32890 /* Instruction loading constant value. */
32892 /* Instructions storing the memory. */
32893 num
+= (length
+ nelt
- 1) / nelt
;
32894 /* Instructions adjusting the address expression. Only need to
32895 adjust address expression if it's 4 bytes aligned and bytes
32896 leftover can only be stored by mis-aligned store instruction. */
32897 if (!unaligned_p
&& (length
& 3) != 0)
32900 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32901 if (!unaligned_p
&& mode
== V16QImode
)
32904 return (num
<= arm_block_set_max_insns ());
32907 /* Set a block of memory using vectorization instructions for the
32908 unaligned case. We fill the first LENGTH bytes of the memory
32909 area starting from DSTBASE with byte constant VALUE. ALIGN is
32910 the alignment requirement of memory. Return TRUE if succeeded. */
32912 arm_block_set_unaligned_vect (rtx dstbase
,
32913 unsigned HOST_WIDE_INT length
,
32914 unsigned HOST_WIDE_INT value
,
32915 unsigned HOST_WIDE_INT align
)
32917 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
32920 rtx (*gen_func
) (rtx
, rtx
);
32922 unsigned HOST_WIDE_INT v
= value
;
32923 unsigned int offset
= 0;
32924 gcc_assert ((align
& 0x3) != 0);
32925 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32926 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32927 if (length
>= nelt_v16
)
32930 gen_func
= gen_movmisalignv16qi
;
32935 gen_func
= gen_movmisalignv8qi
;
32937 nelt_mode
= GET_MODE_NUNITS (mode
);
32938 gcc_assert (length
>= nelt_mode
);
32939 /* Skip if it isn't profitable. */
32940 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32943 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32944 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32946 v
= sext_hwi (v
, BITS_PER_WORD
);
32948 reg
= gen_reg_rtx (mode
);
32949 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
32950 /* Emit instruction loading the constant value. */
32951 emit_move_insn (reg
, val_vec
);
32953 /* Handle nelt_mode bytes in a vector. */
32954 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32956 emit_insn ((*gen_func
) (mem
, reg
));
32957 if (i
+ 2 * nelt_mode
<= length
)
32959 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
32960 offset
+= nelt_mode
;
32961 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32965 /* If there are not less than nelt_v8 bytes leftover, we must be in
32967 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
32969 /* Handle (8, 16) bytes leftover. */
32970 if (i
+ nelt_v8
< length
)
32972 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
32973 offset
+= length
- i
;
32974 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32976 /* We are shifting bytes back, set the alignment accordingly. */
32977 if ((length
& 1) != 0 && align
>= 2)
32978 set_mem_align (mem
, BITS_PER_UNIT
);
32980 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32982 /* Handle (0, 8] bytes leftover. */
32983 else if (i
< length
&& i
+ nelt_v8
>= length
)
32985 if (mode
== V16QImode
)
32986 reg
= gen_lowpart (V8QImode
, reg
);
32988 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
32989 + (nelt_mode
- nelt_v8
))));
32990 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
32991 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
32993 /* We are shifting bytes back, set the alignment accordingly. */
32994 if ((length
& 1) != 0 && align
>= 2)
32995 set_mem_align (mem
, BITS_PER_UNIT
);
32997 emit_insn (gen_movmisalignv8qi (mem
, reg
));
33003 /* Set a block of memory using vectorization instructions for the
33004 aligned case. We fill the first LENGTH bytes of the memory area
33005 starting from DSTBASE with byte constant VALUE. ALIGN is the
33006 alignment requirement of memory. Return TRUE if succeeded. */
33008 arm_block_set_aligned_vect (rtx dstbase
,
33009 unsigned HOST_WIDE_INT length
,
33010 unsigned HOST_WIDE_INT value
,
33011 unsigned HOST_WIDE_INT align
)
33013 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
33014 rtx dst
, addr
, mem
;
33017 unsigned int offset
= 0;
33019 gcc_assert ((align
& 0x3) == 0);
33020 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
33021 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
33022 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
33027 nelt_mode
= GET_MODE_NUNITS (mode
);
33028 gcc_assert (length
>= nelt_mode
);
33029 /* Skip if it isn't profitable. */
33030 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
33033 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
33035 reg
= gen_reg_rtx (mode
);
33036 val_vec
= gen_const_vec_duplicate (mode
, gen_int_mode (value
, QImode
));
33037 /* Emit instruction loading the constant value. */
33038 emit_move_insn (reg
, val_vec
);
33041 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
33042 if (mode
== V16QImode
)
33044 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
33045 emit_insn (gen_movmisalignv16qi (mem
, reg
));
33047 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
33048 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
33050 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
33051 offset
+= length
- nelt_mode
;
33052 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
33053 /* We are shifting bytes back, set the alignment accordingly. */
33054 if ((length
& 0x3) == 0)
33055 set_mem_align (mem
, BITS_PER_UNIT
* 4);
33056 else if ((length
& 0x1) == 0)
33057 set_mem_align (mem
, BITS_PER_UNIT
* 2);
33059 set_mem_align (mem
, BITS_PER_UNIT
);
33061 emit_insn (gen_movmisalignv16qi (mem
, reg
));
33064 /* Fall through for bytes leftover. */
33066 nelt_mode
= GET_MODE_NUNITS (mode
);
33067 reg
= gen_lowpart (V8QImode
, reg
);
33070 /* Handle 8 bytes in a vector. */
33071 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
33073 addr
= plus_constant (Pmode
, dst
, i
);
33074 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
33075 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
33076 emit_move_insn (mem
, reg
);
33078 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
33081 /* Handle single word leftover by shifting 4 bytes back. We can
33082 use aligned access for this case. */
33083 if (i
+ UNITS_PER_WORD
== length
)
33085 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
33086 offset
+= i
- UNITS_PER_WORD
;
33087 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
33088 /* We are shifting 4 bytes back, set the alignment accordingly. */
33089 if (align
> UNITS_PER_WORD
)
33090 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
33092 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
33094 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
33095 We have to use unaligned access for this case. */
33096 else if (i
< length
)
33098 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
33099 offset
+= length
- nelt_mode
;
33100 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
33101 /* We are shifting bytes back, set the alignment accordingly. */
33102 if ((length
& 1) == 0)
33103 set_mem_align (mem
, BITS_PER_UNIT
* 2);
33105 set_mem_align (mem
, BITS_PER_UNIT
);
33107 emit_insn (gen_movmisalignv8qi (mem
, reg
));
33113 /* Set a block of memory using plain strh/strb instructions, only
33114 using instructions allowed by ALIGN on processor. We fill the
33115 first LENGTH bytes of the memory area starting from DSTBASE
33116 with byte constant VALUE. ALIGN is the alignment requirement
33119 arm_block_set_unaligned_non_vect (rtx dstbase
,
33120 unsigned HOST_WIDE_INT length
,
33121 unsigned HOST_WIDE_INT value
,
33122 unsigned HOST_WIDE_INT align
)
33125 rtx dst
, addr
, mem
;
33126 rtx val_exp
, val_reg
, reg
;
33128 HOST_WIDE_INT v
= value
;
33130 gcc_assert (align
== 1 || align
== 2);
33133 v
|= (value
<< BITS_PER_UNIT
);
33135 v
= sext_hwi (v
, BITS_PER_WORD
);
33136 val_exp
= GEN_INT (v
);
33137 /* Skip if it isn't profitable. */
33138 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
33139 align
, true, false))
33142 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
33143 mode
= (align
== 2 ? HImode
: QImode
);
33144 val_reg
= force_reg (SImode
, val_exp
);
33145 reg
= gen_lowpart (mode
, val_reg
);
33147 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
33149 addr
= plus_constant (Pmode
, dst
, i
);
33150 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
33151 emit_move_insn (mem
, reg
);
33154 /* Handle single byte leftover. */
33155 if (i
+ 1 == length
)
33157 reg
= gen_lowpart (QImode
, val_reg
);
33158 addr
= plus_constant (Pmode
, dst
, i
);
33159 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
33160 emit_move_insn (mem
, reg
);
33164 gcc_assert (i
== length
);
33168 /* Set a block of memory using plain strd/str/strh/strb instructions,
33169 to permit unaligned copies on processors which support unaligned
33170 semantics for those instructions. We fill the first LENGTH bytes
33171 of the memory area starting from DSTBASE with byte constant VALUE.
33172 ALIGN is the alignment requirement of memory. */
33174 arm_block_set_aligned_non_vect (rtx dstbase
,
33175 unsigned HOST_WIDE_INT length
,
33176 unsigned HOST_WIDE_INT value
,
33177 unsigned HOST_WIDE_INT align
)
33180 rtx dst
, addr
, mem
;
33181 rtx val_exp
, val_reg
, reg
;
33182 unsigned HOST_WIDE_INT v
;
33185 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
33186 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
33188 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
33189 if (length
< UNITS_PER_WORD
)
33190 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
33193 v
|= (v
<< BITS_PER_WORD
);
33195 v
= sext_hwi (v
, BITS_PER_WORD
);
33197 val_exp
= GEN_INT (v
);
33198 /* Skip if it isn't profitable. */
33199 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
33200 align
, false, use_strd_p
))
33205 /* Try without strd. */
33206 v
= (v
>> BITS_PER_WORD
);
33207 v
= sext_hwi (v
, BITS_PER_WORD
);
33208 val_exp
= GEN_INT (v
);
33209 use_strd_p
= false;
33210 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
33211 align
, false, use_strd_p
))
33216 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
33217 /* Handle double words using strd if possible. */
33220 val_reg
= force_reg (DImode
, val_exp
);
33222 for (; (i
+ 8 <= length
); i
+= 8)
33224 addr
= plus_constant (Pmode
, dst
, i
);
33225 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
33226 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
33227 emit_move_insn (mem
, reg
);
33229 emit_insn (gen_unaligned_storedi (mem
, reg
));
33233 val_reg
= force_reg (SImode
, val_exp
);
33235 /* Handle words. */
33236 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
33237 for (; (i
+ 4 <= length
); i
+= 4)
33239 addr
= plus_constant (Pmode
, dst
, i
);
33240 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
33241 if ((align
& 3) == 0)
33242 emit_move_insn (mem
, reg
);
33244 emit_insn (gen_unaligned_storesi (mem
, reg
));
33247 /* Merge last pair of STRH and STRB into a STR if possible. */
33248 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
33250 addr
= plus_constant (Pmode
, dst
, i
- 1);
33251 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
33252 /* We are shifting one byte back, set the alignment accordingly. */
33253 if ((align
& 1) == 0)
33254 set_mem_align (mem
, BITS_PER_UNIT
);
33256 /* Most likely this is an unaligned access, and we can't tell at
33257 compilation time. */
33258 emit_insn (gen_unaligned_storesi (mem
, reg
));
33262 /* Handle half word leftover. */
33263 if (i
+ 2 <= length
)
33265 reg
= gen_lowpart (HImode
, val_reg
);
33266 addr
= plus_constant (Pmode
, dst
, i
);
33267 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
33268 if ((align
& 1) == 0)
33269 emit_move_insn (mem
, reg
);
33271 emit_insn (gen_unaligned_storehi (mem
, reg
));
33276 /* Handle single byte leftover. */
33277 if (i
+ 1 == length
)
33279 reg
= gen_lowpart (QImode
, val_reg
);
33280 addr
= plus_constant (Pmode
, dst
, i
);
33281 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
33282 emit_move_insn (mem
, reg
);
33288 /* Set a block of memory using vectorization instructions for both
33289 aligned and unaligned cases. We fill the first LENGTH bytes of
33290 the memory area starting from DSTBASE with byte constant VALUE.
33291 ALIGN is the alignment requirement of memory. */
33293 arm_block_set_vect (rtx dstbase
,
33294 unsigned HOST_WIDE_INT length
,
33295 unsigned HOST_WIDE_INT value
,
33296 unsigned HOST_WIDE_INT align
)
33298 /* Check whether we need to use unaligned store instruction. */
33299 if (((align
& 3) != 0 || (length
& 3) != 0)
33300 /* Check whether unaligned store instruction is available. */
33301 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
33304 if ((align
& 3) == 0)
33305 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
33307 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
33310 /* Expand string store operation. Firstly we try to do that by using
33311 vectorization instructions, then try with ARM unaligned access and
33312 double-word store if profitable. OPERANDS[0] is the destination,
33313 OPERANDS[1] is the number of bytes, operands[2] is the value to
33314 initialize the memory, OPERANDS[3] is the known alignment of the
33317 arm_gen_setmem (rtx
*operands
)
33319 rtx dstbase
= operands
[0];
33320 unsigned HOST_WIDE_INT length
;
33321 unsigned HOST_WIDE_INT value
;
33322 unsigned HOST_WIDE_INT align
;
33324 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
33327 length
= UINTVAL (operands
[1]);
33331 value
= (UINTVAL (operands
[2]) & 0xFF);
33332 align
= UINTVAL (operands
[3]);
33333 if (TARGET_NEON
&& length
>= 8
33334 && current_tune
->string_ops_prefer_neon
33335 && arm_block_set_vect (dstbase
, length
, value
, align
))
33338 if (!unaligned_access
&& (align
& 3) != 0)
33339 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
33341 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
33346 arm_macro_fusion_p (void)
33348 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
33351 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33352 for MOVW / MOVT macro fusion. */
33355 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
33357 /* We are trying to fuse
33358 movw imm / movt imm
33359 instructions as a group that gets scheduled together. */
33361 rtx set_dest
= SET_DEST (curr_set
);
33363 if (GET_MODE (set_dest
) != SImode
)
33366 /* We are trying to match:
33367 prev (movw) == (set (reg r0) (const_int imm16))
33368 curr (movt) == (set (zero_extract (reg r0)
33371 (const_int imm16_1))
33373 prev (movw) == (set (reg r1)
33374 (high (symbol_ref ("SYM"))))
33375 curr (movt) == (set (reg r0)
33377 (symbol_ref ("SYM")))) */
33379 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
33381 if (CONST_INT_P (SET_SRC (curr_set
))
33382 && CONST_INT_P (SET_SRC (prev_set
))
33383 && REG_P (XEXP (set_dest
, 0))
33384 && REG_P (SET_DEST (prev_set
))
33385 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
33389 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
33390 && REG_P (SET_DEST (curr_set
))
33391 && REG_P (SET_DEST (prev_set
))
33392 && GET_CODE (SET_SRC (prev_set
)) == HIGH
33393 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
33400 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
33402 rtx prev_set
= single_set (prev
);
33403 rtx curr_set
= single_set (curr
);
33409 if (any_condjump_p (curr
))
33412 if (!arm_macro_fusion_p ())
33415 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
33416 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
33422 /* Return true iff the instruction fusion described by OP is enabled. */
33424 arm_fusion_enabled_p (tune_params::fuse_ops op
)
33426 return current_tune
->fusible_ops
& op
;
33429 /* Return TRUE if return address signing mechanism is enabled. */
33431 arm_current_function_pac_enabled_p (void)
33433 return (aarch_ra_sign_scope
== AARCH_FUNCTION_ALL
33434 || (aarch_ra_sign_scope
== AARCH_FUNCTION_NON_LEAF
33435 && !crtl
->is_leaf
));
33438 /* Raise an error if the current target arch is not bti compatible. */
33439 void aarch_bti_arch_check (void)
33441 if (!arm_arch8m_main
)
33442 error ("This architecture does not support branch protection instructions");
33445 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33447 aarch_bti_enabled (void)
33449 return aarch_enable_bti
!= 0;
33452 /* Check if INSN is a BTI J insn. */
33454 aarch_bti_j_insn_p (rtx_insn
*insn
)
33456 if (!insn
|| !INSN_P (insn
))
33459 rtx pat
= PATTERN (insn
);
33460 return GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == VUNSPEC_BTI_NOP
;
33463 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
33465 aarch_pac_insn_p (rtx x
)
33467 if (!x
|| !INSN_P (x
))
33470 rtx pat
= PATTERN (x
);
33472 if (GET_CODE (pat
) == SET
)
33474 rtx tmp
= XEXP (pat
, 1);
33476 && ((GET_CODE (tmp
) == UNSPEC
33477 && XINT (tmp
, 1) == UNSPEC_PAC_NOP
)
33478 || (GET_CODE (tmp
) == UNSPEC_VOLATILE
33479 && XINT (tmp
, 1) == VUNSPEC_PACBTI_NOP
)))
33486 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33487 For Arm, both of these map to a simple BTI instruction. */
33490 aarch_gen_bti_c (void)
33492 return gen_bti_nop ();
33496 aarch_gen_bti_j (void)
33498 return gen_bti_nop ();
33501 /* For AArch32, we always return false because indirect_return attribute
33502 is only supported on AArch64 targets. */
33505 aarch_fun_is_indirect_return (rtx_insn
*)
33510 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33511 scheduled for speculative execution. Reject the long-running division
33512 and square-root instructions. */
33515 arm_sched_can_speculate_insn (rtx_insn
*insn
)
33517 switch (get_attr_type (insn
))
33525 case TYPE_NEON_FP_SQRT_S
:
33526 case TYPE_NEON_FP_SQRT_D
:
33527 case TYPE_NEON_FP_SQRT_S_Q
:
33528 case TYPE_NEON_FP_SQRT_D_Q
:
33529 case TYPE_NEON_FP_DIV_S
:
33530 case TYPE_NEON_FP_DIV_D
:
33531 case TYPE_NEON_FP_DIV_S_Q
:
33532 case TYPE_NEON_FP_DIV_D_Q
:
33539 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33541 static unsigned HOST_WIDE_INT
33542 arm_asan_shadow_offset (void)
33544 return HOST_WIDE_INT_1U
<< 29;
33548 /* This is a temporary fix for PR60655. Ideally we need
33549 to handle most of these cases in the generic part but
33550 currently we reject minus (..) (sym_ref). We try to
33551 ameliorate the case with minus (sym_ref1) (sym_ref2)
33552 where they are in the same section. */
33555 arm_const_not_ok_for_debug_p (rtx p
)
33557 tree decl_op0
= NULL
;
33558 tree decl_op1
= NULL
;
33560 if (GET_CODE (p
) == UNSPEC
)
33562 if (GET_CODE (p
) == MINUS
)
33564 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
33566 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
33568 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
33569 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
33571 if ((VAR_P (decl_op1
)
33572 || TREE_CODE (decl_op1
) == CONST_DECL
)
33573 && (VAR_P (decl_op0
)
33574 || TREE_CODE (decl_op0
) == CONST_DECL
))
33575 return (get_variable_section (decl_op1
, false)
33576 != get_variable_section (decl_op0
, false));
33578 if (TREE_CODE (decl_op1
) == LABEL_DECL
33579 && TREE_CODE (decl_op0
) == LABEL_DECL
)
33580 return (DECL_CONTEXT (decl_op1
)
33581 != DECL_CONTEXT (decl_op0
));
33591 /* return TRUE if x is a reference to a value in a constant pool */
33593 arm_is_constant_pool_ref (rtx x
)
33596 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
33597 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
33600 /* Remember the last target of arm_set_current_function. */
33601 static GTY(()) tree arm_previous_fndecl
;
33603 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33606 save_restore_target_globals (tree new_tree
)
33608 /* If we have a previous state, use it. */
33609 if (TREE_TARGET_GLOBALS (new_tree
))
33610 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
33611 else if (new_tree
== target_option_default_node
)
33612 restore_target_globals (&default_target_globals
);
33615 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33616 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
33619 arm_option_params_internal ();
33622 /* Invalidate arm_previous_fndecl. */
33625 arm_reset_previous_fndecl (void)
33627 arm_previous_fndecl
= NULL_TREE
;
33630 /* Establish appropriate back-end context for processing the function
33631 FNDECL. The argument might be NULL to indicate processing at top
33632 level, outside of any function scope. */
33635 arm_set_current_function (tree fndecl
)
33637 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
33640 tree old_tree
= (arm_previous_fndecl
33641 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
33644 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33646 /* If current function has no attributes but previous one did,
33647 use the default node. */
33648 if (! new_tree
&& old_tree
)
33649 new_tree
= target_option_default_node
;
33651 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33652 the default have been handled by save_restore_target_globals from
33653 arm_pragma_target_parse. */
33654 if (old_tree
== new_tree
)
33657 arm_previous_fndecl
= fndecl
;
33659 /* First set the target options. */
33660 cl_target_option_restore (&global_options
, &global_options_set
,
33661 TREE_TARGET_OPTION (new_tree
));
33663 save_restore_target_globals (new_tree
);
33665 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
33668 /* Implement TARGET_OPTION_PRINT. */
33671 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
33673 int flags
= ptr
->x_target_flags
;
33674 const char *fpu_name
;
33676 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
33677 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
33679 fprintf (file
, "%*sselected isa %s\n", indent
, "",
33680 TARGET_THUMB2_P (flags
) ? "thumb2" :
33681 TARGET_THUMB_P (flags
) ? "thumb1" :
33684 if (ptr
->x_arm_arch_string
)
33685 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
33686 ptr
->x_arm_arch_string
);
33688 if (ptr
->x_arm_cpu_string
)
33689 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
33690 ptr
->x_arm_cpu_string
);
33692 if (ptr
->x_arm_tune_string
)
33693 fprintf (file
, "%*sselected tune %s\n", indent
, "",
33694 ptr
->x_arm_tune_string
);
33696 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
33699 /* Hook to determine if one function can safely inline another. */
33702 arm_can_inline_p (tree caller
, tree callee
)
33704 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
33705 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
33706 bool can_inline
= true;
33708 struct cl_target_option
*caller_opts
33709 = TREE_TARGET_OPTION (caller_tree
? caller_tree
33710 : target_option_default_node
);
33712 struct cl_target_option
*callee_opts
33713 = TREE_TARGET_OPTION (callee_tree
? callee_tree
33714 : target_option_default_node
);
33716 if (callee_opts
== caller_opts
)
33719 /* Callee's ISA features should be a subset of the caller's. */
33720 struct arm_build_target caller_target
;
33721 struct arm_build_target callee_target
;
33722 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
33723 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
33725 arm_configure_build_target (&caller_target
, caller_opts
, false);
33726 arm_configure_build_target (&callee_target
, callee_opts
, false);
33727 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
33728 can_inline
= false;
33730 sbitmap_free (caller_target
.isa
);
33731 sbitmap_free (callee_target
.isa
);
33733 /* OK to inline between different modes.
33734 Function with mode specific instructions, e.g using asm,
33735 must be explicitly protected with noinline. */
33739 /* Hook to fix function's alignment affected by target attribute. */
33742 arm_relayout_function (tree fndecl
)
33744 if (DECL_USER_ALIGN (fndecl
))
33747 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33750 callee_tree
= target_option_default_node
;
33752 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
33755 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
33758 /* Inner function to process the attribute((target(...))), take an argument and
33759 set the current options from the argument. If we have a list, recursively
33760 go over the list. */
33763 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
33765 if (TREE_CODE (args
) == TREE_LIST
)
33769 for (; args
; args
= TREE_CHAIN (args
))
33770 if (TREE_VALUE (args
)
33771 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
33776 else if (TREE_CODE (args
) != STRING_CST
)
33778 error ("attribute %<target%> argument not a string");
33782 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
33785 while ((q
= strtok (argstr
, ",")) != NULL
)
33788 if (!strcmp (q
, "thumb"))
33790 opts
->x_target_flags
|= MASK_THUMB
;
33791 if (TARGET_FDPIC
&& !arm_arch_thumb2
)
33792 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33795 else if (!strcmp (q
, "arm"))
33796 opts
->x_target_flags
&= ~MASK_THUMB
;
33798 else if (!strcmp (q
, "general-regs-only"))
33799 opts
->x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
33801 else if (startswith (q
, "fpu="))
33804 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+ 4,
33805 &fpu_index
, CL_TARGET
))
33807 error ("invalid fpu for target attribute or pragma %qs", q
);
33810 if (fpu_index
== TARGET_FPU_auto
)
33812 /* This doesn't really make sense until we support
33813 general dynamic selection of the architecture and all
33815 sorry ("auto fpu selection not currently permitted here");
33818 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
33820 else if (startswith (q
, "arch="))
33822 char *arch
= q
+ 5;
33823 const arch_option
*arm_selected_arch
33824 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
33826 if (!arm_selected_arch
)
33828 error ("invalid architecture for target attribute or pragma %qs",
33833 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
33835 else if (q
[0] == '+')
33837 opts
->x_arm_arch_string
33838 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
33842 error ("unknown target attribute or pragma %qs", q
);
33850 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33853 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
33854 struct gcc_options
*opts_set
)
33856 struct cl_target_option cl_opts
;
33858 if (!arm_valid_target_attribute_rec (args
, opts
))
33861 cl_target_option_save (&cl_opts
, opts
, opts_set
);
33862 arm_configure_build_target (&arm_active_target
, &cl_opts
, false);
33863 arm_option_check_internal (opts
);
33864 /* Do any overrides, such as global options arch=xxx.
33865 We do this since arm_active_target was overridden. */
33866 arm_option_reconfigure_globals ();
33867 arm_options_perform_arch_sanity_checks ();
33868 arm_option_override_internal (opts
, opts_set
);
33870 return build_target_option_node (opts
, opts_set
);
33874 add_attribute (const char * mode
, tree
*attributes
)
33876 size_t len
= strlen (mode
);
33877 tree value
= build_string (len
, mode
);
33879 TREE_TYPE (value
) = build_array_type (char_type_node
,
33880 build_index_type (size_int (len
)));
33882 *attributes
= tree_cons (get_identifier ("target"),
33883 build_tree_list (NULL_TREE
, value
),
33887 /* For testing. Insert thumb or arm modes alternatively on functions. */
33890 arm_insert_attributes (tree fndecl
, tree
* attributes
)
33894 if (! TARGET_FLIP_THUMB
)
33897 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
33898 || fndecl_built_in_p (fndecl
) || DECL_ARTIFICIAL (fndecl
))
33901 /* Nested definitions must inherit mode. */
33902 if (current_function_decl
)
33904 mode
= TARGET_THUMB
? "thumb" : "arm";
33905 add_attribute (mode
, attributes
);
33909 /* If there is already a setting don't change it. */
33910 if (lookup_attribute ("target", *attributes
) != NULL
)
33913 mode
= thumb_flipper
? "thumb" : "arm";
33914 add_attribute (mode
, attributes
);
33916 thumb_flipper
= !thumb_flipper
;
33919 /* Hook to validate attribute((target("string"))). */
33922 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
33923 tree args
, int ARG_UNUSED (flags
))
33926 struct gcc_options func_options
, func_options_set
;
33927 tree cur_tree
, new_optimize
;
33928 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
33930 /* Get the optimization options of the current function. */
33931 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
33933 /* If the function changed the optimization levels as well as setting target
33934 options, start with the optimizations specified. */
33935 if (!func_optimize
)
33936 func_optimize
= optimization_default_node
;
33938 /* Init func_options. */
33939 memset (&func_options
, 0, sizeof (func_options
));
33940 init_options_struct (&func_options
, NULL
);
33941 lang_hooks
.init_options_struct (&func_options
);
33942 memset (&func_options_set
, 0, sizeof (func_options_set
));
33944 /* Initialize func_options to the defaults. */
33945 cl_optimization_restore (&func_options
, &func_options_set
,
33946 TREE_OPTIMIZATION (func_optimize
));
33948 cl_target_option_restore (&func_options
, &func_options_set
,
33949 TREE_TARGET_OPTION (target_option_default_node
));
33951 /* Set func_options flags with new target mode. */
33952 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
33953 &func_options_set
);
33955 if (cur_tree
== NULL_TREE
)
33958 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
33960 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
33962 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
33967 /* Match an ISA feature bitmap to a named FPU. We always use the
33968 first entry that exactly matches the feature set, so that we
33969 effectively canonicalize the FPU name for the assembler. */
33971 arm_identify_fpu_from_isa (sbitmap isa
)
33973 auto_sbitmap
fpubits (isa_num_bits
);
33974 auto_sbitmap
cand_fpubits (isa_num_bits
);
33976 bitmap_and (fpubits
, isa
, isa_all_fpubits_internal
);
33978 /* If there are no ISA feature bits relating to the FPU, we must be
33979 doing soft-float. */
33980 if (bitmap_empty_p (fpubits
))
33983 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
33985 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
33986 if (bitmap_equal_p (fpubits
, cand_fpubits
))
33987 return all_fpus
[i
].name
;
33989 /* We must find an entry, or things have gone wrong. */
33990 gcc_unreachable ();
33993 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33994 by the function fndecl. */
33996 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
33998 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
34000 struct cl_target_option
*targ_options
;
34002 targ_options
= TREE_TARGET_OPTION (target_parts
);
34004 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
34005 gcc_assert (targ_options
);
34007 arm_print_asm_arch_directives (stream
, targ_options
);
34009 fprintf (stream
, "\t.syntax unified\n");
34013 if (is_called_in_ARM_mode (decl
)
34014 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
34015 && cfun
->is_thunk
))
34016 fprintf (stream
, "\t.code 32\n");
34017 else if (TARGET_THUMB1
)
34018 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
34020 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
34023 fprintf (stream
, "\t.arm\n");
34025 if (TARGET_POKE_FUNCTION_NAME
)
34026 arm_poke_function_name (stream
, (const char *) name
);
34029 /* If MEM is in the form of [base+offset], extract the two parts
34030 of address and set to BASE and OFFSET, otherwise return false
34031 after clearing BASE and OFFSET. */
34034 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
34038 gcc_assert (MEM_P (mem
));
34040 addr
= XEXP (mem
, 0);
34042 /* Strip off const from addresses like (const (addr)). */
34043 if (GET_CODE (addr
) == CONST
)
34044 addr
= XEXP (addr
, 0);
34049 *offset
= const0_rtx
;
34053 if (GET_CODE (addr
) == PLUS
34054 && GET_CODE (XEXP (addr
, 0)) == REG
34055 && CONST_INT_P (XEXP (addr
, 1)))
34057 *base
= XEXP (addr
, 0);
34058 *offset
= XEXP (addr
, 1);
34063 *offset
= NULL_RTX
;
34068 /* If INSN is a load or store of address in the form of [base+offset],
34069 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
34070 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
34071 otherwise return FALSE. */
34074 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
34078 gcc_assert (INSN_P (insn
));
34079 x
= PATTERN (insn
);
34080 if (GET_CODE (x
) != SET
)
34084 dest
= SET_DEST (x
);
34085 if (REG_P (src
) && MEM_P (dest
))
34088 extract_base_offset_in_addr (dest
, base
, offset
);
34090 else if (MEM_P (src
) && REG_P (dest
))
34093 extract_base_offset_in_addr (src
, base
, offset
);
34098 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
34101 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
34103 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
34104 and PRI are only calculated for these instructions. For other instruction,
34105 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
34106 instruction fusion can be supported by returning different priorities.
34108 It's important that irrelevant instructions get the largest FUSION_PRI. */
34111 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
34112 int *fusion_pri
, int *pri
)
34118 gcc_assert (INSN_P (insn
));
34121 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
34128 /* Load goes first. */
34130 *fusion_pri
= tmp
- 1;
34132 *fusion_pri
= tmp
- 2;
34136 /* INSN with smaller base register goes first. */
34137 tmp
-= ((REGNO (base
) & 0xff) << 20);
34139 /* INSN with smaller offset goes first. */
34140 off_val
= (int)(INTVAL (offset
));
34142 tmp
-= (off_val
& 0xfffff);
34144 tmp
+= ((- off_val
) & 0xfffff);
34151 /* Construct and return a PARALLEL RTX vector with elements numbering the
34152 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34153 the vector - from the perspective of the architecture. This does not
34154 line up with GCC's perspective on lane numbers, so we end up with
34155 different masks depending on our target endian-ness. The diagram
34156 below may help. We must draw the distinction when building masks
34157 which select one half of the vector. An instruction selecting
34158 architectural low-lanes for a big-endian target, must be described using
34159 a mask selecting GCC high-lanes.
34161 Big-Endian Little-Endian
34163 GCC 0 1 2 3 3 2 1 0
34164 | x | x | x | x | | x | x | x | x |
34165 Architecture 3 2 1 0 3 2 1 0
34167 Low Mask: { 2, 3 } { 0, 1 }
34168 High Mask: { 0, 1 } { 2, 3 }
34172 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
34174 int nunits
= GET_MODE_NUNITS (mode
);
34175 rtvec v
= rtvec_alloc (nunits
/ 2);
34176 int high_base
= nunits
/ 2;
34182 if (BYTES_BIG_ENDIAN
)
34183 base
= high
? low_base
: high_base
;
34185 base
= high
? high_base
: low_base
;
34187 for (i
= 0; i
< nunits
/ 2; i
++)
34188 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
34190 t1
= gen_rtx_PARALLEL (mode
, v
);
34194 /* Check OP for validity as a PARALLEL RTX vector with elements
34195 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34196 from the perspective of the architecture. See the diagram above
34197 arm_simd_vect_par_cnst_half_p for more details. */
34200 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
34203 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
34204 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
34205 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
34208 if (!VECTOR_MODE_P (mode
))
34211 if (count_op
!= count_ideal
)
34214 for (i
= 0; i
< count_ideal
; i
++)
34216 rtx elt_op
= XVECEXP (op
, 0, i
);
34217 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
34219 if (!CONST_INT_P (elt_op
)
34220 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
34226 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34229 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
34232 /* For now, we punt and not handle this for TARGET_THUMB1. */
34233 if (vcall_offset
&& TARGET_THUMB1
)
34236 /* Otherwise ok. */
34240 /* Generate RTL for a conditional branch with rtx comparison CODE in
34241 mode CC_MODE. The destination of the unlikely conditional branch
34245 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
34249 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
34250 gen_rtx_REG (cc_mode
, CC_REGNUM
),
34253 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
34254 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
34256 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
34259 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34261 For pure-code sections there is no letter code for this attribute, so
34262 output all the section flags numerically when this is needed. */
34265 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
34268 if (flags
& SECTION_ARM_PURECODE
)
34272 if (!(flags
& SECTION_DEBUG
))
34274 if (flags
& SECTION_EXCLUDE
)
34275 *num
|= 0x80000000;
34276 if (flags
& SECTION_WRITE
)
34278 if (flags
& SECTION_CODE
)
34280 if (flags
& SECTION_MERGE
)
34282 if (flags
& SECTION_STRINGS
)
34284 if (flags
& SECTION_TLS
)
34286 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
34295 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34297 If pure-code is passed as an option, make sure all functions are in
34298 sections that have the SHF_ARM_PURECODE attribute. */
34301 arm_function_section (tree decl
, enum node_frequency freq
,
34302 bool startup
, bool exit
)
34304 const char * section_name
;
34307 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
34308 return default_function_section (decl
, freq
, startup
, exit
);
34310 if (!target_pure_code
)
34311 return default_function_section (decl
, freq
, startup
, exit
);
34314 section_name
= DECL_SECTION_NAME (decl
);
34316 /* If a function is not in a named section then it falls under the 'default'
34317 text section, also known as '.text'. We can preserve previous behavior as
34318 the default text section already has the SHF_ARM_PURECODE section
34322 section
*default_sec
= default_function_section (decl
, freq
, startup
,
34325 /* If default_sec is not null, then it must be a special section like for
34326 example .text.startup. We set the pure-code attribute and return the
34327 same section to preserve existing behavior. */
34329 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
34330 return default_sec
;
34333 /* Otherwise look whether a section has already been created with
34335 sec
= get_named_section (decl
, section_name
, 0);
34337 /* If that is not the case passing NULL as the section's name to
34338 'get_named_section' will create a section with the declaration's
34340 sec
= get_named_section (decl
, NULL
, 0);
34342 /* Set the SHF_ARM_PURECODE attribute. */
34343 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
34348 /* Implements the TARGET_SECTION_FLAGS hook.
34350 If DECL is a function declaration and pure-code is passed as an option
34351 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
34352 section's name and RELOC indicates whether the declarations initializer may
34353 contain runtime relocations. */
34355 static unsigned int
34356 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
34358 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
34360 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
34361 flags
|= SECTION_ARM_PURECODE
;
34366 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
34369 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
34371 rtx
*quot_p
, rtx
*rem_p
)
34373 if (mode
== SImode
)
34374 gcc_assert (!TARGET_IDIV
);
34376 scalar_int_mode libval_mode
34377 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
)).require ();
34379 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
34380 libval_mode
, op0
, mode
, op1
, mode
);
34382 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
34383 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
34384 GET_MODE_SIZE (mode
));
34386 gcc_assert (quotient
);
34387 gcc_assert (remainder
);
34389 *quot_p
= quotient
;
34390 *rem_p
= remainder
;
34393 /* This function checks for the availability of the coprocessor builtin passed
34394 in BUILTIN for the current target. Returns true if it is available and
34395 false otherwise. If a BUILTIN is passed for which this function has not
34396 been implemented it will cause an exception. */
34399 arm_coproc_builtin_available (enum unspecv builtin
)
34401 /* None of these builtins are available in Thumb mode if the target only
34402 supports Thumb-1. */
34420 case VUNSPEC_LDC2L
:
34422 case VUNSPEC_STC2L
:
34425 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34432 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34434 if (arm_arch6
|| arm_arch5te
)
34437 case VUNSPEC_MCRR2
:
34438 case VUNSPEC_MRRC2
:
34443 gcc_unreachable ();
34448 /* This function returns true if OP is a valid memory operand for the ldc and
34449 stc coprocessor instructions and false otherwise. */
34452 arm_coproc_ldc_stc_legitimate_address (rtx op
)
34454 HOST_WIDE_INT range
;
34455 /* Has to be a memory operand. */
34461 /* We accept registers. */
34465 switch GET_CODE (op
)
34469 /* Or registers with an offset. */
34470 if (!REG_P (XEXP (op
, 0)))
34475 /* The offset must be an immediate though. */
34476 if (!CONST_INT_P (op
))
34479 range
= INTVAL (op
);
34481 /* Within the range of [-1020,1020]. */
34482 if (!IN_RANGE (range
, -1020, 1020))
34485 /* And a multiple of 4. */
34486 return (range
% 4) == 0;
34492 return REG_P (XEXP (op
, 0));
34494 gcc_unreachable ();
34499 /* Return true if OP is a valid memory operand for LDRD/STRD without any
34500 register overlap restrictions. Allow [base] and [base, imm] for now. */
34502 arm_ldrd_legitimate_address (rtx op
)
34511 if (GET_CODE (op
) != PLUS
)
34513 if (!REG_P (XEXP (op
, 0)) || !CONST_INT_P (XEXP (op
, 1)))
34516 HOST_WIDE_INT val
= INTVAL (XEXP (op
, 1));
34519 return IN_RANGE (val
, -255, 255);
34520 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
34523 /* Return the diagnostic message string if conversion from FROMTYPE to
34524 TOTYPE is not allowed, NULL otherwise. */
34526 static const char *
34527 arm_invalid_conversion (const_tree fromtype
, const_tree totype
)
34529 if (element_mode (fromtype
) != element_mode (totype
))
34531 /* Do no allow conversions to/from BFmode scalar types. */
34532 if (TYPE_MODE (fromtype
) == BFmode
)
34533 return N_("invalid conversion from type %<bfloat16_t%>");
34534 if (TYPE_MODE (totype
) == BFmode
)
34535 return N_("invalid conversion to type %<bfloat16_t%>");
34538 /* Conversion allowed. */
34542 /* Return the diagnostic message string if the unary operation OP is
34543 not permitted on TYPE, NULL otherwise. */
34545 static const char *
34546 arm_invalid_unary_op (int op
, const_tree type
)
34548 /* Reject all single-operand operations on BFmode except for &. */
34549 if (element_mode (type
) == BFmode
&& op
!= ADDR_EXPR
)
34550 return N_("operation not permitted on type %<bfloat16_t%>");
34552 /* Operation allowed. */
34556 /* Return the diagnostic message string if the binary operation OP is
34557 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34559 static const char *
34560 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
,
34563 /* Reject all 2-operand operations on BFmode. */
34564 if (element_mode (type1
) == BFmode
34565 || element_mode (type2
) == BFmode
)
34566 return N_("operation not permitted on type %<bfloat16_t%>");
34568 /* Operation allowed. */
34572 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34574 In VFPv1, VFP registers could only be accessed in the mode they were
34575 set, so subregs would be invalid there. However, we don't support
34576 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34578 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34579 VFP registers in little-endian order. We can't describe that accurately to
34580 GCC, so avoid taking subregs of such values.
34582 The only exception is going from a 128-bit to a 64-bit type. In that
34583 case the data layout happens to be consistent for big-endian, so we
34584 explicitly allow that case. */
34587 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
34588 reg_class_t rclass
)
34591 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
34592 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
34593 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
34594 && reg_classes_intersect_p (VFP_REGS
, rclass
))
34599 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34600 strcpy from constants will be faster. */
34602 static HOST_WIDE_INT
34603 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
34605 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
34606 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
34607 return MAX (align
, BITS_PER_WORD
* factor
);
34611 /* Emit a speculation barrier on target architectures that do not have
34612 DSB/ISB directly. Such systems probably don't need a barrier
34613 themselves, but if the code is ever run on a later architecture, it
34614 might become a problem. */
34616 arm_emit_speculation_barrier_function ()
34618 emit_library_call (speculation_barrier_libfunc
, LCT_NORMAL
, VOIDmode
);
34621 /* Have we recorded an explicit access to the Q bit of APSR?. */
34623 arm_q_bit_access (void)
34625 if (cfun
&& cfun
->decl
)
34626 return lookup_attribute ("acle qbit",
34627 DECL_ATTRIBUTES (cfun
->decl
));
34631 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34633 arm_ge_bits_access (void)
34635 if (cfun
&& cfun
->decl
)
34636 return lookup_attribute ("acle gebits",
34637 DECL_ATTRIBUTES (cfun
->decl
));
34641 /* NULL if insn INSN is valid within a low-overhead loop.
34642 Otherwise return why doloop cannot be applied. */
34644 static const char *
34645 arm_invalid_within_doloop (const rtx_insn
*insn
)
34647 if (!TARGET_HAVE_LOB
)
34648 return default_invalid_within_doloop (insn
);
34651 return "Function call in the loop.";
34653 if (reg_mentioned_p (gen_rtx_REG (SImode
, LR_REGNUM
), insn
))
34654 return "LR is used inside loop.";
34660 arm_target_bb_ok_for_lob (basic_block bb
)
34662 /* Make sure the basic block is a simple latch having as the single
34663 predecessor and successor the body of the loop itself.
34664 Only simple loops with a single basic block as body are supported for
34665 low over head loops, making sure that LE target is above LE instruction
34666 in the generated code. */
34667 return (single_succ_p (bb
)
34668 && single_pred_p (bb
)
34669 && single_succ_edge (bb
)->dest
== single_pred_edge (bb
)->src
);
34672 /* Utility fuction: Given a VCTP or a VCTP_M insn, return the number of MVE
34673 lanes based on the machine mode being used. */
34676 arm_mve_get_vctp_lanes (rtx_insn
*insn
)
34678 rtx insn_set
= single_set (insn
);
34680 && GET_CODE (SET_SRC (insn_set
)) == UNSPEC
34681 && (XINT (SET_SRC (insn_set
), 1) == VCTP
34682 || XINT (SET_SRC (insn_set
), 1) == VCTP_M
))
34684 machine_mode mode
= GET_MODE (SET_SRC (insn_set
));
34685 return ((VECTOR_MODE_P (mode
) && VALID_MVE_PRED_MODE (mode
))
34686 ? GET_MODE_NUNITS (mode
) : 0);
34691 enum arm_dl_usage_type
{ DL_USAGE_ANY
= 0,
34693 DL_USAGE_WRITE
= 2 };
34695 /* Check if INSN requires the use of the VPR reg, if it does, return the
34696 sub-rtx of the VPR reg. The TYPE argument controls whether
34697 this function should:
34698 * For TYPE == DL_USAGE_ANY, check all operands, including the OUT operands,
34699 and return the first occurrence of the VPR reg.
34700 * For TYPE == DL_USAGE_READ, only check the input operands.
34701 * For TYPE == DL_USAGE_WRITE, only check the output operands.
34702 (INOUT operands are considered both as input and output operands)
34705 arm_get_required_vpr_reg (rtx_insn
*insn
,
34706 arm_dl_usage_type type
= DL_USAGE_ANY
)
34708 gcc_assert (type
< 3);
34709 if (!NONJUMP_INSN_P (insn
))
34713 extract_constrain_insn (insn
);
34714 int n_operands
= recog_data
.n_operands
;
34715 if (recog_data
.n_alternatives
== 0)
34718 /* Fill in recog_op_alt with information about the constraints of
34720 preprocess_constraints (insn
);
34722 for (int op
= 0; op
< n_operands
; op
++)
34724 requires_vpr
= true;
34725 if (type
== DL_USAGE_READ
34726 && recog_data
.operand_type
[op
] == OP_OUT
)
34728 else if (type
== DL_USAGE_WRITE
34729 && recog_data
.operand_type
[op
] == OP_IN
)
34732 /* Iterate through alternatives of operand "op" in recog_op_alt and
34733 identify if the operand is required to be the VPR. */
34734 for (int alt
= 0; alt
< recog_data
.n_alternatives
; alt
++)
34736 const operand_alternative
*op_alt
34737 = &recog_op_alt
[alt
* n_operands
];
34738 /* Fetch the reg_class for each entry and check it against the
34739 VPR_REG reg_class. */
34740 if (alternative_class (op_alt
, op
) != VPR_REG
)
34741 requires_vpr
= false;
34743 /* If all alternatives of the insn require the VPR reg for this operand,
34744 it means that either this is VPR-generating instruction, like a vctp,
34745 vcmp, etc., or it is a VPT-predicated insruction. Return the subrtx
34746 of the VPR reg operand. */
34748 return recog_data
.operand
[op
];
34753 /* Wrapper function of arm_get_required_vpr_reg with TYPE == DL_USAGE_READ,
34754 so return the VPR only if it is an input operand to the insn. */
34757 arm_get_required_vpr_reg_param (rtx_insn
*insn
)
34759 return arm_get_required_vpr_reg (insn
, DL_USAGE_READ
);
34762 /* Wrapper function of arm_get_required_vpr_reg with TYPE == DL_USAGE_WRITE,
34763 so return the VPR only if it is the return value, an output of, or is
34764 clobbered by the insn. */
34767 arm_get_required_vpr_reg_ret_val (rtx_insn
*insn
)
34769 return arm_get_required_vpr_reg (insn
, DL_USAGE_WRITE
);
34772 /* Return the first VCTP instruction in BB, if it exists, or NULL otherwise. */
34775 arm_mve_get_loop_vctp (basic_block bb
)
34777 rtx_insn
*insn
= BB_HEAD (bb
);
34779 /* Now scan through all the instruction patterns and pick out the VCTP
34780 instruction. We require arm_get_required_vpr_reg_param to be false
34781 to make sure we pick up a VCTP, rather than a VCTP_M. */
34782 FOR_BB_INSNS (bb
, insn
)
34783 if (NONDEBUG_INSN_P (insn
))
34784 if (arm_get_required_vpr_reg_ret_val (insn
)
34785 && (arm_mve_get_vctp_lanes (insn
) != 0)
34786 && !arm_get_required_vpr_reg_param (insn
))
34791 /* Return true if INSN is a MVE instruction that is VPT-predicable and is
34792 predicated on VPR_REG. */
34795 arm_mve_insn_predicated_by (rtx_insn
*insn
, rtx vpr_reg
)
34797 rtx insn_vpr_reg_operand
= (MVE_VPT_PREDICATED_INSN_P (insn
)
34798 ? arm_get_required_vpr_reg_param (insn
)
34800 return (insn_vpr_reg_operand
34801 && rtx_equal_p (vpr_reg
, insn_vpr_reg_operand
));
34804 /* Utility function to identify if INSN is an MVE instruction that performs
34805 some across lane operation (and as a result does not align with normal
34806 lane predication rules). All such instructions give one only scalar
34807 output, except for vshlcq which gives a PARALLEL of a vector and a scalar
34808 (one vector result and one carry output). */
34811 arm_mve_across_lane_insn_p (rtx_insn
* insn
)
34813 df_ref insn_defs
= NULL
;
34814 if (!MVE_VPT_PREDICABLE_INSN_P (insn
))
34817 FOR_EACH_INSN_DEF (insn_defs
, insn
)
34818 if (!VALID_MVE_MODE (GET_MODE (DF_REF_REG (insn_defs
)))
34819 && !arm_get_required_vpr_reg_ret_val (insn
))
34825 /* Utility function to identify if INSN is an MVE load or store instruction.
34826 * For TYPE == DL_USAGE_ANY, check all operands. If the function returns
34827 true, INSN is a load or a store insn.
34828 * For TYPE == DL_USAGE_READ, only check the input operands. If the
34829 function returns true, INSN is a load insn.
34830 * For TYPE == DL_USAGE_WRITE, only check the output operands. If the
34831 function returns true, INSN is a store insn. */
34834 arm_mve_load_store_insn_p (rtx_insn
* insn
,
34835 arm_dl_usage_type type
= DL_USAGE_ANY
)
34837 gcc_assert (type
< 3);
34838 int n_operands
= recog_data
.n_operands
;
34839 extract_insn (insn
);
34841 for (int op
= 0; op
< n_operands
; op
++)
34843 if (type
== DL_USAGE_READ
&& recog_data
.operand_type
[op
] == OP_OUT
)
34845 else if (type
== DL_USAGE_WRITE
&& recog_data
.operand_type
[op
] == OP_IN
)
34847 if (mve_memory_operand (recog_data
.operand
[op
],
34848 GET_MODE (recog_data
.operand
[op
])))
34854 /* Return TRUE if INSN is validated for implicit predication by how its outputs
34857 If INSN is a MVE operation across lanes that is not predicated by
34858 VCTP_VPR_GENERATED it can not be validated by the use of its ouputs.
34860 Any other INSN is safe to implicit predicate if we don't use its outputs
34861 outside the loop. The instructions that use this INSN's outputs will be
34862 validated as we go through the analysis. */
34865 arm_mve_impl_pred_on_outputs_p (rtx_insn
*insn
, rtx vctp_vpr_generated
)
34867 /* Reject any unpredicated across lane operation. */
34868 if (!arm_mve_insn_predicated_by (insn
, vctp_vpr_generated
)
34869 && arm_mve_across_lane_insn_p (insn
))
34872 /* Next, scan forward to the various USEs of the DEFs in this insn. */
34873 df_ref insn_def
= NULL
;
34874 basic_block insn_bb
= BLOCK_FOR_INSN (insn
);
34875 FOR_EACH_INSN_DEF (insn_def
, insn
)
34877 for (df_ref use
= DF_REG_USE_CHAIN (DF_REF_REGNO (insn_def
));
34879 use
= DF_REF_NEXT_REG (use
))
34881 rtx_insn
*next_use_insn
= DF_REF_INSN (use
);
34882 if (!INSN_P (next_use_insn
) || DEBUG_INSN_P (next_use_insn
))
34885 if (insn_bb
!= BLOCK_FOR_INSN (next_use_insn
))
34893 /* Returns the prevailing definition of OP before CUR_INSN in the same
34894 basic block as CUR_INSN, if one exists, returns NULL otherwise. */
34897 arm_last_vect_def_insn (rtx op
, rtx_insn
*cur_insn
)
34900 || !BLOCK_FOR_INSN (cur_insn
))
34904 rtx_insn
*last_def
= NULL
;
34905 for (def_insns
= DF_REG_DEF_CHAIN (REGNO (op
));
34907 def_insns
= DF_REF_NEXT_REG (def_insns
))
34909 rtx_insn
*def_insn
= DF_REF_INSN (def_insns
);
34910 /* Definition not in the loop body or after the current insn. */
34911 if (DF_REF_BB (def_insns
) != BLOCK_FOR_INSN (cur_insn
)
34912 || INSN_UID (def_insn
) >= INSN_UID (cur_insn
))
34915 if (!last_def
|| INSN_UID (def_insn
) > INSN_UID (last_def
))
34916 last_def
= def_insn
;
34922 /* This function returns TRUE if we can validate the implicit predication of
34923 INSN_IN with VCTP_VPR_GENERATED based on the definition of the instruction's
34926 If INSN_IN is a MVE operation across lanes then all of its MVE vector
34927 operands must have its tail-predicated lanes be zeroes. We keep track of any
34928 instructions that define vector operands for which this is true in
34931 For any other INSN_IN, the definition of all its operands must be defined
34932 inside the loop body by an instruction that comes before INSN_IN and not be
34933 a MVE load predicated by a different VPR. These instructions have all been
34934 validated for explicit or implicit predication.
34938 arm_mve_impl_pred_on_inputs_p (vec
<rtx_insn
*> *props_zero_set
,
34939 rtx_insn
*insn_in
, rtx vctp_vpr_generated
)
34941 /* If all inputs come from instructions that are explicitly or
34942 implicitly predicated by the same predicate then it is safe to
34943 implicitly predicate this instruction. */
34944 df_ref insn_uses
= NULL
;
34945 bool across_lane
= arm_mve_across_lane_insn_p (insn_in
);
34946 FOR_EACH_INSN_USE (insn_uses
, insn_in
)
34948 rtx op
= DF_REF_REG (insn_uses
);
34949 rtx_insn
*def_insn
= arm_last_vect_def_insn (op
, insn_in
);
34952 if (!VALID_MVE_MODE (GET_MODE (op
)))
34954 if (!def_insn
|| !props_zero_set
->contains (def_insn
))
34961 || (!arm_mve_insn_predicated_by (def_insn
, vctp_vpr_generated
)
34962 && arm_mve_load_store_insn_p (def_insn
, DL_USAGE_READ
)))
34970 /* Determine whether INSN_IN is safe to implicitly predicate based on the type
34971 of instruction and where needed the definition of its inputs and the uses of
34973 Return TRUE if it is safe to implicitly predicate and FALSE otherwise.
34975 * If INSN_IN is a store, then it is always unsafe to implicitly predicate it.
34976 * If INSN_IN is a load, only reject implicit predication if its uses
34977 directly invalidate it.
34978 * If INSN_IN operates across vector lanes and does not have the
34979 "mve_safe_imp_xlane_pred" attribute, then it is always unsafe to implicitly
34981 * If INSN_IN operates on Floating Point elements and we are not compiling
34982 with -Ofast, then it is unsafe to implicitly predicate it as we may be
34983 changing exception and cumulative bits behaviour.
34984 * If INSN_IN is a VCTP instruction, then it is safe to implicitly predicate,
34985 but instructions that use this predicate will need to be checked
34986 just like any other UNPREDICATED MVE instruction.
34987 * Otherwise check if INSN_IN's inputs or uses of outputs can validate its
34988 implicit predication.
34990 * If all inputs come from instructions that are explicitly or implicitly
34991 predicated by the same predicate then it is safe to implicitly predicate
34993 * If INSN_IN is an operation across lanes with the "mve_safe_imp_xlane_pred"
34994 attribute, then all it's operands must have zeroed falsely predicated tail
34997 * Otherwise, check if the implicit predication of INSN_IN can be validated
34998 based on its inputs, and if not check whether it can be validated based on
34999 how its outputs are used. */
35002 arm_mve_impl_predicated_p (vec
<rtx_insn
*> *props_zero_set
,
35003 rtx_insn
*insn_in
, rtx vctp_vpr_generated
)
35006 /* If INSN_IN is a store, then it is always unsafe to implicitly
35008 if (arm_mve_load_store_insn_p (insn_in
, DL_USAGE_WRITE
))
35011 /* If INSN_IN is a load, only reject implicit predication if its uses
35012 directly invalidate it. */
35013 if (arm_mve_load_store_insn_p (insn_in
, DL_USAGE_READ
))
35015 if (!arm_mve_impl_pred_on_outputs_p (insn_in
, vctp_vpr_generated
))
35020 /* If INSN_IN operates across vector lanes and does not have the
35021 "mve_safe_imp_xlane_pred" attribute, then it is always unsafe to implicitly
35023 if (arm_mve_across_lane_insn_p (insn_in
)
35024 && (get_attr_mve_safe_imp_xlane_pred (insn_in
)
35025 != MVE_SAFE_IMP_XLANE_PRED_YES
))
35028 /* If INSN_IN operates on Floating Point elements and we are not compiling
35029 with -Ofast, then it is unsafe to implicitly predicate it as we may be
35030 changing exception and cumulative bits behaviour. */
35031 if (!flag_unsafe_math_optimizations
35032 && flag_trapping_math
35033 && MVE_VPT_UNPREDICATED_INSN_P (insn_in
))
35036 FOR_EACH_INSN_DEF (def
, insn_in
)
35037 if (DF_REF_TYPE (def
) == DF_REF_REG_DEF
35038 && FLOAT_MODE_P (GET_MODE (DF_REF_REG (def
))))
35040 FOR_EACH_INSN_USE (def
, insn_in
)
35041 if (DF_REF_TYPE (def
) == DF_REF_REG_DEF
35042 && FLOAT_MODE_P (GET_MODE (DF_REF_REG (def
))))
35046 /* If INSN_IN is a VCTP instruction, then it is safe to implicitly predicate,
35047 but instructions that use this predicate will need to be checked
35048 just like any other UNPREDICATED MVE instruction. */
35049 if (arm_get_required_vpr_reg_ret_val (insn_in
)
35050 && (arm_mve_get_vctp_lanes (insn_in
) != 0))
35053 /* Otherwise, check if the implicit predication of INSN_IN can be validated
35054 based on its inputs, and if not check whether it can be validated based on
35055 how its outputs are used. */
35056 return (arm_mve_impl_pred_on_inputs_p (props_zero_set
, insn_in
, vctp_vpr_generated
)
35057 || arm_mve_impl_pred_on_outputs_p (insn_in
, vctp_vpr_generated
));
35060 /* Helper function to `arm_mve_dlstp_check_inc_counter` and to
35061 `arm_mve_dlstp_check_dec_counter`. In the situations where the loop counter
35062 is incrementing by 1 or decrementing by 1 in each iteration, ensure that the
35063 number of iterations, the value of REG, going into the loop, was calculated
35065 REG = (N + [1, VCTP_STEP - 1]) / VCTP_STEP
35067 where N is equivalent to the VCTP_REG.
35071 arm_mve_check_reg_origin_is_num_elems (loop
*loop
, rtx reg
, rtx vctp_step
,
35074 df_ref counter_max_last_def
= NULL
;
35076 /* More than one reaching definition. */
35077 if (DF_REG_DEF_COUNT (REGNO (reg
)) > 2)
35080 /* Look for a single defition of REG going into the loop. The DEF_CHAIN will
35081 have at least two values, as this is a loop induction variable that is
35082 defined outside the loop. */
35083 for (df_ref def
= DF_REG_DEF_CHAIN (REGNO (reg
));
35085 def
= DF_REF_NEXT_REG (def
))
35087 /* Skip the update inside the loop, this has already been checked by the
35088 iv_analyze call earlier. */
35089 if (DF_REF_BB (def
) == loop
->header
)
35092 counter_max_last_def
= def
;
35096 if (!counter_max_last_def
)
35099 rtx counter_max_last_set
= single_set (DF_REF_INSN (counter_max_last_def
));
35101 if (!counter_max_last_set
)
35104 /* If we encounter a simple SET from a REG, follow it through. */
35105 if (REG_P (SET_SRC (counter_max_last_set
)))
35107 if (DF_REG_DEF_COUNT (REGNO (SET_SRC (counter_max_last_set
))) != 1)
35110 counter_max_last_def
35111 = DF_REG_DEF_CHAIN (REGNO (SET_SRC (counter_max_last_set
)));
35112 counter_max_last_set
35113 = single_set (DF_REF_INSN (counter_max_last_def
));
35115 if (!counter_max_last_set
)
35119 /* We are looking for:
35120 COUNTER_MAX_LAST_SET = (N + VCTP_STEP - 1) / VCTP_STEP.
35121 We currently only support the unsigned VCTP_OP case. */
35122 rtx division
= SET_SRC (counter_max_last_set
);
35123 if (GET_CODE (division
) != LSHIFTRT
)
35126 /* Now check that we are dividing by VCTP_STEP, i.e. the number of lanes. */
35127 rtx divisor
= XEXP (division
, 1);
35128 unsigned vctp_step_cst
= abs_hwi (INTVAL (vctp_step
));
35129 if (!CONST_INT_P (divisor
)
35130 || (1U << INTVAL (divisor
) != vctp_step_cst
))
35133 rtx dividend
= XEXP (division
, 0);
35134 if (!REG_P (dividend
))
35138 /* For now only support the simple case, this only works for unsigned N, any
35139 signed N will have further computations to deal with overflow. */
35140 if (DF_REG_DEF_COUNT (REGNO (dividend
)) != 1)
35143 rtx_insn
*dividend_insn
= DF_REF_INSN (DF_REG_DEF_CHAIN (REGNO (dividend
)));
35144 rtx dividend_op
= single_set (dividend_insn
);
35146 && GET_CODE (SET_SRC (dividend_op
)) != PLUS
)
35149 /* Check if PLUS_OP is (VCTP_OP + VAL), where VAL = [1, VCTP_STEP - 1]. */
35150 rtx plus_op
= SET_SRC (dividend_op
);
35151 if (!REG_P (XEXP (plus_op
, 0))
35152 || !CONST_INT_P (XEXP (plus_op
, 1))
35153 || !IN_RANGE (INTVAL (XEXP (plus_op
, 1)), 1, vctp_step_cst
- 1))
35156 /* VCTP_REG may have been copied before entering the loop, let's see if we can
35157 trace such a copy back. If we have more than one reaching definition then
35158 bail out as analysis will be too difficult. */
35159 if (DF_REG_DEF_COUNT (REGNO (vctp_reg
)) > 2)
35162 /* Look for the definition of N. */
35163 for (df_ref def
= DF_REG_DEF_CHAIN (REGNO (vctp_reg
));
35165 def
= DF_REF_NEXT_REG (def
))
35167 if (DF_REF_BB (def
) == loop
->header
)
35169 rtx set
= single_set (DF_REF_INSN (def
));
35171 && REG_P (SET_SRC (set
))
35172 && !HARD_REGISTER_P (SET_SRC (set
)))
35173 vctp_reg
= SET_SRC (set
);
35176 return rtx_equal_p (vctp_reg
, XEXP (plus_op
, 0));
35179 /* If we have identified the loop to have an incrementing counter, we need to
35180 make sure that it increments by 1 and that the loop is structured correctly:
35181 * The counter starts from 0
35182 * The counter terminates at (num_of_elem + num_of_lanes - 1) / num_of_lanes
35183 * The vctp insn uses a reg that decrements appropriately in each iteration.
35187 arm_mve_dlstp_check_inc_counter (loop
*loop
, rtx_insn
* vctp_insn
,
35188 rtx condconst
, rtx condcount
)
35190 rtx vctp_reg
= XVECEXP (XEXP (PATTERN (vctp_insn
), 1), 0, 0);
35191 /* The loop latch has to be empty. When compiling all the known MVE LoLs in
35192 user applications, none of those with incrementing counters had any real
35193 insns in the loop latch. As such, this function has only been tested with
35194 an empty latch and may misbehave or ICE if we somehow get here with an
35195 increment in the latch, so, for correctness, error out early. */
35196 if (!empty_block_p (loop
->latch
))
35199 class rtx_iv vctp_reg_iv
;
35200 /* For loops of DLSTP_TYPE_B, the loop counter is independent of the decrement
35201 of the reg used in the vctp_insn. So run iv analysis on that reg. This
35202 has to succeed for such loops to be supported. */
35203 if (!iv_analyze (vctp_insn
, as_a
<scalar_int_mode
> (GET_MODE (vctp_reg
)),
35204 vctp_reg
, &vctp_reg_iv
))
35207 /* Extract the decrementnum of the vctp reg from the iv. This decrementnum
35208 is the number of lanes/elements it decrements from the remaining number of
35209 lanes/elements to process in the loop, for this reason this is always a
35210 negative number, but to simplify later checks we use it's absolute value. */
35211 HOST_WIDE_INT decrementnum
= INTVAL (vctp_reg_iv
.step
);
35212 if (decrementnum
>= 0)
35214 decrementnum
= abs_hwi (decrementnum
);
35216 /* Find where both of those are modified in the loop header bb. */
35217 df_ref condcount_reg_set_df
= df_bb_regno_only_def_find (loop
->header
,
35218 REGNO (condcount
));
35219 df_ref vctp_reg_set_df
= df_bb_regno_only_def_find (loop
->header
,
35221 if (!condcount_reg_set_df
|| !vctp_reg_set_df
)
35223 rtx condcount_reg_set
= single_set (DF_REF_INSN (condcount_reg_set_df
));
35224 rtx vctp_reg_set
= single_set (DF_REF_INSN (vctp_reg_set_df
));
35225 if (!condcount_reg_set
|| !vctp_reg_set
)
35228 /* Ensure the modification of the vctp reg from df is consistent with
35229 the iv and the number of lanes on the vctp insn. */
35230 if (GET_CODE (SET_SRC (vctp_reg_set
)) != PLUS
35231 || !REG_P (SET_DEST (vctp_reg_set
))
35232 || !REG_P (XEXP (SET_SRC (vctp_reg_set
), 0))
35233 || REGNO (SET_DEST (vctp_reg_set
))
35234 != REGNO (XEXP (SET_SRC (vctp_reg_set
), 0))
35235 || !CONST_INT_P (XEXP (SET_SRC (vctp_reg_set
), 1))
35236 || INTVAL (XEXP (SET_SRC (vctp_reg_set
), 1)) >= 0
35237 || decrementnum
!= abs_hwi (INTVAL (XEXP (SET_SRC (vctp_reg_set
), 1)))
35238 || decrementnum
!= arm_mve_get_vctp_lanes (vctp_insn
))
35241 if (REG_P (condcount
) && REG_P (condconst
))
35243 /* First we need to prove that the loop is going 0..condconst with an
35244 inc of 1 in each iteration. */
35245 if (GET_CODE (SET_SRC (condcount_reg_set
)) == PLUS
35246 && CONST_INT_P (XEXP (SET_SRC (condcount_reg_set
), 1))
35247 && INTVAL (XEXP (SET_SRC (condcount_reg_set
), 1)) == 1)
35249 rtx counter_reg
= SET_DEST (condcount_reg_set
);
35250 /* Check that the counter did indeed start from zero. */
35251 df_ref this_set
= DF_REG_DEF_CHAIN (REGNO (counter_reg
));
35254 df_ref last_set_def
= DF_REF_NEXT_REG (this_set
);
35257 rtx_insn
* last_set_insn
= DF_REF_INSN (last_set_def
);
35258 rtx last_set
= single_set (last_set_insn
);
35261 rtx counter_orig_set
;
35262 counter_orig_set
= SET_SRC (last_set
);
35263 if (!CONST_INT_P (counter_orig_set
)
35264 || (INTVAL (counter_orig_set
) != 0))
35266 /* And finally check that the target value of the counter,
35267 condconst, is of the correct shape. */
35268 if (!arm_mve_check_reg_origin_is_num_elems (loop
, condconst
,
35279 /* Everything looks valid. */
35283 /* Helper function to 'arm_mve_dlstp_check_dec_counter' to make sure DEC_INSN
35284 is of the expected form:
35285 (set (reg a) (plus (reg a) (const_int)))
35286 where (reg a) is the same as CONDCOUNT.
35287 Return a rtx with the set if it is in the right format or NULL_RTX
35291 check_dec_insn (rtx_insn
*dec_insn
, rtx condcount
)
35293 if (!NONDEBUG_INSN_P (dec_insn
))
35295 rtx dec_set
= single_set (dec_insn
);
35297 || !REG_P (SET_DEST (dec_set
))
35298 || GET_CODE (SET_SRC (dec_set
)) != PLUS
35299 || !REG_P (XEXP (SET_SRC (dec_set
), 0))
35300 || !CONST_INT_P (XEXP (SET_SRC (dec_set
), 1))
35301 || REGNO (SET_DEST (dec_set
))
35302 != REGNO (XEXP (SET_SRC (dec_set
), 0))
35303 || REGNO (SET_DEST (dec_set
)) != REGNO (condcount
))
35309 /* Helper function to `arm_mve_loop_valid_for_dlstp`. In the case of a
35310 counter that is decrementing, ensure that it is decrementing by the
35311 right amount in each iteration and that the target condition is what
35315 arm_mve_dlstp_check_dec_counter (loop
*loop
, rtx_insn
* vctp_insn
,
35316 rtx condconst
, rtx condcount
)
35318 rtx vctp_reg
= XVECEXP (XEXP (PATTERN (vctp_insn
), 1), 0, 0);
35319 class rtx_iv vctp_reg_iv
;
35320 HOST_WIDE_INT decrementnum
;
35321 /* For decrementing loops of DLSTP_TYPE_A, the counter is usually present in the
35322 loop latch. Here we simply need to verify that this counter is the same
35323 reg that is also used in the vctp_insn and that it is not otherwise
35325 rtx dec_set
= check_dec_insn (BB_END (loop
->latch
), condcount
);
35326 /* If not in the loop latch, try to find the decrement in the loop header. */
35327 if (dec_set
== NULL_RTX
)
35329 df_ref temp
= df_bb_regno_only_def_find (loop
->header
, REGNO (condcount
));
35330 /* If we haven't been able to find the decrement, bail out. */
35333 dec_set
= check_dec_insn (DF_REF_INSN (temp
), condcount
);
35335 if (dec_set
== NULL_RTX
)
35339 decrementnum
= INTVAL (XEXP (SET_SRC (dec_set
), 1));
35341 /* This decrementnum is the number of lanes/elements it decrements from the
35342 remaining number of lanes/elements to process in the loop, for this reason
35343 this is always a negative number, but to simplify later checks we use its
35345 if (decrementnum
>= 0)
35347 decrementnum
= -decrementnum
;
35349 /* If the decrementnum is a 1, then we need to look at the loop vctp_reg and
35350 verify that it also decrements correctly.
35351 Then, we need to establish that the starting value of the loop decrement
35352 originates from the starting value of the vctp decrement. */
35353 if (decrementnum
== 1)
35355 class rtx_iv vctp_reg_iv
, condcount_reg_iv
;
35356 /* The loop counter is found to be independent of the decrement
35357 of the reg used in the vctp_insn, again. Ensure that IV analysis
35358 succeeds and check the step. */
35359 if (!iv_analyze (vctp_insn
, as_a
<scalar_int_mode
> (GET_MODE (vctp_reg
)),
35360 vctp_reg
, &vctp_reg_iv
))
35362 /* Ensure it matches the number of lanes of the vctp instruction. */
35363 if (abs (INTVAL (vctp_reg_iv
.step
))
35364 != arm_mve_get_vctp_lanes (vctp_insn
))
35367 if (!arm_mve_check_reg_origin_is_num_elems (loop
, condcount
,
35372 /* If the decrements are the same, then the situation is simple: either they
35373 are also the same reg, which is safe, or they are different registers, in
35374 which case makse sure that there is a only simple SET from one to the
35375 other inside the loop.*/
35376 else if (decrementnum
== arm_mve_get_vctp_lanes (vctp_insn
))
35378 if (REGNO (condcount
) != REGNO (vctp_reg
))
35380 /* It wasn't the same reg, but it could be behild a
35381 (set (vctp_reg) (condcount)), so instead find where
35382 the VCTP insn is DEF'd inside the loop. */
35383 rtx_insn
*vctp_reg_insn
35384 = DF_REF_INSN (df_bb_regno_only_def_find (loop
->header
,
35385 REGNO (vctp_reg
)));
35386 rtx vctp_reg_set
= single_set (vctp_reg_insn
);
35387 /* This must just be a simple SET from the condcount. */
35389 || !REG_P (SET_DEST (vctp_reg_set
))
35390 || !REG_P (SET_SRC (vctp_reg_set
))
35391 || REGNO (SET_SRC (vctp_reg_set
)) != REGNO (condcount
))
35398 /* We now only need to find out that the loop terminates with a LE
35399 zero condition. If condconst is a const_int, then this is easy.
35400 If its a REG, look at the last condition+jump in a bb before
35401 the loop, because that usually will have a branch jumping over
35402 the loop header. */
35403 rtx_insn
*jump_insn
= BB_END (loop
->header
);
35404 if (CONST_INT_P (condconst
)
35405 && !(INTVAL (condconst
) == 0 && JUMP_P (jump_insn
)
35406 && GET_CODE (XEXP (PATTERN (jump_insn
), 1)) == IF_THEN_ELSE
35407 && (GET_CODE (XEXP (XEXP (PATTERN (jump_insn
), 1), 0)) == NE
35408 ||GET_CODE (XEXP (XEXP (PATTERN (jump_insn
), 1), 0)) == GT
)))
35410 else if (REG_P (condconst
))
35412 basic_block preheader_b
= loop_preheader_edge (loop
)->src
;
35413 if (!single_pred_p (preheader_b
))
35415 basic_block pre_loop_bb
= single_pred (preheader_b
);
35417 rtx initial_compare
= NULL_RTX
;
35418 if (!(prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb
))
35419 && INSN_P (prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb
)))))
35423 = single_set (prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb
)));
35424 if (!(initial_compare
35425 && cc_register (SET_DEST (initial_compare
), VOIDmode
)
35426 && GET_CODE (SET_SRC (initial_compare
)) == COMPARE
35427 && CONST_INT_P (XEXP (SET_SRC (initial_compare
), 1))
35428 && INTVAL (XEXP (SET_SRC (initial_compare
), 1)) == 0))
35431 /* Usually this is a LE condition, but it can also just be a GT or an EQ
35432 condition (if the value is unsigned or the compiler knows its not negative) */
35433 rtx_insn
*loop_jumpover
= BB_END (pre_loop_bb
);
35434 if (!(JUMP_P (loop_jumpover
)
35435 && GET_CODE (XEXP (PATTERN (loop_jumpover
), 1)) == IF_THEN_ELSE
35436 && (GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover
), 1), 0)) == LE
35437 || GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover
), 1), 0)) == GT
35438 || GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover
), 1), 0)) == EQ
)))
35442 /* Everything looks valid. */
35446 /* Function to check a loop's structure to see if it is a valid candidate for
35447 an MVE Tail Predicated Low-Overhead Loop. Returns the loop's VCTP_INSN if
35448 it is valid, or NULL if it isn't. */
35451 arm_mve_loop_valid_for_dlstp (loop
*loop
)
35453 /* Doloop can only be done "elementwise" with predicated dlstp/letp if it
35454 contains a VCTP on the number of elements processed by the loop.
35455 Find the VCTP predicate generation inside the loop body BB. */
35456 rtx_insn
*vctp_insn
= arm_mve_get_loop_vctp (loop
->header
);
35460 /* We only support two loop forms for tail predication:
35461 DLSTP_TYPE_A) Loops of the form:
35462 int num_of_lanes = 128 / elem_size;
35463 while (num_of_elem > 0)
35465 p = vctp<size> (num_of_elem);
35466 num_of_elem -= num_of_lanes;
35468 DLSTP_TYPE_B) Loops of the form:
35469 int num_of_lanes = 128 / elem_size;
35470 int num_of_iters = (num_of_elem + num_of_lanes - 1) / num_of_lanes;
35471 for (i = 0; i < num_of_iters; i++)
35473 p = vctp<size> (num_of_elem);
35474 num_of_elem -= num_of_lanes;
35477 Then, depending on the type of loop above we need will need to do
35478 different sets of checks. */
35479 iv_analysis_loop_init (loop
);
35481 /* In order to find out if the loop is of DLSTP_TYPE_A or DLSTP_TYPE_B above
35482 look for the loop counter: it will either be incrementing by one per
35483 iteration or it will be decrementing by num_of_lanes. We can find the
35484 loop counter in the condition at the end of the loop. */
35485 rtx_insn
*loop_cond
= prev_nonnote_nondebug_insn_bb (BB_END (loop
->header
));
35486 if (!(cc_register (XEXP (PATTERN (loop_cond
), 0), VOIDmode
)
35487 && GET_CODE (XEXP (PATTERN (loop_cond
), 1)) == COMPARE
))
35490 /* The operands in the condition: Try to identify which one is the
35491 constant and which is the counter and run IV analysis on the latter. */
35492 rtx cond_arg_1
= XEXP (XEXP (PATTERN (loop_cond
), 1), 0);
35493 rtx cond_arg_2
= XEXP (XEXP (PATTERN (loop_cond
), 1), 1);
35495 rtx loop_cond_constant
;
35497 class rtx_iv cond_counter_iv
, cond_temp_iv
;
35499 if (CONST_INT_P (cond_arg_1
))
35501 /* cond_arg_1 is the constant and cond_arg_2 is the counter. */
35502 loop_cond_constant
= cond_arg_1
;
35503 loop_counter
= cond_arg_2
;
35504 iv_analyze (loop_cond
, as_a
<scalar_int_mode
> (GET_MODE (cond_arg_2
)),
35505 cond_arg_2
, &cond_counter_iv
);
35507 else if (CONST_INT_P (cond_arg_2
))
35509 /* cond_arg_2 is the constant and cond_arg_1 is the counter. */
35510 loop_cond_constant
= cond_arg_2
;
35511 loop_counter
= cond_arg_1
;
35512 iv_analyze (loop_cond
, as_a
<scalar_int_mode
> (GET_MODE (cond_arg_1
)),
35513 cond_arg_1
, &cond_counter_iv
);
35515 else if (REG_P (cond_arg_1
) && REG_P (cond_arg_2
))
35517 /* If both operands to the compare are REGs, we can safely
35518 run IV analysis on both and then determine which is the
35519 constant by looking at the step.
35520 First assume cond_arg_1 is the counter. */
35521 loop_counter
= cond_arg_1
;
35522 loop_cond_constant
= cond_arg_2
;
35523 iv_analyze (loop_cond
, as_a
<scalar_int_mode
> (GET_MODE (cond_arg_1
)),
35524 cond_arg_1
, &cond_counter_iv
);
35525 iv_analyze (loop_cond
, as_a
<scalar_int_mode
> (GET_MODE (cond_arg_2
)),
35526 cond_arg_2
, &cond_temp_iv
);
35528 /* Look at the steps and swap around the rtx's if needed. Error out if
35529 one of them cannot be identified as constant. */
35530 if (!CONST_INT_P (cond_counter_iv
.step
) || !CONST_INT_P (cond_temp_iv
.step
))
35532 if (INTVAL (cond_counter_iv
.step
) != 0 && INTVAL (cond_temp_iv
.step
) != 0)
35534 if (INTVAL (cond_counter_iv
.step
) == 0 && INTVAL (cond_temp_iv
.step
) != 0)
35536 loop_counter
= cond_arg_2
;
35537 loop_cond_constant
= cond_arg_1
;
35538 cond_counter_iv
= cond_temp_iv
;
35544 if (!REG_P (loop_counter
))
35546 if (!(REG_P (loop_cond_constant
) || CONST_INT_P (loop_cond_constant
)))
35549 /* Now we have extracted the IV step of the loop counter, call the
35550 appropriate checking function. */
35551 if (INTVAL (cond_counter_iv
.step
) > 0)
35552 return arm_mve_dlstp_check_inc_counter (loop
, vctp_insn
,
35553 loop_cond_constant
, loop_counter
);
35554 else if (INTVAL (cond_counter_iv
.step
) < 0)
35555 return arm_mve_dlstp_check_dec_counter (loop
, vctp_insn
,
35556 loop_cond_constant
, loop_counter
);
35561 /* Predict whether the given loop in gimple will be transformed in the RTL
35562 doloop_optimize pass. It could be argued that turning large enough loops
35563 into low-overhead loops would not show a signficant performance boost.
35564 However, in the case of tail predication we would still avoid using VPT/VPST
35565 instructions inside the loop, and in either case using low-overhead loops
35566 would not be detrimental, so we decided to not consider size, avoiding the
35567 need of a heuristic to determine what an appropriate size boundary is. */
35570 arm_predict_doloop_p (struct loop
*loop
)
35573 /* On arm, targetm.can_use_doloop_p is actually
35574 can_use_doloop_if_innermost. Ensure the loop is innermost,
35575 it is valid and as per arm_target_bb_ok_for_lob and the
35576 correct architecture flags are enabled. */
35577 if (!(TARGET_HAVE_LOB
&& optimize
> 0))
35579 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
35580 fprintf (dump_file
, "Predict doloop failure due to"
35581 " target architecture or optimisation flags.\n");
35584 else if (loop
->inner
!= NULL
)
35586 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
35587 fprintf (dump_file
, "Predict doloop failure due to"
35588 " loop nesting.\n");
35591 else if (!arm_target_bb_ok_for_lob (loop
->header
->next_bb
))
35593 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
35594 fprintf (dump_file
, "Predict doloop failure due to"
35595 " loop bb complexity.\n");
35600 gimple_stmt_iterator gsi
= gsi_after_labels (loop
->header
);
35601 while (!gsi_end_p (gsi
))
35603 if (is_gimple_call (gsi_stmt (gsi
))
35604 && !gimple_call_builtin_p (gsi_stmt (gsi
)))
35606 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
35607 fprintf (dump_file
, "Predict doloop failure due to"
35608 " call in loop.\n");
35618 /* Implement targetm.loop_unroll_adjust. Use this to block unrolling of loops
35619 that may later be turned into MVE Tail Predicated Low Overhead Loops. The
35620 performance benefit of an MVE LoL is likely to be much higher than that of
35624 arm_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
35626 if (TARGET_HAVE_MVE
35627 && arm_target_bb_ok_for_lob (loop
->latch
)
35628 && arm_mve_loop_valid_for_dlstp (loop
))
35634 /* Function to hadle emitting a VPT-unpredicated version of a VPT-predicated
35635 insn to a sequence. */
35638 arm_emit_mve_unpredicated_insn_to_seq (rtx_insn
* insn
)
35640 rtx insn_vpr_reg_operand
= arm_get_required_vpr_reg_param (insn
);
35641 int new_icode
= get_attr_mve_unpredicated_insn (insn
);
35642 if (!in_sequence_p ()
35643 || !MVE_VPT_PREDICATED_INSN_P (insn
)
35644 || (!insn_vpr_reg_operand
)
35648 extract_insn (insn
);
35652 /* When transforming a VPT-predicated instruction into its unpredicated
35653 equivalent we need to drop the VPR operand and we may need to also drop a
35654 merge "vuninit" input operand, depending on the instruction pattern. Here
35655 ensure that we have at most a two-operand difference between the two
35657 int n_operands_diff
35658 = recog_data
.n_operands
- insn_data
[new_icode
].n_operands
;
35659 if (!(n_operands_diff
> 0 && n_operands_diff
<= 2))
35662 rtx move
= NULL_RTX
;
35663 /* Then, loop through the operands of the predicated
35664 instruction, and retain the ones that map to the
35665 unpredicated instruction. */
35666 for (int i
= 0; i
< recog_data
.n_operands
; i
++)
35668 /* Ignore the VPR and, if needed, the vuninit
35670 if (insn_vpr_reg_operand
== recog_data
.operand
[i
])
35672 if (n_operands_diff
== 2
35673 && !strcmp (recog_data
.constraints
[i
], "0"))
35675 move
= gen_rtx_SET (arr
[0], recog_data
.operand
[i
]);
35676 arr
[0] = recog_data
.operand
[i
];
35679 arr
[j
++] = recog_data
.operand
[i
];
35682 /* Finally, emit the upredicated instruction. */
35683 rtx_insn
*new_insn
;
35687 new_insn
= emit_insn (GEN_FCN (new_icode
) (arr
[0]));
35690 new_insn
= emit_insn (GEN_FCN (new_icode
) (arr
[0], arr
[1]));
35693 new_insn
= emit_insn (GEN_FCN (new_icode
) (arr
[0], arr
[1], arr
[2]));
35696 new_insn
= emit_insn (GEN_FCN (new_icode
) (arr
[0], arr
[1], arr
[2],
35700 new_insn
= emit_insn (GEN_FCN (new_icode
) (arr
[0], arr
[1], arr
[2],
35704 new_insn
= emit_insn (GEN_FCN (new_icode
) (arr
[0], arr
[1], arr
[2],
35705 arr
[3], arr
[4], arr
[5]));
35708 new_insn
= emit_insn (GEN_FCN (new_icode
) (arr
[0], arr
[1], arr
[2],
35709 arr
[3], arr
[4], arr
[5],
35713 gcc_unreachable ();
35715 INSN_LOCATION (new_insn
) = INSN_LOCATION (insn
);
35718 new_insn
= emit_insn (move
);
35719 INSN_LOCATION (new_insn
) = INSN_LOCATION (insn
);
35724 /* Return TRUE if INSN defines a MVE vector operand that has zeroed
35725 tail-predicated lanes. This is either true if:
35726 * INSN is predicated by VCTP_VPR_GENERATED and the 'invalid lanes' operand
35727 is in the PROPS_ZERO_SET,
35728 * all MVE vector operands are in the PROPS_ZERO_SET
35732 arm_mve_propagate_zero_pred_p (vec
<rtx_insn
*> *props_zero_set
,
35733 rtx_insn
*insn
, rtx vctp_vpr_generated
)
35735 if (arm_mve_load_store_insn_p (insn
, DL_USAGE_READ
))
35737 if (arm_mve_load_store_insn_p (insn
, DL_USAGE_WRITE
))
35740 int inactive_idx
= -1;
35742 extract_insn (insn
);
35743 /* If INSN is predicated by VCTP_VPR_GENERATED, then all tail-predicated
35744 lanes will keep the value that is in the 'invalid lanes' register which we
35745 identify by the "0" constraint, to ensure it is the same as the 'result'
35746 register of this instruction. */
35747 if (arm_mve_insn_predicated_by (insn
, vctp_vpr_generated
))
35749 for (int i
= 0; i
< recog_data
.n_operands
; i
++)
35751 if (strcmp (recog_data
.constraints
[i
], "0") == 0
35752 && VALID_MVE_MODE (GET_MODE (recog_data
.operand
[i
])))
35760 if (inactive_idx
> 0)
35762 rtx op
= recog_data
.operand
[inactive_idx
];
35763 rtx_insn
*def_insn
= arm_last_vect_def_insn (op
, insn
);
35764 return def_insn
!= NULL_RTX
&& props_zero_set
->contains (def_insn
);
35767 /* If this instruction is not predicated by VCTP_VPR_GENERATED, then we must
35768 check that all vector operands have zeroed tail-predicated lanes, and that
35769 it has at least one vector operand. */
35770 bool at_least_one_vector
= false;
35772 FOR_EACH_INSN_USE (insn_uses
, insn
)
35774 rtx reg
= DF_REF_REG (insn_uses
);
35775 if (!VALID_MVE_MODE (GET_MODE (reg
)))
35778 rtx_insn
*def_insn
= arm_last_vect_def_insn (reg
, insn
);
35779 if (def_insn
&& props_zero_set
->contains (def_insn
))
35780 at_least_one_vector
|= true;
35785 return at_least_one_vector
;
35789 /* Attempt to transform the loop contents of loop basic block from VPT
35790 predicated insns into unpredicated insns for a dlstp/letp loop. Returns
35791 the number to decrement from the total number of elements each iteration.
35792 Returns 1 if tail predication can not be performed and fallback to scalar
35793 low-overhead loops. */
35796 arm_attempt_dlstp_transform (rtx label
)
35798 if (!dlstp_enabled
)
35801 basic_block body
= single_succ (BLOCK_FOR_INSN (label
));
35803 /* Ensure that the bb is within a loop that has all required metadata. */
35804 if (!body
->loop_father
|| !body
->loop_father
->header
35805 || !body
->loop_father
->simple_loop_desc
)
35808 loop
*loop
= body
->loop_father
;
35809 /* Instruction that sets the predicate mask depending on how many elements
35810 are left to process. */
35811 rtx_insn
*vctp_insn
= arm_mve_loop_valid_for_dlstp (loop
);
35815 gcc_assert (single_set (vctp_insn
));
35817 rtx vctp_vpr_generated
= single_set (vctp_insn
);
35818 if (!vctp_vpr_generated
)
35821 vctp_vpr_generated
= SET_DEST (vctp_vpr_generated
);
35823 if (!vctp_vpr_generated
|| !REG_P (vctp_vpr_generated
)
35824 || !VALID_MVE_PRED_MODE (GET_MODE (vctp_vpr_generated
)))
35827 /* decrementunum is already known to be valid at this point. */
35828 int decrementnum
= arm_mve_get_vctp_lanes (vctp_insn
);
35830 rtx_insn
*insn
= 0;
35831 rtx_insn
*cur_insn
= 0;
35833 auto_vec
<rtx_insn
*> props_zero_set
;
35835 /* Scan through the insns in the loop bb and emit the transformed bb
35836 insns to a sequence. */
35838 FOR_BB_INSNS (body
, insn
)
35840 if (GET_CODE (insn
) == CODE_LABEL
|| NOTE_INSN_BASIC_BLOCK_P (insn
))
35842 else if (NOTE_P (insn
))
35843 emit_note ((enum insn_note
)NOTE_KIND (insn
));
35844 else if (DEBUG_INSN_P (insn
))
35845 emit_debug_insn (PATTERN (insn
));
35846 else if (!INSN_P (insn
))
35851 /* If the transformation is successful we no longer need the vctp
35853 else if (insn
== vctp_insn
)
35855 /* If the insn pattern requires the use of the VPR value from the
35856 vctp as an input parameter for predication. */
35857 else if (arm_mve_insn_predicated_by (insn
, vctp_vpr_generated
))
35859 /* Check whether this INSN propagates the zeroed tail-predication
35861 if (arm_mve_propagate_zero_pred_p (&props_zero_set
, insn
,
35862 vctp_vpr_generated
))
35863 props_zero_set
.safe_push (insn
);
35864 bool success
= arm_emit_mve_unpredicated_insn_to_seq (insn
);
35871 /* If the insn isn't VPT predicated on vctp_vpr_generated, we need to
35872 make sure that it is still valid within the dlstp/letp loop. */
35875 /* If this instruction USE-s the vctp_vpr_generated other than for
35876 predication, this blocks the transformation as we are not allowed
35877 to optimise the VPR value away. */
35878 df_ref insn_uses
= NULL
;
35879 FOR_EACH_INSN_USE (insn_uses
, insn
)
35881 if (reg_overlap_mentioned_p (vctp_vpr_generated
,
35882 DF_REF_REG (insn_uses
)))
35888 /* If within the loop we have an MVE vector instruction that is
35889 unpredicated, the dlstp/letp looping will add implicit
35890 predication to it. This will result in a change in behaviour
35891 of the instruction, so we need to find out if any instructions
35892 that feed into the current instruction were implicitly
35894 if (MVE_VPT_PREDICABLE_INSN_P (insn
)
35895 && !arm_mve_impl_predicated_p (&props_zero_set
, insn
,
35896 vctp_vpr_generated
))
35901 emit_insn (PATTERN (insn
));
35904 seq
= get_insns ();
35907 /* Re-write the entire BB contents with the transformed
35909 FOR_BB_INSNS_SAFE (body
, insn
, cur_insn
)
35910 if (!(GET_CODE (insn
) == CODE_LABEL
|| NOTE_INSN_BASIC_BLOCK_P (insn
)))
35911 delete_insn (insn
);
35913 emit_insn_after (seq
, BB_END (body
));
35915 /* The transformation has succeeded, so now modify the "count"
35916 (a.k.a. niter_expr) for the middle-end. Also set noloop_assumptions
35917 to NULL to stop the middle-end from making assumptions about the
35918 number of iterations. */
35919 simple_loop_desc (body
->loop_father
)->niter_expr
35920 = XVECEXP (SET_SRC (PATTERN (vctp_insn
)), 0, 0);
35921 simple_loop_desc (body
->loop_father
)->noloop_assumptions
= NULL_RTX
;
35922 return decrementnum
;
35926 namespace selftest
{
35928 /* Scan the static data tables generated by parsecpu.awk looking for
35929 potential issues with the data. We primarily check for
35930 inconsistencies in the option extensions at present (extensions
35931 that duplicate others but aren't marked as aliases). Furthermore,
35932 for correct canonicalization later options must never be a subset
35933 of an earlier option. Any extension should also only specify other
35934 feature bits and never an architecture bit. The architecture is inferred
35935 from the declaration of the extension. */
35937 arm_test_cpu_arch_data (void)
35939 const arch_option
*arch
;
35940 const cpu_option
*cpu
;
35941 auto_sbitmap
target_isa (isa_num_bits
);
35942 auto_sbitmap
isa1 (isa_num_bits
);
35943 auto_sbitmap
isa2 (isa_num_bits
);
35945 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
35947 const cpu_arch_extension
*ext1
, *ext2
;
35949 if (arch
->common
.extensions
== NULL
)
35952 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
35954 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
35959 arm_initialize_isa (isa1
, ext1
->isa_bits
);
35960 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
35962 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
35965 arm_initialize_isa (isa2
, ext2
->isa_bits
);
35966 /* If the option is a subset of the parent option, it doesn't
35967 add anything and so isn't useful. */
35968 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
35970 /* If the extension specifies any architectural bits then
35971 disallow it. Extensions should only specify feature bits. */
35972 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
35977 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
35979 const cpu_arch_extension
*ext1
, *ext2
;
35981 if (cpu
->common
.extensions
== NULL
)
35984 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
35986 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
35991 arm_initialize_isa (isa1
, ext1
->isa_bits
);
35992 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
35994 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
35997 arm_initialize_isa (isa2
, ext2
->isa_bits
);
35998 /* If the option is a subset of the parent option, it doesn't
35999 add anything and so isn't useful. */
36000 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
36002 /* If the extension specifies any architectural bits then
36003 disallow it. Extensions should only specify feature bits. */
36004 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
36010 /* Scan the static data tables generated by parsecpu.awk looking for
36011 potential issues with the data. Here we check for consistency between the
36012 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
36013 a feature bit that is not defined by any FPU flag. */
36015 arm_test_fpu_data (void)
36017 auto_sbitmap
isa_all_fpubits_internal (isa_num_bits
);
36018 auto_sbitmap
fpubits (isa_num_bits
);
36019 auto_sbitmap
tmpset (isa_num_bits
);
36021 static const enum isa_feature fpu_bitlist_internal
[]
36022 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
36023 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
36025 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
36027 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
36028 bitmap_and_compl (tmpset
, isa_all_fpubits_internal
, fpubits
);
36029 bitmap_clear (isa_all_fpubits_internal
);
36030 bitmap_copy (isa_all_fpubits_internal
, tmpset
);
36033 if (!bitmap_empty_p (isa_all_fpubits_internal
))
36035 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
36036 " group that are not defined by any FPU.\n"
36037 " Check your arm-cpus.in.\n");
36038 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal
));
36043 arm_run_selftests (void)
36045 arm_test_cpu_arch_data ();
36046 arm_test_fpu_data ();
36048 } /* Namespace selftest. */
36050 #undef TARGET_RUN_TARGET_SELFTESTS
36051 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
36052 #endif /* CHECKING_P */
36054 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
36055 global variable based guard use the default else
36056 return a null tree. */
36058 arm_stack_protect_guard (void)
36060 if (arm_stack_protector_guard
== SSP_GLOBAL
)
36061 return default_stack_protect_guard ();
36066 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
36067 Unlike the arm version, we do NOT implement asm flag outputs. */
36070 thumb1_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> & /*inputs*/,
36071 vec
<machine_mode
> & /*input_modes*/,
36072 vec
<const char *> &constraints
,
36073 vec
<rtx
> &, vec
<rtx
> & /*clobbers*/,
36074 HARD_REG_SET
& /*clobbered_regs*/, location_t
/*loc*/)
36076 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
36077 if (startswith (constraints
[i
], "=@cc"))
36079 sorry ("%<asm%> flags not supported in thumb1 mode");
36085 /* Generate code to enable conditional branches in functions over 1 MiB.
36087 operands: is the operands list of the asm insn (see arm_cond_branch or
36088 arm_cond_branch_reversed).
36089 pos_label: is an index into the operands array where operands[pos_label] is
36090 the asm label of the final jump destination.
36091 dest: is a string which is used to generate the asm label of the intermediate
36093 branch_format: is a string denoting the intermediate branch format, e.g.
36094 "beq", "bne", etc. */
36097 arm_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
36098 const char * branch_format
)
36100 rtx_code_label
* tmp_label
= gen_label_rtx ();
36101 char label_buf
[256];
36103 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
, \
36104 CODE_LABEL_NUMBER (tmp_label
));
36105 const char *label_ptr
= arm_strip_name_encoding (label_buf
);
36106 rtx dest_label
= operands
[pos_label
];
36107 operands
[pos_label
] = tmp_label
;
36109 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
36110 output_asm_insn (buffer
, operands
);
36112 snprintf (buffer
, sizeof (buffer
), "b\t%%l0%d\n%s:", pos_label
, label_ptr
);
36113 operands
[pos_label
] = dest_label
;
36114 output_asm_insn (buffer
, operands
);
36118 /* If given mode matches, load from memory to LO_REGS.
36119 (i.e [Rn], Rn <= LO_REGS). */
36121 arm_mode_base_reg_class (machine_mode mode
)
36123 if (TARGET_HAVE_MVE
36124 && (mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
))
36127 return MODE_BASE_REG_REG_CLASS (mode
);
36130 #undef TARGET_DOCUMENTATION_NAME
36131 #define TARGET_DOCUMENTATION_NAME "ARM"
36133 struct gcc_target targetm
= TARGET_INITIALIZER
;
36135 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
36138 arm_get_mask_mode (machine_mode mode
)
36140 if (TARGET_HAVE_MVE
)
36141 return arm_mode_to_pred_mode (mode
);
36143 return default_get_mask_mode (mode
);
36146 /* Helper function to determine whether SEQ represents a sequence of
36147 instructions representing the vsel<cond> floating point instructions.
36148 This is an heuristic to check whether the proposed optimisation is desired,
36149 the choice has no consequence for correctness. */
36151 arm_is_vsel_fp_insn (rtx_insn
*seq
)
36153 rtx_insn
*curr_insn
= seq
;
36154 rtx set
= NULL_RTX
;
36155 /* The pattern may start with a simple set with register operands. Skip
36156 through any of those. */
36159 set
= single_set (curr_insn
);
36161 || !REG_P (SET_DEST (set
)))
36164 if (!REG_P (SET_SRC (set
)))
36166 curr_insn
= NEXT_INSN (curr_insn
);
36172 /* The next instruction should be a compare. */
36173 if (!REG_P (SET_DEST (set
))
36174 || GET_CODE (SET_SRC (set
)) != COMPARE
)
36177 curr_insn
= NEXT_INSN (curr_insn
);
36181 /* And the last instruction should be an IF_THEN_ELSE. */
36182 set
= single_set (curr_insn
);
36184 || !REG_P (SET_DEST (set
))
36185 || GET_CODE (SET_SRC (set
)) != IF_THEN_ELSE
)
36188 return !NEXT_INSN (curr_insn
);
36192 /* Helper function to determine whether SEQ represents a sequence of
36193 instructions representing the Armv8.1-M Mainline conditional arithmetic
36194 instructions: csinc, csneg and csinv. The cinc instruction is generated
36195 using a different mechanism.
36196 This is an heuristic to check whether the proposed optimisation is desired,
36197 the choice has no consequence for correctness. */
36200 arm_is_v81m_cond_insn (rtx_insn
*seq
)
36202 rtx_insn
*curr_insn
= seq
;
36203 rtx set
= NULL_RTX
;
36204 /* The pattern may start with a simple set with register operands. Skip
36205 through any of those. */
36208 set
= single_set (curr_insn
);
36210 || !REG_P (SET_DEST (set
)))
36213 if (!REG_P (SET_SRC (set
)))
36215 curr_insn
= NEXT_INSN (curr_insn
);
36221 /* The next instruction should be one of:
36225 if (GET_CODE (SET_SRC (set
)) != NEG
36226 && GET_CODE (SET_SRC (set
)) != PLUS
36227 && GET_CODE (SET_SRC (set
)) != NOT
)
36230 curr_insn
= NEXT_INSN (curr_insn
);
36234 /* The next instruction should be a COMPARE. */
36235 set
= single_set (curr_insn
);
36237 || !REG_P (SET_DEST (set
))
36238 || GET_CODE (SET_SRC (set
)) != COMPARE
)
36241 curr_insn
= NEXT_INSN (curr_insn
);
36245 /* And the last instruction should be an IF_THEN_ELSE. */
36246 set
= single_set (curr_insn
);
36248 || !REG_P (SET_DEST (set
))
36249 || GET_CODE (SET_SRC (set
)) != IF_THEN_ELSE
)
36252 return !NEXT_INSN (curr_insn
);
36255 /* For Armv8.1-M Mainline we have both conditional execution through IT blocks,
36256 as well as conditional arithmetic instructions controlled by
36257 TARGET_COND_ARITH. To generate the latter we rely on a special part of the
36258 "ce" pass that generates code for targets that don't support conditional
36259 execution of general instructions known as "noce". These transformations
36260 happen before 'reload_completed'. However, "noce" also triggers for some
36261 unwanted patterns [PR 116444] that prevent "ce" optimisations after reload.
36262 To make sure we can get both we use the TARGET_NOCE_CONVERSION_PROFITABLE_P
36263 hook to only allow "noce" to generate the patterns that are profitable. */
36266 arm_noce_conversion_profitable_p (rtx_insn
*seq
, struct noce_if_info
*if_info
)
36268 if (!TARGET_COND_ARITH
36269 || reload_completed
)
36270 return default_noce_conversion_profitable_p (seq
, if_info
);
36272 if (arm_is_v81m_cond_insn (seq
))
36275 /* Look for vsel<cond> opportunities as we still want to codegen these for
36276 Armv8.1-M Mainline targets. */
36277 if (arm_is_vsel_fp_insn (seq
))
36283 /* Output assembly to read the thread pointer from the appropriate TPIDR
36284 register into DEST. If PRED_P also emit the %? that can be used to
36285 output the predication code. */
36288 arm_output_load_tpidr (rtx dst
, bool pred_p
)
36291 int tpidr_coproc_num
= -1;
36292 switch (target_thread_pointer
)
36295 tpidr_coproc_num
= 2;
36298 tpidr_coproc_num
= 3;
36301 tpidr_coproc_num
= 4;
36304 gcc_unreachable ();
36306 snprintf (buf
, sizeof (buf
),
36307 "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
36308 pred_p
? "%?" : "", tpidr_coproc_num
);
36309 output_asm_insn (buf
, &dst
);
36313 /* Return the MVE vector mode that has NUNITS elements of mode INNER_MODE. */
36315 arm_mve_data_mode (scalar_mode inner_mode
, poly_uint64 nunits
)
36317 enum mode_class mclass
36318 = (SCALAR_FLOAT_MODE_P (inner_mode
) ? MODE_VECTOR_FLOAT
: MODE_VECTOR_INT
);
36320 FOR_EACH_MODE_IN_CLASS (mode
, mclass
)
36321 if (inner_mode
== GET_MODE_INNER (mode
)
36322 && known_eq (nunits
, GET_MODE_NUNITS (mode
)))
36324 return opt_machine_mode ();
36327 #include "gt-arm.h"